1 /**
2  * SROA structured replacement of aggregate optimization
3  *
4  * This 'slices' a two register wide aggregate into two separate register-sized variables,
5  * enabling much better enregistering.
6  * SROA (Scalar Replacement Of Aggregates) is the common term for this.
7  *
8  * Compiler implementation of the
9  * $(LINK2 https://www.dlang.org, D programming language).
10  *
11  * Copyright:   Copyright (C) 2016-2023 by The D Language Foundation, All Rights Reserved
12  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
13  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
14  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gsroa.c, backend/gsroa.d)
15  */
16 
17 module dmd.backend.gsroa;
18 
19 import core.stdc.stdio;
20 import core.stdc.stdlib;
21 import core.stdc.string;
22 import core.stdc.time;
23 
24 import dmd.backend.cc;
25 import dmd.backend.cdef;
26 import dmd.backend.code_x86;
27 import dmd.backend.oper;
28 import dmd.backend.global;
29 import dmd.backend.el;
30 import dmd.backend.symtab;
31 import dmd.backend.ty;
32 import dmd.backend.type;
33 
34 import dmd.backend.dlist;
35 import dmd.backend.dvec;
36 
37 
38 nothrow:
39 @safe:
40 
41 private enum log = false;       // print logging info
42 private enum enable = true;     // enable SROA
43 
44 alias SLICESIZE = REGSIZE;  // slices are all register-sized
45 enum MAXSLICES = 2;         // max # of pieces we can slice an aggregate into
46 
47 struct SymInfo
48 {
49     bool canSlice;
50     bool accessSlice;   // if Symbol was accessed as a slice
51     tym_t[MAXSLICES] ty; // type of each slice
52     SYMIDX si0;          // index of first slice, the rest follow sequentially
53 }
54 
55 /********************************
56  * Gather information about slice-able variables by scanning e.
57  * Params:
58  *      symtab = symbol table
59  *      e = expression to scan
60  *      sia = where to put gathered information
61  */
62 @trusted
63 extern (D) private void sliceStructs_Gather(ref const symtab_t symtab, SymInfo[] sia, const(elem)* e)
64 {
65     while (1)
66     {
67         switch (e.Eoper)
68         {
69             case OPvar:
70             {
71                 const si = e.EV.Vsym.Ssymnum;
72                 if (si != SYMIDX.max && sia[si].canSlice)
73                 {
74                     assert(si < symtab.length);
75                     const n = nthSlice(e);
76                     const sz = getSize(e);
77                     if (sz == 2 * SLICESIZE && !tyfv(e.Ety) &&
78                         tybasic(e.Ety) != TYldouble && tybasic(e.Ety) != TYildouble)
79                     {
80                         // Rewritten as OPpair later
81                     }
82                     else if (n != NOTSLICE)
83                     {
84                         if (!sia[si].accessSlice)
85                         {
86                             /* [1] default as pointer type
87                              */
88                             foreach (ref ty; sia[si].ty)
89                                 ty = TYnptr;
90 
91                             const s = e.EV.Vsym;
92                             const t = s.Stype;
93                             if (tybasic(t.Tty) == TYstruct)
94                             {
95                                 if (t.Ttag.Sstruct.Sflags & STRbitfields)
96                                 {
97                                     // Can get "used before set" errors from slicing this
98                                     // Would be workable if the symbol was flagged instead of the type
99                                     sia[si].canSlice = false;
100                                     return;
101                                 }
102 
103                                 if (const targ1 = t.Ttag.Sstruct.Sarg1type)
104                                     if (const targ2 = t.Ttag.Sstruct.Sarg2type)
105                                     {
106                                         sia[si].ty[0] = targ1.Tty;
107                                         sia[si].ty[1] = targ2.Tty;
108 
109                                         if (config.fpxmmregs &&
110                                              tyxmmreg(targ1.Tty) && !tyxmmreg(targ2.Tty) ||
111                                             !tyxmmreg(targ1.Tty) &&  tyxmmreg(targ2.Tty))
112 
113                                         {
114                                             /* https://issues.dlang.org/show_bug.cgi?22438
115                                              * disable till fixed
116                                              */
117                                             if (log) printf(" [%d] can't because xmmgpr or gprxmm\n", cast(int)si);
118                                             sia[si].canSlice = false;
119                                             return;
120                                         }
121                                     }
122                             }
123                             else if (tybasic(t.Tty) == TYarray)
124                             {
125                                 // could be an array of floats, deal with this later
126                                 if (log) printf(" [%d] can't because array of floats\n", cast(int)si);
127                                 sia[si].canSlice = false;
128                                 return;
129                             }
130                         }
131                         if (sz == SLICESIZE)
132                         {
133                             sia[si].ty[n] = tybasic(e.Ety);
134                             if (SLICESIZE == 4 && config.fpxmmregs && tyxmmreg(e.Ety))
135                             {
136                                 /* for 32 bits, OPstreq is converted to a TYllong.
137                                  * It needs to be converted to cfloat, otherwise XMM
138                                  * registers cannot be handled. This fails:
139                                  *   struct F { float x, y; }
140                                  *   void foo(F p1, ref F rfp) { rfp = F(p.x, p.y); }
141                                  */
142                                 if (log) printf(" [%d] can't because 32 bit XMM\n", cast(int)si);
143                                 sia[si].canSlice = false;
144                                 return;
145                             }
146                             if (config.fpxmmregs && tyxmmreg(e.Ety))
147                             {
148                                 /* Too many issues with mixing XMM with non-XMM
149                                  * One problem is an OPpair with one operand a long, the other XMM.
150                                  * Giving it up for now.
151                                  */
152                                 if (log) printf(" [%d] can't because XMM\n", cast(int)si);
153                                 sia[si].canSlice = false;
154                                 return;
155                             }
156                         }
157                         sia[si].accessSlice = true;
158                     }
159                     else
160                     {
161                         if (log) printf(" [%d] can't because NOTSLICE 1\n", cast(int)si);
162                         sia[si].canSlice = false;
163                     }
164                 }
165                 return;
166             }
167 
168             default:
169                 if (OTassign(e.Eoper))
170                 {
171                     if (OTbinary(e.Eoper))
172                         sliceStructs_Gather(symtab, sia, e.EV.E2);
173 
174                     // Assignment to a whole var will disallow SROA
175                     if (e.EV.E1.Eoper == OPvar)
176                     {
177                         const e1 = e.EV.E1;
178                         const si = e1.EV.Vsym.Ssymnum;
179                         if (si != SYMIDX.max && sia[si].canSlice)
180                         {
181                             assert(si < symtab.length);
182                             if (nthSlice(e1) == NOTSLICE)
183                             {
184                                 if (log)
185                                 {
186                                     printf(" [%d] can't because NOTSLICE 2\n", cast(int)si);
187                                     elem_print(e);
188                                 }
189                                 sia[si].canSlice = false;
190                             }
191                             // Disable SROA on OSX32 (because XMM registers?)
192                             // https://issues.dlang.org/show_bug.cgi?id=15206
193                             // https://github.com/dlang/dmd/pull/8034
194                             else if (!(config.exe & EX_OSX))
195                             {
196                                 sliceStructs_Gather(symtab, sia, e.EV.E1);
197                             }
198                         }
199                         return;
200                     }
201                     e = e.EV.E1;
202                     break;
203                 }
204                 if (OTunary(e.Eoper))
205                 {
206                     e = e.EV.E1;
207                     break;
208                 }
209                 if (OTbinary(e.Eoper))
210                 {
211                     sliceStructs_Gather(symtab, sia, e.EV.E2);
212                     e = e.EV.E1;
213                     break;
214                 }
215                 return;
216         }
217     }
218 }
219 
220 /***********************************
221  * Rewrite expression tree e based on info in sia[].
222  * Params:
223  *      symtab = symbol table
224  *      sia = slicing info
225  *      e = expression tree to rewrite in place
226  */
227 @trusted
228 extern (D) private void sliceStructs_Replace(ref symtab_t symtab, const SymInfo[] sia, elem *e)
229 {
230     while (1)
231     {
232         switch (e.Eoper)
233         {
234             case OPvar:
235             {
236                 Symbol *s = e.EV.Vsym;
237                 const si = s.Ssymnum;
238                 //printf("e: %d %d\n", si, sia[si].canSlice);
239                 //elem_print(e);
240                 if (si != SYMIDX.max && sia[si].canSlice)
241                 {
242                     const n = nthSlice(e);
243                     if (getSize(e) == 2 * SLICESIZE)
244                     {
245                         if (log) { printf("slicing struct before "); elem_print(e); }
246                         // Rewrite e as (si0 OPpair si0+1)
247                         elem *e1 = el_calloc();
248                         el_copy(e1, e);
249                         e1.Ety = sia[si].ty[0];
250 
251                         elem *e2 = el_calloc();
252                         el_copy(e2, e);
253                         Symbol *s1 = symtab[sia[si].si0 + 1]; // +1 for second slice
254                         e2.Ety = sia[si].ty[1];
255                         e2.EV.Vsym = s1;
256                         e2.EV.Voffset = 0;
257 
258                         e.Eoper = OPpair;
259                         e.EV.E1 = e1;
260                         e.EV.E2 = e2;
261 
262                         if (tycomplex(e.Ety))
263                         {
264                             /* Ensure complex OPpair operands are floating point types
265                              * because [1] may have defaulted them to a pointer type.
266                              * https://issues.dlang.org/show_bug.cgi?id=18936
267                              */
268                             tym_t tyop;
269                             switch (tybasic(e.Ety))
270                             {
271                                 case TYcfloat:   tyop = TYfloat;   break;
272                                 case TYcdouble:  tyop = TYdouble;  break;
273                                 case TYcldouble: tyop = TYldouble; break;
274                                 default:
275                                     assert(0);
276                             }
277                             if (!tyfloating(e1.Ety))
278                                 e1.Ety = tyop;
279                             if (!tyfloating(e2.Ety))
280                                 e2.Ety = tyop;
281                         }
282                         if (log) { printf("slicing struct after\n"); elem_print(e); }
283                     }
284                     else if (n == 0)  // the first slice of the symbol is the same as the original
285                     {
286                         if (log) { printf("slicing slice 0 "); elem_print(e); }
287                     }
288                     else // the nth slice
289                     {
290                         if (log) { printf("slicing slice %d ", n); elem_print(e); }
291                         e.EV.Vsym = symtab[sia[si].si0 + n];
292                         e.EV.Voffset -= n * SLICESIZE;
293                         //printf("replaced with:\n");
294                         //elem_print(e);
295                     }
296                 }
297                 return;
298             }
299 
300             case OPrelconst:
301             {
302                 Symbol *s = e.EV.Vsym;
303                 const si = s.Ssymnum;
304                 //printf("e: %d %d\n", si, sia[si].canSlice);
305                 //elem_print(e);
306                 if (si != SYMIDX.max && sia[si].canSlice)
307                 {
308                     printf("shouldn't be slicing %s\n", s.Sident.ptr);
309                     assert(0);
310                 }
311                 return;
312             }
313 
314             default:
315                 if (OTunary(e.Eoper))
316                 {
317                     e = e.EV.E1;
318                     break;
319                 }
320                 if (OTbinary(e.Eoper))
321                 {
322                     sliceStructs_Replace(symtab, sia, e.EV.E2);
323                     e = e.EV.E1;
324                     break;
325                 }
326                 return;
327         }
328     }
329 }
330 
331 @trusted
332 void sliceStructs(ref symtab_t symtab, block* startblock)
333 {
334 if (enable) // disable while we test the inliner
335 {
336     if (log) printf("\n************ sliceStructs() %s *******************\n", funcsym_p.Sident.ptr);
337     const sia_length = symtab.length;
338     /* 3 is because it is used for two arrays, sia[] and sia2[].
339      * sia2[] can grow to twice the size of sia[], as symbols can get split into two.
340      */
341     debug
342         enum tmp_length = 3;
343     else
344         enum tmp_length = 6;
345     SymInfo[tmp_length] tmp = void;
346 
347     import dmd.common.string : SmallBuffer;
348     auto sb = SmallBuffer!(SymInfo)(3 * sia_length, tmp[]);
349     SymInfo* sip = sb.ptr;
350     memset(sip, 0, 3 * sia_length * SymInfo.sizeof);
351     SymInfo[] sia = sip[0 .. sia_length];
352     SymInfo[] sia2 = sip[sia_length .. sia_length * 3];
353 
354     if (log) foreach (si; 0 .. symtab.length)
355     {
356         Symbol *s = symtab[si];
357         printf("[%d]: %p %d %s %s\n", cast(int)si, s, cast(int)type_size(s.Stype), s.Sident.ptr, tym_str(s.Stype.Tty));
358     }
359 
360     bool anySlice = false;
361     foreach (si; 0 .. symtab.length)
362     {
363         Symbol *s = symtab[si];
364         if (log) printf("slice1 [%d]: %s\n", cast(int)si, s.Sident.ptr);
365 
366         //if (strcmp(s.Sident.ptr, "__inlineretval3".ptr) == 0) { printf("can't\n"); sia[si].canSlice = false; continue; }
367         if (!(s.Sflags & SFLunambig))   // if somebody took the address of s
368         {
369             if (log) printf(" can't because SFLunambig\n");
370             sia[si].canSlice = false;
371             continue;
372         }
373 
374         const sz = type_size(s.Stype);
375         if (sz != 2 * SLICESIZE ||
376             tyvector(s.Stype.Tty) ||            // SIMD types
377             tyfv(s.Stype.Tty) || tybasic(s.Stype.Tty) == TYhptr)    // because there is no TYseg
378         {
379             if (log) printf(" can't because size or pointer type\n");
380             sia[si].canSlice = false;
381             continue;
382         }
383 
384         switch (s.Sclass)
385         {
386             case SC.fastpar:
387             case SC.register:
388             case SC.auto_:
389             case SC.shadowreg:
390             case SC.parameter:
391                 anySlice = true;
392                 sia[si].canSlice = true;
393                 sia[si].accessSlice = false;
394                 // We can't slice whole XMM registers
395                 if (tyxmmreg(s.Stype.Tty) &&
396                     isXMMreg(s.Spreg) && s.Spreg2 == NOREG)
397                 {
398                     if (log) printf(" can't because XMM reg\n");
399                     sia[si].canSlice = false;
400                 }
401                 break;
402 
403             case SC.stack:
404             case SC.pseudo:
405             case SC.static_:
406             case SC.bprel:
407                 if (log) printf(" can't because Sclass\n");
408                 sia[si].canSlice = false;
409                 break;
410 
411             default:
412                 symbol_print(s);
413                 assert(0);
414         }
415     }
416 
417     if (!anySlice)
418         return;
419 
420     foreach (b; BlockRange(startblock))
421     {
422         if (b.BC == BCasm)
423             return;
424         if (b.Belem)
425             sliceStructs_Gather(symtab, sia, b.Belem);
426     }
427 
428     {   // scope needed because of goto skipping declarations
429         bool any = false;
430         int n = 0;              // the number of symbols added
431         foreach (si; 0 .. sia_length)
432         {
433             sia2[si + n].canSlice = false;
434             if (sia[si].canSlice)
435             {
436                 // If never did access it as a slice, don't slice
437                 if (!sia[si].accessSlice)
438                 {
439                     if (log) printf(" can't slice %s because no accessSlice\n", symtab[si].Sident.ptr);
440                     sia[si].canSlice = false;
441                     continue;
442                 }
443 
444                 /* Split slice-able symbol sold into two symbols,
445                  * (sold,snew) in adjacent slots in the symbol table.
446                  */
447                 Symbol *sold = symtab[si + n];
448 
449                 const idlen = 2 + strlen(sold.Sident.ptr) + 2;
450                 char *id = cast(char *)malloc(idlen + 1);
451                 if (!id)
452                     err_nomem();
453                 const len = snprintf(id, idlen + 1, "__%s_%d", sold.Sident.ptr, SLICESIZE);
454                 assert(len == idlen);
455                 if (log) printf("retyping slice symbol %s %s\n", sold.Sident.ptr, tym_str(sia[si].ty[0]));
456                 if (log) printf("creating slice symbol %s %s\n", id, tym_str(sia[si].ty[1]));
457                 Symbol *snew = symbol_calloc(id[0 .. idlen]);
458                 free(id);
459                 snew.Sclass = sold.Sclass;
460                 snew.Sfl = sold.Sfl;
461                 snew.Sflags = sold.Sflags;
462                 if (snew.Sclass == SC.fastpar || snew.Sclass == SC.shadowreg)
463                 {
464                     snew.Spreg = sold.Spreg2;
465                     snew.Spreg2 = NOREG;
466                     sold.Spreg2 = NOREG;
467                 }
468                 type_free(sold.Stype);
469                 sold.Stype = type_fake(sia[si].ty[0]);
470                 sold.Stype.Tcount++;
471                 snew.Stype = type_fake(sia[si].ty[1]);
472                 snew.Stype.Tcount++;
473 
474                 // insert snew into symtab[si + n + 1]
475                 symbol_insert(symtab, snew, si + n + 1);
476 
477                 sia2[si + n].canSlice = true;
478                 sia2[si + n].si0 = si + n;
479                 sia2[si + n].ty[] = sia[si].ty[];
480                 ++n;
481                 any = true;
482             }
483         }
484         if (!any)
485             return;
486     }
487 
488     foreach (si; 0 .. symtab.length)
489     {
490         Symbol *s = symtab[si];
491         assert(s.Ssymnum == si);
492     }
493 
494     foreach (b; BlockRange(startblock))
495     {
496         if (b.Belem)
497             sliceStructs_Replace(symtab, sia2, b.Belem);
498     }
499 
500     static if (0)
501     {
502         printf("after slicing:\n");
503         foreach (b; BlockRange(startblock))
504         {
505             if (b.Belem)
506                 elem_print(b.Belem);
507         }
508         printf("after slicing done\n");
509     }
510 
511 }
512 }
513 
514 
515 /*************************************
516  * Determine if `e` is a slice.
517  * Params:
518  *      e = elem that may be a slice
519  * Returns:
520  *      slice number if it is, NOTSLICE if not
521  */
522 enum NOTSLICE = -1;
523 int nthSlice(const(elem)* e)
524 {
525     const sz = tysize(e.Ety); // not getSize(e) because type_fake(TYstruct) doesn't work
526     if (sz == -1)
527         return NOTSLICE;
528     const sliceSize = SLICESIZE;
529 
530     /* if sz is less than sliceSize, this causes problems because if, say,
531      * sz is 4 while sliceSize is 8, and sz gets enregistered, then assigning to
532      * the lower 4 bytes of sz will zero out the upper 4 bytes.
533      * https://github.com/dlang/dmd/pull/13220
534      */
535     if (sz != sliceSize)
536         return NOTSLICE;
537 
538     /* See if e fits in a slice
539      */
540     const lwr = e.EV.Voffset;
541     const upr = lwr + sz;
542     if (0 <= lwr && upr <= sliceSize)
543         return 0;
544     if (sliceSize <= lwr && upr <= sliceSize * 2)
545         return 1;
546 
547     return NOTSLICE;
548 }
549 
550 /******************************************
551  * Get size of an elem e.
552  */
553 private int getSize(const(elem)* e)
554 {
555     int sz = tysize(e.Ety);
556     if (sz == -1 && e.ET && (tybasic(e.Ety) == TYstruct || tybasic(e.Ety) == TYarray))
557         sz = cast(int)type_size(e.ET);
558     return sz;
559 }