1 /**
2  * Global loop optimizations
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 1985-1998 by Symantec
8  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d)
12  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d
13  */
14 
15 module dmd.backend.gloop;
16 
17 import core.stdc.stdio;
18 import core.stdc.stdlib;
19 import core.stdc.string;
20 
21 import dmd.backend.cc;
22 import dmd.backend.cdef;
23 import dmd.backend.code_x86;
24 import dmd.backend.evalu8 : el_toldoubled;
25 import dmd.backend.oper;
26 import dmd.backend.global;
27 import dmd.backend.goh;
28 import dmd.backend.el;
29 import dmd.backend.symtab;
30 import dmd.backend.ty;
31 import dmd.backend.type;
32 
33 import dmd.backend.barray;
34 import dmd.backend.dlist;
35 import dmd.backend.dvec;
36 import dmd.backend.mem;
37 
38 nothrow:
39 @safe:
40 
41 @trusted
42 char symbol_isintab(const Symbol *s) { return sytab[s.Sclass] & SCSS; }
43 
44 
45 import dmd.backend.gother : findloopparameters;
46 
47 alias Loops = Rarray!Loop;
48 
49 
50 /*********************************
51  * Loop data structure.
52  */
53 
54 struct Loop
55 {
56 nothrow:
57     vec_t Lloop;        // Vector of blocks in this loop
58     vec_t Lexit;        // Vector of exit blocks of loop
59     block *Lhead;       // Pointer to header of loop
60     block *Ltail;       // Pointer to tail
61     block *Lpreheader;  // Pointer to preheader (if any)
62     Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so
63                         // redundant temporaries aren't created
64     Rarray!Iv Livlist;        // basic induction variables
65     Rarray!Iv Lopeqlist;      // list of other op= variables
66 
67     /*************************
68      * Reset memory so this allocation can be re-used.
69      */
70     @trusted
71     void reset()
72     {
73         vec_free(Lloop);
74         vec_free(Lexit);
75 
76         foreach (ref iv; Livlist)
77             iv.reset();
78         foreach (ref iv; Lopeqlist)
79             iv.reset();
80 
81         Llis.reset();
82         Livlist.reset();
83         Lopeqlist.reset();
84     }
85 
86     /***********************
87      * Write loop.
88      */
89 
90     @trusted
91     void print()
92     {
93         debug
94         {
95             Loop *l = &this;
96             printf("loop %p\n", l);
97             printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx,
98                 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx
99                                                 : cast(uint)-1);
100             printf("\tLloop "); vec_println(l.Lloop);
101             printf("\tLexit "); vec_println(l.Lexit);
102         }
103     }
104 }
105 
106 struct famlist
107 {
108 nothrow:
109     elem **FLpelem;         /* parent of elem in the family         */
110     elem *c1;
111     elem *c2;               // c1*(basic IV) + c2
112     Symbol *FLtemp;         // symbol index of temporary (FLELIM if */
113                             /* this entry has no temporary)         */
114     tym_t FLty;             /* type of this induction variable      */
115     tym_t FLivty;           /* type of the basic IV elem (which is  */
116                             /* not necessarilly the type of the IV  */
117                             /* elem!)                               */
118 
119     void reset()
120     {
121         el_free(c1);
122         el_free(c2);
123     }
124 
125     @trusted
126     void print() const
127     {
128         debug
129         {
130             printf("famlist:\n");
131             printf("*FLpelem:\n");
132             elem_print(*FLpelem);
133             printf("c1:");
134             elem_print(c1);
135             printf("c2:");
136             elem_print(c2);
137             printf("FLty = %s\n", tym_str(FLty));
138             printf("FLivty = %s\n", tym_str(FLivty));
139         }
140     }
141 }
142 
143 @system
144 enum FLELIM = cast(Symbol *)-1;
145 
146 struct Iv
147 {
148 nothrow:
149     Symbol *IVbasic;        // symbol of basic IV
150     elem **IVincr;          // pointer to parent of IV increment elem
151     Barray!famlist IVfamily;      // variables in this family
152 
153     @trusted
154     void reset()
155     {
156         foreach (ref fl; IVfamily)
157         {
158             fl.reset();
159         }
160         IVfamily.reset();
161     }
162 
163     @trusted
164     void print() const
165     {
166         debug
167         {
168             printf("IV: '%s'\n",IVbasic.Sident.ptr);
169             printf("*IVincr:\n");
170             elem_print(*IVincr);
171         }
172     }
173 }
174 
175 
176 private __gshared bool addblk;                    /* if TRUE, then we added a block */
177 
178 /* is elem loop invariant?      */
179 int isLI(const elem* n) { return n.Nflags & NFLli; }
180 
181 /* make elem loop invariant     */
182 void makeLI(elem* n) { n.Nflags |= NFLli; }
183 
184 /******************************
185  *      Only variables that could only be unambiguously defined
186  *      are candidates for loop invariant removal and induction
187  *      variables.
188  *      This means only variables that have the SFLunambig flag
189  *      set for them.
190  *      Doing this will still cover 90% (I hope) of the cases, and
191  *      is a lot faster to compute.
192  */
193 
194 /*************
195  * Free loops.
196  */
197 
198 private void freeloop(ref Loops loops)
199 {
200     foreach (ref loop; loops)
201         loop.reset();
202     loops.reset();
203 }
204 
205 
206 /**********************************
207  * Initialize block information.
208  * Returns:
209  *      true if there is a BCasm block
210  */
211 
212 @trusted
213 bool blockinit()
214 {
215     bool hasasm = false;
216 
217     assert(dfo);
218     foreach (b; BlockRange(startblock))
219     {
220         debug                   /* check integrity of Bpred and Bsucc   */
221           L1:
222             foreach (blp; ListRange(b.Bpred))
223             {
224                 foreach (bls; ListRange(list_block(blp).Bsucc))
225                     if (list_block(bls) == b)
226                         continue L1;
227                 assert(0);
228             }
229 
230         hasasm |= (b.BC == BCasm);
231     }
232     foreach (j, b; dfo[])
233     {
234         assert(b.Bdfoidx == j);
235         b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */
236         vec_clear(b.Bdom);
237     }
238     return hasasm;
239 }
240 
241 /****************************************
242  * Compute dominators (Bdom) for each block.
243  * See Aho & Ullman Fig. 13.5.
244  * Note that flow graph is reducible if there is only one
245  * pass through the loop.
246  * Params:
247  *   dfo = depth first order array of blocks,
248  *         Bdom vector is filled in for each block
249  */
250 
251 @trusted
252 void compdom()
253 {
254     compdom(dfo[]);
255 }
256 
257 @trusted
258 private extern (D) void compdom(block*[] dfo)
259 {
260     assert(dfo.length);
261     block* sb = dfo[0];                  // starting block
262 
263     vec_clear(sb.Bdom);
264     vec_setbit(0,sb.Bdom);               // starting block only doms itself
265     foreach (b; dfo)                     // for all except startblock
266     {
267         if (b != sb)
268             vec_set(b.Bdom);             // dominate all blocks
269     }
270 
271     vec_t t1 = vec_calloc(vec_numbits(sb.Bdom));       // allocate a temporary
272     uint counter = 0;                    // # of times thru loop
273     bool changes;
274     do
275     {
276         changes = false;
277         foreach (i, b; dfo)              // for each block in dfo[]
278         {
279             if (i == 0)
280                 continue;                // except startblock
281             if (b.Bpred)                 // if there are predecessors
282             {
283                 vec_set(t1);
284                 foreach (bl; ListRange(b.Bpred))
285                 {
286                     vec_andass(t1,list_block(bl).Bdom);
287                 }
288             }
289             else
290                 vec_clear(t1);      // no predecessors to dominate
291             vec_setbit(i,t1);       // each block doms itself
292             if (changes)
293                 vec_copy(b.Bdom,t1);
294             else if (!vec_equal(b.Bdom,t1))   // if any changes
295             {
296                 vec_copy(b.Bdom,t1);
297                 changes = true;
298             }
299         }
300         counter++;
301         assert(counter < 50);              // should have converged by now
302     } while (changes);
303     vec_free(t1);
304 
305     debug if (debugc)
306     {
307         printf("Flow graph is%s reducible\n", counter <= 2 ? "".ptr : " not".ptr);
308     }
309 }
310 
311 /***************************
312  * Test if block A dominates block B.
313  * Params:
314  *      A = block
315  *      B = another block
316  * Returns:
317  *      true if A dominates B.
318  */
319 
320 @trusted
321 bool dom(const block* A, const block* B)
322 {
323     assert(A && B && dfo && dfo[A.Bdfoidx] == A);
324     return vec_testbit(A.Bdfoidx,B.Bdom) != 0;
325 }
326 
327 /**********************
328  * Find all the loops.
329  */
330 
331 private extern (D) void findloops(block*[] dfo, ref Loops loops)
332 {
333     freeloop(loops);
334 
335     //printf("findloops()\n");
336     foreach (b; dfo)
337         b.Bweight = 1;             // reset Bweights
338     foreach_reverse (b; dfo)       // for each block (note reverse
339                                    // dfo order, so most nested
340                                    // loops are found first)
341     {
342         assert(b);
343         foreach (bl; ListRange(b.Bsucc))
344         {
345             block *s = list_block(bl);      // each successor s to b
346             assert(s);
347             if (dom(s,b))                   // if s dominates b
348                 buildloop(loops, s, b);     // we found a loop
349         }
350     }
351 
352     debug if (debugc)
353     {
354         foreach (ref l; loops)
355             l.print();
356     }
357 }
358 
359 /********************************
360  * Increase weight of a variable by a factor.
361  * Params:
362  *      weight = how important a variable is
363  *      factor = loops scale the importance
364  * Returns:
365  *      new weight
366  */
367 
368 private uint loop_weight(uint weight, int factor) pure
369 {
370     // Be careful not to overflow
371     if (weight < 0x1_0000)
372         weight *= 10 * factor;
373     else if (weight < 0x10_0000)
374         weight *= 2 * factor;
375     else
376         weight += factor;
377     assert(cast(int)weight > 0); // overflow check
378     return weight;
379 }
380 
381 /*****************************
382  * Construct natural loop.
383  * Algorithm 13.1 from Aho & Ullman.
384  * Note that head dom tail.
385  * Params:
386  *      ploops = collection of existing loops to add to
387  *      head = head of loop; head dominates tail
388  *      tail = tail of loop
389  */
390 
391 @trusted
392 private void buildloop(ref Loops ploops,block *head,block *tail)
393 {
394     //printf("buildloop()\n");
395 
396      /* Add a block b and all its predecessors to vector v.
397       */
398     static void insert(block *b, vec_t v)
399     {
400         assert(b && v);
401         if (!vec_testbit(b.Bdfoidx,v))      // if block is not in loop
402         {
403             vec_setbit(b.Bdfoidx,v);        // add block to loop
404             b.Bweight = loop_weight(b.Bweight,1);   // *10 usage count
405             foreach (bl; ListRange(b.Bpred))
406                 insert(list_block(bl), v);  // insert all its predecessors
407         }
408     }
409 
410     Loop* l;
411 
412     /* See if this is part of an existing loop. If so, merge the two.     */
413     foreach (ref lp; ploops)
414         if (lp.Lhead == head)           /* two loops with same header   */
415         {
416             // Calculate loop contents separately so we get the Bweights
417             // done accurately.
418 
419             vec_t v = vec_calloc(dfo.length);
420             vec_setbit(head.Bdfoidx,v);
421             head.Bweight = loop_weight(head.Bweight, 1);
422             insert(tail,v);
423 
424             vec_orass(lp.Lloop,v);      // merge into existing loop
425             vec_free(v);
426 
427             vec_clear(lp.Lexit);        // recompute exit blocks
428             l = &lp;
429             goto L1;
430         }
431 
432     /* Allocate loop entry        */
433     l = ploops.push();
434 
435     l.Lloop = vec_calloc(dfo.length);    // allocate loop bit vector
436     l.Lexit = vec_calloc(dfo.length);    // bit vector for exit blocks
437     l.Lhead = head;
438     l.Ltail = tail;
439     l.Lpreheader = null;
440 
441     vec_setbit(head.Bdfoidx,l.Lloop);    /* add head to the loop         */
442     head.Bweight = loop_weight(head.Bweight, 2);  // *20 usage for loop header
443 
444     insert(tail,l.Lloop);                /* insert tail in loop          */
445 
446 L1:
447     /* Find all the exit blocks (those blocks with
448      * successors outside the loop).
449      */
450 
451     // for each block in this loop
452     foreach (i; VecRange(l.Lloop))
453     {
454         if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit)
455             vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */
456         else
457         {
458             foreach (bl; ListRange(dfo[i].Bsucc))
459                 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop))
460                 {
461                     vec_setbit(i,l.Lexit);
462                     break;
463                 }
464         }
465     }
466 
467     /*  Find preheader, if any, to the loop.
468         The preheader is a block that has only the head as a successor.
469         All other predecessors of head must be inside the loop.
470      */
471     l.Lpreheader = null;
472     foreach (bl; ListRange(head.Bpred))
473     {
474         block *b = list_block(bl);
475 
476         if (!vec_testbit(b.Bdfoidx,l.Lloop))  /* if not in loop       */
477         {
478             if (l.Lpreheader)                 /* if already one       */
479             {
480                 l.Lpreheader = null;          /* can only be one      */
481                 break;
482             }
483             else
484             {
485                 if (list_next(b.Bsucc))       // if more than 1 successor
486                     break;                    // b can't be a preheader
487                 l.Lpreheader = b;
488             }
489         }
490     }
491 }
492 
493 /**************************************
494  * Perform loop rotations.
495  * Loop starts as:
496  *
497  *         prehead
498  *          |
499  *          v
500  *      +->head---->
501  *      |   |
502  *      |   v
503  *      |  body
504  *      |   |
505  *      |   v
506  *      +--tail
507  *
508  * Two types are done:
509  *      1) Header is moved to be past the tail.
510  *
511  *         prehead
512  *          |
513  *      +---+
514  *      |
515  *      |  body<-+
516  *      |   |    |
517  *      |   v    |
518  *      |  tail  |
519  *      |   |    |
520  *      |   v    |
521  *      +->head--+
522  *          |
523  *          v
524  *
525  *      2) Header is copied past the tail (done only if MFtime is set).
526  *
527  *         prehead
528  *          |
529  *          v
530  *         head1-----+
531  *          |        |
532  *          v        |
533  *         body<--+  |
534  *          |     |  |
535  *          v     |  |
536  *         tail   |  |
537  *          |     |  |
538  *          v     |  |
539  *         head2--+  |
540  *          |        |
541  *          +--------+
542  *          v
543  *
544  * Input:
545  *      Loop information (do not depend on the preheader information)
546  * Output:
547  *      Revised list of blocks, a new dfo and new loop information
548  * Returns:
549  *      true need to recompute loop data
550  */
551 
552 @trusted
553 private bool looprotate(ref Loop l)
554 {
555     block *tail = l.Ltail;
556     block *head = l.Lhead;
557 
558     //printf("looprotate(%p)\n",l);
559 
560     // Do not rotate loop if:
561     if (head == tail ||                         // loop is only one block big
562         !vec_testbit(head.Bdfoidx,l.Lexit))   // header is not an exit block
563         goto Lret;
564 
565     if (//iter != 1 &&
566         vec_testbit(tail.Bdfoidx,l.Lexit))    // tail is an exit block
567         goto Lret;
568 
569     // Do not rotate if already rotated
570     foreach (b; BlockRange(tail.Bnext))
571         if (b == head)                  // if loop already rotated
572             goto Lret;
573 
574     if (head.BC == BCtry)
575          goto Lret;
576     if (head.BC == BC_try)
577          goto Lret;
578 
579     //if (debugc) { printf("looprotate: "); l.print(); }
580 
581     if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm)
582     {   // Duplicate the header past the tail (but doing
583         // switches would be too expensive in terms of code
584         // generated).
585 
586         auto head2 = block_calloc(); // create new head block
587         head2.Btry = head.Btry;
588         head2.Bflags = head.Bflags;
589         head.Bflags = BFLnomerg;       // move flags over to head2
590         head2.Bflags |= BFLnomerg;
591         head2.BC = head.BC;
592         assert(head2.BC != BCswitch);
593         if (head.Belem)                // copy expression tree
594             head2.Belem = el_copytree(head.Belem);
595         head2.Bnext = tail.Bnext;
596         tail.Bnext = head2;
597 
598         // pred(head1) = pred(head) outside loop
599         // pred(head2) = pred(head) inside loop
600         list_t *pbln;
601         auto pbl2 = &(head2.Bpred);
602         for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln)
603         {
604             if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop))
605             {   // if this predecessor is inside the loop
606 
607                 *pbl2 = *pbl;
608                 *pbl = list_next(*pbl);
609                 pbln = pbl;                     // don't skip this next one
610                 (*pbl2).next = null;
611                 auto bsucc = list_block(*pbl2).Bsucc;
612                 pbl2 = &((*pbl2).next);
613                 foreach (bl; ListRange(bsucc))
614                     if (list_block(bl) == head)
615                     {
616                         bl.ptr = cast(void *)head2;
617                         goto L2;
618                     }
619                 assert(0);
620         L2:
621             }
622             else
623                 pbln = &((*pbl).next);      // next predecessor in list
624         } // for each pred(head)
625 
626         // succ(head2) = succ(head)
627         foreach (bl; ListRange(head.Bsucc))
628         {
629             list_append(&(head2.Bsucc),list_block(bl));
630             list_append(&(list_block(bl).Bpred),head2);
631         }
632         if (debugc) printf("1Rotated loop %p\n", &l);
633         go.changes++;
634         return true;
635     }
636     else if (startblock != head
637             /* This screws up the OPctor/OPdtor sequence for:
638              *   struct CString
639              *   {   CString();
640              *      ~CString();
641              *      int GetLength();
642              *   };
643              *
644              *   void f(void)
645              *   {  for(;;)
646              *      {   CString s ;
647              *    if(s.GetLength()!=0)
648              *       break ;
649              *      }
650              *   }
651              */
652             && !(config.flags3 & CFG3eh)
653             )
654     {   // optimize for space
655         // Simply position the header past the tail
656         foreach (b; BlockRange(startblock))
657         {
658             if (b.Bnext == head)
659             {   // found parent b of head
660                 b.Bnext = head.Bnext;
661                 head.Bnext = tail.Bnext;
662                 tail.Bnext = head;
663                 if (debugc) printf("2Rotated loop %p\n", &l);
664                 go.changes++;
665                 return false;
666             }
667         }
668         assert(0);
669     }
670 Lret:
671     return false;
672 }
673 
674 private __gshared
675 {
676     bool doflow;             // true if flow analysis has to be redone
677 }
678 
679 /*********************************
680  * Loop invariant and induction variable elimination.
681  * Input:
682  *      iter    which optimization iteration we are on
683  */
684 
685 @trusted
686 void loopopt()
687 {
688     __gshared Loops startloop_cache;
689 
690     Loops startloop = startloop_cache;
691 
692     if (debugc) printf("loopopt()\n");
693 restart:
694     if (blockinit())                    // init block data
695     {
696         findloops(dfo[], startloop);    // Compute Bweights
697         freeloop(startloop);            // free existing loops
698         startloop_cache = startloop;
699         return;                         // can't handle ASM blocks
700     }
701     compdom();                          // compute dominators
702     findloops(dfo[], startloop);              // find the loops
703 
704   L3:
705     while (1)
706     {
707         foreach (ref l; startloop)
708         {
709             if (looprotate(l))              // rotate the loop
710             {
711                 compdfo(dfo, startblock);
712                 blockinit();
713                 compdom();
714                 findloops(dfo[], startloop);
715                 continue L3;
716             }
717         }
718         break;
719     }
720 
721     // Make sure there is a preheader for each loop.
722 
723     addblk = false;                     /* assume no blocks added        */
724     foreach (ref l; startloop)
725     {
726         //if (debugc) l.print();
727 
728         if (!l.Lpreheader)             /* if no preheader               */
729         {
730             if (debugc) printf("Generating preheader for loop\n");
731             addblk = true;              // add one
732             block* p = block_calloc();  // the preheader
733             block* h = l.Lhead;         // loop header
734 
735             /* Find parent of h */
736             if (h == startblock)
737                 startblock = p;
738             else
739             {
740                 for (auto ph = startblock; 1; ph = ph.Bnext)
741                 {
742                     assert(ph);         /* should have found it         */
743                     if (ph.Bnext == h)
744                     {
745                         // Link p into block list between ph and h
746                         ph.Bnext = p;
747                         break;
748                     }
749                 }
750             }
751             p.Bnext = h;
752 
753             l.Lpreheader = p;
754             p.BC = BCgoto;
755             assert(p.Bsucc == null);
756             list_append(&(p.Bsucc),h); /* only successor is h          */
757             p.Btry = h.Btry;
758 
759             if (debugc) printf("Adding preheader %p to loop %p\n",p,&l);
760 
761             // Move preds of h that aren't in the loop to preds of p
762             for (list_t bl = h.Bpred; bl;)
763             {
764                 block *b = list_block(bl);
765 
766                 if (!vec_testbit (b.Bdfoidx, l.Lloop))
767                 {
768                     list_append(&(p.Bpred), b);
769                     list_subtract(&(h.Bpred), b);
770                     bl = h.Bpred;      /* dunno what subtract did      */
771 
772                     /* Fix up successors of predecessors        */
773                     foreach (bls; ListRange(b.Bsucc))
774                         if (list_block(bls) == h)
775                                 bls.ptr = cast(void *)p;
776                 }
777                 else
778                     bl = list_next(bl);
779             }
780             list_append(&(h.Bpred),p); /* p is a predecessor to h      */
781         }
782     } /* for */
783     if (addblk)                         /* if any blocks were added      */
784     {
785         compdfo(dfo, startblock);                      /* compute depth-first order    */
786         blockinit();
787         compdom();
788         findloops(dfo[], startloop);          // recompute block info
789         addblk = false;
790     }
791 
792     /* Do the loop optimizations.
793      */
794 
795     doflow = true;                      /* do flow analysis             */
796 
797     if (go.mfoptim & MFtime)
798     {
799         if (debugc) printf("Starting loop unrolling\n");
800     L2:
801         while (1)
802         {
803             foreach (ref l; startloop)
804             {
805                 if (loopunroll(l))
806                 {
807                     compdfo(dfo, startblock);                      // compute depth-first order
808                     blockinit();
809                     compdom();
810                     findloops(dfo[], startloop);    // recompute block info
811                     doflow = true;
812                     continue L2;
813                 }
814             }
815             break;
816         }
817     }
818 
819     /* Note that accessing the loops
820      * starting from startloop will access them in least nested
821      * one first, thus moving LIs out as far as possible
822      */
823     if (debugc) printf("Starting loop invariants\n");
824 
825     foreach_reverse (ref l; startloop)
826     {
827         //if (debugc) l.print();
828 
829         assert(l.Lpreheader);
830         if (doflow)
831         {
832             flowrd();               /* compute reaching definitions  */
833             flowlv();               /* compute live variables        */
834             flowae();               // compute available expressions
835             doflow = false;         /* no need to redo it           */
836             if (go.defnod.length == 0)     /* if no definition elems       */
837                 break;              /* no need to optimize          */
838         }
839         vec_t lv = l.Lloop;
840         if (debugc) printf("...Loop %p start...\n",&l);
841 
842         /* Unmark all elems in this loop         */
843         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
844             if (dfo[i].Belem)
845                 unmarkall(dfo[i].Belem);       /* unmark all elems     */
846 
847         /* Find & mark all LIs   */
848         vec_t gin = vec_clone(l.Lpreheader.Bout);
849         vec_t rd = vec_calloc(go.defnod.length);        /* allocate our running RD vector */
850         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop
851         {
852             block *b = dfo[i];
853 
854             if (debugc) printf("B%d\n",i);
855             if (b.Belem)
856             {
857                 vec_copy(rd, b.Binrd); // IN reaching defs
858                 static if (0)
859                 {
860                     printf("i = %d\n",i);
861                     {
862                         for (int j = 0; j < go.defnod.length; j++)
863                             elem_print(go.defnod[j].DNelem);
864                     }
865                     printf("rd    : "); vec_println(rd);
866                 }
867                 markInvariants(b != l.Lhead, b, lv, gin, b.Belem, rd);
868                 static if (0)
869                 {
870                     printf("B%d\n", i);
871                     {
872                         foreach (j; 0 .. go.defnod.length)
873                         {
874                             printf("  [%2d] ", j);
875                             WReqn(go.defnod[j].DNelem);
876                             printf("\n");
877                         }
878                     }
879                     printf("rd    : "); vec_println(rd);
880                     printf("Boutrd: "); vec_println(b.Boutrd);
881                 }
882                 assert(vec_equal(rd, b.Boutrd));
883             }
884             else
885                 assert(vec_equal(b.Binrd, b.Boutrd));
886         }
887         vec_free(rd);
888         vec_free(gin);
889 
890         /* Move loop invariants  */
891         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
892         {
893             uint domexit;               // true if this block dominates all
894                                         // exit blocks of the loop
895 
896             for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block
897             {
898                 if (!vec_testbit (i, dfo[j].Bdom))
899                 {
900                     domexit = 0;
901                     goto L1;                // break if !(i dom j)
902                 }
903             }
904             // if i dom (all exit blocks)
905             domexit = 1;
906         L1:
907             if (dfo[i].Belem)
908             {   // If there is any hope of making an improvement
909                 if (domexit || l.Llis.length)
910                 {
911                     //if (dfo[i] != l.Lhead)
912                         //domexit |= 2;
913                     movelis(dfo[i].Belem, dfo[i], l, domexit);
914                 }
915             }
916         }
917         if (debugc) printf("...Loop %p done...\n",&l);
918 
919         if (go.mfoptim & MFliv)
920         {
921             loopiv(l);              /* induction variables          */
922             if (addblk)             /* if we added a block          */
923             {
924                 compdfo(dfo, startblock);
925                 goto restart;       /* play it safe and start over  */
926             }
927         }
928     } /* for */
929     freeloop(startloop);
930     startloop_cache = startloop;
931 }
932 
933 /*****************************
934  * If elem is loop invariant, mark it.
935  * Input:
936  *      lv =    vector of all the blocks in this loop.
937  *      rd =    vector of loop invariants for this elem. This must be
938  *              continually updated.
939  * Note that we do not iterate until no more LIs are found. The only
940  * thing this would buy us is stuff that depends on LI assignments.
941  */
942 
943 @trusted
944 private void markInvariants(int gref, block* gblock, vec_t lv, vec_t gin, elem *n, vec_t rd)
945 {
946 
947     void markinvar(elem *n,vec_t rd)
948     {
949         vec_t tmp;
950         uint i;
951         Symbol *v;
952         elem *n1;
953 
954         assert(n && rd);
955         assert(vec_numbits(rd) == go.defnod.length);
956         switch (n.Eoper)
957         {
958             case OPaddass:  case OPminass:  case OPmulass:  case OPandass:
959             case OPorass:   case OPxorass:  case OPdivass:  case OPmodass:
960             case OPshlass:  case OPshrass:  case OPashrass:
961             case OPpostinc: case OPpostdec:
962             case OPcall:
963             case OPvecsto:
964             case OPcmpxchg:
965                 markinvar(n.EV.E2,rd);
966                 goto case OPnegass;
967 
968             case OPnegass:
969                 n1 = n.EV.E1;
970                 if (n1.Eoper == OPind)
971                         markinvar(n1.EV.E1,rd);
972                 else if (OTbinary(n1.Eoper))
973                 {   markinvar(n1.EV.E1,rd);
974                     markinvar(n1.EV.E2,rd);
975                 }
976             L2:
977                 if (n.Eoper == OPcall ||
978                     gblock.Btry ||
979                     !(n1.Eoper == OPvar &&
980                         symbol_isintab(n1.EV.Vsym)))
981                 {
982                     gref = 1;
983                 }
984 
985                 updaterd(n,rd,null);
986                 break;
987 
988             case OPcallns:
989                 markinvar(n.EV.E2,rd);
990                 markinvar(n.EV.E1,rd);
991                 break;
992 
993             case OPstrcpy:
994             case OPstrcat:
995             case OPmemcpy:
996             case OPmemset:
997                 markinvar(n.EV.E2,rd);
998                 markinvar(n.EV.E1,rd);
999                 updaterd(n,rd,null);
1000                 break;
1001 
1002             case OPbtc:
1003             case OPbtr:
1004             case OPbts:
1005                 markinvar(n.EV.E1,rd);
1006                 markinvar(n.EV.E2,rd);
1007                 updaterd(n,rd,null);
1008                 break;
1009 
1010             case OPucall:
1011                 markinvar(n.EV.E1,rd);
1012                 goto case OPasm;
1013 
1014             case OPasm:
1015                 gref = 1;
1016                 updaterd(n,rd,null);
1017                 break;
1018 
1019             case OPucallns:
1020             case OPstrpar:
1021             case OPstrctor:
1022             case OPvector:
1023             case OPvoid:
1024             case OPstrlen:
1025             case OPddtor:
1026             case OPinp:
1027             case OPprefetch:                // don't mark E2
1028                 markinvar(n.EV.E1,rd);
1029                 break;
1030 
1031             case OPcond:
1032             case OPparam:
1033             case OPstrcmp:
1034             case OPmemcmp:
1035             case OPbt:                      // OPbt is like OPind, assume not LI
1036             case OPoutp:
1037                 markinvar(n.EV.E1,rd);
1038                 markinvar(n.EV.E2,rd);
1039                 break;
1040 
1041             case OPandand:
1042             case OPoror:
1043                 markinvar(n.EV.E1,rd);
1044                 tmp = vec_clone(rd);
1045                 markinvar(n.EV.E2,tmp);
1046                 if (el_returns(n.EV.E2))
1047                     vec_orass(rd,tmp);              // rd |= tmp
1048                 vec_free(tmp);
1049                 break;
1050 
1051             case OPcolon:
1052             case OPcolon2:
1053                 tmp = vec_clone(rd);
1054                 switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2)))
1055                 {
1056                     case 3: // E1 and E2 return
1057                         markinvar(n.EV.E1,rd);
1058                         markinvar(n.EV.E2,tmp);
1059                         vec_orass(rd,tmp);              // rd |= tmp
1060                         break;
1061                     case 2: // E1 returns
1062                         markinvar(n.EV.E1,rd);
1063                         markinvar(n.EV.E2,tmp);
1064                         break;
1065                     case 1: // E2 returns
1066                         markinvar(n.EV.E1,tmp);
1067                         markinvar(n.EV.E2,rd);
1068                         break;
1069                     case 0: // neither returns
1070                         markinvar(n.EV.E1,tmp);
1071                         vec_copy(tmp,rd);
1072                         markinvar(n.EV.E2,tmp);
1073                         break;
1074                     default:
1075                         assert(0);
1076                 }
1077                 vec_free(tmp);
1078                 break;
1079 
1080             case OPaddr:            // mark addresses of OPvars as LI
1081                 markinvar(n.EV.E1,rd);
1082                 if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1))
1083                     makeLI(n);
1084                 break;
1085 
1086             case OPmsw:
1087             case OPneg:     case OPbool:    case OPnot:     case OPcom:
1088             case OPs16_32:  case OPd_s32:   case OPs32_d:
1089             case OPd_s16:   case OPs16_d:   case OPd_f:     case OPf_d:
1090             case OP32_16:   case OPu8_16:
1091             case OPld_d:    case OPd_ld:
1092             case OPld_u64:
1093             case OPc_r:     case OPc_i:
1094             case OPu16_32:
1095             case OPu16_d:   case OPd_u16:
1096             case OPs8_16:   case OP16_8:
1097             case OPd_u32:   case OPu32_d:
1098 
1099             case OPs32_64:  case OPu32_64:
1100             case OP64_32:
1101             case OPd_s64:   case OPd_u64:
1102             case OPs64_d:
1103             case OPu64_d:
1104             case OP128_64:
1105             case OPs64_128:
1106             case OPu64_128:
1107 
1108             case OPabs:
1109             case OPtoprec:
1110             case OPrndtol:
1111             case OPrint:
1112             case OPsetjmp:
1113             case OPbsf:
1114             case OPbsr:
1115             case OPbswap:
1116             case OPpopcnt:
1117             case OPsqrt:
1118             case OPsin:
1119             case OPcos:
1120             case OPvp_fp: /* BUG for MacHandles */
1121             case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp:
1122             case OPcvp_fp:
1123             case OPvecfill:
1124                 markinvar(n.EV.E1,rd);
1125                 if (isLI(n.EV.E1))        /* if child is LI               */
1126                     makeLI(n);
1127                 break;
1128 
1129             case OPeq:
1130             case OPstreq:
1131                 markinvar(n.EV.E2,rd);
1132                 n1 = n.EV.E1;
1133                 markinvar(n1,rd);
1134 
1135                 /* Determine if assignment is LI. Conditions are:       */
1136                 /* 1) Rvalue is LI                                      */
1137                 /* 2) Lvalue is a variable (simplifies things a lot)    */
1138                 /* 3) Lvalue can only be affected by unambiguous defs   */
1139                 /* 4) No rd's of lvalue that are within the loop (other */
1140                 /*    than the current def)                             */
1141                 if (isLI(n.EV.E2) && n1.Eoper == OPvar)          /* 1 & 2 */
1142                 {
1143                     v = n1.EV.Vsym;
1144                     if (v.Sflags & SFLunambig)
1145                     {
1146                         tmp = vec_calloc(go.defnod.length);
1147                         //filterrd(tmp,rd,v);
1148                         listrds(rd,n1,tmp,null);
1149                         for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)
1150                             if (go.defnod[i].DNelem != n &&
1151                                 vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1152                                     goto L3;
1153                         makeLI(n);      // then the def is LI
1154                     L3: vec_free(tmp);
1155                     }
1156                 }
1157                 goto L2;
1158 
1159             case OPadd:     case OPmin:     case OPmul:     case OPand:
1160             case OPor:      case OPxor:     case OPdiv:     case OPmod:
1161             case OPshl:     case OPshr:     case OPeqeq:    case OPne:
1162             case OPlt:      case OPle:      case OPgt:      case OPge:
1163             case OPashr:
1164             case OPror:     case OProl:
1165             case OPbtst:
1166 
1167             case OPunord:   case OPlg:      case OPleg:     case OPule:
1168             case OPul:      case OPuge:     case OPug:      case OPue:
1169             case OPngt:     case OPnge:     case OPnlt:     case OPnle:
1170             case OPord:     case OPnlg:     case OPnleg:    case OPnule:
1171             case OPnul:     case OPnuge:    case OPnug:     case OPnue:
1172 
1173             case OPcomma:
1174             case OPpair:
1175             case OPrpair:
1176             case OPremquo:
1177             case OPscale:
1178             case OPyl2x:
1179             case OPyl2xp1:
1180                 markinvar(n.EV.E1,rd);
1181                 markinvar(n.EV.E2,rd);
1182                 if (isLI(n.EV.E2) && isLI(n.EV.E1))
1183                         makeLI(n);
1184                 break;
1185 
1186             case OPind:                     /* must assume this is not LI   */
1187                 markinvar(n.EV.E1,rd);
1188                 if (isLI(n.EV.E1))
1189                 {
1190                     static if (0)
1191                     {
1192                         // This doesn't work with C++, because exp2_ptrtocomtype() will
1193                         // transfer const to where it doesn't belong.
1194                         if (n.Ety & mTYconst)
1195                         {
1196                             makeLI(n);
1197                         }
1198 
1199                         // This was disabled because it was marking as LI
1200                         // the loop dimension for the [i] array if
1201                         // a[j][i] was in a loop. This meant the a[j] array bounds
1202                         // check for the a[j].length was skipped.
1203                         else if (n.Ejty)
1204                         {
1205                             tmp = vec_calloc(go.defnod.length);
1206                             filterrdind(tmp,rd,n);  // only the RDs pertaining to n
1207 
1208                             // if (no RDs within loop)
1209                             //      then it's loop invariant
1210 
1211                             for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1212                                 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1213                                     goto L10;       // found a RD in the loop
1214 
1215                             // If gref has occurred, this can still be LI
1216                             // if n is an AE that was also an AE at the
1217                             // point of gref.
1218                             // We can catch a subset of these cases by looking
1219                             // at the AEs at the start of the loop.
1220                             if (gref)
1221                             {
1222                                 int j;
1223 
1224                                 //printf("\tn is: "); WReqn(n); printf("\n");
1225                                 for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j)
1226                                 {
1227                                     elem *e = go.expnod[j];
1228 
1229                                     //printf("\t\tgo.expnod[%d] = %p\n",j,e);
1230                                     //printf("\t\tAE is: "); WReqn(e); printf("\n");
1231                                     if (el_match2(n,e))
1232                                     {
1233                                         makeLI(n);
1234                                         //printf("Ind LI: "); WReqn(n); printf("\n");
1235                                         break;
1236                                     }
1237                                 }
1238                             }
1239                             else
1240                                 makeLI(n);
1241                       L10:
1242                             vec_free(tmp);
1243                             break;
1244                         }
1245                     }
1246                 }
1247                 break;
1248 
1249             case OPvar:
1250                 v = n.EV.Vsym;
1251                 if (v.Sflags & SFLunambig)     // must be unambiguous to be LI
1252                 {
1253                     tmp = vec_calloc(go.defnod.length);
1254                     //filterrd(tmp,rd,v);       // only the RDs pertaining to v
1255                     listrds(rd,n,tmp,null);  // only the RDs pertaining to v
1256 
1257                     // if (no RDs within loop)
1258                     //  then it's loop invariant
1259 
1260                     for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1261                         if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1262                             goto L1;    // found a RD in the loop
1263                     makeLI(n);
1264 
1265                 L1: vec_free(tmp);
1266                 }
1267                 break;
1268 
1269             case OPstring:
1270             case OPrelconst:
1271             case OPconst:                   /* constants are always LI      */
1272             case OPframeptr:
1273                 makeLI(n);
1274                 break;
1275 
1276             case OPinfo:
1277                 markinvar(n.EV.E2,rd);
1278                 break;
1279 
1280             case OPstrthis:
1281             case OPmark:
1282             case OPctor:
1283             case OPdtor:
1284             case OPdctor:
1285             case OPhalt:
1286             case OPgot:                     // shouldn't OPgot be makeLI ?
1287                 break;
1288 
1289             default:
1290                 //printf("n.Eoper = %s, OPconst = %d\n", oper_str(n.Eoper), OPconst);
1291                 assert(0);
1292         }
1293 
1294         debug
1295         {
1296             if (debugc && isLI(n))
1297             {
1298                 printf("  LI elem: ");
1299                 WReqn(n);
1300                 printf("\n");
1301             }
1302         }
1303     }
1304 
1305     markinvar(n, rd);
1306 }
1307 
1308 /********************
1309  * Update rd vector.
1310  * Input:
1311  *      n       assignment elem or function call elem or OPasm elem
1312  *      rd      reaching def vector to update
1313  *              (clear bits for defs we kill, set bit for n (which is the
1314  *               def we are genning))
1315  *      vecdim  go.defnod.length
1316  */
1317 
1318 extern (C) {
1319 @trusted
1320 void updaterd(elem *n,vec_t GEN,vec_t KILL)
1321 {
1322     const op = n.Eoper;
1323     elem *t;
1324 
1325     assert(OTdef(op));
1326     assert(GEN);
1327     elem_debug(n);
1328 
1329     uint ni = n.Edef;
1330     assert(ni != -1);
1331 
1332     // If unambiguous def
1333     if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar)
1334     {
1335         vec_t v = go.defnod[ni].DNunambig;
1336         assert(v);
1337         if (KILL)
1338             vec_orass(KILL, v);
1339         vec_subass(GEN, v);
1340     }
1341     else
1342     {
1343         static if (0)
1344         {
1345             if (OTassign(op) && t.Eoper != OPvar && t.Ejty)
1346             {
1347                 // for all unambig defs in go.defnod[]
1348                 foreach (uint i; 0 .. go.defnod.length)
1349                 {
1350                     elem *tn = go.defnod[i].DNelem;
1351                     elem *tn1;
1352 
1353                     if (tn == n)
1354                         ni = i;
1355 
1356                     if (!OTassign(tn.Eoper))
1357                         continue;
1358 
1359                     // If def of same variable, kill that def
1360                     tn1 = tn.EV.E1;
1361                     if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty)
1362                         continue;
1363 
1364                     if (KILL)
1365                         vec_setbit(i,KILL);
1366                     vec_clearbit(i,GEN);
1367                 }
1368             }
1369         }
1370     }
1371 
1372     vec_setbit(ni,GEN);                 // set bit in GEN for this def
1373 }
1374 }
1375 
1376 /***************************
1377  * Mark all elems as not being loop invariant.
1378  */
1379 
1380 @trusted
1381 private void unmarkall(elem *e)
1382 {
1383     for (; 1; e = e.EV.E1)
1384     {
1385         assert(e);
1386         e.Nflags &= ~NFLli;            /* unmark this elem             */
1387         if (OTunary(e.Eoper))
1388             continue;
1389         else if (OTbinary(e.Eoper))
1390         {
1391             unmarkall(e.EV.E2);
1392             continue;
1393         }
1394         return;
1395     }
1396 }
1397 
1398 
1399 /********************************
1400  * Search for references to v in tree n before nstop is encountered.
1401  * Params:
1402  *      v = symbol to search for
1403  *      n = tree to search
1404  *      nstop = stop searching tree when reaching this elem
1405  * Returns:
1406  *    true if there are any refs of v in n before nstop is encountered
1407  */
1408 
1409 @trusted
1410 private bool refs(Symbol *v,elem *n,elem *nstop)
1411 {
1412     symbol_debug(v);
1413     assert(symbol_isintab(v));
1414     assert(v.Ssymnum < globsym.length);
1415     bool stop = false;
1416 
1417     // Walk tree in evaluation order
1418     bool walk(elem* n)
1419     {
1420         elem_debug(n);
1421         assert(n);
1422 
1423         if (stop)
1424             return false;
1425         bool f = false;
1426         const op = n.Eoper;
1427         if (OTunary(op))
1428             f = walk(n.EV.E1);
1429         else if (OTbinary(op))
1430         {
1431             if (ERTOL(n))                   /* watch order of evaluation    */
1432             {
1433                 /* Note that (OPvar = e) is not a ref of OPvar, whereas     */
1434                 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */
1435                 /* a ref of OPvar, etc.                                     */
1436                 f = walk(n.EV.E2);
1437                 if (!f)
1438                 {
1439                     if (op == OPeq)
1440                     {
1441                         if (n.EV.E1.Eoper != OPvar)
1442                             f = walk(n.EV.E1.EV.E1);
1443                     }
1444                     else
1445                         f = walk(n.EV.E1);
1446                 }
1447             }
1448             else
1449                 f = walk(n.EV.E1) || walk(n.EV.E2);
1450         }
1451 
1452         if (n == nstop)
1453             stop = true;
1454         else if (n.Eoper == OPvar)           /* if variable reference        */
1455             return v == n.EV.Vsym;
1456         else if (op == OPasm)                /* everything is referenced     */
1457             return true;
1458         return f;
1459     }
1460 
1461     return walk(n);
1462 }
1463 
1464 /*************************
1465  * Move LIs to preheader.
1466  * Conditions to be satisfied for code motion are:
1467  *      1) All exit blocks are dominated (true before this is called).
1468  *                      -- OR --
1469  *      2) Variable assigned by a statement is not live on entering
1470  *         any successor outside the loop of any exit block of the
1471  *         loop.
1472  *
1473  *      3) Cannot move assignment to variable if there are any other
1474  *         assignments to that variable within the loop (true or
1475  *         assignment would not have been marked LI).
1476  *      4) Cannot move assignments to a variable if there is a use
1477  *         of that variable in this loop that is reached by any other
1478  *         def of it.
1479  *      5) Cannot move expressions that have side effects.
1480  *      6) Do not move assignments to variables that could be affected
1481  *         by ambiguous defs.
1482  *      7) It is not worth it to move expressions of the form:
1483  *              (var == const)
1484  * Input:
1485  *      n       the elem we're considering moving
1486  *      b       the block this elem is in
1487  *      l       the loop we're in
1488  *      domexit flags
1489  *      bit 0:  1       this branch is always executed
1490  *              0       this branch is only sometimes executed
1491  *      bit 1:  1       do not move LIs that could throw exceptions
1492  *                      or cannot be moved past possibly thrown exceptions
1493  * Returns:
1494  *      revised domexit
1495  */
1496 
1497 @trusted
1498 private void movelis(elem* n, block* b, ref Loop l, ref uint pdomexit)
1499 {
1500     vec_t tmp;
1501     elem *ne;
1502     elem *t;
1503     elem *n2;
1504     Symbol *v;
1505     tym_t ty;
1506 
1507 Lnextlis:
1508     //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); }
1509     assert(n);
1510     elem_debug(n);
1511     const op = n.Eoper;
1512     switch (op)
1513     {
1514         case OPvar:
1515         case OPconst:
1516         case OPrelconst:
1517             goto Lret;
1518 
1519         case OPandand:
1520         case OPoror:
1521         case OPcond:
1522         {
1523             uint domexit;
1524 
1525             movelis(n.EV.E1,b,l,pdomexit);        // always executed
1526             domexit = pdomexit & ~1;   // sometimes executed
1527             movelis(n.EV.E2,b,l,domexit);
1528             pdomexit |= domexit & 2;
1529             goto Lret;
1530         }
1531 
1532         case OPeq:
1533             // Do loop invariant assignments
1534             if (isLI(n) && n.EV.E1.Eoper == OPvar)
1535             {
1536                 v = n.EV.E1.EV.Vsym;          // variable index number
1537 
1538                 if (!(v.Sflags & SFLunambig)) goto L3;         // case 6
1539 
1540                 // If case 4 is not satisfied, return
1541 
1542                 // Function parameters have an implied definition prior to the
1543                 // first block of the function. Unfortunately, the rd vector
1544                 // does not take this into account. Therefore, we assume the
1545                 // worst and reject assignments to function parameters.
1546                 if (v.Sclass == SC.parameter || v.Sclass == SC.regpar ||
1547                     v.Sclass == SC.fastpar || v.Sclass == SC.shadowreg)
1548                         goto L3;
1549 
1550                 if (el_sideeffect(n.EV.E2)) goto L3;              // case 5
1551 
1552                 // If case 1 or case 2 is not satisfied, return
1553 
1554                 if (!(pdomexit & 1))                   // if not case 1
1555                 {
1556                     uint i;
1557                     for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
1558                     {
1559                         foreach (bl; ListRange(dfo[i].Bsucc))
1560                         {
1561                             block *s;           // successor to exit block
1562 
1563                             s = list_block(bl);
1564                             if (!vec_testbit(s.Bdfoidx,l.Lloop) &&
1565                                 (!symbol_isintab(v) ||
1566                                  vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit
1567                                     goto L3;
1568                         }
1569                     }
1570                 }
1571 
1572                 tmp = vec_calloc(go.defnod.length);
1573                 uint i;
1574                 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
1575                 {
1576                     if (dfo[i] == b)        // except this one
1577                         continue;
1578 
1579                     //<if there are any RDs of v in Binrd other than n>
1580                     //    <if there are any refs of v in that block>
1581                     //        return;
1582 
1583                     //filterrd(tmp,dfo[i].Binrd,v);
1584                     listrds(dfo[i].Binrd,n.EV.E1,tmp,null);
1585                     uint j;
1586                     for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1587                     {
1588                         if (go.defnod[j].DNelem == n)
1589                             continue;
1590                         if (dfo[i].Belem &&
1591                             refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v
1592                         {
1593                             vec_free(tmp);
1594                             goto L3;
1595                         }
1596                         break;
1597                     }
1598                 } // foreach
1599 
1600                 // <if there are any RDs of v in b.Binrd other than n>
1601                 //     <if there are any references to v before the
1602                 //      assignment to v>
1603                 //         <can't move this assignment>
1604 
1605                 //filterrd(tmp,b.Binrd,v);
1606                 listrds(b.Binrd,n.EV.E1,tmp,null);
1607                 uint j;
1608                 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1609                 {
1610                     if (go.defnod[j].DNelem == n)
1611                         continue;
1612                     if (b.Belem && refs(v,b.Belem,n))
1613                     {
1614                         vec_free(tmp);
1615                         goto L3;            // can't move it
1616                     }
1617                     break;                  // avoid redundant looping
1618                 }
1619                 vec_free(tmp);
1620 
1621                 // We have an LI assignment, n.
1622                 // Check to see if the rvalue is already in the preheader.
1623                 foreach (e; l.Llis)
1624                 {
1625                     if (el_match(n.EV.E2, e.EV.E2))
1626                     {
1627                         el_free(n.EV.E2);
1628                         n.EV.E2 = el_calloc();
1629                         el_copy(n.EV.E2, e.EV.E1);
1630                         if (debugc) printf("LI assignment rvalue was replaced\n");
1631                         doflow = true;
1632                         go.changes++;
1633                         break;
1634                     }
1635                 }
1636 
1637                 // move assignment elem to preheader
1638                 if (debugc) printf("Moved LI assignment ");
1639 
1640                 debug
1641                     if (debugc)
1642                     {
1643                         WReqn(n);
1644                         printf(";\n");
1645                     }
1646 
1647                 go.changes++;
1648                 doflow = true;                  // redo flow analysis
1649                 ne = el_calloc();
1650                 el_copy(ne,n);                  // create assignment elem
1651                 assert(l.Lpreheader);          // make sure there is one
1652                 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader
1653                 l.Llis.push(ne);
1654 
1655                 el_copy(n,ne.EV.E1);      // replace n with just a reference to v
1656                 goto Lret;
1657             } // if
1658             break;
1659 
1660         case OPcall:
1661         case OPucall:
1662             pdomexit |= 2;
1663             break;
1664 
1665         case OPpair:
1666         case OPrpair:                   // don't move these, as they do not do computation
1667             movelis(n.EV.E1,b,l,pdomexit);
1668             n = n.EV.E2;
1669             goto Lnextlis;
1670 
1671         default:
1672             break;
1673     }
1674 
1675 L3:
1676     // Do leaves of non-LI expressions, leaves of = elems that didn't
1677     // meet the invariant assignment removal criteria, and don't do leaves
1678     if (OTleaf(op))
1679         goto Lret;
1680     if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot ||
1681       // These are usually addressing modes, so moving them is a net loss
1682       (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL)
1683      )
1684     {
1685         if (OTassign(op))
1686         {
1687             elem *n1 = n.EV.E1;
1688             elem *n11;
1689 
1690             if (OTbinary(op))
1691                 movelis(n.EV.E2,b,l,pdomexit);
1692 
1693             // Do lvalue only if it is an expression
1694             if (n1.Eoper == OPvar)
1695                 goto Lret;
1696             n11 = n1.EV.E1;
1697             if (OTbinary(n1.Eoper))
1698             {
1699                 movelis(n11,b,l,pdomexit);
1700                 n = n1.EV.E2;
1701             }
1702             // If *(x + c), just make x the LI, not the (x + c).
1703             // The +c comes free with the addressing mode.
1704             else if (n1.Eoper == OPind &&
1705                     isLI(n11) &&
1706                     n11.Eoper == OPadd &&
1707                     n11.EV.E2.Eoper == OPconst
1708                     )
1709             {
1710                 n = n11.EV.E1;
1711             }
1712             else
1713                 n = n11;
1714             movelis(n,b,l,pdomexit);
1715             if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym)))
1716             {
1717                 //printf("assign to global => domexit |= 2\n");
1718                 pdomexit |= 2;
1719             }
1720         }
1721         else if (OTunary(op))
1722         {
1723             elem *e1 = n.EV.E1;
1724 
1725             // If *(x + c), just make x the LI, not the (x + c).
1726             // The +c comes free with the addressing mode.
1727             if (op == OPind &&
1728                 isLI(e1) &&
1729                 e1.Eoper == OPadd &&
1730                 e1.EV.E2.Eoper == OPconst
1731                )
1732             {
1733                 n = e1.EV.E1;
1734             }
1735             else
1736                 n = e1;
1737         }
1738         else if (OTbinary(op))
1739         {
1740             movelis(n.EV.E1,b,l,pdomexit);
1741             n = n.EV.E2;
1742         }
1743         goto Lnextlis;
1744   }
1745 
1746   if (el_sideeffect(n))
1747         goto Lret;
1748 
1749     static if (0)
1750     {
1751         printf("*pdomexit = %u\n", pdomexit);
1752         if (pdomexit & 2)
1753         {
1754             // If any indirections, can't LI it
1755 
1756             // If this operand has already been indirected, we can let
1757             // it pass.
1758             Symbol *s;
1759 
1760             printf("looking at:\n");
1761             elem_print(n);
1762             s = el_basesym(n.EV.E1);
1763             if (s)
1764             {
1765                 foreach (el; l.Llis)
1766                 {
1767                     el = el.EV.E2;
1768                     elem_print(el);
1769                     if (el.Eoper == OPind && el_basesym(el.EV.E1) == s)
1770                     {
1771                         printf("  pass!\n");
1772                         goto Lpass;
1773                     }
1774                 }
1775             }
1776             printf("  skip!\n");
1777             goto Lret;
1778 
1779         Lpass:
1780         }
1781     }
1782 
1783     // Move the LI expression to the preheader
1784     if (debugc) printf("Moved LI expression ");
1785 
1786     debug
1787         if (debugc)
1788         {
1789             WReqn(n);
1790             printf(";\n");
1791         }
1792 
1793     // See if it's already been moved
1794     ty = n.Ety;
1795     foreach (el; l.Llis)
1796     {
1797         tym_t ty2;
1798 
1799         //printf("existing LI: "); WReqn(el); printf("\n");
1800         ty2 = el.EV.E2.Ety;
1801         if (tysize(ty) == tysize(ty2))
1802         {
1803             el.EV.E2.Ety = ty;
1804             if (el_match(n,el.EV.E2))
1805             {
1806                 el.EV.E2.Ety = ty2;
1807                 if (!OTleaf(n.Eoper))
1808                 {       el_free(n.EV.E1);
1809                         if (OTbinary(n.Eoper))
1810                                 el_free(n.EV.E2);
1811                 }
1812                 el_copy(n,el.EV.E1);      // make copy of temp
1813                 n.Ety = ty;
1814 
1815                 debug
1816                     if (debugc)
1817                     {   printf("Already moved: LI expression replaced with ");
1818                         WReqn(n);
1819                         printf("\nPreheader %d expression %p ",
1820                         l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1821                         WReqn(l.Lpreheader.Belem);
1822                         printf("\n");
1823                     }
1824 
1825                 go.changes++;
1826                 doflow = true;                  // redo flow analysis
1827                 goto Lret;
1828             }
1829             el.EV.E2.Ety = ty2;
1830         }
1831     }
1832 
1833     if (!(pdomexit & 1))                       // if only sometimes executed
1834     {
1835         if (debugc) printf(" doesn't dominate exit\n");
1836         goto Lret;                              // don't move LI
1837     }
1838 
1839     if (tyaggregate(n.Ety))
1840         goto Lret;
1841 
1842     go.changes++;
1843     doflow = true;                              // redo flow analysis
1844 
1845     t = el_alloctmp(n.Ety);                     /* allocate temporary t */
1846 
1847     debug
1848     {
1849         if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr);
1850         if (debugc) printf("%s\n", tym_str(t.Ety));
1851         if (debugc) printf("\n");
1852     }
1853 
1854     n2 = el_calloc();
1855     el_copy(n2,n);                              /* create copy n2 of n  */
1856     ne = el_bin(OPeq,t.Ety,t,n2);               /* create elem t=n2     */
1857     assert(l.Lpreheader);                       /* make sure there is one */
1858     appendelem(ne,&(l.Lpreheader.Belem));       /* append ne to preheader */
1859 
1860     debug
1861         if (debugc)
1862         {
1863             printf("Preheader %d expression %p\n\t",
1864             l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1865             WReqn(l.Lpreheader.Belem);
1866             printf("\nLI expression replaced with "); WReqn(t);
1867             printf("\n");
1868         }
1869 
1870     el_copy(n,t);                                 /* replace this elem with t */
1871 
1872     // Remember LI expression in elem list
1873     l.Llis.push(ne);
1874 
1875 Lret:
1876 
1877 }
1878 
1879 /***************************
1880  * Append elem to existing elem using an OPcomma elem.
1881  * Input:
1882  *      n       elem to append
1883  *      *pn     elem to append to
1884  */
1885 
1886 @trusted
1887 private void appendelem(elem *n,elem **pn)
1888 {
1889     assert(n && pn);
1890     if (*pn)                                    /* if this elem exists  */
1891     {
1892         while ((*pn).Eoper == OPcomma)          /* while we see OPcomma elems */
1893         {
1894             (*pn).Ety = n.Ety;
1895             pn = &((*pn).EV.E2);                /* cruise down right side */
1896         }
1897         *pn = el_bin(OPcomma,n.Ety,*pn,n);
1898     }
1899     else
1900         *pn = n;                                /* else create a elem   */
1901 }
1902 
1903 /************** LOOP INDUCTION VARIABLES **********************/
1904 
1905 /****************************
1906  * Create a new famlist entry.
1907  */
1908 
1909 @trusted
1910 private void newfamlist(famlist* fl, tym_t ty)
1911 {
1912     eve c = void;
1913     memset(&c,0,c.sizeof);
1914 
1915     fl.FLty = ty;
1916     switch (tybasic(ty))
1917     {   case TYfloat:
1918             c.Vfloat = 1;
1919             break;
1920 
1921         case TYdouble:
1922         case TYdouble_alias:
1923             c.Vdouble = 1;
1924             break;
1925 
1926         case TYldouble:
1927             c.Vldouble = 1;
1928             break;
1929 
1930         case TYbool:
1931         case TYchar:
1932         case TYschar:
1933         case TYuchar:
1934             c.Vchar = 1;
1935             break;
1936 
1937         case TYshort:
1938         case TYushort:
1939         case TYchar16:
1940         case TYwchar_t:             // BUG: what about 4 byte wchar_t's?
1941             c.Vshort = 1;
1942             break;
1943 
1944         case TYsptr:
1945         case TYcptr:
1946         case TYfptr:
1947         case TYvptr:
1948         case TYnptr:
1949         case TYnullptr:
1950         case TYimmutPtr:
1951         case TYsharePtr:
1952         case TYrestrictPtr:
1953         case TYfgPtr:
1954             ty = TYint;
1955             if (I64)
1956                 ty = TYllong;
1957             goto case TYuint;
1958 
1959         case TYint:
1960         case TYuint:
1961             c.Vint = 1;
1962             break;
1963 
1964         case TYhptr:
1965             ty = TYlong;
1966             goto case TYlong;
1967 
1968         case TYlong:
1969         case TYulong:
1970         case TYdchar:
1971         default:
1972             c.Vlong = 1;
1973             break;
1974     }
1975     fl.c1 = el_const(ty,&c);               /* c1 = 1               */
1976     c.Vldouble = 0;
1977     if (typtr(ty))
1978     {
1979         ty = TYint;
1980         if (tybasic(ty) == TYhptr)
1981             ty = TYlong;
1982         if (I64)
1983             ty = TYllong;
1984     }
1985     fl.c2 = el_const(ty,&c);               /* c2 = 0               */
1986 }
1987 
1988 /***************************
1989  * Remove induction variables from loop l.
1990  * Loop invariant removal should have been done just previously.
1991  */
1992 
1993 @trusted
1994 private void loopiv(ref Loop l)
1995 {
1996     if (debugc) printf("loopiv(%p)\n", &l);
1997     assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0);
1998     elimspec(l, dfo);
1999     if (doflow)
2000     {
2001         flowrd();               /* compute reaching defs                */
2002         flowlv();               /* compute live variables               */
2003         flowae();               // compute available expressions
2004         doflow = false;
2005     }
2006     findbasivs(l);              /* find basic induction variables       */
2007     findopeqs(l);               // find op= variables
2008     findivfams(l);              /* find IV families                     */
2009     elimfrivivs(l);             /* eliminate less useful family IVs     */
2010     intronvars(l);              /* introduce new variables              */
2011     elimbasivs(l);              /* eliminate basic IVs                  */
2012     if (!addblk)                // adding a block changes the Binlv
2013         elimopeqs(l);           // eliminate op= variables
2014 
2015     foreach (ref iv; l.Livlist)
2016         iv.reset();
2017     l.Livlist.reset();          // reset for reuse
2018 
2019     foreach (ref iv; l.Lopeqlist)
2020         iv.reset();
2021     l.Lopeqlist.reset();        // reset for reuse
2022 
2023     /* Do copy propagation and dead assignment elimination        */
2024     /* upon return to optfunc()                                   */
2025 }
2026 
2027 /*************************************
2028  * Find basic IVs of loop l.
2029  * A basic IV x of loop l is a variable x which has
2030  * exactly one assignment within l of the form:
2031  * x += c or x -= c, where c is either a constant
2032  * or a LI.
2033  * Input:
2034  *      go.defnod[] loaded with all the definition elems of the loop
2035  */
2036 
2037 @trusted
2038 private void findbasivs(ref Loop l)
2039 {
2040     vec_t poss,notposs;
2041     elem *n;
2042     bool ambdone;
2043 
2044     ambdone = false;
2045     poss = vec_calloc(globsym.length);
2046     notposs = vec_calloc(globsym.length);  /* vector of all variables      */
2047                                         /* (initially all unmarked)     */
2048 
2049     /* for each def in go.defnod[] that is within loop l     */
2050 
2051     foreach (const i; 0 .. go.defnod.length)
2052     {
2053         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2054             continue;               /* def is not in the loop       */
2055 
2056         n = go.defnod[i].DNelem;
2057         elem_debug(n);
2058         if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar)
2059         {
2060             Symbol *s;                  /* if unambiguous def           */
2061 
2062             s = n.EV.E1.EV.Vsym;
2063             if (symbol_isintab(s))
2064             {
2065                 SYMIDX v;
2066 
2067                 v = n.EV.E1.EV.Vsym.Ssymnum;
2068                 if ((n.Eoper == OPaddass || n.Eoper == OPminass ||
2069                      n.Eoper == OPpostinc || n.Eoper == OPpostdec) &&
2070                         (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c          */
2071                          n.EV.E2.Eoper == OPvar && isLI(n.EV.E2)))
2072                 {
2073                     if (vec_testbit(v,poss))
2074                         /* We've already seen this def elem,    */
2075                         /* therefore there is more than one     */
2076                         /* def of v within the loop, therefore  */
2077                         /* v is not a basic IV.                 */
2078                         vec_setbit(v,notposs);
2079                     else
2080                         vec_setbit(v,poss);
2081                 }
2082                 else                    /* else mark as not possible    */
2083                     vec_setbit(v,notposs);
2084             }
2085         }
2086         else                            /* else ambiguous def           */
2087         {
2088             /* mark any vars that could be affected by              */
2089             /* this def as not possible                             */
2090 
2091             if (!ambdone)           /* avoid redundant loops        */
2092             {
2093                 foreach (j; 0 .. globsym.length)
2094                 {
2095                     if (!(globsym[j].Sflags & SFLunambig))
2096                         vec_setbit(j,notposs);
2097                 }
2098                 ambdone = true;
2099             }
2100         }
2101     }
2102     static if (0)
2103     {
2104         printf("poss    "); vec_println(poss);
2105         printf("notposs "); vec_println(notposs);
2106     }
2107     vec_subass(poss,notposs);             /* poss = poss - notposs        */
2108 
2109     /* create list of IVs */
2110     uint i;
2111     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each basic IV
2112     {
2113         Symbol *s;
2114 
2115         /* Skip if we don't want it to be a basic IV (see funcprev())   */
2116         s = globsym[i];
2117         assert(symbol_isintab(s));
2118         if (s.Sflags & SFLnotbasiciv)
2119             continue;
2120 
2121         // Do not use aggregates as basic IVs. This is because the other loop
2122         // code doesn't check offsets into symbols, (assuming everything
2123         // is at offset 0). We could perhaps amend this by allowing basic IVs
2124         // if the struct consists of only one data member.
2125         if (tyaggregate(s.ty()))
2126             continue;
2127 
2128         // Do not use complex types as basic IVs, as the code gen isn't up to it
2129         if (tycomplex(s.ty()))
2130                 continue;
2131 
2132         auto biv = l.Livlist.push();
2133         biv.IVbasic = s;               // symbol of basic IV
2134         biv.IVincr = null;
2135 
2136         if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i);
2137 
2138         /* We have the sym idx of the basic IV. We need to find         */
2139         /* the parent of the increment elem for it.                     */
2140 
2141         /* First find the go.defnod[]      */
2142         foreach (j; 0 .. go.defnod.length)
2143         {
2144             /* If go.defnod is a def of i and it is in the loop        */
2145             if (go.defnod[j].DNelem.EV.E1 &&     /* OPasm are def nodes  */
2146                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2147                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2148             {
2149                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2150                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2151 
2152                 debug if (debugc)
2153                 {
2154                     printf("Increment elem is: "); WReqn(*biv.IVincr);     printf("\n");
2155                 }
2156                 goto L1;
2157             }
2158         }
2159         assert(0);                      /* should have found it         */
2160         /* NOTREACHED */
2161 
2162     L1:
2163     }
2164 
2165     vec_free(poss);
2166     vec_free(notposs);
2167 }
2168 
2169 /*************************************
2170  * Find op= elems of loop l.
2171  * Analogous to findbasivs().
2172  * Used to eliminate useless loop code normally found in benchmark programs.
2173  * Input:
2174  *      go.defnod[] loaded with all the definition elems of the loop
2175  */
2176 
2177 @trusted
2178 private void findopeqs(ref Loop l)
2179 {
2180     vec_t poss,notposs;
2181     elem *n;
2182     bool ambdone;
2183 
2184     ambdone = false;
2185     poss = vec_calloc(globsym.length);
2186     notposs = vec_calloc(globsym.length);  // vector of all variables
2187                                         // (initially all unmarked)
2188 
2189     // for each def in go.defnod[] that is within loop l
2190 
2191     foreach (i; 0 .. go.defnod.length)
2192     {
2193         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2194             continue;               // def is not in the loop
2195 
2196         n = go.defnod[i].DNelem;
2197         elem_debug(n);
2198         if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar)
2199         {
2200             Symbol *s;                  // if unambiguous def
2201 
2202             s = n.EV.E1.EV.Vsym;
2203             if (symbol_isintab(s))
2204             {
2205                 SYMIDX v;
2206 
2207                 v = n.EV.E1.EV.Vsym.Ssymnum;
2208                 {
2209                     if (vec_testbit(v,poss))
2210                         // We've already seen this def elem,
2211                         // therefore there is more than one
2212                         // def of v within the loop, therefore
2213                         // v is not a basic IV.
2214                         vec_setbit(v,notposs);
2215                     else
2216                         vec_setbit(v,poss);
2217                 }
2218             }
2219         }
2220         else                            // else ambiguous def
2221         {
2222             // mark any vars that could be affected by
2223             // this def as not possible
2224 
2225             if (!ambdone)           // avoid redundant loops
2226             {
2227                 foreach (j; 0 .. globsym.length)
2228                 {
2229                     if (!(globsym[j].Sflags & SFLunambig))
2230                         vec_setbit(j,notposs);
2231                 }
2232                 ambdone = true;
2233             }
2234         }
2235     }
2236 
2237     // Don't use symbols already in Livlist
2238     foreach (ref iv; l.Livlist)
2239     {
2240         vec_setbit(iv.IVbasic.Ssymnum,notposs);
2241     }
2242 
2243 
2244     static if (0)
2245     {
2246         printf("poss    "); vec_println(poss);
2247         printf("notposs "); vec_println(notposs);
2248     }
2249 
2250     vec_subass(poss,notposs);           // poss = poss - notposs
2251 
2252     // create list of IVs
2253     uint i;
2254     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each opeq IV
2255     {
2256         Symbol *s;
2257 
2258         s = globsym[i];
2259         assert(symbol_isintab(s));
2260 
2261         // Do not use aggregates as basic IVs. This is because the other loop
2262         // code doesn't check offsets into symbols, (assuming everything
2263         // is at offset 0). We could perhaps amend this by allowing basic IVs
2264         // if the struct consists of only one data member.
2265         if (tyaggregate(s.ty()))
2266             continue;
2267 
2268         auto biv = l.Lopeqlist.push();
2269         biv.IVbasic = s;               // symbol of basic IV
2270         biv.IVincr = null;
2271 
2272         if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i);
2273 
2274         // We have the sym idx of the basic IV. We need to find
2275         // the parent of the increment elem for it.
2276 
2277         // First find the go.defnod[]
2278         foreach (j; 0 .. go.defnod.length)
2279         {
2280             // If go.defnod is a def of i and it is in the loop
2281             if (go.defnod[j].DNelem.EV.E1 &&     // OPasm are def nodes
2282                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2283                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2284             {
2285                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2286                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2287 
2288                 debug if (debugc)
2289                 {
2290                     printf("Opeq elem is: "); WReqn(*biv.IVincr);  printf("\n");
2291                 }
2292                 goto Lcont;
2293             }
2294         }
2295         assert(0);                      // should have found it
2296         // NOTREACHED
2297 
2298     Lcont:
2299     }
2300 
2301     vec_free(poss);
2302     vec_free(notposs);
2303 }
2304 
2305 /*****************************
2306  * Find families for each basic IV.
2307  * An IV family is a list of elems of the form
2308  * c1*X+c2, where X is a basic induction variable.
2309  * Note that we do not do divides, because of roundoff error problems.
2310  */
2311 
2312 @trusted
2313 private void findivfams(ref Loop l)
2314 {
2315     if (debugc) printf("findivfams(%p)\n", &l);
2316     foreach (ref biv; l.Livlist)
2317     {
2318         for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
2319             if (dfo[i].Belem)
2320                 ivfamelems(&biv,&(dfo[i].Belem));
2321         /* Fold all the constant expressions in c1 and c2.      */
2322         foreach (ref fl; biv.IVfamily)
2323         {
2324             fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain);
2325             fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain);
2326         }
2327     }
2328 }
2329 
2330 /*************************
2331  * Tree walking support routine for findivfams().
2332  *      biv =   basic induction variable pointer
2333  *      pn      pointer to elem
2334  */
2335 
2336 @trusted
2337 private void ivfamelems(Iv *biv,elem **pn)
2338 {
2339     tym_t ty,c2ty;
2340     elem *n1;
2341     elem *n2;
2342 
2343     assert(pn);
2344     elem* n = *pn;
2345     assert(biv && n);
2346     const op = n.Eoper;
2347     if (OTunary(op))
2348     {
2349        ivfamelems(biv,&n.EV.E1);
2350         n1 = n.EV.E1;
2351         n2 = null;
2352     }
2353     else if (OTbinary(op))
2354     {
2355         ivfamelems(biv,&n.EV.E1);
2356         ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */
2357         n1 = n.EV.E1;
2358         n2 = n.EV.E2;
2359     }
2360     else                                /* else leaf elem               */
2361         return;                         /* which can't be in the family */
2362 
2363     if (tycomplex(n.Ety))
2364         return;
2365 
2366     if (op == OPmul || op == OPadd || op == OPmin ||
2367         op == OPneg || op == OPshl)
2368     {   /* Note that we are wimping out and not considering             */
2369         /* LI variables as part of c1 and c2, but only constants.       */
2370 
2371         ty = n.Ety;
2372 
2373         /* Since trees are canonicalized, basic induction variables     */
2374         /* will only appear on the left.                                */
2375 
2376         /* Improvement:                                                 */
2377         /* We wish to pick up the cases (biv + li), (biv - li) and      */
2378         /* (li + biv). OPmul and LS with bivs are out, since if we      */
2379         /* try to eliminate the biv, and the loop test is a >, >=,      */
2380         /* <, <=, we have a problem since we don't know if the li       */
2381         /* is negative. (Would have to call swaprel() on it.)           */
2382 
2383         /* If we have (li + var), swap the leaves.                      */
2384         if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar)
2385         {
2386             n.EV.E1 = n2;
2387             n2 = n.EV.E2 = n1;
2388             n1 = n.EV.E1;
2389         }
2390 
2391         // Get rid of case where we painted a far pointer to a long
2392         if (op == OPadd || op == OPmin)
2393         {
2394             int sz;
2395 
2396             sz = tysize(ty);
2397             if (sz == tysize(TYfptr) && !tyfv(ty) &&
2398                 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety)))
2399                 return;
2400         }
2401 
2402         /* Look for function of basic IV (-biv or biv op const)         */
2403         if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic)
2404         {
2405             if (op == OPneg)
2406             {
2407                 if (debugc) printf("found (-biv), elem %p\n",n);
2408                 auto fl = biv.IVfamily.push();
2409                 newfamlist(fl, ty);
2410                 fl.FLivty = n1.Ety;
2411                 fl.FLpelem = pn;
2412                 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1       */
2413             }
2414             else if (n2.Eoper == OPconst ||
2415                      isLI(n2) && (op == OPadd || op == OPmin))
2416             {
2417                 debug
2418                     if (debugc)
2419                     {   printf("found (biv op const), elem (");
2420                             WReqn(n);
2421                             printf(");\n");
2422                             printf("Types: n1=%s", tym_str(n1.Ety));
2423                             printf(" ty=%s", tym_str(ty));
2424                             printf(" n2=%s\n", tym_str(n2.Ety));
2425                     }
2426 
2427                 auto fl = biv.IVfamily.push();
2428                 newfamlist(fl, ty);
2429                 fl.FLivty = n1.Ety;
2430                 fl.FLpelem = pn;
2431                 switch (op)
2432                 {
2433                     case OPadd:           /* c2 = right           */
2434                         c2ty = n2.Ety;
2435                         if (typtr(fl.c2.Ety))
2436                                 c2ty = fl.c2.Ety;
2437                         goto L1;
2438 
2439                     case OPmin:           /* c2 = -right          */
2440                         c2ty = fl.c2.Ety;
2441                         /* Check for subtracting two pointers */
2442                         if (typtr(c2ty) && typtr(n2.Ety))
2443                         {
2444                             if (tybasic(c2ty) == TYhptr)
2445                                 c2ty = TYlong;
2446                             else
2447                                 c2ty = I64 ? TYllong : TYint;
2448                         }
2449                   L1:
2450                         fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2));
2451                         break;
2452 
2453                     case OPmul:           /* c1 = right           */
2454                     case OPshl:           /* c1 = 1 << right      */
2455                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2456                         break;
2457 
2458                     default:
2459                         assert(0);
2460                 }
2461             }
2462         }
2463 
2464         /* Look for function of existing IV                             */
2465 
2466         foreach (ref fl; biv.IVfamily)
2467         {
2468             if (*fl.FLpelem != n1)          /* not it               */
2469                 continue;
2470 
2471             /* Look for (fl op constant)     */
2472             if (op == OPneg)
2473             {
2474                 if (debugc) printf("found (-fl), elem %p\n",n);
2475                 /* c1 = -c1; c2 = -c2; */
2476                 fl.c1 = el_una(OPneg,ty,fl.c1);
2477                 fl.c2 = el_una(OPneg,ty,fl.c2);
2478                 fl.FLty = ty;
2479                 fl.FLpelem = pn;        /* replace with new IV  */
2480             }
2481             else if (n2.Eoper == OPconst ||
2482                      isLI(n2) && (op == OPadd || op == OPmin))
2483             {
2484                 debug
2485                     if (debugc)
2486                     {
2487                         printf("found (fl op const), elem (");
2488                         WReqn(n);
2489                         assert(*pn == n);
2490                         printf(");\n");
2491                         elem_print(n);
2492                     }
2493 
2494                 switch (op)
2495                 {
2496                     case OPmul:
2497                     case OPshl:
2498                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2499                         break;
2500 
2501                     case OPadd:
2502                     case OPmin:
2503                         break;
2504 
2505                     default:
2506                         assert(0);
2507                 }
2508                 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2));
2509                 fl.FLty = ty;
2510                 fl.FLpelem = pn;        /* replace with new IV  */
2511             } /* else if */
2512         } /* for */
2513     } /* if */
2514 }
2515 
2516 /*********************************
2517  * Eliminate frivolous family ivs, that is,
2518  * if we can't eliminate the BIV, then eliminate family ivs that
2519  * differ from it only by a constant.
2520  */
2521 
2522 @trusted
2523 private void elimfrivivs(ref Loop l)
2524 {
2525     foreach (ref biv; l.Livlist)
2526     {
2527         int nrefs;
2528 
2529         if (debugc) printf("elimfrivivs()\n");
2530         /* Compute number of family ivs for biv */
2531         const nfams = biv.IVfamily.length;
2532         if (debugc) printf("nfams = %d\n", cast(int)nfams);
2533 
2534         /* Compute number of references to biv  */
2535         if (onlyref(biv.IVbasic,l,*biv.IVincr,nrefs))
2536                 nrefs--;
2537         if (debugc) printf("nrefs = %d\n",nrefs);
2538         assert(nrefs + 1 >= nfams);
2539         if (nrefs > nfams ||            // if we won't eliminate the biv
2540             (!I16 && nrefs == nfams))
2541         {   /* Eliminate any family ivs that only differ by a constant  */
2542             /* from biv                                                 */
2543             foreach (ref fl; biv.IVfamily)
2544             {
2545                 elem *ec1 = fl.c1;
2546                 targ_llong c;
2547 
2548                 if (elemisone(ec1) ||
2549                     // Eliminate fl's that can be represented by
2550                     // an addressing mode
2551                     (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) &&
2552                      ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8)
2553                     )
2554                    )
2555                 {
2556                     fl.FLtemp = FLELIM;
2557 
2558                     debug
2559                         if (debugc)
2560                         {
2561                             printf("Eliminating frivolous IV ");
2562                             WReqn(*fl.FLpelem);
2563                             printf("\n");
2564                         }
2565                 }
2566             }
2567         }
2568     }
2569 }
2570 
2571 
2572 /******************************
2573  * Introduce new variables.
2574  */
2575 
2576 @trusted
2577 private void intronvars(ref Loop l)
2578 {
2579     elem *T;
2580     elem *ne;
2581     elem *t2;
2582     elem *C2;
2583     elem *cmul;
2584     tym_t ty,tyr;
2585 
2586     if (debugc) printf("intronvars(%p)\n", &l);
2587     foreach (ref biv; l.Livlist)
2588     {
2589         elem *bivinc = *biv.IVincr;   /* ptr to increment elem */
2590 
2591         foreach (ref fl; biv.IVfamily)
2592         {                               /* for each IV in family of biv  */
2593             if (fl.FLtemp == FLELIM)   /* if already eliminated         */
2594                 continue;
2595 
2596             /* If induction variable can be written as a simple function */
2597             /* of a previous induction variable, skip it.                */
2598             if (funcprev(biv,fl))
2599                 continue;
2600 
2601             ty = fl.FLty;
2602             T = el_alloctmp(ty);        /* allocate temporary T          */
2603             fl.FLtemp = T.EV.Vsym;
2604 
2605             debug
2606             {
2607                 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr);
2608                 if (debugc) printf("%s\n", tym_str(ty));
2609                 if (debugc) printf("\n");
2610             }
2611 
2612             /* append elem T=biv*C1+C2 to preheader */
2613             /* ne = biv*C1      */
2614             tyr = fl.FLivty;                   /* type of biv              */
2615             ne = el_var(biv.IVbasic);
2616             ne.Ety = tyr;
2617             if (!elemisone(fl.c1))             /* don't multiply ptrs by 1 */
2618                 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1));
2619             if (tyfv(tyr) && tysize(ty) == SHORTSIZE)
2620                 ne = el_una(OP32_16,ty,ne);
2621             C2 = el_copytree(fl.c2);
2622             t2 = el_bin(OPadd,ty,ne,C2);        /* t2 = ne + C2         */
2623             ne = el_bin(OPeq,ty,el_copytree(T),t2);
2624             appendelem(ne, &(l.Lpreheader.Belem));
2625 
2626             /* prefix T+=C1*C to elem biv+=C                            */
2627             /* Must prefix in case the value of the expression (biv+=C) */
2628             /* is used by somebody up the tree.                         */
2629             cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1),
2630                                           el_copytree(bivinc.EV.E2));
2631             t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul);
2632             t2 = doptelem(t2,GOALvalue | GOALagain);
2633             *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc);
2634             biv.IVincr = &((*biv.IVincr).EV.E2);
2635 
2636             debug
2637                 if (debugc)
2638                 {
2639                     printf("Replacing elem (");
2640                     WReqn(*fl.FLpelem);
2641                     printf(") with '%s'\n",T.EV.Vsym.Sident.ptr);
2642                     printf("The init elem is (");
2643                     WReqn(ne);
2644                     printf(");\nThe increment elem is (");
2645                     WReqn(t2);
2646                     printf(")\n");
2647                 }
2648 
2649             el_free(*fl.FLpelem);
2650             *fl.FLpelem = T;           /* replace elem n with ref to T  */
2651             doflow = true;              /* redo flow analysis           */
2652             go.changes++;
2653         } /* for */
2654     } /* for */
2655 }
2656 
2657 /*******************************
2658  * Determine if induction variable can be rewritten as a simple
2659  * function of a previously generated temporary.
2660  * This can frequently
2661  * generate less code than that of an all-new temporary (especially
2662  * if it is the same as a previous temporary!).
2663  * Input:
2664  *      biv             Basic induction variable
2665  *      fl              Item in biv's family list we are looking at
2666  * Returns:
2667  *      false           Caller should create a new induction variable.
2668  *      true            *FLpelem is replaced with function of a previous
2669  *                      induction variable. FLtemp is set to FLELIM to
2670  *                      indicate this.
2671  */
2672 
2673 @trusted
2674 private bool funcprev(ref Iv biv, ref famlist fl)
2675 {
2676     tym_t tymin;
2677     int sz;
2678     elem *e1;
2679     elem *e2;
2680     elem *flse1;
2681 
2682     debug if (debugc)
2683         printf("funcprev\n");
2684 
2685     foreach (ref fls; biv.IVfamily)
2686     {
2687         if (!fls.FLtemp)                // haven't generated a temporary yet
2688             continue;
2689         if (fls.FLtemp == FLELIM)      /* no iv we can use here        */
2690             continue;
2691 
2692         /* The multipliers must match   */
2693         if (!el_match(fls.c1,fl.c1))
2694             continue;
2695 
2696         /* If the c2's match also, we got it easy */
2697         if (el_match(fls.c2,fl.c2))
2698         {
2699             if (tysize(fl.FLty) > tysize(fls.FLtemp.ty()))
2700                 continue;              /* can't increase size of var   */
2701             flse1 = el_var(fls.FLtemp);
2702             flse1.Ety = fl.FLty;
2703             goto L2;
2704         }
2705 
2706         /* The difference is only in the addition. Therefore, replace
2707            *fl.FLpelem with:
2708                 case 1:         (fl.c2 + (fls.FLtemp - fls.c2))
2709                 case 2:         (fls.FLtemp + (fl.c2 - fls.c2))
2710          */
2711         e1 = fl.c2;
2712         /* Subtracting relocatables usually generates slow code for     */
2713         /* linkers that can't handle arithmetic on relocatables.        */
2714         if (typtr(fls.c2.Ety))
2715         {
2716             if (fls.c2.Eoper == OPrelconst &&
2717                 !(fl.c2.Eoper == OPrelconst &&
2718                   fl.c2.EV.Vsym == fls.c2.EV.Vsym)
2719                )
2720                 continue;
2721         }
2722         flse1 = el_var(fls.FLtemp);
2723         e2 = flse1;                             /* assume case 1        */
2724         tymin = e2.Ety;
2725         if (typtr(fls.c2.Ety))
2726         {
2727             if (!typtr(tymin))
2728             {
2729                 if (typtr(e1.Ety))
2730                 {
2731                     e1 = e2;
2732                     e2 = fl.c2;            /* case 2               */
2733                 }
2734                 else                        /* can't subtract fptr  */
2735                     goto L1;
2736             }
2737             if (tybasic(fls.c2.Ety) == TYhptr)
2738                 tymin = TYlong;
2739             else
2740                 tymin = I64 ? TYllong : TYint;         /* type of (ptr - ptr) */
2741         }
2742 
2743         /* If e1 and fls.c2 are fptrs, and are not from the same       */
2744         /* segment, we cannot subtract them.                            */
2745         if (tyfv(e1.Ety) && tyfv(fls.c2.Ety))
2746         {
2747             if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst)
2748                 goto L1;                /* assume expressions have diff segs */
2749             if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass)
2750             {
2751                L1:
2752                 el_free(flse1);
2753                 continue;
2754             }
2755         }
2756 
2757         /* Some more type checking...   */
2758         sz = tysize(fl.FLty);
2759         if (sz != tysize(e1.Ety) &&
2760             sz != tysize(tymin))
2761             goto L1;
2762 
2763         /* Do some type checking (can't add pointers and get a pointer!) */
2764         //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin))
2765             //goto L1;
2766         /* Construct (e1 + (e2 - fls.c2))      */
2767         flse1 = el_bin(OPadd,fl.FLty,
2768                             e1,
2769                             el_bin(OPmin,tymin,
2770                                     e2,
2771                                     el_copytree(fls.c2)));
2772         if (sz < tysize(tymin) && sz == tysize(e1.Ety))
2773         {
2774             assert(I16);
2775             flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2);
2776         }
2777 
2778         flse1 = doptelem(flse1,GOALvalue | GOALagain);
2779         fl.c2 = null;
2780     L2:
2781         debug if (debugc)
2782         {
2783             printf("Replacing ");
2784             WReqn(*fl.FLpelem);
2785             printf(" with ");
2786             WReqn(flse1);
2787             printf("\n");
2788         }
2789 
2790         el_free(*fl.FLpelem);
2791         *fl.FLpelem = flse1;
2792 
2793         /* Fix the iv so when we do loops again, we won't create        */
2794         /* yet another iv, which is just what funcprev() is supposed    */
2795         /* to prevent.                                                  */
2796         fls.FLtemp.Sflags |= SFLnotbasiciv;
2797 
2798         fl.FLtemp = FLELIM;            /* mark iv as being gone        */
2799         go.changes++;
2800         doflow = true;
2801         return true;                    /* it was replaced              */
2802     }
2803     return false;                       /* need to create a new variable */
2804 }
2805 
2806 /***********************
2807  * Eliminate basic IVs.
2808  */
2809 
2810 @trusted
2811 private void elimbasivs(ref Loop l)
2812 {
2813     if (debugc) printf("elimbasivs(%p)\n", &l);
2814     foreach (ref biv; l.Livlist)
2815     {
2816         /* Can't eliminate this basic IV if we have a goal for the      */
2817         /* increment elem.                                              */
2818         // Be careful about Nflags being in a union...
2819         elem* einc = *biv.IVincr;
2820         if (!(einc.Nflags & NFLnogoal))
2821             continue;
2822 
2823         Symbol* X = biv.IVbasic;
2824         assert(symbol_isintab(X));
2825         tym_t ty = X.ty();
2826         int refcount;
2827         elem** pref = onlyref(X,l,einc,refcount);
2828 
2829         /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */
2830         if (pref != null && refcount <= 1)
2831         {
2832             if (!biv.IVfamily.length)
2833                 continue;
2834 
2835             if (catchRef(X, l))
2836                 continue;
2837 
2838             elem* ref_ = *pref;
2839 
2840             /* Replace (X) with (X != 0)                            */
2841             if (ref_.Eoper == OPvar)
2842                 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L));
2843 
2844             const fi = simfl(biv.IVfamily, ty); // find simplest elem in family
2845             if (fi == biv.IVfamily.length)
2846                 continue;
2847             famlist* fl = &biv.IVfamily[fi];
2848 
2849             // Don't do the replacement if we would replace a
2850             // signed comparison with an unsigned one
2851             tym_t flty = fl.FLty;
2852             if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))
2853                 flty = touns(flty);
2854 
2855             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge &&
2856                 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) &&
2857                  tyuns(flty))
2858                     continue;
2859 
2860             /* if we have (e relop X), replace it with (X relop e)  */
2861             if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X)
2862             {
2863                 elem* tmp = ref_.EV.E2;
2864                 ref_.EV.E2 = ref_.EV.E1;
2865                 ref_.EV.E1 = tmp;
2866                 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper);
2867             }
2868 
2869             // If e*c1+c2 would result in a sign change or an overflow
2870             // then we can't do it
2871             if (fl.c1.Eoper == OPconst)
2872             {
2873                 targ_llong c1 = el_tolong(fl.c1);
2874                 const int sz = tysize(ty);
2875                 if (sz == SHORTSIZE &&
2876                     ((ref_.EV.E2.Eoper == OPconst &&
2877                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) ||
2878                      c1 & ~0x7FFFL)
2879                    )
2880                     continue;
2881 
2882                 if (sz == LONGSIZE &&
2883                     ((ref_.EV.E2.Eoper == OPconst &&
2884                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) ||
2885                      c1 & ~0x7FFFFFFFL)
2886                    )
2887                     continue;
2888                 if (sz == LLONGSIZE &&
2889                     ((ref_.EV.E2.Eoper == OPconst &&
2890                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) ||
2891                      c1 & ~0x7FFFFFFFFFFFFFFFL)
2892                    )
2893                     continue;
2894             }
2895 
2896             /* If the incr is a decrement, and the relational is < or <=,
2897              * and its unsigned, then don't do it because it could drop below 0.
2898              * https://issues.dlang.org/show_bug.cgi?id=16189
2899              */
2900             if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) &&
2901                 (ref_.Eoper == OPlt || ref_.Eoper == OPle) &&
2902                 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)))
2903                 continue;
2904 
2905             /* If loop started out with a signed conditional that was
2906              * replaced with an unsigned one, don't do it if c2
2907              * is less than 0.
2908              */
2909             if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst)
2910             {
2911                 targ_llong c2 = el_tolong(fl.c2);
2912                 if (c2 < 0)
2913                     continue;
2914             }
2915 
2916             elem *refE2 = el_copytree(ref_.EV.E2);
2917             int refEoper = ref_.Eoper;
2918 
2919             /* if c1 < 0 and relop is < <= > >=
2920                then adjust relop as if both sides were multiplied
2921                by -1
2922              */
2923             if (!tyuns(ty) &&
2924                 (tyintegral(ty) && el_tolong(fl.c1) < 0 ||
2925                  tyfloating(ty) && el_toldoubled(fl.c1) < 0.0))
2926                 refEoper = swaprel(refEoper);
2927 
2928             /* Replace (X relop e) with (X relop (short)e)
2929                if T is 1 word but e is 2
2930              */
2931             if (tysize(flty) == SHORTSIZE &&
2932                 tysize(refE2.Ety) == LONGSIZE)
2933                 refE2 = el_una(OP32_16,flty,refE2);
2934 
2935             /* replace e with e*c1 + c2             */
2936             elem* C2 = el_copytree(fl.c2);
2937             elem* fofe = el_bin(OPadd,flty,
2938                             el_bin(OPmul,refE2.Ety,
2939                                     refE2,
2940                                     el_copytree(fl.c1)),
2941                             C2);
2942             fofe = doptelem(fofe,GOALvalue | GOALagain);    // fold any constants
2943 
2944             if (tyuns(flty) && refEoper == OPge &&
2945                 fofe.Eoper == OPconst && el_allbits(fofe, 0) &&
2946                 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0))
2947             {
2948                 /* Don't do it if replacement will result in
2949                  * an unsigned T>=0 which will be an infinite loop.
2950                  */
2951                 el_free(fofe);
2952                 continue;
2953             }
2954 
2955             if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr);
2956 
2957             debug if (debugc)
2958             {
2959                 printf("Comparison replaced: ");
2960                 WReqn(ref_);
2961                 printf(" with ");
2962             }
2963 
2964             el_free(ref_.EV.E2);
2965             ref_.EV.E2 = refE2;
2966             ref_.Eoper = cast(ubyte)refEoper;
2967 
2968             elimass(einc);          // dump the increment elem
2969 
2970             // replace X with T
2971             assert(ref_.EV.E1.EV.Voffset == 0);
2972             ref_.EV.E1.EV.Vsym = fl.FLtemp;
2973             ref_.EV.E1.Ety = flty;
2974             ref_.EV.E2 = fofe;
2975 
2976             /* If sizes of expression worked out wrong...
2977                Which can happen if we have (int)ptr==e
2978              */
2979             if (OTbinary(fofe.Eoper))         // if didn't optimize it away
2980             {
2981                 const tym_t fofety = fofe.Ety;
2982                 const int sz = tysize(fofety);
2983                 tym_t ty1 = fofe.EV.E1.Ety;
2984                 const tym_t ty2 = fofe.EV.E2.Ety;
2985                 /* Sizes of + expression must all be the same       */
2986                 if (sz != tysize(ty1) &&
2987                     sz != tysize(ty2)
2988                    )
2989                 {
2990                     if (tyuns(fofety))      // if unsigned comparison
2991                         ty1 = touns(ty1);   /* to unsigned type     */
2992                     fofe.Ety = ty1;
2993                     ref_.EV.E1.Ety = ty1;
2994                 }
2995             }
2996 
2997             /* Fix if leaves of compare are TYfptrs and the compare */
2998             /* operator is < <= > >=.                               */
2999             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety))
3000             {
3001                 assert(tyfv(ref_.EV.E2.Ety));
3002                 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1);
3003                 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe);
3004             }
3005 
3006             debug if (debugc)
3007             {
3008                 WReqn(ref_);
3009                 printf("\n");
3010             }
3011 
3012             go.changes++;
3013             doflow = true;                  /* redo flow analysis   */
3014 
3015             /* if X is live on entry to any successor S outside loop */
3016             /*      prepend elem X=(T-c2)/c1 to S.Belem     */
3017 
3018             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3019             {
3020                 elem *ne;
3021                 block *b;
3022 
3023                 foreach (bl; ListRange(dfo[i].Bsucc))
3024                 {   /* for each successor   */
3025                     b = list_block(bl);
3026                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3027                         continue;       /* inside loop  */
3028                     if (!vec_testbit(X.Ssymnum,b.Binlv))
3029                         continue;       /* not live     */
3030 
3031                     C2 = el_copytree(fl.c2);
3032                     ne = el_bin(OPmin,ty,
3033                             el_var(fl.FLtemp),
3034                             C2);
3035                     if (tybasic(ne.EV.E1.Ety) == TYfptr &&
3036                         tybasic(ne.EV.E2.Ety) == TYfptr)
3037                     {
3038                         ne.Ety = I64 ? TYllong : TYint;
3039                         if (tylong(ty) && _tysize[TYint] == 2)
3040                             ne = el_una(OPs16_32,ty,ne);
3041                     }
3042 
3043                     ne = el_bin(OPeq,X.ty(),
3044                             el_var(X),
3045                             el_bin(OPdiv,ne.Ety,
3046                                 ne,
3047                                 el_copytree(fl.c1)));
3048 
3049                     debug if (debugc)
3050                     {
3051                         printf("Adding (");
3052                         WReqn(ne);
3053                         printf(") to exit block B%d\n",b.Bdfoidx);
3054                         //elem_print(ne);
3055                     }
3056 
3057                     /* We have to add a new block if there is */
3058                     /* more than one predecessor to b.      */
3059                     if (list_next(b.Bpred))
3060                     {
3061                         block *bn = block_calloc();
3062                         bn.Btry = b.Btry;
3063                         bn.BC = BCgoto;
3064                         bn.Bnext = dfo[i].Bnext;
3065                         dfo[i].Bnext = bn;
3066                         list_append(&(bn.Bsucc),b);
3067                         list_append(&(bn.Bpred),dfo[i]);
3068                         bl.ptr = cast(void *)bn;
3069                         foreach (bl2; ListRange(b.Bpred))
3070                             if (list_block(bl2) == dfo[i])
3071                             {
3072                                 bl2.ptr = cast(void *)bn;
3073                                 goto L2;
3074                             }
3075                         assert(0);
3076                     L2:
3077                         b = bn;
3078                         addblk = true;
3079                     }
3080 
3081                     if (b.Belem)
3082                         b.Belem =
3083                             el_bin(OPcomma,b.Belem.Ety,
3084                                 ne,b.Belem);
3085                     else
3086                         b.Belem = ne;
3087                     go.changes++;
3088                     doflow = true;  /* redo flow analysis   */
3089                 } /* for each successor */
3090             } /* foreach exit block */
3091             if (addblk)
3092                 return;
3093         }
3094         else if (refcount == 0)                 /* if no uses of IV in loop  */
3095         {
3096             /* Eliminate the basic IV if it is not live on any successor */
3097             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3098             {
3099                 foreach (bl; ListRange(dfo[i].Bsucc))
3100                 {   /* for each successor   */
3101                     block *b = list_block(bl);
3102                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3103                         continue;       /* inside loop  */
3104                     if (vec_testbit(X.Ssymnum,b.Binlv))
3105                         goto L1;        /* live         */
3106                 }
3107             }
3108 
3109             if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X);
3110 
3111             /* Remove the (s op= e2) by replacing it with (1 , e2)
3112              * and let later passes remove the (1 ,) nodes.
3113              * Don't remove those nodes here because other biv's may refer
3114              * to them.
3115              */
3116             {
3117                 elem* ei = *biv.IVincr;
3118                 ei.Eoper = OPcomma;
3119                 ei.EV.E1.Eoper = OPconst;
3120                 ei.EV.E1.Ety = TYint;
3121             }
3122 
3123             go.changes++;
3124             doflow = true;                  /* redo flow analysis   */
3125           L1:
3126         }
3127     } /* for */
3128 }
3129 
3130 /***********************
3131  * Eliminate opeq IVs that are not used outside the loop.
3132  */
3133 
3134 @trusted
3135 private void elimopeqs(ref Loop l)
3136 {
3137     elem **pref;
3138     Symbol *X;
3139     int refcount;
3140 
3141     if (debugc) printf("elimopeqs(%p)\n", &l);
3142     //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr));
3143 
3144     foreach (ref biv; l.Lopeqlist)
3145     {
3146         // Can't eliminate this basic IV if we have a goal for the
3147         // increment elem.
3148         // Be careful about Nflags being in a union...
3149         if (!((*biv.IVincr).Nflags & NFLnogoal))
3150             continue;
3151 
3152         X = biv.IVbasic;
3153         assert(symbol_isintab(X));
3154         pref = onlyref(X,l,*biv.IVincr,refcount);
3155 
3156         // if only ref of X is of the form (X) or (X relop e) or (e relop X)
3157         if (pref != null && refcount <= 1)
3158         { }
3159         else if (refcount == 0)                 // if no uses of IV in loop
3160         {   // Eliminate the basic IV if it is not live on any successor
3161             uint i;
3162             for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3163             {
3164                 foreach (bl; ListRange(dfo[i].Bsucc))
3165                 {   // for each successor
3166                     block *b = list_block(bl);
3167                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3168                         continue;       // inside loop
3169                     if (vec_testbit(X.Ssymnum,b.Binlv))
3170                         goto L1;        // live
3171                 }
3172             }
3173 
3174             if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X);
3175 
3176             /* Remove the (s op= e2) by replacing it with (1 , e2)
3177              * and let later passes remove the (1 ,) nodes.
3178              * Don't remove those nodes here because other biv's may refer
3179              * to them, for nodes like (sum += i++)
3180              */
3181             {
3182                 elem* einc = *(biv.IVincr);
3183                 einc.Eoper = OPcomma;
3184                 einc.EV.E1.Eoper = OPconst;
3185                 einc.EV.E1.Ety = TYint;
3186             }
3187 
3188             go.changes++;
3189             doflow = true;                      // redo flow analysis
3190         L1:
3191         }
3192     }
3193 }
3194 
3195 /**************************
3196  * Find simplest elem in family.
3197  * Params:
3198  *      fams = array of famlist's
3199  *      tym  = type of basic IV
3200  * Returns: index into fams[] of simplest; fams.length if none found.
3201  */
3202 
3203 @trusted
3204 extern (D)
3205 private size_t simfl(famlist[] fams, tym_t tym)
3206 {
3207     size_t sofar = fams.length;
3208 
3209     foreach (i, ref fl; fams)
3210     {
3211         if (fl.FLtemp == FLELIM)       /* no variable, so skip it      */
3212             continue;
3213         /* If size of replacement is less than size of biv, we could    */
3214         /* be in trouble due to loss of precision.                      */
3215         if (size(fl.FLtemp.ty()) < size(tym))
3216             continue;
3217 
3218         // pick simplest
3219         sofar = sofar == fams.length ? i
3220                                      : (flcmp(fams[sofar], fams[i]) ? sofar : i);
3221     }
3222     return sofar;
3223 }
3224 
3225 /**************************
3226  * Return simpler of two family elems.
3227  * There is room for improvement, namely if
3228  *      f1.c1 = 2, f2.c1 = 27
3229  * then pick f1 because it is a shift.
3230  * Returns:
3231  *      true for f1 is simpler, false  for f2 is simpler
3232  */
3233 
3234 @trusted
3235 private bool flcmp(const ref famlist f1, const ref famlist f2)
3236 {
3237     auto t1 = &(f1.c1.EV);
3238     auto t2 = &(f2.c1.EV);
3239     auto ty = (*f1.FLpelem).Ety;           // type of elem
3240 
3241     static if (0)
3242     {
3243         printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort);
3244         printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort);
3245         printf("%s %s\n", tym_str((*f1.FLpelem).Ety), tym_str((*f2.FLpelem).Ety));
3246     }
3247 
3248     /* Wimp out and just pick f1 if the types don't match               */
3249     if (tysize(ty) == tysize((*f2.FLpelem).Ety))
3250     {
3251         switch (tybasic(ty))
3252         {   case TYbool:
3253             case TYchar:
3254             case TYschar:
3255             case TYuchar:
3256                 if (t2.Vuchar == 1 ||
3257                     t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0)
3258                         goto Lf2;
3259                 break;
3260 
3261             case TYshort:
3262             case TYushort:
3263             case TYchar16:
3264             case TYwchar_t:     // BUG: what about 4 byte wchar_t's?
3265             case_short:
3266                 if (t2.Vshort == 1 ||
3267                     t1.Vshort != 1 && f2.c2.EV.Vshort == 0)
3268                         goto Lf2;
3269                 break;
3270 
3271             case TYsptr:
3272             case TYcptr:
3273             case TYnptr:        // BUG: 64 bit pointers?
3274             case TYimmutPtr:
3275             case TYsharePtr:
3276             case TYrestrictPtr:
3277             case TYfgPtr:
3278             case TYnullptr:
3279             case TYint:
3280             case TYuint:
3281                 if (_tysize[TYint] == SHORTSIZE)
3282                     goto case_short;
3283                 else
3284                     goto case_long;
3285 
3286             case TYlong:
3287             case TYulong:
3288             case TYdchar:
3289             case TYfptr:
3290             case TYvptr:
3291             case TYhptr:
3292             case_long:
3293                 if (t2.Vlong == 1 ||
3294                     t1.Vlong != 1 && f2.c2.EV.Vlong == 0)
3295                         goto Lf2;
3296                 break;
3297 
3298             case TYfloat:
3299                 if (t2.Vfloat == 1 ||
3300                     t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0)
3301                         goto Lf2;
3302                 break;
3303 
3304             case TYdouble:
3305             case TYdouble_alias:
3306                 if (t2.Vdouble == 1.0 ||
3307                     t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0)
3308                         goto Lf2;
3309                 break;
3310 
3311             case TYldouble:
3312                 if (t2.Vldouble == 1.0 ||
3313                     t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0)
3314                         goto Lf2;
3315                 break;
3316 
3317             case TYllong:
3318             case TYullong:
3319                 if (t2.Vllong == 1 ||
3320                     t1.Vllong != 1 && f2.c2.EV.Vllong == 0)
3321                         goto Lf2;
3322                 break;
3323 
3324             default:
3325                 assert(0);
3326         }
3327     }
3328     //printf("picking f1\n");
3329     return true;
3330 
3331 Lf2:
3332     //printf("picking f2\n");
3333     return false;
3334 }
3335 
3336 /*****************************
3337  * If loop is in a try block, see if there are references to x in an enclosing
3338  * try block. This is because the loop may throw and transfer control
3339  * outside the try block, and that will count as a use of x.
3340  * Params:
3341  *      x = basic induction variable symbol
3342  *      l = loop x is in
3343  * Returns:
3344  *      true if x is used outside the try block
3345  */
3346 @trusted
3347 private bool catchRef(Symbol* x, ref Loop l)
3348 {
3349     block* btry = l.Lhead.Btry;
3350     if (!btry)
3351         return false;   // not in a try block
3352 
3353     foreach (i, b; dfo[])
3354     {
3355         if (vec_testbit(b.Bdfoidx, l.Lloop))
3356             continue;
3357         /* this is conservative, just checking if x is used outside the loop.
3358          * A better check would see if the body of the loop throws, and would
3359          * check the enclosing catch/finally blocks and their exit blocks.
3360          * https://issues.dlang.org/show_bug.cgi?22104
3361          */
3362         if (vec_testbit(x.Ssymnum, b.Binlv))
3363             return true;
3364     }
3365 
3366     return false;
3367 }
3368 
3369 /************************************
3370  * Params:
3371  *      x    = basic IV symbol
3372  *      incn = increment elem for basic IV X.
3373  *      refcount = set to # of references to X other than the increment elem
3374  * Returns:
3375  *      If ref of X in loop l is of the form (X relop e) or (e relop X)
3376  *              Return the relop elem
3377  *      Else
3378  *              Return null
3379  */
3380 
3381 private __gshared
3382 {
3383     elem **nd;
3384     elem *sincn;
3385     Symbol *X;
3386 }
3387 
3388 @trusted
3389 private elem ** onlyref(Symbol *x, ref Loop l,elem *incn, out int refcount)
3390 {
3391     uint i;
3392 
3393     //printf("onlyref('%s')\n", x.Sident.ptr);
3394     X = x;                                /* save some parameter passing  */
3395     assert(symbol_isintab(x));
3396     sincn = incn;
3397 
3398     debug
3399       if (!(X.Ssymnum < globsym.length && incn))
3400           printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn);
3401 
3402     assert(X.Ssymnum < globsym.length && incn);
3403     int count = 0;
3404     nd = null;
3405     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3406     {
3407         block* b = dfo[i];
3408         if (b.Belem)
3409         {
3410             count += countrefs(&b.Belem,b.BC == BCiftrue);
3411         }
3412     }
3413 
3414     static if (0)
3415     {
3416         printf("count = %d, nd = (", count);
3417         if (nd) WReqn(*nd);
3418         printf(")\n");
3419     }
3420 
3421     refcount = count;
3422     return nd;
3423 }
3424 
3425 
3426 /******************************
3427  * Count elems of the form (X relop e) or (e relop X).
3428  * Do not count the node if it is the increment node (sincn).
3429  * Params:
3430  *      pn = pointer to start of elem tree
3431  *      flag = true if block wants to test the elem
3432  * Returns:
3433  *      number of elems of the form
3434  */
3435 
3436 @trusted
3437 private int countrefs(elem **pn,bool flag)
3438 {
3439     elem *n = *pn;
3440 
3441     assert(n);
3442     if (n == sincn)                       /* if it is the increment elem  */
3443     {
3444         return OTbinary(n.Eoper)
3445             ? countrefs(&n.EV.E2, false)
3446             : 0;                          // don't count lvalue
3447     }
3448     if (OTunary(n.Eoper))
3449         return countrefs(&n.EV.E1,false);
3450     else if (OTbinary(n.Eoper))
3451     {
3452         if (OTrel(n.Eoper))
3453         {
3454             elem *e1 = n.EV.E1;
3455 
3456             assert(e1.Eoper != OPcomma);
3457             if (e1 == sincn &&
3458                 (e1.Eoper == OPeq || OTopeq(e1.Eoper)))
3459                 goto L1;
3460 
3461             /* Check both subtrees to see if n is the comparison node,
3462              * that is, if X is a leaf of the comparison.
3463              */
3464             if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2, X) ||
3465                 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1, X))
3466                 nd = pn;                /* found the relop node */
3467         }
3468     L1:
3469         return countrefs(&n.EV.E1,false) +
3470                countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma));
3471     }
3472     else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X)
3473     {
3474         if (flag)
3475             nd = pn;                    /* comparing it with 0          */
3476         return 1;                       // found another reference
3477     }
3478     return 0;
3479 }
3480 
3481 /*******************************
3482  * Count number of times symbol X appears in elem tree e.
3483  */
3484 
3485 @trusted
3486 private int countrefs2(const(elem)* e, const Symbol* s)
3487 {
3488     debug elem_debug(e);
3489     while (OTunary(e.Eoper))
3490         e = e.EV.E1;
3491     if (OTbinary(e.Eoper))
3492         return countrefs2(e.EV.E1, s) + countrefs2(e.EV.E2, s);
3493     return ((e.Eoper == OPvar || e.Eoper == OPrelconst) &&
3494             e.EV.Vsym == s);
3495 }
3496 
3497 /****************************
3498  * Eliminate some special cases.
3499  */
3500 
3501 @trusted
3502 private
3503 extern(D) void elimspec(const ref Loop loop, block*[] dfo)
3504 {
3505     // Visit each block in loop
3506     for (size_t i = 0; (i = vec_index(i, loop.Lloop)) < dfo.length; ++i)
3507     {
3508         auto b = dfo[i];
3509         if (b.Belem)
3510             elimspecwalk(&b.Belem);
3511     }
3512 }
3513 
3514 
3515 /******************************
3516  */
3517 
3518 @trusted
3519 private void elimspecwalk(elem **pn)
3520 {
3521     elem *n;
3522 
3523     n = *pn;
3524     assert(n);
3525     if (OTunary(n.Eoper))
3526         elimspecwalk(&n.EV.E1);
3527     else if (OTbinary(n.Eoper))
3528     {
3529         elimspecwalk(&n.EV.E1);
3530         elimspecwalk(&n.EV.E2);
3531         if (OTrel(n.Eoper))
3532         {
3533             elem *e1 = n.EV.E1;
3534 
3535             /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3).
3536              * This will reduce the number of cases for elimbasivs().
3537              * Don't do equivalent with (e1 rel (e2,e3)) because
3538              * of potential side effects in e1.
3539              */
3540             if (e1.Eoper == OPcomma)
3541             {
3542                 elem *e;
3543 
3544                 debug if (debugc)
3545                 {   printf("3rewriting ("); WReqn(n); printf(")\n"); }
3546 
3547                 e = n.EV.E2;
3548                 n.EV.E2 = e1;
3549                 n.EV.E1 = n.EV.E2.EV.E1;
3550                 n.EV.E2.EV.E1 = n.EV.E2.EV.E2;
3551                 n.EV.E2.EV.E2 = e;
3552                 n.EV.E2.Eoper = n.Eoper;
3553                 n.EV.E2.Ety = n.Ety;
3554                 n.Eoper = OPcomma;
3555 
3556                 go.changes++;
3557                 doflow = true;
3558 
3559                 elimspecwalk(&n.EV.E1);
3560                 elimspecwalk(&n.EV.E2);
3561             }
3562 
3563             /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3))
3564              * Rewrite ((X ++  e2) rel e3) into ((X +=  e2),(X-e2 rel e3))
3565              * so that the op= will not have a goal, so elimbasivs()
3566              * will work on it.
3567              */
3568             if ((OTopeq(e1.Eoper)
3569                  || OTpost(e1.Eoper)
3570                 ) &&
3571                 !el_sideeffect(e1.EV.E1))
3572             {
3573                 elem *e;
3574                 OPER op;
3575 
3576                 debug if (debugc)
3577                 {   printf("4rewriting ("); WReqn(n); printf(")\n"); }
3578 
3579                 e = el_calloc();
3580                 el_copy(e,n);
3581                 e.EV.E1 = el_copytree(e1.EV.E1);
3582                 e.EV.E1.Ety = n.EV.E1.Ety;
3583                 n.EV.E2 = e;
3584                 switch (e1.Eoper)
3585                 {
3586                     case OPpostinc:
3587                         e1.Eoper = OPaddass;
3588                         op = OPmin;
3589                         goto L3;
3590 
3591                     case OPpostdec:
3592                         e1.Eoper = OPminass;
3593                         op = OPadd;
3594                     L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2));
3595                         break;
3596 
3597                     default:
3598                         break;
3599                 }
3600                 /* increment node is now guaranteed to have no goal */
3601                 e1.Nflags |= NFLnogoal;
3602                 n.Eoper = OPcomma;
3603                 //go.changes++;
3604                 doflow = true;
3605 
3606                 elimspecwalk(&n.EV.E1);
3607                 elimspecwalk(&n.EV.E2);
3608             }
3609         }
3610   }
3611 }
3612 
3613 /********
3614  * Walk e in execution order.
3615  * When eincrement is found, remove it.
3616  * Continue, replacing instances of `v` with `v+increment`
3617  * When second eincrement is found, stop.
3618  * Params:
3619  *      e = expression to walk
3620  *      defnum = index of eincrement
3621  *      v = increment variable
3622  *      increment = amount to increment v
3623  *      unrolls = number of times loop has been unrolled
3624  */
3625 
3626 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow
3627 {
3628     int state = 0;
3629 
3630     /***********************************
3631      * Walk e in execution order, fixing it according to state.
3632      * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state
3633      * state == 1..unrolls-1: replacing instances of v with v+(state*increment),
3634      * state == unrolls-1: leave eincrement alone, advance to next state
3635      * state == unrolls: done
3636      */
3637 
3638     void walker(elem *e) @trusted
3639     {
3640         assert(e);
3641         const op = e.Eoper;
3642         if (ERTOL(e))
3643         {
3644             if (e.Edef != defnum)
3645             {
3646                 walker(e.EV.E2); // this function is @trusted because of this union access
3647                 walker(e.EV.E1);
3648             }
3649         }
3650         else if (OTbinary(op))
3651         {
3652             if (e.Edef != defnum)
3653             {
3654                 walker(e.EV.E1);
3655                 walker(e.EV.E2);
3656             }
3657         }
3658         else if (OTunary(op))
3659         {
3660             assert(e.Edef != defnum);
3661             walker(e.EV.E1);
3662         }
3663         else if (op == OPvar &&
3664                  state &&
3665                  e.EV.Vsym == v)
3666         {
3667             // overwrite e with (v+increment)
3668             elem *e1 = el_calloc();
3669             el_copy(e1,e);
3670             e.Eoper = OPadd;
3671             e.EV.E1 = e1;
3672             e.EV.E2 = el_long(e.Ety, increment * state);
3673         }
3674         if (OTdef(op) && e.Edef == defnum)
3675         {
3676             // found the increment elem; neuter all but the last one
3677             if (state + 1 < unrolls)
3678             {
3679                 el_free(e.EV.E1);
3680                 el_free(e.EV.E2);
3681                 e.Eoper = OPconst;
3682                 e.EV.Vllong = 0;
3683             }
3684             ++state;
3685         }
3686     }
3687 
3688     walker(e);
3689     assert(state == unrolls);
3690 }
3691 
3692 
3693 /*********************************
3694  * Unroll loop if possible.
3695  * Params:
3696  *      l = loop to unroll
3697  * Returns:
3698  *      true if loop was unrolled
3699  */
3700 @trusted
3701 bool loopunroll(ref Loop l)
3702 {
3703     const bool log = false;
3704     if (log) printf("loopunroll(%p)\n", &l);
3705 
3706     /* Do not repeatedly unroll the same loop,
3707      * or waste time attempting to
3708      */
3709     if (l.Lhead.Bflags & BFLnounroll)
3710         return false;
3711     l.Lhead.Bflags |= BFLnounroll;
3712     if (log)
3713         WRfunc("loop", funcsym_p, startblock);
3714 
3715     if (l.Lhead.Btry || l.Ltail.Btry)
3716         return false;
3717 
3718     /* For simplification, only unroll loops that consist only
3719      * of a head and tail, and the tail is the exit block.
3720      */
3721     const numblocks = vec_numBitsSet(l.Lloop);
3722 {   // Ensure no changes
3723     if (dfo.length != vec_numbits(l.Lloop)) assert(0);
3724     int n = 0;
3725     for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3726         ++n;
3727     if (n != numblocks) assert(0);
3728 }
3729     if (numblocks != 2)
3730     {
3731         if (log) printf("\tnot 2 blocks, but %d\n", numblocks);
3732         return false;
3733     }
3734     assert(l.Lhead != l.Ltail);
3735 
3736     /* tail must be the sole exit block
3737      */
3738     if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) ||
3739         !vec_testbit(l.Ltail.Bdfoidx, l.Lexit))
3740     {
3741         if (log) printf("\ttail not sole exit block\n");
3742         return false;
3743     }
3744 
3745     elem *ehead = l.Lhead.Belem;
3746     elem *etail = l.Ltail.Belem;
3747 
3748     if (log)
3749     {
3750         printf("Unroll candidate:\n");
3751         printf("  head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n");
3752         printf("  tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n");
3753     }
3754 
3755     /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer
3756      */
3757     if ((etail.Eoper != OPlt && etail.Eoper != OPle) ||
3758         etail.EV.E1.Eoper != OPvar ||
3759         etail.EV.E2.Eoper != OPconst)
3760     {
3761         if (log) printf("\tnot (v < c)\n");
3762         return false;
3763     }
3764 
3765     elem *e1 = etail.EV.E1;
3766     elem *e2 = etail.EV.E2;
3767 
3768     if (!tyintegral(e1.Ety) ||
3769         tysize(e1.Ety) > targ_llong.sizeof ||
3770         !(tyuns(e1.Ety) || tyuns(e2.Ety)))
3771     {
3772         if (log) printf("\tnot (integral unsigned)\n");
3773         return false;
3774     }
3775 
3776     int cost = el_length(ehead);
3777     //printf("test4 cost: %d\n", cost);
3778 
3779     if (cost > 100)
3780     {
3781         if (log) printf("\tcost %d\n", cost);
3782         return false;
3783     }
3784     if (log) printf("cost %d\n", cost);
3785 
3786     Symbol* v = e1.EV.Vsym;
3787 
3788     // RD info is only reliable for registers and autos
3789     if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig))
3790     {
3791         if (log) printf("\tnot SCRD\n");
3792         return false;
3793     }
3794 
3795     /* Find the initial, increment elem, and final value of s
3796      */
3797     elem *einitial;
3798     elem *eincrement;
3799     if (!findloopparameters(etail, einitial, eincrement))
3800     {
3801         if (log) printf("\tnot findloopparameters()\n");
3802         return false;
3803     }
3804 
3805     targ_llong initial = el_tolong(einitial.EV.E2);
3806     targ_llong increment = el_tolong(eincrement.EV.E2);
3807     if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass)
3808         increment = -increment;
3809     targ_llong final_ = el_tolong(e2);
3810 
3811     if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_);
3812 
3813     if (etail.Eoper == OPle)
3814         ++final_;
3815 
3816     if (initial < 0 ||
3817         final_ < initial ||
3818         increment <= 0 ||
3819         (final_ - initial) % increment)
3820     {
3821         if (log) printf("\tnot (evenly divisible)\n");
3822         return false;
3823     }
3824 
3825     /* If loop would only execute once anyway, just remove the test at the end
3826      */
3827     if (initial + increment == final_)
3828     {
3829         if (log) printf("\tjust once\n");
3830         etail.Eoper = OPcomma;
3831         e2.EV.Vllong = 0;
3832         e2.Ety = etail.Ety;
3833         return false;
3834     }
3835 
3836     /* number of times the loop is unrolled
3837      */
3838     targ_ullong numIterations = (final_ - initial) / increment;
3839     const int unrolls = (numIterations < 1000 / cost)
3840         ? cast(int)numIterations
3841         : 2;
3842 
3843     if (unrolls == 0 || (final_ - initial) % unrolls)
3844     {
3845         if (log) printf("\tnot (divisible by %d)\n", unrolls);
3846         return false;
3847     }
3848 
3849     if (log) printf("Unrolling starting\n");
3850 
3851     // Double the increment
3852     eincrement.EV.E2.EV.Vllong *= unrolls;
3853     //printf("  4head:\t"); WReqn(l.Lhead.Belem); printf("\n");
3854 
3855     elem* e = null;
3856     foreach (i; 0 .. unrolls)
3857         e = el_combine(e, el_copytree(ehead));
3858 
3859     /* Walk e in execution order.
3860      * When eincrement is found, remove it.
3861      * Continue, replacing instances of `v` with `v+increment`
3862      * When last eincrement is found, stop.
3863      */
3864     unrollWalker(e, eincrement.Edef, v, increment, unrolls);
3865 
3866     l.Lhead.Belem = e;
3867 
3868     /* If unrolled loop would only execute once anyway, just remove the test at the end
3869      */
3870     if (initial + unrolls * increment == final_)
3871     {
3872         if (log) printf("\tcompletely unrolled\n");
3873         etail.Eoper = OPcomma;
3874         e2.EV.Vllong = 0;
3875         e2.Ety = etail.Ety;
3876     }
3877 
3878     //WRfunc("loopunroll done", funcsym_p, startblock);
3879     return true;
3880 }
3881 
3882 /******************************
3883  * Count number of elems in a tree
3884  * Params:
3885  *  e = tree
3886  * Returns:
3887  *  number of elems in tree
3888  */
3889 @trusted
3890 private int el_length(elem *e)
3891 {
3892     int n = 0;
3893     while (e)
3894     {
3895         n += 1;
3896         if (!OTleaf(e.Eoper))
3897         {
3898             if (e.Eoper == OPctor || e.Eoper == OPdtor)
3899                 return 10_000;
3900             n += el_length(e.EV.E2);
3901             e = e.EV.E1;
3902         }
3903         else
3904             break;
3905     }
3906     return n;
3907 }