1 /**
2  * x87 FPU code generation
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 1987-1995 by Symantec
8  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d)
12  */
13 
14 module dmd.backend.cg87;
15 
16 import core.stdc.stdio;
17 import core.stdc.stdlib;
18 import core.stdc.string;
19 
20 import dmd.backend.barray;
21 import dmd.backend.cc;
22 import dmd.backend.cdef;
23 import dmd.backend.code;
24 import dmd.backend.code_x86;
25 import dmd.backend.codebuilder;
26 import dmd.backend.mem;
27 import dmd.backend.el;
28 import dmd.backend.global;
29 import dmd.backend.oper;
30 import dmd.backend.ty;
31 import dmd.backend.evalu8 : el_toldoubled;
32 
33 
34 nothrow:
35 @safe:
36 
37 // NOTE: this could be a TLS global which would allow this variable to be used in
38 //       a multi-threaded version of the backend
39 __gshared Globals87 global87;
40 
41 // Constants that the 8087 supports directly
42 // BUG: rewrite for 80 bit long doubles
43 enum PI            = 3.14159265358979323846;
44 enum LOG2          = 0.30102999566398119521;
45 enum LN2           = 0.6931471805599453094172321;
46 enum LOG2T         = 3.32192809488736234787;
47 enum LOG2E         = 1.4426950408889634074;   // 1/LN2
48 
49 enum FWAIT = 0x9B;            // FWAIT opcode
50 
51 /* Mark variable referenced by e as not a register candidate            */
52 @trusted
53 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; }
54 
55 /* Generate the appropriate ESC instruction     */
56 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); }
57 enum
58 {   // Values for MF
59     MFfloat         = 0,
60     MFlong          = 1,
61     MFdouble        = 2,
62     MFword          = 3
63 }
64 
65 /*********************************
66  */
67 
68 struct Dconst
69 {
70     int round;
71     Symbol *roundto0;
72     Symbol *roundtonearest;
73 }
74 
75 private __gshared Dconst oldd;
76 
77 enum NDPP = 0;       // print out debugging info
78 
79 @trusted
80 bool NOSAHF() { return I64 || config.fpxmmregs; }     // can't use SAHF instruction
81 
82 /** 87 Control Word rounding modes */
83 enum CW : ushort
84 {
85     roundto0       = 0xFBF,
86     roundtonearest = 0x3BF,
87 }
88 
89 /**********************************
90  * When we need to temporarilly save 8087 registers, we record information
91  * about the save into an array of NDP structs.
92  */
93 
94 @trusted
95 private void getlvalue87(ref CodeBuilder cdb, ref code pcs,elem *e,regm_t keepmsk)
96 {
97     // the x87 instructions cannot read XMM registers
98     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
99         e.EV.Vsym.Sflags &= ~GTregcand;
100 
101     getlvalue(cdb, &pcs, e, keepmsk);
102     if (ADDFWAIT())
103         pcs.Iflags |= CFwait;
104     if (I32)
105         pcs.Iflags &= ~CFopsize;
106     else if (I64)
107         pcs.Irex &= ~REX_W;
108 }
109 
110 /****************************************
111  * Store/load to ndp save location i
112  */
113 
114 @trusted
115 private void ndp_fstp(ref CodeBuilder cdb, size_t i, tym_t ty)
116 {
117     switch (tybasic(ty))
118     {
119         case TYfloat:
120         case TYifloat:
121         case TYcfloat:
122             cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
123             break;
124 
125         case TYdouble:
126         case TYdouble_alias:
127         case TYidouble:
128         case TYcdouble:
129             cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
130             break;
131 
132         case TYldouble:
133         case TYildouble:
134         case TYcldouble:
135             cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
136             break;
137 
138         default:
139             assert(0);
140     }
141 }
142 
143 @trusted
144 private void ndp_fld(ref CodeBuilder cdb, size_t i, tym_t ty)
145 {
146     switch (tybasic(ty))
147     {
148         case TYfloat:
149         case TYifloat:
150         case TYcfloat:
151             cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i);
152             break;
153 
154         case TYdouble:
155         case TYdouble_alias:
156         case TYidouble:
157         case TYcdouble:
158             cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i);
159             break;
160 
161         case TYldouble:
162         case TYildouble:
163         case TYcldouble:
164             cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
165             break;
166 
167         default:
168             assert(0);
169     }
170 }
171 
172 /**************************
173  * Insert e into next available slot in save[].
174  * Params:
175  *    save = array of NDP
176  *    ndp = NDP to insert into save[]
177  * Returns:
178  *    index of slot in save[] where ndp was inserted
179  */
180 
181 @safe
182 private size_t getemptyslot(T)(ref T save, ref NDP ndp)
183 {
184     foreach (i, ref n; save[])
185         if (n.e == null)
186         {
187             n = ndp;
188             return i;
189         }
190 
191     save.push(ndp);
192     return save.length - 1;
193 }
194 
195 /*********************************
196  * Pop 8087 stack.
197  */
198 
199 void pop87() { pop87(__LINE__, __FILE__); }
200 
201 @trusted
202 void pop87(int line, const(char)* file)
203 {
204     int i;
205 
206     if (NDPP)
207         printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused);
208 
209     --global87.stackused;
210     assert(global87.stackused >= 0);
211     for (i = 0; i < global87.stack.length - 1; i++)
212         global87.stack[i] = global87.stack[i + 1];
213     // end of stack is nothing
214     global87.stack[$ - 1] = NDP();
215 }
216 
217 
218 /*******************************
219  * Push 8087 stack. Generate and return any code
220  * necessary to preserve anything that might run off the end of the stack.
221  */
222 
223 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); }
224 
225 @trusted
226 void push87(ref CodeBuilder cdb, int line, const(char)* file)
227 {
228     // if we would lose the top register off of the stack
229     if (global87.stack[7].e != null)
230     {
231         const i = getemptyslot(global87.save, global87.stack[7]);
232         cdb.genf2(0xD9,0xF6);                         // FDECSTP
233         genfwait(cdb);
234         ndp_fstp(cdb, i, global87.stack[7].e.Ety);       // FSTP i[BP]
235         assert(global87.stackused == 8);
236         if (NDPP) printf("push87() : overflow\n");
237     }
238     else
239     {
240         if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused);
241         global87.stackused++;
242         assert(global87.stackused <= 8);
243     }
244     // Shift the stack up
245     for (int i = 7; i > 0; i--)
246         global87.stack[i] = global87.stack[i - 1];
247     global87.stack[0] = NDP();
248 }
249 
250 /*****************************
251  * Note elem e as being in ST(i) as being a value we want to keep.
252  */
253 
254 void note87(elem *e, uint offset, int i)
255 {
256     note87(e, offset, i, __LINE__);
257 }
258 
259 @trusted
260 void note87(elem *e, uint offset, int i, int linnum)
261 {
262     if (NDPP)
263         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
264 
265     static if (0)
266     {
267         if (global87.stack[i].e)
268             printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e);
269     }
270 
271     debug if (i >= global87.stackused)
272     {
273         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
274         elem_print(e);
275     }
276     assert(i < global87.stackused);
277 
278     while (e.Eoper == OPcomma)
279         e = e.EV.E2;
280     global87.stack[i].e = e;
281     global87.stack[i].offset = offset;
282 }
283 
284 /****************************************************
285  * Exchange two entries in 8087 stack.
286  */
287 
288 @trusted
289 void xchg87(int i, int j)
290 {
291     NDP save;
292 
293     save = global87.stack[i];
294     global87.stack[i] = global87.stack[j];
295     global87.stack[j] = save;
296 }
297 
298 /****************************
299  * Make sure that elem e is in register ST(i). Reload it if necessary.
300  * Input:
301  *      i       0..3    8087 register number
302  *      flag    1       don't bother with FXCH
303  */
304 
305 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag)
306 {
307     makesure87(cdb,e,offset,i,flag,__LINE__);
308 }
309 
310 @trusted
311 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum)
312 {
313     debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
314 
315     while (e.Eoper == OPcomma)
316         e = e.EV.E2;
317     assert(e && i < 4);
318 L1:
319     if (global87.stack[i].e != e || global87.stack[i].offset != offset)
320     {
321         debug if (global87.stack[i].e)
322             printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset);
323 
324         assert(global87.stack[i].e == null);
325         int j;
326         for (j = 0; 1; j++)
327         {
328             if (j >= global87.save.length && e.Eoper == OPcomma)
329             {
330                 e = e.EV.E2;              // try right side
331                 goto L1;
332             }
333 
334             debug if (j >= global87.save.length)
335                 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length);
336 
337             assert(j < global87.save.length);
338             //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset);
339             if (e == global87.save[j].e && offset == global87.save[j].offset)
340                 break;
341         }
342         push87(cdb);
343         genfwait(cdb);
344         ndp_fld(cdb, j, e.Ety);         // FLD j[BP]
345         if (!(flag & 1))
346         {
347             while (i != 0)
348             {
349                 cdb.genf2(0xD9,0xC8 + i);       // FXCH ST(i)
350                 i--;
351             }
352         }
353         global87.save[j] = NDP();               // back in 8087
354     }
355     //global87.stack[i].e = null;
356 }
357 
358 /****************************
359  * Save in memory any values in the 8087 that we want to keep.
360  */
361 
362 @trusted
363 void save87(ref CodeBuilder cdb)
364 {
365     bool any = false;
366     while (global87.stack[0].e && global87.stackused)
367     {
368         // Save it
369         const i = getemptyslot(global87.save, global87.stack[0]);
370         if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e, cast(int)i);
371 
372         genfwait(cdb);
373         ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP]
374         pop87();
375         any = true;
376     }
377     if (any)                          // if any stores
378         genfwait(cdb);   // wait for last one to finish
379 }
380 
381 /******************************************
382  * Save any noted values that would be destroyed by n pushes
383  */
384 
385 @trusted
386 void save87regs(ref CodeBuilder cdb, uint n)
387 {
388     assert(n <= 7);
389     uint j = 8 - n;
390     if (global87.stackused > j)
391     {
392         for (uint k = 8; k > j; k--)
393         {
394             cdb.genf2(0xD9,0xF6);     // FDECSTP
395             genfwait(cdb);
396             if (k <= global87.stackused)
397             {
398                 const i = getemptyslot(global87.save, global87.stack[k - 1]);
399                 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety);   // FSTP i[BP]
400                 global87.stack[k - 1] = NDP();
401             }
402         }
403 
404         for (uint k = 8; k > j; k--)
405         {
406             if (k > global87.stackused)
407             {   cdb.genf2(0xD9,0xF7); // FINCSTP
408                 genfwait(cdb);
409             }
410         }
411         global87.stackused = j;
412     }
413 }
414 
415 /*****************************************************
416  * Save/restore ST0 or ST01
417  */
418 
419 @trusted
420 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore)
421 {
422     //printf("gensaverestore87(%s)\n", regm_str(regm));
423     assert(regm == mST0 || regm == mST01);
424 
425     auto ndp0 = NDP(el_calloc());
426     const i = getemptyslot(global87.save, ndp0);  // this blocks slot [i] for the life of this function
427     ndp_fstp(cdbsave, i, TYldouble);
428 
429     CodeBuilder cdb2a;
430     cdb2a.ctor();
431     ndp_fld(cdb2a, i, TYldouble);
432 
433     if (regm == mST01)
434     {
435         auto ndp1 = NDP(el_calloc());
436         const j = getemptyslot(global87.save, ndp1);
437         ndp_fstp(cdbsave, j, TYldouble);
438         ndp_fld(cdbrestore, j, TYldouble);
439     }
440 
441     cdbrestore.append(cdb2a);
442 }
443 
444 /*************************************
445  * Find which, if any, slot on stack holds elem e.
446  */
447 
448 @trusted
449 private int cse_get(elem *e, uint offset)
450 {
451     int i;
452 
453     for (i = 0; 1; i++)
454     {
455         if (i == global87.stackused)
456         {
457             i = -1;
458             //printf("cse not found\n");
459             //elem_print(e);
460             break;
461         }
462         if (global87.stack[i].e == e &&
463             global87.stack[i].offset == offset)
464         {   //printf("cse found %d\n",i);
465             //elem_print(e);
466             break;
467         }
468     }
469     return i;
470 }
471 
472 /*************************************
473  * Reload common subexpression.
474  */
475 
476 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
477 {
478     //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
479     // Look on 8087 stack
480     int i = cse_get(e, 0);
481 
482     if (tycomplex(e.Ety))
483     {
484         uint sz = tysize(e.Ety);
485         int j = cse_get(e, sz / 2);
486         if (i >= 0 && j >= 0)
487         {
488             push87(cdb);
489             push87(cdb);
490             cdb.genf2(0xD9,0xC0 + i);         // FLD ST(i)
491             cdb.genf2(0xD9,0xC0 + j + 1);     // FLD ST(j + 1)
492             fixresult_complex87(cdb,e,mST01,pretregs);
493         }
494         else
495             // Reload
496             loaddata(cdb,e,pretregs);
497     }
498     else
499     {
500         if (i >= 0)
501         {
502             push87(cdb);
503             cdb.genf2(0xD9,0xC0 + i); // FLD ST(i)
504             if (*pretregs & XMMREGS)
505                 fixresult87(cdb,e,mST0,pretregs);
506             else
507                 fixresult(cdb,e,mST0,pretregs);
508         }
509         else
510             // Reload
511             loaddata(cdb,e,pretregs);
512     }
513 }
514 
515 
516 /*******************************
517  * Decide if we need to gen an FWAIT.
518  */
519 
520 public void genfwait(ref CodeBuilder cdb)
521 {
522     if (ADDFWAIT())
523         cdb.gen1(FWAIT);
524 }
525 
526 
527 /***************************
528  * Put the 8087 flags into the CPU flags.
529  */
530 
531 @trusted
532 private void cg87_87topsw(ref CodeBuilder cdb)
533 {
534     /* Note that SAHF is not available on some early I64 processors
535      * and will cause a seg fault
536      */
537     assert(!NOSAHF);
538     getregs(cdb,mAX);
539     if (config.target_cpu >= TARGET_80286)
540         cdb.genf2(0xDF,0xE0);             // FSTSW AX
541     else
542     {
543         cdb.genfltreg(0xD8+5,7,0);        // FSTSW floatreg[BP]
544         genfwait(cdb);          // FWAIT
545         cdb.genfltreg(0x8A,4,1);          // MOV AH,floatreg+1[BP]
546     }
547     cdb.gen1(0x9E);                       // SAHF
548     code_orflag(cdb.last(),CFpsw);
549 }
550 
551 /*****************************************
552  * Jump to ctarget if condition code C2 is set.
553  */
554 
555 @trusted
556 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget)
557 {
558     if (NOSAHF)
559     {
560         getregs(cdb,mAX);
561         cdb.genf2(0xDF,0xE0);                                    // FSTSW AX
562         cdb.genc2(0xF6,modregrm(3,0,4),4);                       // TEST AH,4
563         genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget
564     }
565     else
566     {
567         cg87_87topsw(cdb);
568         genjmp(cdb, JP, FLcode, cast(block *)ctarget);  // JP ctarget
569     }
570 }
571 
572 /***************************
573  * Set the PSW based on the state of ST0.
574  * Input:
575  *      pop     if stack should be popped after test
576  * Returns:
577  *      start of code appended to c.
578  */
579 
580 @trusted
581 private void genftst(ref CodeBuilder cdb,elem *e,int pop)
582 {
583     if (NOSAHF)
584     {
585         push87(cdb);
586         cdb.gen2(0xD9,0xEE);          // FLDZ
587         cdb.gen2(0xDF,0xE9);          // FUCOMIP ST1
588         pop87();
589         if (pop)
590         {
591             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
592             pop87();
593         }
594     }
595     else if (config.flags4 & CFG4fastfloat)  // if fast floating point
596     {
597         cdb.genf2(0xD9,0xE4);                // FTST
598         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
599         if (pop)
600         {
601             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
602             pop87();
603         }
604     }
605     else if (config.target_cpu >= TARGET_80386)
606     {
607         // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
608         push87(cdb);
609         cdb.gen2(0xD9,0xEE);                 // FLDZ
610         cdb.gen2(pop ? 0xDA : 0xDD,0xE9);    // FUCOMPP / FUCOMP
611         pop87();
612         if (pop)
613             pop87();
614         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
615     }
616     else
617     {
618         // Call library function which does not raise exceptions
619         regm_t regm = 0;
620 
621         callclib(cdb,e,CLIB.ftest,&regm,0);
622         if (pop)
623         {
624             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
625             pop87();
626         }
627     }
628 }
629 
630 /*************************************
631  * Determine if there is a special 8087 instruction to load
632  * constant e.
633  * Input:
634  *      im      0       load real part
635  *              1       load imaginary part
636  * Returns:
637  *      opcode if found
638  *      0 if not
639  */
640 
641 @trusted
642 ubyte loadconst(elem *e, int im)
643 {
644     elem_debug(e);
645     assert(im == 0 || im == 1);
646 
647     immutable float[7] fval =
648         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
649     immutable double[7] dval =
650         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
651 
652     static if (real.sizeof < 10)
653     {
654         import dmd.root.longdouble;
655         immutable targ_ldouble[7] ldval =
656         [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2];
657     }
658     else
659     {
660         enum M_PI_L        = 0x1.921fb54442d1846ap+1L;       // 3.14159 fldpi
661         enum M_LOG2T_L     = 0x1.a934f0979a3715fcp+1L;       // 3.32193 fldl2t
662         enum M_LOG2E_L     = 0x1.71547652b82fe178p+0L;       // 1.4427 fldl2e
663         enum M_LOG2_L      = 0x1.34413509f79fef32p-2L;       // 0.30103 fldlg2
664         enum M_LN2_L       = 0x1.62e42fefa39ef358p-1L;       // 0.693147 fldln2
665         immutable targ_ldouble[7] ldval =
666         [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L];
667     }
668 
669     immutable ubyte[7 + 1] opcode =
670         /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
671         [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0];
672 
673     int i;
674     targ_float f;
675     targ_double d;
676     targ_ldouble ld;
677     int sz;
678     int zero;
679     void *p;
680     immutable ubyte[16] zeros;
681 
682     if (im == 0)
683     {
684         switch (tybasic(e.Ety))
685         {
686             case TYfloat:
687             case TYifloat:
688             case TYcfloat:
689                 f = e.EV.Vfloat;
690                 sz = 4;
691                 p = &f;
692                 break;
693 
694             case TYdouble:
695             case TYdouble_alias:
696             case TYidouble:
697             case TYcdouble:
698                 d = e.EV.Vdouble;
699                 sz = 8;
700                 p = &d;
701                 break;
702 
703             case TYldouble:
704             case TYildouble:
705             case TYcldouble:
706                 ld = e.EV.Vldouble;
707                 sz = 10;
708                 p = &ld;
709                 break;
710 
711             default:
712                 assert(0);
713         }
714     }
715     else
716     {
717         switch (tybasic(e.Ety))
718         {
719             case TYcfloat:
720                 f = e.EV.Vcfloat.im;
721                 sz = 4;
722                 p = &f;
723                 break;
724 
725             case TYcdouble:
726                 d = e.EV.Vcdouble.im;
727                 sz = 8;
728                 p = &d;
729                 break;
730 
731             case TYcldouble:
732                 ld = e.EV.Vcldouble.im;
733                 sz = 10;
734                 p = &ld;
735                 break;
736 
737             default:
738                 assert(0);
739         }
740     }
741 
742     // Note that for this purpose, -0 is not regarded as +0,
743     // since FLDZ loads a +0
744     assert(sz <= zeros.length);
745     zero = (memcmp(p, zeros.ptr, sz) == 0);
746     if (zero && config.target_cpu >= TARGET_PentiumPro)
747         return 0xEE;            // FLDZ is the only one with 1 micro-op
748 
749     // For some reason, these instructions take more clocks
750     if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
751         return 0;
752 
753     if (zero)
754         return 0xEE;
755 
756     for (i = 1; i < fval.length; i++)
757     {
758         switch (sz)
759         {
760             case 4:
761                 if (fval[i] != f)
762                     continue;
763                 break;
764             case 8:
765                 if (dval[i] != d)
766                     continue;
767                 break;
768             case 10:
769                 if (ldval[i] != ld)
770                     continue;
771                 break;
772             default:
773                 assert(0);
774         }
775         break;
776     }
777     return opcode[i];
778 }
779 
780 /******************************
781  * Given the result of an expression is in retregs,
782  * generate necessary code to return result in *pretregs.
783  */
784 
785 @trusted
786 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false)
787 {
788     //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
789     //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
790     assert(!*pretregs || retregs);
791 
792     if ((*pretregs | retregs) & mST01)
793     {
794         fixresult_complex87(cdb, e, retregs, pretregs, isReturnValue);
795         return;
796     }
797 
798     tym_t tym = tybasic(e.Ety);
799     uint sz = _tysize[tym];
800     //printf("tym = x%x, sz = %d\n", tym, sz);
801 
802     /* if retregs needs to be transferred into the 8087 */
803     if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
804     {
805         debug if (sz > DOUBLESIZE)
806         {
807             elem_print(e);
808             printf("retregs = %s\n", regm_str(retregs));
809         }
810         assert(sz <= DOUBLESIZE);
811         if (!I16)
812         {
813 
814             if (*pretregs & mPSW)
815             {   // Set flags
816                 regm_t r = retregs | mPSW;
817                 fixresult(cdb,e,retregs,&r);
818             }
819             push87(cdb);
820             if (sz == REGSIZE || (I64 && sz == 4))
821             {
822                 const reg = findreg(retregs);
823                 cdb.genfltreg(STO,reg,0);           // MOV fltreg,reg
824                 cdb.genfltreg(0xD9,0,0);            // FLD float ptr fltreg
825             }
826             else
827             {
828                 const msreg = findregmsw(retregs);
829                 const lsreg = findreglsw(retregs);
830                 cdb.genfltreg(STO,lsreg,0);         // MOV fltreg,lsreg
831                 cdb.genfltreg(STO,msreg,4);         // MOV fltreg+4,msreg
832                 cdb.genfltreg(0xDD,0,0);            // FLD double ptr fltreg
833             }
834         }
835         else
836         {
837             regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
838             regm |= *pretregs & mPSW;
839             fixresult(cdb,e,retregs,&regm);
840             regm = 0;           // don't worry about result from CLIB.xxx
841             callclib(cdb,e,
842                     ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87),
843                     &regm,0);
844         }
845     }
846     else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
847     {
848         assert(sz <= DOUBLESIZE);
849         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
850         if (*pretregs & mPSW && !(retregs & mPSW))
851             genftst(cdb,e,0);
852         // FSTP floatreg
853         pop87();
854         cdb.genfltreg(ESC(mf,1),3,0);
855         genfwait(cdb);
856         reg_t reg;
857         allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
858         if (sz == FLOATSIZE)
859         {
860             if (!I16)
861                 cdb.genfltreg(LOD,reg,0);
862             else
863             {
864                 cdb.genfltreg(LOD,reg,REGSIZE);
865                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
866             }
867         }
868         else
869         {   assert(sz == DOUBLESIZE);
870             if (I16)
871             {
872                 cdb.genfltreg(LOD,AX,6);
873                 cdb.genfltreg(LOD,BX,4);
874                 cdb.genfltreg(LOD,CX,2);
875                 cdb.genfltreg(LOD,DX,0);
876             }
877             else if (I32)
878             {
879                 cdb.genfltreg(LOD,reg,REGSIZE);
880                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
881             }
882             else // I64
883             {
884                 cdb.genfltreg(LOD,reg,0);
885                 code_orrex(cdb.last(), REX_W);
886             }
887         }
888     }
889     else if (*pretregs == 0 && retregs == mST0)
890     {
891         cdb.genf2(0xDD,modregrm(3,3,0));    // FPOP
892         pop87();
893     }
894     else
895     {
896         if (*pretregs & mPSW)
897         {
898             if (!(retregs & mPSW))
899             {
900                 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST
901             }
902         }
903         if (*pretregs & mST0 && retregs & XMMREGS)
904         {
905             assert(sz <= DOUBLESIZE);
906             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
907             // MOVD floatreg,XMM?
908             const reg = findreg(retregs);
909             cdb.genxmmreg(xmmstore(tym),reg,0,tym);
910             push87(cdb);
911             cdb.genfltreg(ESC(mf,1),0,0);                 // FLD float/double ptr fltreg
912         }
913         else if (retregs & mST0 && *pretregs & XMMREGS)
914         {
915             assert(sz <= DOUBLESIZE);
916             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
917             // FSTP floatreg
918             pop87();
919             cdb.genfltreg(ESC(mf,1),3,0);
920             genfwait(cdb);
921             // MOVD XMM?,floatreg
922             reg_t reg;
923             allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
924             cdb.genxmmreg(xmmload(tym),reg,0,tym);
925         }
926         else
927             assert(!(*pretregs & mST0) || (retregs & mST0));
928     }
929     if (*pretregs & mST0)
930         note87(e,0,0);
931 }
932 
933 /********************************
934  * Generate in-line 8087 code for the following operators:
935  *      add
936  *      min
937  *      mul
938  *      div
939  *      cmp
940  */
941 
942 // Reverse the order that the op is done in
943 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ];
944 
945 @trusted
946 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
947 {
948     //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
949     // we could be evaluating / for side effects only
950     assert(*pretregs != 0);
951 
952     elem *e1 = e.EV.E1;
953     elem *e2 = e.EV.E2;
954     uint sz2 = tysize(e1.Ety);
955     if (tycomplex(e1.Ety))
956         sz2 /= 2;
957 
958     OPER eoper = e.Eoper;
959     if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L)
960     {
961         // Perform "mul 2.0" as fadd ST(0), ST
962         regm_t retregs = mST0;
963         codelem(cdb,e1,&retregs,false);
964         cdb.genf2(0xDC, 0xC0);                    // fadd ST(0), ST;
965         fixresult87(cdb,e,mST0,pretregs);         // result is in ST(0).
966         freenode(e2);
967         return;
968     }
969 
970     uint op;
971     if (OTrel(eoper))
972         eoper = OPeqeq;
973     bool imaginary;
974     static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; }
975     switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety)))
976     {
977         case X(OPadd, TYfloat, TYfloat):
978         case X(OPadd, TYdouble, TYdouble):
979         case X(OPadd, TYdouble_alias, TYdouble_alias):
980         case X(OPadd, TYldouble, TYldouble):
981         case X(OPadd, TYldouble, TYdouble):
982         case X(OPadd, TYdouble, TYldouble):
983         case X(OPadd, TYifloat, TYifloat):
984         case X(OPadd, TYidouble, TYidouble):
985         case X(OPadd, TYildouble, TYildouble):
986             op = 0;                             // FADDP
987             break;
988 
989         case X(OPmin, TYfloat, TYfloat):
990         case X(OPmin, TYdouble, TYdouble):
991         case X(OPmin, TYdouble_alias, TYdouble_alias):
992         case X(OPmin, TYldouble, TYldouble):
993         case X(OPmin, TYldouble, TYdouble):
994         case X(OPmin, TYdouble, TYldouble):
995         case X(OPmin, TYifloat, TYifloat):
996         case X(OPmin, TYidouble, TYidouble):
997         case X(OPmin, TYildouble, TYildouble):
998             op = 4;                             // FSUBP
999             break;
1000 
1001         case X(OPmul, TYfloat, TYfloat):
1002         case X(OPmul, TYdouble, TYdouble):
1003         case X(OPmul, TYdouble_alias, TYdouble_alias):
1004         case X(OPmul, TYldouble, TYldouble):
1005         case X(OPmul, TYldouble, TYdouble):
1006         case X(OPmul, TYdouble, TYldouble):
1007         case X(OPmul, TYifloat, TYifloat):
1008         case X(OPmul, TYidouble, TYidouble):
1009         case X(OPmul, TYildouble, TYildouble):
1010         case X(OPmul, TYfloat, TYifloat):
1011         case X(OPmul, TYdouble, TYidouble):
1012         case X(OPmul, TYldouble, TYildouble):
1013         case X(OPmul, TYifloat, TYfloat):
1014         case X(OPmul, TYidouble, TYdouble):
1015         case X(OPmul, TYildouble, TYldouble):
1016             op = 1;                             // FMULP
1017             break;
1018 
1019         case X(OPdiv, TYfloat, TYfloat):
1020         case X(OPdiv, TYdouble, TYdouble):
1021         case X(OPdiv, TYdouble_alias, TYdouble_alias):
1022         case X(OPdiv, TYldouble, TYldouble):
1023         case X(OPdiv, TYldouble, TYdouble):
1024         case X(OPdiv, TYdouble, TYldouble):
1025         case X(OPdiv, TYifloat, TYifloat):
1026         case X(OPdiv, TYidouble, TYidouble):
1027         case X(OPdiv, TYildouble, TYildouble):
1028             op = 6;                             // FDIVP
1029             break;
1030 
1031         case X(OPmod, TYfloat, TYfloat):
1032         case X(OPmod, TYdouble, TYdouble):
1033         case X(OPmod, TYdouble_alias, TYdouble_alias):
1034         case X(OPmod, TYldouble, TYldouble):
1035         case X(OPmod, TYfloat, TYifloat):
1036         case X(OPmod, TYdouble, TYidouble):
1037         case X(OPmod, TYldouble, TYildouble):
1038         case X(OPmod, TYifloat, TYifloat):
1039         case X(OPmod, TYidouble, TYidouble):
1040         case X(OPmod, TYildouble, TYildouble):
1041         case X(OPmod, TYifloat, TYfloat):
1042         case X(OPmod, TYidouble, TYdouble):
1043         case X(OPmod, TYildouble, TYldouble):
1044             op = cast(uint) -1;
1045             break;
1046 
1047         case X(OPeqeq, TYfloat, TYfloat):
1048         case X(OPeqeq, TYdouble, TYdouble):
1049         case X(OPeqeq, TYdouble_alias, TYdouble_alias):
1050         case X(OPeqeq, TYldouble, TYldouble):
1051         case X(OPeqeq, TYifloat, TYifloat):
1052         case X(OPeqeq, TYidouble, TYidouble):
1053         case X(OPeqeq, TYildouble, TYildouble):
1054         {
1055             assert(OTrel(e.Eoper));
1056             assert((*pretregs & mST0) == 0);
1057             regm_t retregs = mST0;
1058             codelem(cdb,e1,&retregs,false);
1059             note87(e1,0,0);
1060             regm_t resregm = mPSW;
1061 
1062             if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat)
1063             {
1064                 if (e2.Eoper == OPconst && !boolres(e2))
1065                 {
1066                     if (NOSAHF)
1067                     {
1068                         push87(cdb);
1069                         cdb.gen2(0xD9,0xEE);             // FLDZ
1070                         cdb.gen2(0xDF,0xF1);             // FCOMIP ST1
1071                         pop87();
1072                     }
1073                     else
1074                     {
1075                         cdb.genf2(0xD9,0xE4);            // FTST
1076                         cg87_87topsw(cdb);
1077                     }
1078                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1079                     pop87();
1080                 }
1081                 else if (NOSAHF)
1082                 {
1083                     note87(e1,0,0);
1084                     load87(cdb,e2,0,&retregs,e1,-1);
1085                     makesure87(cdb,e1,0,1,0);
1086                     resregm = 0;
1087                     //cdb.genf2(0xD9,0xC8 + 1);          // FXCH ST1
1088                     cdb.gen2(0xDF,0xF1);                 // FCOMIP ST1
1089                     pop87();
1090                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1091                     pop87();
1092                 }
1093                 else
1094                 {
1095                     load87(cdb,e2, 0, pretregs, e1, 3);  // FCOMPP
1096                 }
1097             }
1098             else
1099             {
1100                 if (e2.Eoper == OPconst && !boolres(e2) &&
1101                     config.target_cpu < TARGET_80386)
1102                 {
1103                     regm_t regm = 0;
1104 
1105                     callclib(cdb,e,CLIB.ftest0,&regm,0);
1106                     pop87();
1107                 }
1108                 else
1109                 {
1110                     note87(e1,0,0);
1111                     load87(cdb,e2,0,&retregs,e1,-1);
1112                     makesure87(cdb,e1,0,1,0);
1113                     resregm = 0;
1114                     if (NOSAHF)
1115                     {
1116                         cdb.gen2(0xDF,0xE9);              // FUCOMIP ST1
1117                         pop87();
1118                         cdb.genf2(0xDD,modregrm(3,3,0));  // FPOP
1119                         pop87();
1120                     }
1121                     else if (config.target_cpu >= TARGET_80386)
1122                     {
1123                         cdb.gen2(0xDA,0xE9);      // FUCOMPP
1124                         cg87_87topsw(cdb);
1125                         pop87();
1126                         pop87();
1127                     }
1128                     else
1129                         // Call a function instead so that exceptions
1130                         // are not generated.
1131                         callclib(cdb,e,CLIB.fcompp,&resregm,0);
1132                 }
1133             }
1134 
1135             freenode(e2);
1136             return;
1137         }
1138 
1139         case X(OPadd, TYcfloat, TYcfloat):
1140         case X(OPadd, TYcdouble, TYcdouble):
1141         case X(OPadd, TYcldouble, TYcldouble):
1142         case X(OPadd, TYcfloat, TYfloat):
1143         case X(OPadd, TYcdouble, TYdouble):
1144         case X(OPadd, TYcldouble, TYldouble):
1145         case X(OPadd, TYfloat, TYcfloat):
1146         case X(OPadd, TYdouble, TYcdouble):
1147         case X(OPadd, TYldouble, TYcldouble):
1148             goto Lcomplex;
1149 
1150         case X(OPadd, TYifloat, TYcfloat):
1151         case X(OPadd, TYidouble, TYcdouble):
1152         case X(OPadd, TYildouble, TYcldouble):
1153             goto Lcomplex2;
1154 
1155         case X(OPmin, TYcfloat, TYcfloat):
1156         case X(OPmin, TYcdouble, TYcdouble):
1157         case X(OPmin, TYcldouble, TYcldouble):
1158         case X(OPmin, TYcfloat, TYfloat):
1159         case X(OPmin, TYcdouble, TYdouble):
1160         case X(OPmin, TYcldouble, TYldouble):
1161         case X(OPmin, TYfloat, TYcfloat):
1162         case X(OPmin, TYdouble, TYcdouble):
1163         case X(OPmin, TYldouble, TYcldouble):
1164             goto Lcomplex;
1165 
1166         case X(OPmin, TYifloat, TYcfloat):
1167         case X(OPmin, TYidouble, TYcdouble):
1168         case X(OPmin, TYildouble, TYcldouble):
1169             goto Lcomplex2;
1170 
1171         case X(OPmul, TYcfloat, TYcfloat):
1172         case X(OPmul, TYcdouble, TYcdouble):
1173         case X(OPmul, TYcldouble, TYcldouble):
1174             goto Lcomplex;
1175 
1176         case X(OPdiv, TYcfloat, TYcfloat):
1177         case X(OPdiv, TYcdouble, TYcdouble):
1178         case X(OPdiv, TYcldouble, TYcldouble):
1179         case X(OPdiv, TYfloat, TYcfloat):
1180         case X(OPdiv, TYdouble, TYcdouble):
1181         case X(OPdiv, TYldouble, TYcldouble):
1182         case X(OPdiv, TYifloat, TYcfloat):
1183         case X(OPdiv, TYidouble, TYcdouble):
1184         case X(OPdiv, TYildouble, TYcldouble):
1185             goto Lcomplex;
1186 
1187         case X(OPdiv, TYifloat,   TYfloat):
1188         case X(OPdiv, TYidouble,  TYdouble):
1189         case X(OPdiv, TYildouble, TYldouble):
1190             op = 6;                             // FDIVP
1191             break;
1192 
1193         Lcomplex:
1194         {
1195             loadComplex(cdb,e1);
1196             loadComplex(cdb,e2);
1197             makesure87(cdb, e1, sz2, 2, 0);
1198             makesure87(cdb, e1, 0, 3, 0);
1199             regm_t retregs = mST01;
1200             if (eoper == OPadd)
1201             {
1202                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1203                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1204                 pop87();
1205                 pop87();
1206             }
1207             else if (eoper == OPmin)
1208             {
1209                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1210                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1211                 pop87();
1212                 pop87();
1213             }
1214             else
1215             {
1216                 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv;
1217                 callclib(cdb, e, clib, &retregs, 0);
1218             }
1219             fixresult_complex87(cdb, e, retregs, pretregs);
1220             return;
1221         }
1222 
1223         Lcomplex2:
1224         {
1225             regm_t retregs = mST0;
1226             codelem(cdb,e1, &retregs, false);
1227             note87(e1, 0, 0);
1228             loadComplex(cdb,e2);
1229             makesure87(cdb, e1, 0, 2, 0);
1230             retregs = mST01;
1231             if (eoper == OPadd)
1232             {
1233                 cdb.genf2(0xDE, 0xC0+2);   // FADDP ST(2),ST
1234             }
1235             else if (eoper == OPmin)
1236             {
1237                 cdb.genf2(0xDE, 0xE8+2);   // FSUBP ST(2),ST
1238                 cdb.genf2(0xD9, 0xE0);     // FCHS
1239             }
1240             else
1241                 assert(0);
1242             pop87();
1243             cdb.genf2(0xD9, 0xC8 + 1);     // FXCH ST(1)
1244             fixresult_complex87(cdb, e, retregs, pretregs);
1245             return;
1246         }
1247 
1248         case X(OPeqeq, TYcfloat, TYcfloat):
1249         case X(OPeqeq, TYcdouble, TYcdouble):
1250         case X(OPeqeq, TYcldouble, TYcldouble):
1251         case X(OPeqeq, TYcfloat, TYifloat):
1252         case X(OPeqeq, TYcdouble, TYidouble):
1253         case X(OPeqeq, TYcldouble, TYildouble):
1254         case X(OPeqeq, TYcfloat, TYfloat):
1255         case X(OPeqeq, TYcdouble, TYdouble):
1256         case X(OPeqeq, TYcldouble, TYldouble):
1257         case X(OPeqeq, TYifloat, TYcfloat):
1258         case X(OPeqeq, TYidouble, TYcdouble):
1259         case X(OPeqeq, TYildouble, TYcldouble):
1260         case X(OPeqeq, TYfloat, TYcfloat):
1261         case X(OPeqeq, TYdouble, TYcdouble):
1262         case X(OPeqeq, TYldouble, TYcldouble):
1263         case X(OPeqeq, TYfloat, TYifloat):
1264         case X(OPeqeq, TYdouble, TYidouble):
1265         case X(OPeqeq, TYldouble, TYildouble):
1266         case X(OPeqeq, TYifloat, TYfloat):
1267         case X(OPeqeq, TYidouble, TYdouble):
1268         case X(OPeqeq, TYildouble, TYldouble):
1269         {
1270             loadComplex(cdb,e1);
1271             loadComplex(cdb,e2);
1272             makesure87(cdb, e1, sz2, 2, 0);
1273             makesure87(cdb, e1, 0, 3, 0);
1274             regm_t retregs = 0;
1275             callclib(cdb, e, CLIB.ccmp, &retregs, 0);
1276             return;
1277         }
1278 
1279         case X(OPadd, TYfloat, TYifloat):
1280         case X(OPadd, TYdouble, TYidouble):
1281         case X(OPadd, TYldouble, TYildouble):
1282         case X(OPadd, TYifloat, TYfloat):
1283         case X(OPadd, TYidouble, TYdouble):
1284         case X(OPadd, TYildouble, TYldouble):
1285 
1286         case X(OPmin, TYfloat, TYifloat):
1287         case X(OPmin, TYdouble, TYidouble):
1288         case X(OPmin, TYldouble, TYildouble):
1289         case X(OPmin, TYifloat, TYfloat):
1290         case X(OPmin, TYidouble, TYdouble):
1291         case X(OPmin, TYildouble, TYldouble):
1292         {
1293             regm_t retregs = mST0;
1294             codelem(cdb,e1, &retregs, false);
1295             note87(e1, 0, 0);
1296             codelem(cdb,e2, &retregs, false);
1297             makesure87(cdb, e1, 0, 1, 0);
1298             if (eoper == OPmin)
1299                 cdb.genf2(0xD9, 0xE0);     // FCHS
1300             if (tyimaginary(e1.Ety))
1301                 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1)
1302             retregs = mST01;
1303             fixresult_complex87(cdb, e, retregs, pretregs);
1304             return;
1305         }
1306 
1307         case X(OPadd, TYcfloat, TYifloat):
1308         case X(OPadd, TYcdouble, TYidouble):
1309         case X(OPadd, TYcldouble, TYildouble):
1310             op = 0;
1311             goto Lci;
1312 
1313         case X(OPmin, TYcfloat, TYifloat):
1314         case X(OPmin, TYcdouble, TYidouble):
1315         case X(OPmin, TYcldouble, TYildouble):
1316             op = 4;
1317             goto Lci;
1318 
1319         Lci:
1320         {
1321             loadComplex(cdb,e1);
1322             regm_t retregs = mST0;
1323             load87(cdb,e2,sz2,&retregs,e1,op);
1324             freenode(e2);
1325             retregs = mST01;
1326             makesure87(cdb, e1,0,1,0);
1327             fixresult_complex87(cdb,e, retregs, pretregs);
1328             return;
1329         }
1330 
1331         case X(OPmul, TYcfloat, TYfloat):
1332         case X(OPmul, TYcdouble, TYdouble):
1333         case X(OPmul, TYcldouble, TYldouble):
1334             imaginary = false;
1335             goto Lcmul;
1336 
1337         case X(OPmul, TYcfloat, TYifloat):
1338         case X(OPmul, TYcdouble, TYidouble):
1339         case X(OPmul, TYcldouble, TYildouble):
1340             imaginary = true;
1341         Lcmul:
1342         {
1343             loadComplex(cdb,e1);
1344             if (imaginary)
1345             {
1346                 cdb.genf2(0xD9, 0xE0);          // FCHS
1347                 cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1348                 if (elemisone(e2))
1349                 {
1350                     freenode(e2);
1351                     fixresult_complex87(cdb, e, mST01, pretregs);
1352                     return;
1353                 }
1354             }
1355             regm_t retregs = mST0;
1356             codelem(cdb,e2, &retregs, false);
1357             makesure87(cdb, e1, sz2, 1, 0);
1358             makesure87(cdb, e1, 0, 2, 0);
1359             cdb.genf2(0xDC,0xC8 + 2);           // FMUL ST(2), ST
1360             cdb.genf2(0xDE,0xC8 + 1);           // FMULP ST(1), ST
1361             pop87();
1362             fixresult_complex87(cdb, e, mST01, pretregs);
1363             return;
1364         }
1365 
1366         case X(OPmul, TYfloat, TYcfloat):
1367         case X(OPmul, TYdouble, TYcdouble):
1368         case X(OPmul, TYldouble, TYcldouble):
1369             imaginary = false;
1370             goto Lcmul2;
1371 
1372         case X(OPmul, TYifloat, TYcfloat):
1373         case X(OPmul, TYidouble, TYcdouble):
1374         case X(OPmul, TYildouble, TYcldouble):
1375             imaginary = true;
1376         Lcmul2:
1377         {
1378             regm_t retregs = mST0;
1379             codelem(cdb,e1, &retregs, false);
1380             note87(e1, 0, 0);
1381             loadComplex(cdb,e2);
1382             makesure87(cdb, e1, 0, 2, 0);
1383             cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1)
1384             cdb.genf2(0xD9,0xC8 + 2);        // FXCH ST(2)
1385             cdb.genf2(0xDC,0xC8 + 2);        // FMUL ST(2), ST
1386             cdb.genf2(0xDE,0xC8 + 1);        // FMULP ST(1), ST
1387             pop87();
1388             fixresult_complex87(cdb, e, mST01, pretregs);
1389             return;
1390         }
1391 
1392         case X(OPdiv, TYcfloat, TYfloat):
1393         case X(OPdiv, TYcdouble, TYdouble):
1394         case X(OPdiv, TYcldouble, TYldouble):
1395         {
1396             loadComplex(cdb,e1);
1397             regm_t retregs = mST0;
1398             codelem(cdb,e2, &retregs, false);
1399             makesure87(cdb, e1, sz2, 1, 0);
1400             makesure87(cdb, e1, 0, 2, 0);
1401             cdb.genf2(0xDC,0xF8 + 2);            // FDIV ST(2), ST
1402             cdb.genf2(0xDE,0xF8 + 1);            // FDIVP ST(1), ST
1403             pop87();
1404             fixresult_complex87(cdb, e, mST01, pretregs);
1405             return;
1406         }
1407 
1408         case X(OPdiv, TYcfloat, TYifloat):
1409         case X(OPdiv, TYcdouble, TYidouble):
1410         case X(OPdiv, TYcldouble, TYildouble):
1411         {
1412             loadComplex(cdb,e1);
1413             cdb.genf2(0xD9,0xC8 + 1);        // FXCH ST(1)
1414             xchg87(0, 1);
1415             cdb.genf2(0xD9, 0xE0);               // FCHS
1416             regm_t retregs = mST0;
1417             codelem(cdb,e2, &retregs, false);
1418             makesure87(cdb, e1, 0, 1, 0);
1419             makesure87(cdb, e1, sz2, 2, 0);
1420             cdb.genf2(0xDC,0xF8 + 2);        // FDIV ST(2), ST
1421             cdb.genf2(0xDE,0xF8 + 1);             // FDIVP ST(1), ST
1422             pop87();
1423             fixresult_complex87(cdb, e, mST01, pretregs);
1424             return;
1425         }
1426 
1427         case X(OPmod, TYcfloat, TYfloat):
1428         case X(OPmod, TYcdouble, TYdouble):
1429         case X(OPmod, TYcldouble, TYldouble):
1430         case X(OPmod, TYcfloat, TYifloat):
1431         case X(OPmod, TYcdouble, TYidouble):
1432         case X(OPmod, TYcldouble, TYildouble):
1433         {
1434             /*
1435                         fld     E1.re
1436                         fld     E1.im
1437                         fld     E2
1438                         fxch    ST(1)
1439                 FM1:    fprem
1440                         fstsw   word ptr sw
1441                         fwait
1442                         mov     AH, byte ptr sw+1
1443                         jp      FM1
1444                         fxch    ST(2)
1445                 FM2:    fprem
1446                         fstsw   word ptr sw
1447                         fwait
1448                         mov     AH, byte ptr sw+1
1449                         jp      FM2
1450                         fstp    ST(1)
1451                         fxch    ST(1)
1452              */
1453             loadComplex(cdb,e1);
1454             regm_t retregs = mST0;
1455             codelem(cdb,e2, &retregs, false);
1456             makesure87(cdb, e1, sz2, 1, 0);
1457             makesure87(cdb, e1, 0, 2, 0);
1458             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1459 
1460             cdb.gen2(0xD9, 0xF8);                  // FPREM
1461             code *cfm1 = cdb.last();
1462             genjmpifC2(cdb, cfm1);                 // JC2 FM1
1463             cdb.genf2(0xD9, 0xC8 + 2);             // FXCH ST(2)
1464 
1465             cdb.gen2(0xD9, 0xF8);                  // FPREM
1466             code *cfm2 = cdb.last();
1467 
1468             genjmpifC2(cdb, cfm2);                 // JC2 FM2
1469             cdb.genf2(0xDD,0xD8 + 1);              // FSTP ST(1)
1470             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1471 
1472             pop87();
1473             fixresult_complex87(cdb, e, mST01, pretregs);
1474             return;
1475         }
1476 
1477         default:
1478 
1479             debug
1480             elem_print(e);
1481 
1482             assert(0);
1483     }
1484 
1485     int reverse = 0;
1486     int e2oper = e2.Eoper;
1487 
1488     /* Move double-sized operand into the second position if there's a chance
1489      * it will allow combining a load with an operation (DMD Bugzilla 2905)
1490      */
1491     if ( ((tybasic(e1.Ety) == TYdouble)
1492           && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst))
1493           && (tybasic(e2.Ety) != TYdouble)) ||
1494         (e1.Eoper == OPconst) ||
1495         (e1.Eoper == OPvar &&
1496          ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
1497           (e2oper == OPd_f &&
1498             (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) &&
1499             e2.EV.E1.EV.E1.Eoper == OPvar
1500           ) ||
1501           ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
1502             e2.EV.E1.Eoper == OPvar
1503           )
1504          )
1505         )
1506        )
1507     {   // Reverse order of evaluation
1508         e1 = e.EV.E2;
1509         e2 = e.EV.E1;
1510         op = oprev[op + 1];
1511         reverse ^= 1;
1512     }
1513 
1514     regm_t retregs1 = mST0;
1515     codelem(cdb,e1,&retregs1,false);
1516     note87(e1,0,0);
1517 
1518     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv)
1519     {
1520         regm_t retregs = mST0;
1521         load87(cdb,e2,0,&retregs,e1,-1);
1522         makesure87(cdb, e1,0,1,0);
1523         if (op == 7)                    // if reverse divide
1524             cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1525         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
1526         pop87();
1527         regm_t resregm = mST0;
1528         freenode(e2);
1529         fixresult87(cdb,e,resregm,pretregs);
1530     }
1531     else if (e.Eoper == OPmod)
1532     {
1533         /*
1534          *              fld     tbyte ptr y
1535          *              fld     tbyte ptr x             // ST = x, ST1 = y
1536          *      FM1:    // We don't use fprem1 because for some inexplicable
1537          *              // reason we get -5 when we do _modulo(15, 10)
1538          *              fprem                           // ST = ST % ST1
1539          *              fstsw   word ptr sw
1540          *              fwait
1541          *              mov     AH,byte ptr sw+1        // get msb of status word in AH
1542          *              sahf                            // transfer to flags
1543          *              jp      FM1                     // continue till ST < ST1
1544          *              fstp    ST(1)                   // leave remainder on stack
1545          */
1546         regm_t retregs = mST0;
1547         load87(cdb,e2,0,&retregs,e1,-1);
1548         makesure87(cdb,e1,0,1,0);       // now have x,y on stack; need y,x
1549         if (!reverse)                           // if not reverse modulo
1550             cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
1551 
1552         cdb.gen2(0xD9, 0xF8);                   // FM1: FPREM
1553         code *cfm1 = cdb.last();
1554         genjmpifC2(cdb, cfm1);                  // JC2 FM1
1555         cdb.genf2(0xDD,0xD8 + 1);               // FSTP ST(1)
1556 
1557         pop87();
1558         freenode(e2);
1559         fixresult87(cdb,e,mST0,pretregs);
1560     }
1561     else
1562     {
1563         load87(cdb,e2,0,pretregs,e1,op);
1564         freenode(e2);
1565     }
1566     if (*pretregs & mST0)
1567         note87(e,0,0);
1568     //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1569 }
1570 
1571 /*****************************
1572  * Load e into ST01.
1573  */
1574 
1575 @trusted
1576 private void loadComplex(ref CodeBuilder cdb,elem *e)
1577 {
1578     regm_t retregs;
1579 
1580     int sz = tysize(e.Ety);
1581     switch (tybasic(e.Ety))
1582     {
1583         case TYfloat:
1584         case TYdouble:
1585         case TYldouble:
1586             retregs = mST0;
1587             codelem(cdb,e,&retregs,false);
1588             // Convert to complex with a 0 for the imaginary part
1589             push87(cdb);
1590             cdb.gen2(0xD9,0xEE);              // FLDZ
1591             break;
1592 
1593         case TYifloat:
1594         case TYidouble:
1595         case TYildouble:
1596             // Convert to complex with a 0 for the real part
1597             push87(cdb);
1598             cdb.gen2(0xD9,0xEE);              // FLDZ
1599             retregs = mST0;
1600             codelem(cdb,e,&retregs,false);
1601             break;
1602 
1603         case TYcfloat:
1604         case TYcdouble:
1605         case TYcldouble:
1606             sz /= 2;
1607             retregs = mST01;
1608             codelem(cdb,e,&retregs,false);
1609             break;
1610 
1611         default:
1612             assert(0);
1613     }
1614     note87(e, 0, 1);
1615     note87(e, sz, 0);
1616 }
1617 
1618 /*************************
1619  * If op == -1, load expression e into ST0.
1620  * else compute (eleft op e), eleft is in ST0.
1621  * Must follow same logic as cmporder87();
1622  */
1623 
1624 @trusted
1625 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op)
1626 {
1627     code cs;
1628     regm_t retregs;
1629     reg_t reg;
1630     uint mf1;
1631     ubyte ldop;
1632     int i;
1633 
1634     if (NDPP)
1635         printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1636 
1637     assert(!(NOSAHF && op == 3));
1638     elem_debug(e);
1639     if (ADDFWAIT())
1640         cs.Iflags = CFwait;
1641     else
1642         cs.Iflags = 0;
1643     cs.Irex = 0;
1644     OPER opr = oprev[op + 1];
1645     tym_t ty = tybasic(e.Ety);
1646     uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
1647     bool noted = false;
1648     if ((ty == TYldouble || ty == TYildouble) &&
1649         op != -1 && e.Eoper != OPd_ld)
1650         goto Ldefault;
1651 L5:
1652     switch (e.Eoper)
1653     {
1654         case OPcomma:
1655             if (op != -1)
1656             {
1657                 note87(eleft,eoffset,0);
1658                 noted = true;
1659             }
1660             docommas(cdb,e);
1661             goto L5;
1662 
1663         case OPvar:
1664             notreg(e);
1665             goto L2;
1666 
1667         case OPind:
1668         L2:
1669             if (op != -1)
1670             {
1671                 if (e.Ecount && e.Ecount != e.Ecomsub &&
1672                     (i = cse_get(e, 0)) >= 0)
1673                 {
1674                     immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8];
1675 
1676                     cdb.genf2(0xD8,b2[op] + i);        // Fop ST(i)
1677                 }
1678                 else
1679                 {
1680                     getlvalue87(cdb,cs,e,0);
1681                     makesure87(cdb,eleft,eoffset,0,0);
1682                     cs.Iop = ESC(mf,0);
1683                     cs.Irm |= modregrm(0,op,0);
1684                     cdb.gen(&cs);
1685                 }
1686             }
1687             else
1688             {
1689                 push87(cdb);
1690                 switch (ty)
1691                 {
1692                     case TYfloat:
1693                     case TYdouble:
1694                     case TYifloat:
1695                     case TYidouble:
1696                     case TYcfloat:
1697                     case TYcdouble:
1698                     case TYdouble_alias:
1699                         loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var
1700                         break;
1701                     case TYldouble:
1702                     case TYildouble:
1703                     case TYcldouble:
1704                         loadea(cdb,e,&cs,0xDB,5,0,0,0);      // FLD var
1705                         break;
1706                     default:
1707                         printf("ty = x%x\n", ty);
1708                         assert(0);
1709                 }
1710                 note87(e,0,0);
1711             }
1712             break;
1713 
1714         case OPd_f:
1715         case OPf_d:
1716         case OPd_ld:
1717             mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat)
1718                     ? MFfloat : MFdouble;
1719             if (op != -1 && global87.stackused && !noted)
1720                 note87(eleft,eoffset,0);    // don't trash this value
1721             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1722             {
1723                 static if (1)
1724                 {
1725                   L4:
1726                     getlvalue87(cdb,cs,e.EV.E1,0);
1727                     cs.Iop = ESC(mf1,0);
1728                     if (op != -1)
1729                     {
1730                         cs.Irm |= modregrm(0,op,0);
1731                         makesure87(cdb,eleft,eoffset,0,0);
1732                     }
1733                     else
1734                     {
1735                         cs.Iop |= 1;
1736                         push87(cdb);
1737                     }
1738                     cdb.gen(&cs);                     // FLD / Fop
1739                 }
1740                 else
1741                 {
1742                     loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */
1743                 }
1744 
1745                 // Variable cannot be put into a register anymore
1746                 if (e.EV.E1.Eoper == OPvar)
1747                     notreg(e.EV.E1);
1748                 freenode(e.EV.E1);
1749             }
1750             else
1751             {
1752                 retregs = mST0;
1753                 codelem(cdb,e.EV.E1,&retregs,false);
1754                 if (op != -1)
1755                 {
1756                     makesure87(cdb,eleft,eoffset,1,0);
1757                     cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1758                     pop87();
1759                 }
1760             }
1761             break;
1762 
1763         case OPs64_d:
1764             if (e.EV.E1.Eoper == OPvar ||
1765                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1766             {
1767                 getlvalue87(cdb,cs,e.EV.E1,0);
1768                 cs.Iop = 0xDF;
1769                 push87(cdb);
1770                 cs.Irm |= modregrm(0,5,0);
1771                 cdb.gen(&cs);                     // FILD m64
1772                 // Variable cannot be put into a register anymore
1773                 if (e.EV.E1.Eoper == OPvar)
1774                     notreg(e.EV.E1);
1775                 freenode(e.EV.E1);
1776             }
1777             else if (I64)
1778             {
1779                 retregs = ALLREGS;
1780                 codelem(cdb,e.EV.E1,&retregs,false);
1781                 reg = findreg(retregs);
1782                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1783                 code_orrex(cdb.last(), REX_W);
1784                 push87(cdb);
1785                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1786             }
1787             else
1788             {
1789                 retregs = ALLREGS;
1790                 codelem(cdb,e.EV.E1,&retregs,false);
1791                 reg = findreglsw(retregs);
1792                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reglsw
1793                 reg = findregmsw(retregs);
1794                 cdb.genfltreg(STO,reg,4);         // MOV floatreg+4,regmsw
1795                 push87(cdb);
1796                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1797             }
1798             if (op != -1)
1799             {
1800                 makesure87(cdb,eleft,eoffset,1,0);
1801                 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1802                 pop87();
1803             }
1804             break;
1805 
1806         case OPconst:
1807             ldop = loadconst(e, 0);
1808             if (ldop)
1809             {
1810                 push87(cdb);
1811                 cdb.genf2(0xD9,ldop);          // FLDx
1812                 if (op != -1)
1813                 {
1814                     cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1815                     pop87();
1816                 }
1817             }
1818             else
1819             {
1820                 assert(0);
1821             }
1822             break;
1823 
1824         case OPu16_d:
1825         {
1826             /* This opcode should never be generated        */
1827             /* (probably shouldn't be for 16 bit code too)  */
1828             assert(!I32);
1829 
1830             if (op != -1 && !noted)
1831                 note87(eleft,eoffset,0);    // don't trash this value
1832             retregs = ALLREGS & mLSW;
1833             codelem(cdb,e.EV.E1,&retregs,false);
1834             regwithvalue(cdb,ALLREGS & mMSW,0,reg,0);  // 0-extend
1835             retregs |= mask(reg);
1836             mf1 = MFlong;
1837             goto L3;
1838         }
1839 
1840         case OPs16_d:       mf1 = MFword;   goto L6;
1841         case OPs32_d:       mf1 = MFlong;   goto L6;
1842         L6:
1843             if (e.Ecount)
1844                 goto Ldefault;
1845             if (op != -1 && !noted)
1846                 note87(eleft,eoffset,0);    // don't trash this value
1847             if (e.EV.E1.Eoper == OPvar ||
1848                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1849             {
1850                 goto L4;
1851             }
1852             else
1853             {
1854                 retregs = ALLREGS;
1855                 codelem(cdb,e.EV.E1,&retregs,false);
1856             L3:
1857                 if (I16 && e.Eoper != OPs16_d)
1858                 {
1859                     /* MOV floatreg+2,reg   */
1860                     reg = findregmsw(retregs);
1861                     cdb.genfltreg(STO,reg,REGSIZE);
1862                     retregs &= mLSW;
1863                 }
1864                 reg = findreg(retregs);
1865                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1866                 if (op != -1)
1867                 {
1868                     makesure87(cdb,eleft,eoffset,0,0);
1869                     cdb.genfltreg(ESC(mf1,0),op,0);   // Fop floatreg
1870                 }
1871                 else
1872                 {
1873                     /* FLD long ptr floatreg        */
1874                     push87(cdb);
1875                     cdb.genfltreg(ESC(mf1,1),0,0);
1876                 }
1877             }
1878             break;
1879         default:
1880         Ldefault:
1881             retregs = mST0;
1882             codelem(cdb,e,&retregs,2);
1883 
1884             if (op != -1)
1885             {
1886                 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1));
1887                 pop87();
1888                 if (op == 4 || op == 6)     // sub or div
1889                 {
1890                     code *cl = cdb.last();
1891                     if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9)   // FXCH ST(1)
1892                     {   cl.Iop = NOP;
1893                         opr = op;           // reverse operands
1894                     }
1895                 }
1896                 cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1897             }
1898             break;
1899     }
1900     if (op == 3)                    // FCOMP
1901     {   pop87();                    // extra pop was done
1902         cg87_87topsw(cdb);
1903     }
1904     fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs);
1905     if (NDPP)
1906         printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1907 }
1908 
1909 /********************************
1910  * Determine if a compare is to be done forwards (return 0)
1911  * or backwards (return 1).
1912  * Must follow same logic as load87().
1913  */
1914 
1915 @trusted
1916 int cmporder87(elem *e)
1917 {
1918     //printf("cmporder87(%p)\n",e);
1919   L1:
1920     switch (e.Eoper)
1921     {
1922         case OPcomma:
1923             e = e.EV.E2;
1924             goto L1;
1925 
1926         case OPd_f:
1927         case OPf_d:
1928         case OPd_ld:
1929             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1930                 goto ret0;
1931             else
1932                 goto ret1;
1933 
1934         case OPconst:
1935             if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble
1936                                 || tybasic(e.Ety) == TYildouble)
1937             {
1938                 //printf("ret 1, loadconst(e) = %d\n", loadconst(e));
1939                 goto ret1;
1940             }
1941             goto ret0;
1942 
1943         case OPvar:
1944         case OPind:
1945             if (tybasic(e.Ety) == TYldouble ||
1946                 tybasic(e.Ety) == TYildouble)
1947                 goto ret1;
1948             goto ret0;
1949 
1950         case OPu16_d:
1951         case OPs16_d:
1952         case OPs32_d:
1953             goto ret0;
1954 
1955         case OPs64_d:
1956             goto ret1;
1957 
1958         default:
1959             goto ret1;
1960     }
1961 
1962 ret1:
1963     return 1;
1964 
1965 ret0:
1966     return 0;
1967 }
1968 
1969 /*******************************
1970  * Perform an assignment to a long double/double/float.
1971  */
1972 
1973 @trusted
1974 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1975 {
1976     code cs;
1977     opcode_t op1;
1978     uint op2;
1979 
1980     //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1981     assert(e.Eoper == OPeq);
1982     regm_t retregs = mST0 | (*pretregs & mPSW);
1983     codelem(cdb,e.EV.E2,&retregs,false);
1984     tym_t ty1 = tybasic(e.EV.E1.Ety);
1985     switch (ty1)
1986     {
1987         case TYdouble_alias:
1988         case TYidouble:
1989         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
1990 
1991         case TYifloat:
1992         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
1993 
1994         case TYildouble:
1995         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
1996 
1997         default:
1998             assert(0);
1999     }
2000     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
2001     {
2002         if (ty1 == TYldouble || ty1 == TYildouble)
2003         {
2004             push87(cdb);
2005             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
2006             pop87();
2007         }
2008         else
2009             op2 = 2;                        // FST e.EV.E1
2010     }
2011     else
2012     {   // FSTP e.EV.E1
2013         pop87();
2014     }
2015 
2016     static if (0)
2017     {
2018         // Doesn't work if ST(0) gets saved to the stack by getlvalue()
2019         loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
2020     }
2021     else
2022     {
2023         cs.Irex = 0;
2024         cs.Iflags = 0;
2025         cs.Iop = op1;
2026         if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
2027         {   // Make sure it's still there
2028             elem *e2 = e.EV.E2;
2029             while (e2.Eoper == OPcomma)
2030                 e2 = e2.EV.E2;
2031             note87(e2,0,0);
2032             getlvalue87(cdb, cs, e.EV.E1, 0);
2033             makesure87(cdb,e2,0,0,1);
2034         }
2035         else
2036         {
2037             getlvalue87(cdb, cs, e.EV.E1, 0);
2038         }
2039         cs.Irm |= modregrm(0,op2,0);            // OR in reg field
2040         cdb.gen(&cs);
2041         if (tysize(TYldouble) == 12)
2042         {
2043             /* This deals with the fact that 10 byte reals really
2044              * occupy 12 bytes by zeroing the extra 2 bytes.
2045              */
2046             if (op1 == 0xDB)
2047             {
2048                 cs.Iop = 0xC7;                      // MOV EA+10,0
2049                 NEWREG(cs.Irm, 0);
2050                 cs.IEV1.Voffset += 10;
2051                 cs.IFL2 = FLconst;
2052                 cs.IEV2.Vint = 0;
2053                 cs.Iflags |= CFopsize;
2054                 cdb.gen(&cs);
2055             }
2056         }
2057         else if (tysize(TYldouble) == 16)
2058         {
2059             /* This deals with the fact that 10 byte reals really
2060              * occupy 16 bytes by zeroing the extra 6 bytes.
2061              */
2062             if (op1 == 0xDB)
2063             {
2064                 cs.Irex &= ~REX_W;
2065                 cs.Iop = 0xC7;                      // MOV EA+10,0
2066                 NEWREG(cs.Irm, 0);
2067                 cs.IEV1.Voffset += 10;
2068                 cs.IFL2 = FLconst;
2069                 cs.IEV2.Vint = 0;
2070                 cs.Iflags |= CFopsize;
2071                 cdb.gen(&cs);
2072 
2073                 cs.IEV1.Voffset += 2;
2074                 cs.Iflags &= ~CFopsize;
2075                 cdb.gen(&cs);
2076             }
2077         }
2078     }
2079     genfwait(cdb);
2080     freenode(e.EV.E1);
2081     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2082 }
2083 
2084 /*******************************
2085  * Perform an assignment to a long double/double/float.
2086  */
2087 
2088 @trusted
2089 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2090 {
2091     code cs;
2092     opcode_t op1;
2093     uint op2;
2094     uint sz;
2095     int fxch = 0;
2096 
2097     //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2098     assert(e.Eoper == OPeq);
2099     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2100     cs.Irex = 0;
2101     regm_t retregs = mST01 | (*pretregs & mPSW);
2102     codelem(cdb,e.EV.E2,&retregs,false);
2103     tym_t ty1 = tybasic(e.EV.E1.Ety);
2104     switch (ty1)
2105     {
2106         case TYcdouble:     op1 = ESC(MFdouble,1);  op2 = 3; break;
2107         case TYcfloat:      op1 = ESC(MFfloat,1);   op2 = 3; break;
2108         case TYcldouble:    op1 = 0xDB;             op2 = 7; break;
2109         default:
2110             assert(0);
2111     }
2112     if (*pretregs & (mST01 | mXMM0 | mXMM1))  // if want result on stack too
2113     {
2114         if (ty1 == TYcldouble)
2115         {
2116             push87(cdb);
2117             push87(cdb);
2118             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2119             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2120             pop87();
2121             pop87();
2122         }
2123         else
2124         {   op2 = 2;                        // FST e.EV.E1
2125             fxch = 1;
2126         }
2127     }
2128     else
2129     {   // FSTP e.EV.E1
2130         pop87();
2131         pop87();
2132     }
2133     sz = tysize(ty1) / 2;
2134     if (*pretregs & (mST01 | mXMM0 | mXMM1))
2135     {
2136         cs.Iflags = 0;
2137         cs.Irex = 0;
2138         cs.Iop = op1;
2139         getlvalue87(cdb, cs, e.EV.E1, 0);
2140         cs.IEV1.Voffset += sz;
2141         cs.Irm |= modregrm(0, op2, 0);
2142         makesure87(cdb,e.EV.E2, sz, 0, 0);
2143         cdb.gen(&cs);
2144         genfwait(cdb);
2145         makesure87(cdb,e.EV.E2,  0, 1, 0);
2146     }
2147     else
2148     {
2149         loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0);
2150         genfwait(cdb);
2151     }
2152     if (fxch)
2153         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2154     cs.IEV1.Voffset -= sz;
2155     cdb.gen(&cs);
2156     if (fxch)
2157         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2158     if (tysize(TYldouble) == 12)
2159     {
2160         if (op1 == 0xDB)
2161         {
2162             cs.Iop = 0xC7;              // MOV EA+10,0
2163             NEWREG(cs.Irm, 0);
2164             cs.IEV1.Voffset += 10;
2165             cs.IFL2 = FLconst;
2166             cs.IEV2.Vint = 0;
2167             cs.Iflags |= CFopsize;
2168             cdb.gen(&cs);
2169             cs.IEV1.Voffset += 12;
2170             cdb.gen(&cs);               // MOV EA+22,0
2171         }
2172     }
2173     if (tysize(TYldouble) == 16)
2174     {
2175         if (op1 == 0xDB)
2176         {
2177             cs.Iop = 0xC7;              // MOV EA+10,0
2178             NEWREG(cs.Irm, 0);
2179             cs.IEV1.Voffset += 10;
2180             cs.IFL2 = FLconst;
2181             cs.IEV2.Vint = 0;
2182             cs.Iflags |= CFopsize;
2183             cdb.gen(&cs);
2184 
2185             cs.IEV1.Voffset += 2;
2186             cs.Iflags &= ~CFopsize;
2187             cdb.gen(&cs);
2188 
2189             cs.IEV1.Voffset += 14;
2190             cs.Iflags |= CFopsize;
2191             cdb.gen(&cs);
2192 
2193             cs.IEV1.Voffset += 2;
2194             cs.Iflags &= ~CFopsize;
2195             cdb.gen(&cs);
2196         }
2197     }
2198     genfwait(cdb);
2199     freenode(e.EV.E1);
2200     fixresult_complex87(cdb, e,mST01 | mPSW,pretregs);
2201 }
2202 
2203 /*******************************
2204  * Perform an assignment while converting to integral type,
2205  * i.e. handle (e1 = (int) e2)
2206  */
2207 
2208 @trusted
2209 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2210 {
2211     code cs;
2212     opcode_t op1;
2213     uint op2;
2214 
2215     assert(e.Eoper == OPeq);
2216     assert(!*pretregs);
2217     regm_t retregs = mST0;
2218     elem_debug(e.EV.E2);
2219     codelem(cdb,e.EV.E2.EV.E1,&retregs,false);
2220 
2221     switch (e.EV.E2.Eoper)
2222     {   case OPd_s16:
2223             op1 = ESC(MFword,1);
2224             op2 = 3;
2225             break;
2226         case OPd_s32:
2227         case OPd_u16:
2228             op1 = ESC(MFlong,1);
2229             op2 = 3;
2230             break;
2231         case OPd_s64:
2232             op1 = 0xDF;
2233             op2 = 7;
2234             break;
2235         default:
2236             assert(0);
2237     }
2238     freenode(e.EV.E2);
2239 
2240     genfwait(cdb);
2241     genSetRoundingMode(cdb, CW.roundto0);   // FLDCW roundto0
2242 
2243     pop87();
2244     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2245     if (e.EV.E1.Eoper == OPvar)
2246         notreg(e.EV.E1);                    // cannot be put in register anymore
2247     loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
2248 
2249     genfwait(cdb);
2250     genSetRoundingMode(cdb, CW.roundtonearest);   // FLDCW roundtonearest
2251 
2252     freenode(e.EV.E1);
2253 }
2254 
2255 /**********************************
2256  * Perform +=, -=, *= and /= for doubles.
2257  */
2258 
2259 @trusted
2260 public void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2261 {
2262     code cs;
2263     uint op;
2264     opcode_t opld;
2265     opcode_t op1;
2266     uint op2;
2267     tym_t ty1 = tybasic(e.EV.E1.Ety);
2268 
2269     switch (ty1)
2270     {
2271         case TYdouble_alias:
2272         case TYidouble:
2273         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
2274         case TYifloat:
2275         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
2276         case TYildouble:
2277         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
2278 
2279         case TYcfloat:
2280         case TYcdouble:
2281         case TYcldouble:
2282             if (e.Eoper == OPmodass)
2283                 opmod_complex87(cdb, e, pretregs);
2284             else
2285                 opass_complex87(cdb, e, pretregs);
2286             return;
2287 
2288         default:
2289             assert(0);
2290     }
2291     switch (e.Eoper)
2292     {
2293         case OPpostinc:
2294         case OPaddass:      op = 0 << 3;    opld = 0xC1;    break;  // FADD
2295         case OPpostdec:
2296         case OPminass:      op = 5 << 3;    opld = 0xE1; /*0xE9;*/  break;  // FSUBR
2297         case OPmulass:      op = 1 << 3;    opld = 0xC9;    break;  // FMUL
2298         case OPdivass:      op = 7 << 3;    opld = 0xF1;    break;  // FDIVR
2299         case OPmodass:      break;
2300         default:            assert(0);
2301     }
2302     regm_t retregs = mST0;
2303     codelem(cdb,e.EV.E2,&retregs,false);     // evaluate rvalue
2304     note87(e.EV.E2,0,0);
2305     getlvalue87(cdb,cs,e.EV.E1,e.Eoper==OPmodass?mAX:0);
2306     makesure87(cdb,e.EV.E2,0,0,0);
2307     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass)
2308     {
2309         push87(cdb);
2310         cs.Iop = op1;
2311         if (ty1 == TYldouble || ty1 == TYildouble)
2312             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2313         cdb.gen(&cs);
2314         cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2315         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
2316         pop87();
2317     }
2318     else if (e.Eoper == OPmodass)
2319     {
2320         /*
2321          *          fld     tbyte ptr y
2322          *          fld     tbyte ptr x             // ST = x, ST1 = y
2323          *  FM1:    // We don't use fprem1 because for some inexplicable
2324          *          // reason we get -5 when we do _modulo(15, 10)
2325          *          fprem                           // ST = ST % ST1
2326          *          fstsw   word ptr sw
2327          *          fwait
2328          *          mov     AH,byte ptr sw+1        // get msb of status word in AH
2329          *          sahf                            // transfer to flags
2330          *          jp      FM1                     // continue till ST < ST1
2331          *          fstp    ST(1)                   // leave remainder on stack
2332          */
2333         code *c1;
2334 
2335         push87(cdb);
2336         cs.Iop = op1;
2337         if (ty1 == TYldouble || ty1 == TYildouble)
2338             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2339         cdb.gen(&cs);                       // FLD   e.EV.E1
2340 
2341         cdb.gen2(0xD9, 0xF8);               // FPREM
2342         code *cfm1 = cdb.last();
2343         genjmpifC2(cdb, cfm1);              // JC2 FM1
2344         cdb.genf2(0xDD,0xD8 + 1);           // FSTP ST(1)
2345 
2346         pop87();
2347     }
2348     else if (ty1 == TYldouble || ty1 == TYildouble)
2349     {
2350         push87(cdb);
2351         cs.Iop = op1;
2352         cs.Irm |= modregrm(0, 5, 0);        // FLD tbyte ptr ...
2353         cdb.gen(&cs);                       // FLD   e.EV.E1
2354         cdb.genf2(0xDE,opld);               // FopP  ST(1)
2355         pop87();
2356     }
2357     else
2358     {
2359         cs.Iop = op1 & ~1;
2360         cs.Irm |= op;
2361         cdb.gen(&cs);                       // Fop e.EV.E1
2362     }
2363     if (*pretregs & mPSW)
2364         genftst(cdb,e,0);                   // FTST ST0
2365     // if want result in registers
2366     if (*pretregs & (mST0 | ALLREGS | mBP))
2367     {
2368         if (ty1 == TYldouble || ty1 == TYildouble)
2369         {
2370             push87(cdb);
2371             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
2372             pop87();
2373         }
2374         else
2375             op2 = 2;                        // FST e.EV.E1
2376     }
2377     else
2378     {   // FSTP
2379         pop87();
2380     }
2381     cs.Iop = op1;
2382     NEWREG(cs.Irm,op2);                     // FSTx e.EV.E1
2383     freenode(e.EV.E1);
2384     cdb.gen(&cs);
2385     genfwait(cdb);
2386     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2387 }
2388 
2389 /***********************************
2390  * Perform %= where E1 is complex and E2 is real or imaginary.
2391  */
2392 
2393 @trusted
2394 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs)
2395 {
2396 
2397     /*          fld     E2
2398                 fld     E1.re
2399         FM1:    fprem
2400                 fstsw   word ptr sw
2401                 fwait
2402                 mov     AH, byte ptr sw+1
2403                 jp      FM1
2404                 fxch    ST(1)
2405                 fld     E1.im
2406         FM2:    fprem
2407                 fstsw   word ptr sw
2408                 fwait
2409                 mov     AH, byte ptr sw+1
2410                 jp      FM2
2411                 fstp    ST(1)
2412      */
2413 
2414     code cs;
2415 
2416     tym_t ty1 = tybasic(e.EV.E1.Ety);
2417     uint sz2 = _tysize[ty1] / 2;
2418 
2419     regm_t retregs = mST0;
2420     codelem(cdb,e.EV.E2,&retregs,false);         // FLD E2
2421     note87(e.EV.E2,0,0);
2422     getlvalue87(cdb,cs,e.EV.E1,0);
2423     makesure87(cdb,e.EV.E2,0,0,0);
2424 
2425     push87(cdb);
2426     switch (ty1)
2427     {
2428         case TYcdouble:  cs.Iop = ESC(MFdouble,1);      break;
2429         case TYcfloat:   cs.Iop = ESC(MFfloat,1);       break;
2430         case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
2431         default:
2432             assert(0);
2433     }
2434     cdb.gen(&cs);                               // FLD E1.re
2435 
2436     cdb.gen2(0xD9, 0xF8);                       // FPREM
2437     code *cfm1 = cdb.last();
2438     genjmpifC2(cdb, cfm1);                      // JC2 FM1
2439     cdb.genf2(0xD9, 0xC8 + 1);                  // FXCH ST(1)
2440 
2441     push87(cdb);
2442     cs.IEV1.Voffset += sz2;
2443     cdb.gen(&cs);                               // FLD E1.im
2444 
2445     cdb.gen2(0xD9, 0xF8);                       // FPREM
2446     code *cfm2 = cdb.last();
2447     genjmpifC2(cdb, cfm2);                      // JC2 FM2
2448     cdb.genf2(0xDD,0xD8 + 1);                   // FSTP ST(1)
2449 
2450     pop87();
2451 
2452     if (*pretregs & (mST01 | mPSW))
2453     {
2454         cs.Irm |= modregrm(0, 2, 0);
2455         cdb.gen(&cs);            // FST mreal.im
2456         cs.IEV1.Voffset -= sz2;
2457         cdb.gen(&cs);            // FST mreal.re
2458         retregs = mST01;
2459     }
2460     else
2461     {
2462         cs.Irm |= modregrm(0, 3, 0);
2463         cdb.gen(&cs);            // FSTP mreal.im
2464         cs.IEV1.Voffset -= sz2;
2465         cdb.gen(&cs);            // FSTP mreal.re
2466         pop87();
2467         pop87();
2468         retregs = 0;
2469     }
2470     freenode(e.EV.E1);
2471     genfwait(cdb);
2472     fixresult_complex87(cdb,e,retregs,pretregs);
2473 }
2474 
2475 /**********************************
2476  * Perform +=, -=, *= and /= for the lvalue being complex.
2477  */
2478 
2479 @trusted
2480 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2481 {
2482     regm_t retregs;
2483     regm_t idxregs;
2484     code cs;
2485     uint op;
2486     opcode_t op2;
2487 
2488     tym_t ty1 = tybasic(e.EV.E1.Ety);
2489     uint sz2 = _tysize[ty1] / 2;
2490     switch (e.Eoper)
2491     {
2492         case OPpostinc:
2493         case OPaddass:  op = 0 << 3;            // FADD
2494                         op2 = 0xC0;             // FADDP ST(i),ST
2495                         break;
2496 
2497         case OPpostdec:
2498         case OPminass:  op = 5 << 3;            // FSUBR
2499                         op2 = 0xE0;             // FSUBRP ST(i),ST
2500                         break;
2501 
2502         case OPmulass:  op = 1 << 3;            // FMUL
2503                         op2 = 0xC8;             // FMULP ST(i),ST
2504                         break;
2505 
2506         case OPdivass:  op = 7 << 3;            // FDIVR
2507                         op2 = 0xF0;             // FDIVRP ST(i),ST
2508                         break;
2509 
2510         default:        assert(0);
2511     }
2512 
2513     if (!tycomplex(e.EV.E2.Ety) &&
2514         (e.Eoper == OPmulass || e.Eoper == OPdivass))
2515     {
2516         retregs = mST0;
2517         codelem(cdb,e.EV.E2, &retregs, false);
2518         note87(e.EV.E2, 0, 0);
2519         getlvalue87(cdb,cs, e.EV.E1, 0);
2520         makesure87(cdb,e.EV.E2,0,0,0);
2521         push87(cdb);
2522         cdb.genf2(0xD9,0xC0);                   // FLD ST(0)
2523         goto L1;
2524     }
2525     else
2526     {
2527         loadComplex(cdb,e.EV.E2);
2528         getlvalue87(cdb,cs,e.EV.E1,0);
2529         makesure87(cdb,e.EV.E2,sz2,0,0);
2530         makesure87(cdb,e.EV.E2,0,1,0);
2531     }
2532 
2533     switch (e.Eoper)
2534     {
2535         case OPpostinc:
2536         case OPaddass:
2537         case OPpostdec:
2538         case OPminass:
2539         L1:
2540             if (ty1 == TYcldouble)
2541             {
2542                 push87(cdb);
2543                 push87(cdb);
2544                 cs.Iop = 0xDB;
2545                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2546                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2547                 cs.IEV1.Voffset += sz2;
2548                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2549                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2550                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2551                 pop87();
2552                 pop87();
2553                 if (tyimaginary(e.EV.E2.Ety))
2554                 {
2555                     if (e.Eoper == OPmulass)
2556                     {
2557                         cdb.genf2(0xD9, 0xE0);   // FCHS
2558                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2559                     }
2560                     else if (e.Eoper == OPdivass)
2561                     {
2562                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2563                         cdb.genf2(0xD9, 0xE0);   // FCHS
2564                     }
2565                 }
2566             L2:
2567                 if (*pretregs & (mST01 | mPSW))
2568                 {
2569                     push87(cdb);
2570                     push87(cdb);
2571                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2572                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2573                     retregs = mST01;
2574                 }
2575                 else
2576                     retregs = 0;
2577                 cs.Iop = 0xDB;
2578                 cs.Irm |= modregrm(0,7,0);
2579                 cdb.gen(&cs);                   // FSTP e.EV.E1.im
2580                 cs.IEV1.Voffset -= sz2;
2581                 cdb.gen(&cs);                   // FSTP e.EV.E1.re
2582                 pop87();
2583                 pop87();
2584 
2585             }
2586             else
2587             {
2588                 ubyte rmop = cast(ubyte)(cs.Irm | op);
2589                 ubyte rmfst = cs.Irm | modregrm(0,2,0);
2590                 ubyte rmfstp = cs.Irm | modregrm(0,3,0);
2591                 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2592                 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;
2593 
2594                 cs.Iop = iop;
2595                 cs.Irm = rmop;
2596                 cs.IEV1.Voffset += sz2;
2597                 cdb.gen(&cs);                           // FSUBR mreal.im
2598                 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass))
2599                 {
2600                     if (e.Eoper == OPmulass)
2601                         cdb.genf2(0xD9, 0xE0);          // FCHS
2602                     cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2603                     cs.IEV1.Voffset -= sz2;
2604                     cdb.gen(&cs);                       // FMUL mreal.re
2605                     if (e.Eoper == OPdivass)
2606                         cdb.genf2(0xD9, 0xE0);          // FCHS
2607                     if (*pretregs & (mST01 | mPSW))
2608                     {
2609                         cs.Iop = iopfst;
2610                         cs.Irm = rmfst;
2611                         cs.IEV1.Voffset += sz2;
2612                         cdb.gen(&cs);                   // FST mreal.im
2613                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2614                         cs.IEV1.Voffset -= sz2;
2615                         cdb.gen(&cs);                   // FST mreal.re
2616                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2617                         retregs = mST01;
2618                     }
2619                     else
2620                     {
2621                         cs.Iop = iopfst;
2622                         cs.Irm = rmfstp;
2623                         cs.IEV1.Voffset += sz2;
2624                         cdb.gen(&cs);                   // FSTP mreal.im
2625                         pop87();
2626                         cs.IEV1.Voffset -= sz2;
2627                         cdb.gen(&cs);                   // FSTP mreal.re
2628                         pop87();
2629                         retregs = 0;
2630                     }
2631                     goto L3;
2632                 }
2633 
2634                 if (*pretregs & (mST01 | mPSW))
2635                 {
2636                     cs.Iop = iopfst;
2637                     cs.Irm = rmfst;
2638                     cdb.gen(&cs);               // FST mreal.im
2639                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2640                     cs.Iop = iop;
2641                     cs.Irm = rmop;
2642                     cs.IEV1.Voffset -= sz2;
2643                     cdb.gen(&cs);               // FSUBR mreal.re
2644                     cs.Iop = iopfst;
2645                     cs.Irm = rmfst;
2646                     cdb.gen(&cs);               // FST mreal.re
2647                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2648                     retregs = mST01;
2649                 }
2650                 else
2651                 {
2652                     cs.Iop = iopfst;
2653                     cs.Irm = rmfstp;
2654                     cdb.gen(&cs);               // FSTP mreal.im
2655                     pop87();
2656                     cs.Iop = iop;
2657                     cs.Irm = rmop;
2658                     cs.IEV1.Voffset -= sz2;
2659                     cdb.gen(&cs);               // FSUBR mreal.re
2660                     cs.Iop = iopfst;
2661                     cs.Irm = rmfstp;
2662                     cdb.gen(&cs);               // FSTP mreal.re
2663                     pop87();
2664                     retregs = 0;
2665                 }
2666             }
2667         L3:
2668             freenode(e.EV.E1);
2669             genfwait(cdb);
2670             fixresult_complex87(cdb,e,retregs,pretregs);
2671             return;
2672 
2673         case OPmulass:
2674             push87(cdb);
2675             push87(cdb);
2676             if (ty1 == TYcldouble)
2677             {
2678                 cs.Iop = 0xDB;
2679                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2680                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2681                 cs.IEV1.Voffset += sz2;
2682                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2683                 retregs = mST01;
2684                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2685                 goto L2;
2686             }
2687             else
2688             {
2689                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2690                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2691                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2692                 cs.IEV1.Voffset += sz2;
2693                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2694                 retregs = mST01;
2695                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2696                 if (*pretregs & (mST01 | mPSW))
2697                 {
2698                     cs.Irm |= modregrm(0, 2, 0);
2699                     cdb.gen(&cs);               // FST mreal.im
2700                     cs.IEV1.Voffset -= sz2;
2701                     cdb.gen(&cs);               // FST mreal.re
2702                     retregs = mST01;
2703                 }
2704                 else
2705                 {
2706                     cs.Irm |= modregrm(0, 3, 0);
2707                     cdb.gen(&cs);               // FSTP mreal.im
2708                     cs.IEV1.Voffset -= sz2;
2709                     cdb.gen(&cs);               // FSTP mreal.re
2710                     pop87();
2711                     pop87();
2712                     retregs = 0;
2713                 }
2714                 goto L3;
2715             }
2716 
2717         case OPdivass:
2718             push87(cdb);
2719             push87(cdb);
2720             idxregs = idxregm(&cs);             // mask of index regs used
2721             if (ty1 == TYcldouble)
2722             {
2723                 cs.Iop = 0xDB;
2724                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2725                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2726                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2727                 cs.IEV1.Voffset += sz2;
2728                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2729                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2730                 retregs = mST01;
2731                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2732                 goto L2;
2733             }
2734             else
2735             {
2736                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2737                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2738                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2739                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2740                 cs.IEV1.Voffset += sz2;
2741                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2742                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2743                 retregs = mST01;
2744                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2745                 if (*pretregs & (mST01 | mPSW))
2746                 {
2747                     cs.Irm |= modregrm(0, 2, 0);
2748                     cdb.gen(&cs);               // FST mreal.im
2749                     cs.IEV1.Voffset -= sz2;
2750                     cdb.gen(&cs);               // FST mreal.re
2751                     retregs = mST01;
2752                 }
2753                 else
2754                 {
2755                     cs.Irm |= modregrm(0, 3, 0);
2756                     cdb.gen(&cs);               // FSTP mreal.im
2757                     cs.IEV1.Voffset -= sz2;
2758                     cdb.gen(&cs);               // FSTP mreal.re
2759                     pop87();
2760                     pop87();
2761                     retregs = 0;
2762                 }
2763                 goto L3;
2764             }
2765 
2766         default:
2767             assert(0);
2768     }
2769 }
2770 
2771 /**************************
2772  * OPnegass
2773  */
2774 
2775 @trusted
2776 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2777 {
2778     regm_t retregs;
2779     uint op;
2780 
2781     //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2782     elem *e1 = e.EV.E1;
2783     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
2784     int sz = _tysize[tyml];
2785 
2786     code cs;
2787     getlvalue87(cdb,cs,e1,0);
2788 
2789     /* If the EA is really an XMM register, modEA() will fail.
2790      * So disallow putting e1 into a register.
2791      * A better way would be to negate the XMM register in place.
2792      */
2793     if (e1.Eoper == OPvar)
2794         e1.EV.Vsym.Sflags &= ~GTregcand;
2795 
2796     modEA(cdb,&cs);
2797     cs.Irm |= modregrm(0,6,0);
2798     cs.Iop = 0x80;
2799     if (tysize(TYldouble) > 10)
2800     {
2801         if (tyml == TYldouble || tyml == TYildouble)
2802             cs.IEV1.Voffset += 10 - 1;
2803         else if (tyml == TYcldouble)
2804             cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1;
2805         else
2806             cs.IEV1.Voffset += sz - 1;
2807     }
2808     else
2809         cs.IEV1.Voffset += sz - 1;
2810     cs.IFL2 = FLconst;
2811     cs.IEV2.Vuns = 0x80;
2812     cdb.gen(&cs);                       // XOR 7[EA],0x80
2813     if (tycomplex(tyml))
2814     {
2815         cs.IEV1.Voffset -= sz / 2;
2816         cdb.gen(&cs);                   // XOR 7[EA],0x80
2817     }
2818 
2819     if (*pretregs)
2820     {
2821         switch (tyml)
2822         {
2823             case TYifloat:
2824             case TYfloat:               cs.Iop = 0xD9;  op = 0; break;
2825             case TYidouble:
2826             case TYdouble:
2827             case TYdouble_alias:        cs.Iop = 0xDD;  op = 0; break;
2828             case TYildouble:
2829             case TYldouble:             cs.Iop = 0xDB;  op = 5; break;
2830             default:
2831                 assert(0);
2832         }
2833         NEWREG(cs.Irm,op);
2834         cs.IEV1.Voffset -= sz - 1;
2835         push87(cdb);
2836         cdb.gen(&cs);                   // FLD EA
2837         retregs = mST0;
2838     }
2839     else
2840         retregs = 0;
2841 
2842     freenode(e1);
2843     fixresult87(cdb,e,retregs,pretregs);
2844 }
2845 
2846 /************************
2847  * Take care of OPpostinc and OPpostdec.
2848  */
2849 
2850 @trusted
2851 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2852 {
2853     uint op;
2854     opcode_t op1;
2855     reg_t reg;
2856 
2857     //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2858     code cs;
2859     assert(*pretregs);
2860     getlvalue87(cdb,cs,e.EV.E1,0);
2861     tym_t ty1 = tybasic(e.EV.E1.Ety);
2862     switch (ty1)
2863     {
2864         case TYdouble_alias:
2865         case TYidouble:
2866         case TYdouble:
2867         case TYcdouble:     op1 = ESC(MFdouble,1);  reg = 0;        break;
2868         case TYifloat:
2869         case TYfloat:
2870         case TYcfloat:      op1 = ESC(MFfloat,1);   reg = 0;        break;
2871         case TYildouble:
2872         case TYldouble:
2873         case TYcldouble:    op1 = 0xDB;             reg = 5;        break;
2874         default:
2875             assert(0);
2876     }
2877     NEWREG(cs.Irm, reg);
2878     if (reg == 5)
2879         reg = 7;
2880     else
2881         reg = 3;
2882     cs.Iop = op1;
2883     push87(cdb);
2884     cdb.gen(&cs);                   // FLD e.EV.E1
2885     if (tycomplex(ty1))
2886     {
2887         uint sz = _tysize[ty1] / 2;
2888 
2889         push87(cdb);
2890         cs.IEV1.Voffset += sz;
2891         cdb.gen(&cs);               // FLD e.EV.E1
2892         regm_t retregs = mST0;      // note kludge to only load real part
2893         codelem(cdb,e.EV.E2,&retregs,false); // load rvalue
2894         cdb.genf2(0xD8,             // FADD/FSUBR ST,ST2
2895             (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
2896         NEWREG(cs.Irm,reg);
2897         pop87();
2898         cs.IEV1.Voffset -= sz;
2899         cdb.gen(&cs);               // FSTP e.EV.E1
2900         genfwait(cdb);
2901         freenode(e.EV.E1);
2902         fixresult_complex87(cdb, e, mST01, pretregs);
2903         return;
2904     }
2905 
2906     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS))
2907     {   // Want the result in a register
2908         push87(cdb);
2909         cdb.genf2(0xD9,0xC0);       // FLD ST0
2910     }
2911     if (*pretregs & mPSW)           // if result in flags
2912         genftst(cdb,e,0);           // FTST ST0
2913     regm_t retregs = mST0;
2914     codelem(cdb,e.EV.E2,&retregs,false);    // load rvalue
2915     pop87();
2916     op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
2917     cdb.genf2(0xDE,op);             // FADDP/FSUBRP ST1
2918     NEWREG(cs.Irm,reg);
2919     pop87();
2920     cdb.gen(&cs);                   // FSTP e.EV.E1
2921     genfwait(cdb);
2922     freenode(e.EV.E1);
2923     fixresult87(cdb,e,mPSW | mST0,pretregs);
2924 }
2925 
2926 /************************
2927  * Do the following opcodes:
2928  *      OPd_u64
2929  *      OPld_u64
2930  */
2931 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2932 {
2933     assert(I32 || I64);
2934     assert(*pretregs);
2935     if (I32)
2936         cdd_u64_I32(cdb, e, pretregs);
2937     else
2938         cdd_u64_I64(cdb, e, pretregs);
2939 }
2940 
2941 @trusted
2942 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2943 {
2944     /* Generate:
2945             mov         EDX,0x8000_0000
2946             mov         floatreg+0,0
2947             mov         floatreg+4,EDX
2948             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
2949             fld         real ptr floatreg           // adjust (= 1/real.epsilon)
2950             fcomp
2951             fstsw       AX
2952             fstcw       floatreg+12
2953             fldcw       floatreg+10                 // roundTo0
2954             test        AH,1
2955             jz          L1                          // jae L1
2956 
2957             fld         real ptr floatreg           // adjust
2958             fsubp       ST(1), ST
2959             fistp       floatreg
2960             mov         EAX,floatreg
2961             add         EDX,floatreg+4
2962             fldcw       floatreg+12
2963             jmp         L2
2964 
2965     L1:
2966             fistp       floatreg
2967             mov         EAX,floatreg
2968             mov         EDX,floatreg+4
2969             fldcw       floatreg+12
2970     L2:
2971      */
2972     regm_t retregs = mST0;
2973     codelem(cdb,e.EV.E1, &retregs, false);
2974     tym_t tym = e.Ety;
2975     retregs = *pretregs;
2976     if (!retregs)
2977         retregs = ALLREGS;
2978     reg_t reg, reg2;
2979     allocreg(cdb,&retregs,&reg,tym);
2980     reg  = findreglsw(retregs);
2981     reg2 = findregmsw(retregs);
2982     movregconst(cdb,reg2,0x80000000,0);
2983     getregs(cdb,mask(reg2) | mAX);
2984 
2985     cdb.genfltreg(0xC7,0,0);
2986     code *cf1 = cdb.last();
2987     cf1.IFL2 = FLconst;
2988     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
2989     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
2990     cdb.genfltreg(0xC7,0,8);
2991     code *cf3 = cdb.last();
2992     cf3.IFL2 = FLconst;
2993     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
2994 
2995     push87(cdb);
2996     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
2997     cdb.gen2(0xD8,0xD9);                            // FCOMP
2998     pop87();
2999     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
3000     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
3001     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
3002     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
3003     code *cnop1 = gennop(null);
3004     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
3005 
3006     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3007     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
3008     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3009     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3010     cdb.genfltreg(0x03,reg2,4);                     // ADD reg,floatreg+4
3011     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3012     code *cnop2 = gennop(null);
3013     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
3014 
3015     cdb.append(cnop1);
3016     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3017     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3018     cdb.genfltreg(LOD,reg2,4);                      // MOV reg,floatreg+4
3019     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3020     cdb.append(cnop2);
3021 
3022     pop87();
3023     fixresult(cdb,e,retregs,pretregs);
3024 }
3025 
3026 @trusted
3027 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3028 {
3029     /* Generate:
3030             mov         EDX,0x8000_0000
3031             mov         floatreg+0,0
3032             mov         floatreg+4,EDX
3033             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
3034             fld         real ptr floatreg           // adjust
3035             fcomp
3036             fstsw       AX
3037             fstcw       floatreg+12
3038             fldcw       floatreg+10                 // roundTo0
3039             test        AH,1
3040             jz          L1                          // jae L1
3041 
3042             fld         real ptr floatreg           // adjust
3043             fsubp       ST(1), ST
3044             fistp       floatreg
3045             mov         RAX,floatreg
3046             shl         RDX,32
3047             add         RAX,RDX
3048             fldcw       floatreg+12
3049             jmp         L2
3050 
3051     L1:
3052             fistp       floatreg
3053             mov         RAX,floatreg
3054             fldcw       floatreg+12
3055     L2:
3056      */
3057     regm_t retregs = mST0;
3058     codelem(cdb,e.EV.E1, &retregs, false);
3059     tym_t tym = e.Ety;
3060     retregs = *pretregs;
3061     if (!retregs)
3062         retregs = ALLREGS;
3063     reg_t reg;
3064     allocreg(cdb,&retregs,&reg,tym);
3065     regm_t regm2 = ALLREGS & ~retregs & ~mAX;
3066     reg_t reg2;
3067     allocreg(cdb,&regm2,&reg2,tym);
3068     movregconst(cdb,reg2,0x80000000,0);
3069     getregs(cdb,mask(reg2) | mAX);
3070 
3071     cdb.genfltreg(0xC7,0,0);
3072     code *cf1 = cdb.last();
3073     cf1.IFL2 = FLconst;
3074     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
3075     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
3076     cdb.genfltreg(0xC7,0,8);
3077     code *cf3 = cdb.last();
3078     cf3.IFL2 = FLconst;
3079     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
3080 
3081     push87(cdb);
3082     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3083     cdb.gen2(0xD8,0xD9);                            // FCOMP
3084     pop87();
3085     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
3086     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
3087     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
3088     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
3089     code *cnop1 = gennop(null);
3090     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
3091 
3092     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3093     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
3094     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3095     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3096     code_orrex(cdb.last(), REX_W);
3097     cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32
3098     cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2));  // ADD reg,reg2
3099     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3100     code *cnop2 = gennop(null);
3101     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
3102 
3103     cdb.append(cnop1);
3104     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3105     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3106     code_orrex(cdb.last(), REX_W);
3107     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3108     cdb.append(cnop2);
3109 
3110     pop87();
3111     fixresult(cdb,e,retregs,pretregs);
3112 }
3113 
3114 /************************
3115  * Do the following opcodes:
3116  *      OPd_u32
3117  */
3118 @trusted
3119 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3120 {
3121     assert(I32 || I64);
3122 
3123     /* Generate:
3124             mov         floatreg+8,0x0FBF0000   // (roundTo0<<16)
3125             fstcw       floatreg+12
3126             fldcw       floatreg+10             // roundTo0
3127             fistp       floatreg
3128             fldcw       floatreg+12
3129             mov         EAX,floatreg
3130      */
3131     regm_t retregs = mST0;
3132     codelem(cdb,e.EV.E1, &retregs, false);
3133     tym_t tym = e.Ety;
3134     retregs = *pretregs & ALLREGS;
3135     if (!retregs)
3136         retregs = ALLREGS;
3137     reg_t reg;
3138     allocreg(cdb,&retregs,&reg,tym);
3139 
3140     cdb.genfltreg(0xC7,0,8);
3141     code *cf3 = cdb.last();
3142     cf3.IFL2 = FLconst;
3143     cf3.IEV2.Vint = 0x0FBF0000;                 // MOV floatreg+8,(roundTo0<<16)
3144 
3145     cdb.genfltreg(0xD9,7,12);                    // FSTCW floatreg+12
3146     cdb.genfltreg(0xD9,5,10);                    // FLDCW floatreg+10
3147 
3148     cdb.genfltreg(0xDF,7,0);                     // FISTP dword ptr floatreg
3149     cdb.genfltreg(0xD9,5,12);                    // FLDCW floatreg+12
3150     cdb.genfltreg(LOD,reg,0);                    // MOV reg,floatreg
3151 
3152     pop87();
3153     fixresult(cdb,e,retregs,pretregs);
3154 }
3155 
3156 /************************
3157  * Do the following opcodes:
3158  *      OPd_s16
3159  *      OPd_s32
3160  *      OPd_u16
3161  *      OPd_s64
3162  */
3163 
3164 @trusted
3165 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3166 {
3167     regm_t retregs;
3168     uint mf,rf;
3169     reg_t reg;
3170     int clib;
3171 
3172     //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3173     assert(*pretregs);
3174     tym_t tym = e.Ety;
3175     int sz = tysize(tym);
3176     int szoff = sz;
3177 
3178     switch (e.Eoper)
3179     {
3180         case OPd_s16:
3181             clib = CLIB.dblint87;
3182             mf = ESC(MFword,1);
3183             rf = 3;
3184             break;
3185 
3186         case OPd_u16:
3187             szoff = 4;
3188             goto case OPd_s32;
3189 
3190         case OPd_s32:
3191             clib = CLIB.dbllng87;
3192             mf = ESC(MFlong,1);
3193             rf = 3;
3194             break;
3195 
3196         case OPd_s64:
3197             clib = CLIB.dblllng;
3198             mf = 0xDF;
3199             rf = 7;
3200             break;
3201 
3202         default:
3203             assert(0);
3204     }
3205 
3206     if (I16)                       // C may change the default control word
3207     {
3208         if (clib == CLIB.dblllng)
3209         {   retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3210             codelem(cdb,e.EV.E1,&retregs,false);
3211             callclib(cdb,e,clib,pretregs,0);
3212         }
3213         else
3214         {   retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3215             codelem(cdb,e.EV.E1,&retregs,false);
3216             callclib(cdb,e,clib,pretregs,0);
3217             pop87();
3218         }
3219     }
3220     else if (1)
3221     {   //  Generate:
3222         //  sub     ESP,12
3223         //  fstcw   8[ESP]
3224         //  fldcw   roundto0
3225         //  fistp   long64 ptr [ESP]
3226         //  fldcw   8[ESP]
3227         //  pop     lsw
3228         //  pop     msw
3229         //  add     ESP,4
3230 
3231         uint szpush = szoff + 2;
3232         if (config.flags3 & CFG3pic)
3233             szpush += 2;
3234         szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);
3235 
3236         retregs = mST0;
3237         codelem(cdb,e.EV.E1,&retregs,false);
3238 
3239         if (szpush == REGSIZE)
3240             cdb.gen1(0x50 + AX);                // PUSH EAX
3241         else
3242             cod3_stackadj(cdb, szpush);
3243         genfwait(cdb);
3244         cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
3245 
3246         genfwait(cdb);
3247 
3248         if (config.flags3 & CFG3pic)
3249         {
3250             cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW.roundto0); // MOV szoff+2[ESP], CW.roundto0
3251             code_orflag(cdb.last(), CFopsize);
3252             cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
3253         }
3254         else
3255             genSetRoundingMode(cdb, CW.roundto0);   // FLDCW roundto0
3256 
3257         pop87();
3258 
3259         genfwait(cdb);
3260         cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP));                   // FISTP [ESP]
3261 
3262         retregs = *pretregs & (ALLREGS | mBP);
3263         if (!retregs)
3264                 retregs = ALLREGS;
3265         allocreg(cdb,&retregs,&reg,tym);
3266 
3267         genfwait(cdb);                                           // FWAIT
3268         cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
3269 
3270         if (szoff > REGSIZE)
3271         {   szpush -= REGSIZE;
3272             genpop(cdb,findreglsw(retregs));       // POP lsw
3273         }
3274         szpush -= REGSIZE;
3275         genpop(cdb,reg);                           // POP reg
3276 
3277         if (szpush)
3278             cod3_stackadj(cdb, -szpush);
3279         fixresult(cdb,e,retregs,pretregs);
3280     }
3281     else
3282     {
3283         // This is incorrect. For -inf and nan, the 8087 returns the largest
3284         // negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
3285         // and for nan, 0 should be returned.
3286         retregs = mST0;
3287         codelem(cdb,e.EV.E1,&retregs,false);
3288 
3289         genfwait(cdb);
3290         genSetRoundingMode(cdb, CW.roundto0);      // FLDCW roundto0
3291 
3292         pop87();
3293         cdb.genfltreg(mf,rf,0);                    // FISTP floatreg
3294         retregs = *pretregs & (ALLREGS | mBP);
3295         if (!retregs)
3296                 retregs = ALLREGS;
3297         allocreg(cdb,&retregs,&reg,tym);
3298 
3299         genfwait(cdb);
3300 
3301         if (sz > REGSIZE)
3302         {
3303             cdb.genfltreg(LOD,reg,REGSIZE);          // MOV reg,floatreg + REGSIZE
3304                                                      // MOV lsreg,floatreg
3305             cdb.genfltreg(LOD,findreglsw(retregs),0);
3306         }
3307         else
3308             cdb.genfltreg(LOD,reg,0);                // MOV reg,floatreg
3309         genSetRoundingMode(cdb, CW.roundtonearest);  // FLDCW roundtonearest
3310         fixresult(cdb,e,retregs,pretregs);
3311     }
3312 }
3313 
3314 /************************
3315  * Do OPrndtol.
3316  */
3317 
3318 @trusted
3319 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3320 {
3321     if (*pretregs == 0)
3322     {
3323         codelem(cdb,e.EV.E1,pretregs,false);
3324         return;
3325     }
3326     regm_t retregs = mST0;
3327     codelem(cdb,e.EV.E1,&retregs,false);
3328 
3329     ubyte op1,op2;
3330     tym_t tym = e.Ety;
3331     uint sz = tysize(tym);
3332     switch (sz)
3333     {   case 2:
3334             op1 = 0xDF;
3335             op2 = 3;
3336             break;
3337         case 4:
3338             op1 = 0xDB;
3339             op2 = 3;
3340             break;
3341         case 8:
3342             op1 = 0xDF;
3343             op2 = 7;
3344             break;
3345         default:
3346             assert(0);
3347     }
3348 
3349     pop87();
3350     cdb.genfltreg(op1,op2,0);           // FISTP floatreg
3351     retregs = *pretregs & (ALLREGS | mBP);
3352     if (!retregs)
3353         retregs = ALLREGS;
3354     reg_t reg;
3355     allocreg(cdb,&retregs,&reg,tym);
3356     genfwait(cdb);                      // FWAIT
3357     if (tysize(tym) > REGSIZE)
3358     {
3359         cdb.genfltreg(LOD,reg,REGSIZE);             // MOV reg,floatreg + REGSIZE
3360                                                     // MOV lsreg,floatreg
3361         cdb.genfltreg(LOD,findreglsw(retregs),0);
3362     }
3363     else
3364     {
3365         cdb.genfltreg(LOD,reg,0);       // MOV reg,floatreg
3366         if (tysize(tym) == 8 && I64)
3367             code_orrex(cdb.last(), REX_W);
3368     }
3369     fixresult(cdb,e,retregs,pretregs);
3370 }
3371 
3372 /*************************
3373  * Do OPscale, OPyl2x, OPyl2xp1.
3374  */
3375 
3376 @trusted
3377 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3378 {
3379     assert(*pretregs != 0);
3380 
3381     regm_t retregs = mST0;
3382     codelem(cdb,e.EV.E1,&retregs,false);
3383     note87(e.EV.E1,0,0);
3384     codelem(cdb,e.EV.E2,&retregs,false);
3385     makesure87(cdb,e.EV.E1,0,1,0);       // now have x,y on stack; need y,x
3386     switch (e.Eoper)
3387     {
3388         case OPscale:
3389             cdb.genf2(0xD9,0xFD);                   // FSCALE
3390             cdb.genf2(0xDD,0xD8 + 1);                    // FSTP ST(1)
3391             break;
3392 
3393         case OPyl2x:
3394             cdb.genf2(0xD9,0xF1);                   // FYL2X
3395             break;
3396 
3397         case OPyl2xp1:
3398             cdb.genf2(0xD9,0xF9);                   // FYL2XP1
3399             break;
3400 
3401         default:
3402             assert(0);
3403     }
3404     pop87();
3405     fixresult87(cdb,e,mST0,pretregs);
3406 }
3407 
3408 
3409 /**********************************
3410  * Unary -, absolute value, square root, sine, cosine
3411  */
3412 
3413 @trusted
3414 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3415 {
3416     //printf("neg87()\n");
3417 
3418     assert(*pretregs);
3419     opcode_t op;
3420     switch (e.Eoper)
3421     {   case OPneg:  op = 0xE0;     break;
3422         case OPabs:  op = 0xE1;     break;
3423         case OPsqrt: op = 0xFA;     break;
3424         case OPsin:  op = 0xFE;     break;
3425         case OPcos:  op = 0xFF;     break;
3426         case OPrint: op = 0xFC;     break;  // FRNDINT
3427         default:
3428             assert(0);
3429     }
3430     regm_t retregs = mST0;
3431     codelem(cdb,e.EV.E1,&retregs,false);
3432     cdb.genf2(0xD9,op);                 // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
3433     fixresult87(cdb,e,mST0,pretregs);
3434 }
3435 
3436 /**********************************
3437  * Unary - for complex operands
3438  */
3439 
3440 @trusted
3441 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3442 {
3443     assert(e.Eoper == OPneg);
3444     regm_t retregs = mST01;
3445     codelem(cdb,e.EV.E1,&retregs,false);
3446     cdb.genf2(0xD9,0xE0);           // FCHS
3447     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3448     cdb.genf2(0xD9,0xE0);                // FCHS
3449     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3450     fixresult_complex87(cdb,e,mST01,pretregs);
3451 }
3452 
3453 /*********************************
3454  */
3455 
3456 @trusted
3457 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3458 {
3459     //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3460     code cs;
3461 
3462     getlvalue87(cdb,cs,e,0);           // get addressing mode
3463     if (*pretregs)
3464     {
3465         switch (tybasic(e.Ety))
3466         {   case TYfloat:
3467             case TYifloat:
3468                 cs.Iop = 0xD9;
3469                 break;
3470 
3471             case TYidouble:
3472             case TYdouble:
3473             case TYdouble_alias:
3474                 cs.Iop = 0xDD;
3475                 break;
3476 
3477             case TYildouble:
3478             case TYldouble:
3479                 cs.Iop = 0xDB;
3480                 cs.Irm |= modregrm(0,5,0);
3481                 break;
3482 
3483             default:
3484                 assert(0);
3485         }
3486         push87(cdb);
3487         cdb.gen(&cs);                 // FLD EA
3488         fixresult87(cdb,e,mST0,pretregs);
3489     }
3490 }
3491 
3492 /************************************
3493  * Reset statics for another .obj file.
3494  */
3495 
3496 @trusted
3497 void cg87_reset()
3498 {
3499     memset(&oldd,0,oldd.sizeof);
3500 }
3501 
3502 
3503 /*****************************************
3504  * Set rounding mode.
3505  * Params:
3506  *      cdb = code sink
3507  *      cw = control word spedifying rounding mode
3508  */
3509 
3510 @trusted
3511 private void genSetRoundingMode(ref CodeBuilder cdb, CW cw)
3512 {
3513     if (config.flags3 & CFG3pic)
3514     {
3515         cdb.genfltreg(0xC7, 0, 0);       // MOV floatreg, cw
3516         code *c1 = cdb.last();
3517         c1.IFL2 = FLconst;
3518         c1.IEV2.Vuns = cw;
3519 
3520         cdb.genfltreg(0xD9, 5, 0);         // FLDCW floatreg
3521     }
3522     else
3523     {
3524         if (!oldd.round)                // if not initialized
3525         {
3526             oldd.round = 1;
3527 
3528             auto cwi = CW.roundto0;          // round to 0
3529             oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
3530             cwi = CW.roundtonearest;            // round to nearest
3531             oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
3532         }
3533         Symbol *rnddir = (cw == CW.roundto0) ? oldd.roundto0 : oldd.roundtonearest;
3534         code cs;
3535         cs.Iop = 0xD9;
3536         cs.Iflags = CFoff;
3537         cs.Irex = 0;
3538         cs.IEV1.Vsym = rnddir;
3539         cs.IFL1 = rnddir.Sfl;
3540         cs.IEV1.Voffset = 0;
3541         cs.Irm = modregrm(0,5,BPRM);
3542         cdb.gen(&cs);
3543     }
3544 }
3545 
3546 /************************* Complex Numbers *********************/
3547 
3548 /***************************
3549  * Set the PSW based on the state of ST01.
3550  * Input:
3551  *      pop     if stack should be popped after test
3552  */
3553 
3554 @trusted
3555 private void genctst(ref CodeBuilder cdb,elem *e,int pop)
3556 {
3557     assert(pop == 0 || pop == 1);
3558 
3559     // Generate:
3560     //  if (NOSAHF && pop)
3561     //          FLDZ
3562     //          FUCOMIP
3563     //          JNE     L1
3564     //          JP      L1              // if NAN
3565     //          FLDZ
3566     //          FUCOMIP ST(2)
3567     //      L1:
3568     //        if (pop)
3569     //          FPOP
3570     //          FPOP
3571     //  if (pop)
3572     //          FLDZ
3573     //          FUCOMPP
3574     //          FSTSW   AX
3575     //          SAHF
3576     //          FLDZ
3577     //          FUCOMPP
3578     //          JNE     L1
3579     //          JP      L1              // if NAN
3580     //          FSTSW   AX
3581     //          SAHF
3582     //      L1:
3583     //  else
3584     //          FLDZ
3585     //          FUCOM
3586     //          FSTSW   AX
3587     //          SAHF
3588     //          FUCOMP  ST(2)
3589     //          JNE     L1
3590     //          JP      L1              // if NAN
3591     //          FSTSW   AX
3592     //          SAHF
3593     //      L1:
3594     // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
3595 
3596     CodeBuilder cdbnop;
3597     cdbnop.ctor();
3598     cdbnop.gennop();
3599     code *cnop = cdbnop.peek();
3600     push87(cdb);
3601     cdb.gen2(0xD9,0xEE);                       // FLDZ
3602     if (NOSAHF)
3603     {
3604         cdb.gen2(0xDF,0xE9);                   // FUCOMIP
3605         pop87();
3606         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3607         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3608         cdb.gen2(0xD9,0xEE);                   // FLDZ
3609         cdb.gen2(0xDF,0xEA);                   // FUCOMIP ST(2)
3610         if (pop)
3611         {
3612             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3613             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3614             pop87();
3615             pop87();
3616         }
3617     }
3618     else if (pop)
3619     {
3620         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3621         pop87();
3622         pop87();
3623         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3624         cdb.gen2(0xD9,0xEE);                   // FLDZ
3625         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3626         pop87();
3627         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3628         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3629         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3630     }
3631     else
3632     {
3633         cdb.gen2(0xDD,0xE1);                   // FUCOM
3634         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3635         cdb.gen2(0xDD,0xEA);                   // FUCOMP ST(2)
3636         pop87();
3637         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3638         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3639         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3640     }
3641     cdb.append(cdbnop);
3642 }
3643 
3644 /******************************
3645  * Given the result of an expression is in retregs,
3646  * generate necessary code to return result in *pretregs.
3647  */
3648 
3649 @trusted
3650 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false)
3651 {
3652     static if (0)
3653     {
3654         printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
3655             e,regm_str(retregs),regm_str(*pretregs));
3656     }
3657 
3658     assert(!*pretregs || retregs);
3659     tym_t tym = tybasic(e.Ety);
3660     uint sz = _tysize[tym];
3661 
3662     if (isReturnValue)
3663     {
3664         // In loadComplex and complex_eq87, complex numbers have the real part
3665         // pushed to the FPU stack first (ST1), then the imaginary part (ST0).
3666         // However, the Intel 64 bit ABI scheme requires that types classified
3667         // as complex x87 instead have the real part returned in ST0, and the
3668         // imaginary part in ST1.
3669         if (retregs == mST01 && I64 && (config.exe & EX_posix))
3670             cdb.genf2(0xD9, 0xC8 + 1);          // FXCH ST(1)
3671     }
3672 
3673     if (*pretregs == 0 && retregs == mST01)
3674     {
3675         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3676         pop87();
3677         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3678         pop87();
3679     }
3680     else if (tym == TYllong)
3681     {
3682         // passing cfloat through register for I64
3683         assert(retregs & mST01, "this float expression is not implemented");
3684         pop87();
3685         cdb.genfltreg(ESC(MFfloat,1),BX,4);     // FSTP floatreg
3686         pop87();
3687         cdb.genfltreg(ESC(MFfloat,1),BX,0);     // FSTP floatreg+4
3688         genfwait(cdb);
3689         const reg = findreg(*pretregs);
3690         getregs(cdb,reg);
3691         cdb.genfltreg(LOD, reg, 0);             // MOV ECX,floatreg
3692         code_orrex(cdb.last(), REX_W);          // extend to RCX
3693     }
3694     else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
3695     {
3696         if (*pretregs & mPSW && !(retregs & mPSW))
3697             genctst(cdb,e,0);                   // FTST
3698         pop87();
3699         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3700         genfwait(cdb);
3701         getregs(cdb,mDX|mAX);
3702         cdb.genfltreg(LOD, DX, 0);              // MOV EDX,floatreg
3703 
3704         pop87();
3705         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3706         genfwait(cdb);
3707         cdb.genfltreg(LOD, AX, 0);              // MOV EAX,floatreg
3708     }
3709     else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
3710     {
3711         push87(cdb);
3712         cdb.genfltreg(STO, AX, 0);              // MOV floatreg, EAX
3713         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3714 
3715         push87(cdb);
3716         cdb.genfltreg(STO, DX, 0);              // MOV floatreg, EDX
3717         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3718 
3719         if (*pretregs & mPSW)
3720             genctst(cdb,e,0);                   // FTST
3721     }
3722     else if ((tym == TYcfloat || tym == TYcdouble) &&
3723              *pretregs & (mXMM0|mXMM1) && retregs & mST01)
3724     {
3725         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3726         uint xop = xmmload(tyf);
3727         uint mf = tyf == TYfloat ? MFfloat : MFdouble;
3728         if (*pretregs & mPSW && !(retregs & mPSW))
3729             genctst(cdb,e,0);                   // FTST
3730         pop87();
3731         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3732         genfwait(cdb);
3733         getregs(cdb,mXMM0|mXMM1);
3734         cdb.genxmmreg(xop,XMM1,0,tyf);
3735 
3736         pop87();
3737         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3738         genfwait(cdb);
3739         cdb.genxmmreg(xop, XMM0, 0, tyf);       // MOVD XMM0,floatreg
3740     }
3741     else if ((tym == TYcfloat || tym == TYcdouble) &&
3742              retregs & (mXMM0|mXMM1) && *pretregs & mST01)
3743     {
3744         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3745         uint xop = xmmstore(tyf);
3746         uint fop = tym == TYcfloat ? 0xD9 : 0xDD;
3747         push87(cdb);
3748         cdb.genfltreg(xop, XMM0-XMM0, 0);       // STOS(SD) floatreg, XMM0
3749         checkSetVex(cdb.last(),tyf);
3750         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3751 
3752         push87(cdb);
3753         cdb.genxmmreg(xop, XMM1, 0, tyf);       // MOV floatreg, XMM1
3754         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3755 
3756         if (*pretregs & mPSW)
3757             genctst(cdb,e,0);                   // FTST
3758     }
3759     else
3760     {   if (*pretregs & mPSW)
3761         {   if (!(retregs & mPSW))
3762             {   assert(retregs & mST01);
3763                 genctst(cdb,e,!(*pretregs & mST01));        // FTST
3764             }
3765         }
3766         assert(!(*pretregs & mST01) || (retregs & mST01));
3767     }
3768     if (*pretregs & mST01)
3769     {   note87(e,0,1);
3770         note87(e,sz/2,0);
3771     }
3772 }
3773 
3774 /*****************************************
3775  * Operators OPc_r and OPc_i
3776  */
3777 
3778 @trusted
3779 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3780 {
3781     regm_t retregs = mST01;
3782     codelem(cdb,e.EV.E1, &retregs, false);
3783     switch (e.Eoper)
3784     {
3785         case OPc_r:
3786             cdb.genf2(0xDD,0xD8 + 0); // FPOP
3787             pop87();
3788             break;
3789 
3790         case OPc_i:
3791             cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1)
3792             pop87();
3793             break;
3794 
3795         default:
3796             assert(0);
3797     }
3798     retregs = mST0;
3799     fixresult87(cdb, e, retregs, pretregs);
3800 }
3801 
3802 /**************************************
3803  * Load complex operand into ST01 or flags or both.
3804  */
3805 
3806 @trusted
3807 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3808 {
3809     //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3810     //elem_print(e);
3811     assert(!I16);
3812     debug
3813     if (I32)
3814     {
3815         assert(config.inline8087);
3816         elem_debug(e);
3817         assert(*pretregs & (mST01 | mPSW));
3818         assert(!(*pretregs & ~(mST01 | mPSW)));
3819     }
3820 
3821     tym_t ty = tybasic(e.Ety);
3822     code cs = void;
3823     uint mf;
3824     uint sz;
3825     ubyte ldop;
3826     regm_t retregs;
3827     int i;
3828 
3829     //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3830     sz = _tysize[ty] / 2;
3831     memset(&cs, 0, cs.sizeof);
3832     if (ADDFWAIT())
3833         cs.Iflags = CFwait;
3834     switch (ty)
3835     {
3836         case TYcfloat:      mf = MFfloat;           break;
3837         case TYcdouble:     mf = MFdouble;          break;
3838         case TYcldouble:    break;
3839         default:            assert(0);
3840     }
3841     switch (e.Eoper)
3842     {
3843         case OPvar:
3844             notreg(e);                  // never enregister this variable
3845             goto case OPind;
3846 
3847         case OPind:
3848             push87(cdb);
3849             push87(cdb);
3850             switch (ty)
3851             {
3852                 case TYcfloat:
3853                 case TYcdouble:
3854                     loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0);        // FLD var
3855                     cs.IEV1.Voffset += sz;
3856                     cdb.gen(&cs);
3857                     break;
3858 
3859                 case TYcldouble:
3860                     loadea(cdb,e,&cs,0xDB,5,0,0,0);             // FLD var
3861                     cs.IEV1.Voffset += sz;
3862                     cdb.gen(&cs);
3863                     break;
3864 
3865                 default:
3866                     assert(0);
3867             }
3868             retregs = mST01;
3869             break;
3870 
3871         case OPd_ld:
3872         case OPld_d:
3873         case OPf_d:
3874         case OPd_f:
3875             cload87(cdb,e.EV.E1, pretregs);
3876             freenode(e.EV.E1);
3877             return;
3878 
3879         case OPconst:
3880             push87(cdb);
3881             push87(cdb);
3882             for (i = 0; i < 2; i++)
3883             {
3884                 ldop = loadconst(e, i);
3885                 if (ldop)
3886                 {
3887                     cdb.genf2(0xD9,ldop);             // FLDx
3888                 }
3889                 else
3890                 {
3891                     assert(0);
3892                 }
3893             }
3894             retregs = mST01;
3895             break;
3896 
3897         default:
3898             debug elem_print(e);
3899             assert(0);
3900     }
3901     fixresult_complex87(cdb, e, retregs, pretregs);
3902 }
3903 
3904 /**********************************************
3905  * Load OPpair or OPrpair into mST01
3906  */
3907 @trusted
3908 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3909 {
3910     assert(e.Eoper == OPpair || e.Eoper == OPrpair);
3911     regm_t retregs = mST0;
3912     codelem(cdb,e.EV.E1, &retregs, false);
3913     note87(e.EV.E1, 0, 0);
3914     codelem(cdb,e.EV.E2, &retregs, false);
3915     makesure87(cdb,e.EV.E1, 0, 1, 0);
3916     if (e.Eoper == OPrpair)
3917         cdb.genf2(0xD9, 0xC8 + 1);   // FXCH ST(1)
3918     retregs = mST01;
3919     fixresult_complex87(cdb, e, retregs, pretregs);
3920 }
3921 
3922 /**********************************************
3923  * Round 80 bit precision to 32 or 64 bits.
3924  * OPtoprec
3925  */
3926 @trusted
3927 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3928 {
3929     //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs));
3930     if (!*pretregs)
3931     {
3932         codelem(cdb,e.EV.E1,pretregs,false);
3933         return;
3934     }
3935 
3936     assert(config.inline8087);
3937     regm_t retregs = mST0;
3938     codelem(cdb,e.EV.E1, &retregs, false);
3939     if (*pretregs & mST0)
3940     {
3941         const tym = tybasic(e.Ety);
3942         const sz = _tysize[tym];
3943         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
3944         cdb.genfltreg(ESC(mf,1),3,0);   // FSTP float/double ptr fltreg
3945         genfwait(cdb);
3946         cdb.genfltreg(ESC(mf,1),0,0);   // FLD float/double ptr fltreg
3947     }
3948     fixresult87(cdb, e, retregs, pretregs);
3949 }