1 /**
2  * Constants and data structures specific to the x86 platform.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_code_x86.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d
11  */
12 
13 module dmd.backend.code_x86;
14 
15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html
16 
17 import dmd.backend.cdef;
18 import dmd.backend.cc : config, FL;
19 import dmd.backend.code;
20 import dmd.backend.codebuilder : CodeBuilder;
21 import dmd.backend.el : elem;
22 import dmd.backend.ty : I64;
23 import dmd.backend.barray;
24 
25 nothrow:
26 @safe:
27 
28 alias opcode_t = uint;          // CPU opcode
29 enum opcode_t NoOpcode = 0xFFFF;              // not a valid opcode_t
30 
31 /* Register definitions */
32 
33 enum
34 {
35     AX      = 0,
36     CX      = 1,
37     DX      = 2,
38     BX      = 3,
39     SP      = 4,
40     BP      = 5,
41     SI      = 6,
42     DI      = 7,
43 
44     // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members
45 
46     R8       = 8,
47     R9       = 9,
48     R10      = 10,
49     R11      = 11,
50     R12      = 12,
51     R13      = 13,
52     R14      = 14,
53     R15      = 15,
54 
55     XMM0    = 16,
56     XMM1    = 17,
57     XMM2    = 18,
58     XMM3    = 19,
59     XMM4    = 20,
60     XMM5    = 21,
61     XMM6    = 22,
62     XMM7    = 23,
63 /* There are also XMM8..XMM14 */
64     XMM15   = 31,
65 }
66 
67 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; }
68 
69 enum PICREG = BX;
70 
71 enum ES     = 24;
72 
73 enum NUMGENREGS = 16;
74 
75 // fishy naming as it covers XMM7 but not XMM15
76 // currently only used as a replacement for mES in cgcod.c
77 enum NUMREGS = 25;
78 
79 enum PSW     = 25;
80 enum STACK   = 26;      // top of stack
81 enum ST0     = 27;      // 8087 top of stack register
82 enum ST01    = 28;      // top two 8087 registers; for complex types
83 
84 enum reg_t NOREG   = 29;     // no register
85 
86 enum
87 {
88     AL      = 0,
89     CL      = 1,
90     DL      = 2,
91     BL      = 3,
92     AH      = 4,
93     CH      = 5,
94     DH      = 6,
95     BH      = 7,
96 }
97 
98 enum
99 {
100     mAX     = 1,
101     mCX     = 2,
102     mDX     = 4,
103     mBX     = 8,
104     mSP     = 0x10,
105     mBP     = 0x20,
106     mSI     = 0x40,
107     mDI     = 0x80,
108 
109     mR8     = (1 << R8),
110     mR9     = (1 << R9),
111     mR10    = (1 << R10),
112     mR11    = (1 << R11),
113     mR12    = (1 << R12),
114     mR13    = (1 << R13),
115     mR14    = (1 << R14),
116     mR15    = (1 << R15),
117 
118     mXMM0   = (1 << XMM0),
119     mXMM1   = (1 << XMM1),
120     mXMM2   = (1 << XMM2),
121     mXMM3   = (1 << XMM3),
122     mXMM4   = (1 << XMM4),
123     mXMM5   = (1 << XMM5),
124     mXMM6   = (1 << XMM6),
125     mXMM7   = (1 << XMM7),
126     XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7),
127 
128     mES     = (1 << ES),      // 0x1000000
129     mPSW    = (1 << PSW),     // 0x2000000
130 
131     mSTACK  = (1 << STACK),   // 0x4000000
132 
133     mST0    = (1 << ST0),     // 0x20000000
134     mST01   = (1 << ST01),    // 0x40000000
135 }
136 
137 // Flags for getlvalue (must fit in regm_t)
138 enum RMload  = (1 << 30);
139 enum RMstore = (1 << 31);
140 
141     // To support positional independent code,
142     // must be able to remove BX from available registers
143     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
144     enum ALLREGS_INIT_PIC      = (mAX|mCX|mDX|mSI|mDI);
145     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
146     enum BYTEREGS_INIT_PIC     = (mAX|mCX|mDX);
147 
148 /* We use the same IDXREGS for the 386 as the 8088, because if
149    we used ALLREGS, it would interfere with mMSW
150  */
151 enum IDXREGS         = (mBX|mSI|mDI);
152 
153 enum FLOATREGS_64    = mAX;
154 enum FLOATREGS2_64   = mDX;
155 enum DOUBLEREGS_64   = mAX;
156 enum DOUBLEREGS2_64  = mDX;
157 
158 enum FLOATREGS_32    = mAX;
159 enum FLOATREGS2_32   = mDX;
160 enum DOUBLEREGS_32   = (mAX|mDX);
161 enum DOUBLEREGS2_32  = (mCX|mBX);
162 
163 enum FLOATREGS_16    = (mDX|mAX);
164 enum FLOATREGS2_16   = (mCX|mBX);
165 enum DOUBLEREGS_16   = (mAX|mBX|mCX|mDX);
166 
167 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/
168 
169 /* Segment registers    */
170 enum
171 {
172     SEG_ES  = 0,
173     SEG_CS  = 1,
174     SEG_SS  = 2,
175     SEG_DS  = 3,
176 }
177 
178 /*********************
179  * Masks for register pairs.
180  * Note that index registers are always LSWs. This is for the convenience
181  * of implementing far pointers.
182  */
183 
184 static if (0)
185 {
186 // Give us an extra one so we can enregister a long
187 enum mMSW = mCX|mDX|mDI|mES;       // most significant regs
188 enum mLSW = mAX|mBX|mSI;           // least significant regs
189 }
190 else
191 {
192 enum mMSW = mCX|mDX|mES;           // most significant regs
193 enum mLSW = mAX|mBX|mSI|mDI;       // least significant regs
194 }
195 
196 /* Return !=0 if there is a SIB byte   */
197 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; }
198 
199 static if (0)
200 {
201 // relocation field size is always 32bits
202 //enum is32bitaddr(x,Iflags) (1)
203 }
204 else
205 {
206 //
207 // is32bitaddr works correctly only when x is 0 or 1.  This is
208 // true today for the current definition of I32, but if the definition
209 // of I32 changes, this macro will need to change as well
210 //
211 // Note: even for linux targets, CFaddrsize can be set by the inline
212 // assembler.
213 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); }
214 }
215 
216 
217 /**********************
218  * C library routines.
219  * See callclib().
220  */
221 
222 enum CLIB
223 {
224     lcmp,
225     lmul,
226     ldiv,
227     lmod,
228     uldiv,
229     ulmod,
230 
231     dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub,
232     fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub,
233 
234     dbllng,lngdbl,dblint,intdbl,
235     dbluns,unsdbl,
236     dblulng,
237     ulngdbl,
238     dblflt,fltdbl,
239     dblllng,
240     llngdbl,
241     dblullng,
242     ullngdbl,
243     dtst,
244     vptrfptr,cvptrfptr,
245 
246     _87topsw,fltto87,dblto87,dblint87,dbllng87,
247     ftst,
248     fcompp,
249     ftest,
250     ftest0,
251     fdiv87,
252 
253     // Complex numbers
254     cmul,
255     cdiv,
256     ccmp,
257 
258     u64_ldbl,
259     ld_u64,
260     MAX
261 }
262 
263 alias code_flags_t = uint;
264 enum
265 {
266     CFes        =        1,     // generate an ES: segment override for this instr
267     CFjmp16     =        2,     // need 16 bit jump offset (long branch)
268     CFtarg      =        4,     // this code is the target of a jump
269     CFseg       =        8,     // get segment of immediate value
270     CFoff       =     0x10,     // get offset of immediate value
271     CFss        =     0x20,     // generate an SS: segment override (not with
272                                 // CFes at the same time, though!)
273     CFpsw       =     0x40,     // we need the flags result after this instruction
274     CFopsize    =     0x80,     // prefix with operand size
275     CFaddrsize  =    0x100,     // prefix with address size
276     CFds        =    0x200,     // need DS override (not with ES, SS, or CS )
277     CFcs        =    0x400,     // need CS override
278     CFfs        =    0x800,     // need FS override
279     CFgs        =   CFcs | CFfs,   // need GS override
280     CFwait      =   0x1000,     // If I32 it indicates when to output a WAIT
281     CFselfrel   =   0x2000,     // if self-relative
282     CFunambig   =   0x4000,     // indicates cannot be accessed by other addressing
283                                 // modes
284     CFtarg2     =   0x8000,     // like CFtarg, but we can't optimize this away
285     CFvolatile  =  0x10000,     // volatile reference, do not schedule
286     CFclassinit =  0x20000,     // class init code
287     CFoffset64  =  0x40000,     // offset is 64 bits
288     CFpc32      =  0x80000,     // I64: PC relative 32 bit fixup
289 
290     CFvex       =  0x10_0000,    // vex prefix
291     CFvex3      =  0x20_0000,    // 3 byte vex prefix
292 
293     CFjmp5      =  0x40_0000,    // always a 5 byte jmp
294     CFswitch    =  0x80_0000,    // kludge for switch table fixups
295 
296     CFindirect  = 0x100_0000,    // OSX32: indirect fixups
297 
298     /* These are for CFpc32 fixups, they're the negative of the offset of the fixup
299      * from the program counter
300      */
301     CFREL       = 0x700_0000,
302 
303     CFSEG       = CFes | CFss | CFds | CFcs | CFfs | CFgs,
304     CFPREFIX    = CFSEG | CFopsize | CFaddrsize,
305 }
306 
307 struct code
308 {
309     code *next;
310     code_flags_t Iflags;
311 
312     union
313     {
314         opcode_t Iop;
315         struct Svex
316         {
317           nothrow:
318           align(1):
319             ubyte  op;
320 
321             // [R X B m-mmmm]  [W vvvv L pp]
322             ushort _pp;
323 
324             @property ushort pp() const { return _pp & 3; }
325             @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); }
326 
327             @property ushort l() const { return (_pp >> 2) & 1; }
328             @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); }
329 
330             @property ushort vvvv() const { return (_pp >> 3) & 0x0F; }
331             @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); }
332 
333             @property ushort w() const { return (_pp >> 7) & 1; }
334             @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); }
335 
336             @property ushort mmmm() const { return (_pp >> 8) & 0x1F; }
337             @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); }
338 
339             @property ushort b() const { return (_pp >> 13) & 1; }
340             @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); }
341 
342             @property ushort x() const { return (_pp >> 14) & 1; }
343             @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); }
344 
345             @property ushort r() const { return (_pp >> 15) & 1; }
346             @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); }
347 
348             ubyte pfx; // always 0xC4
349         }
350         Svex Ivex;
351     }
352 
353     /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte,
354      * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm,
355      * the 32 bit x86 added the sib, and the 64 bit one added the rex.
356      */
357     union
358     {
359         uint Iea;
360         struct
361         {
362             ubyte Irm;          // reg/mode
363             ubyte Isib;         // SIB byte
364             ubyte Irex;         // REX prefix
365         }
366     }
367 
368     /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective
369      * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second
370      * operand, usually for immediate instructions.
371      */
372 
373     FL IFL1,IFL2;         // FLavors of 1st, 2nd operands
374     evc IEV1;             // 1st operand, if any
375     evc IEV2;             // 2nd operand, if any
376 
377   nothrow:
378     void orReg(uint reg)
379     {   if (reg & 8)
380             Irex |= REX_R;
381         Irm |= modregrm(0, reg & 7, 0);
382     }
383 
384     void setReg(uint reg)
385     {
386         Irex &= ~REX_R;
387         Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0);
388         orReg(reg);
389     }
390 
391     bool isJumpOP() { return Iop == JMP || Iop == JMPS; }
392 
393     void print()               // pretty-printer
394     {
395         code_print(&this);
396     }
397 }
398 
399 /*******************
400  * Some instructions.
401  */
402 
403 enum
404 {
405     SEGES   = 0x26,
406     SEGCS   = 0x2E,
407     SEGSS   = 0x36,
408     SEGDS   = 0x3E,
409     SEGFS   = 0x64,
410     SEGGS   = 0x65,
411 
412     CMP     = 0x3B,
413     CALL    = 0xE8,
414     JMP     = 0xE9,    // Intra-Segment Direct
415     JMPS    = 0xEB,    // JMP SHORT
416     JCXZ    = 0xE3,
417     LOOP    = 0xE2,
418     LES     = 0xC4,
419     LEA     = 0x8D,
420     LOCK    = 0xF0,
421     INT3    = 0xCC,
422     HLT     = 0xF4,
423     ENTER   = 0xC8,
424     LEAVE   = 0xC9,
425     MOVSXb  = 0x0FBE,
426     MOVSXw  = 0x0FBF,
427     MOVZXb  = 0x0FB6,
428     MOVZXw  = 0x0FB7,
429 
430     STOSB   = 0xAA,
431     STOS    = 0xAB,
432 
433     STO     = 0x89,
434     LOD     = 0x8B,
435 
436     JO      = 0x70,
437     JNO     = 0x71,
438     JC      = 0x72,
439     JB      = 0x72,
440     JNC     = 0x73,
441     JAE     = 0x73,
442     JE      = 0x74,
443     JNE     = 0x75,
444     JBE     = 0x76,
445     JA      = 0x77,
446     JS      = 0x78,
447     JNS     = 0x79,
448     JP      = 0x7A,
449     JNP     = 0x7B,
450     JL      = 0x7C,
451     JGE     = 0x7D,
452     JLE     = 0x7E,
453     JG      = 0x7F,
454 
455     UD2     = 0x0F0B,
456     PAUSE   = 0xF390,  // aka REP NOP
457 
458     // NOP is used as a placeholder in the linked list of instructions, no
459     // actual code will be generated for it.
460     NOP     = SEGCS,   // don't use 0x90 because the
461                        // Windows stuff wants to output 0x90's
462 
463     ASM     = SEGSS,   // string of asm bytes
464 
465     ESCAPE  = SEGDS,   // marker that special information is here
466                        // (Iop2 is the type of special information)
467     ENDBR32 = 0xF30F1EFB,
468     ENDBR64 = 0xF30F1EFA,
469 }
470 
471 
472 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop
473 
474 enum
475 {
476     ESClinnum   = (1 << 8),      // line number information
477     ESCctor     = (2 << 8),      // object is constructed
478     ESCdtor     = (3 << 8),      // object is destructed
479     ESCmark     = (4 << 8),      // mark eh stack
480     ESCrelease  = (5 << 8),      // release eh stack
481     ESCoffset   = (6 << 8),      // set code offset for eh
482     ESCadjesp   = (7 << 8),      // adjust ESP by IEV2.Vint
483     ESCmark2    = (8 << 8),      // mark eh stack
484     ESCrelease2 = (9 << 8),      // release eh stack
485     ESCframeptr = (10 << 8),     // replace with load of frame pointer
486     ESCdctor    = (11 << 8),     // D object is constructed
487     ESCddtor    = (12 << 8),     // D object is destructed
488     ESCadjfpu   = (13 << 8),     // adjust fpustackused by IEV2.Vint
489     ESCfixesp   = (14 << 8),     // reset ESP to end of local frame
490 }
491 
492 /*********************************
493  * Macros to ease generating code
494  * modregrm:    generate mod reg r/m field
495  * modregxrm:   reg could be R8..R15
496  * modregrmx:   rm could be R8..R15
497  * modregxrmx:  reg or rm could be R8..R15
498  * NEWREG:      change reg field of x to r
499  * genorreg:    OR  t,f
500  */
501 
502 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); }
503 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); }
504 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); }
505 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); }
506 
507 void NEWREXR(ref ubyte x, uint r)  { x = (x&~REX_R)|((r&8)>>1); }
508 void NEWREG (ref ubyte x, uint r)  { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); }
509 void code_newreg(code* c, uint r)  { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); }
510 
511 //#define genorreg(c,t,f)         genregs((c),0x09,(f),(t))
512 
513 enum
514 {
515     REX     = 0x40,        // REX prefix byte, OR'd with the following bits:
516     REX_W   = 8,           // 0 = default operand size, 1 = 64 bit operand size
517     REX_R   = 4,           // high bit of reg field of modregrm
518     REX_X   = 2,           // high bit of sib index reg
519     REX_B   = 1,           // high bit of rm field, sib base reg, or opcode reg
520 }
521 
522 uint VEX2_B1(code.Svex ivex)
523 {
524     return
525         ivex.r    << 7 |
526         ivex.vvvv << 3 |
527         ivex.l    << 2 |
528         ivex.pp;
529 }
530 
531 uint VEX3_B1(code.Svex ivex)
532 {
533     return
534         ivex.r    << 7 |
535         ivex.x    << 6 |
536         ivex.b    << 5 |
537         ivex.mmmm;
538 }
539 
540 uint VEX3_B2(code.Svex ivex)
541 {
542     return
543         ivex.w    << 7 |
544         ivex.vvvv << 3 |
545         ivex.l    << 2 |
546         ivex.pp;
547 }
548 
549 @trusted
550 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
551 
552 /************************************
553  */
554 
555 
556 struct NDP
557 {
558     elem *e;                    // which elem is stored here (NULL if none)
559     uint offset;            // offset from e (used for complex numbers)
560 }
561 
562 struct Globals87
563 {
564     NDP[8] stack;              // 8087 stack
565     int stackused = 0;         // number of items on the 8087 stack
566 
567     Barray!NDP save;           // 8087 values spilled to memory
568 }