1 /** 2 * Constants and data structures specific to the x86 platform. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_code_x86.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d 11 */ 12 13 module dmd.backend.code_x86; 14 15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html 16 17 import dmd.backend.cdef; 18 import dmd.backend.cc : config, FL; 19 import dmd.backend.code; 20 import dmd.backend.codebuilder : CodeBuilder; 21 import dmd.backend.el : elem; 22 import dmd.backend.ty : I64; 23 import dmd.backend.barray; 24 25 nothrow: 26 @safe: 27 28 alias opcode_t = uint; // CPU opcode 29 enum opcode_t NoOpcode = 0xFFFF; // not a valid opcode_t 30 31 /* Register definitions */ 32 33 enum 34 { 35 AX = 0, 36 CX = 1, 37 DX = 2, 38 BX = 3, 39 SP = 4, 40 BP = 5, 41 SI = 6, 42 DI = 7, 43 44 // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members 45 46 R8 = 8, 47 R9 = 9, 48 R10 = 10, 49 R11 = 11, 50 R12 = 12, 51 R13 = 13, 52 R14 = 14, 53 R15 = 15, 54 55 XMM0 = 16, 56 XMM1 = 17, 57 XMM2 = 18, 58 XMM3 = 19, 59 XMM4 = 20, 60 XMM5 = 21, 61 XMM6 = 22, 62 XMM7 = 23, 63 /* There are also XMM8..XMM14 */ 64 XMM15 = 31, 65 } 66 67 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; } 68 69 enum PICREG = BX; 70 71 enum ES = 24; 72 73 enum NUMGENREGS = 16; 74 75 // fishy naming as it covers XMM7 but not XMM15 76 // currently only used as a replacement for mES in cgcod.c 77 enum NUMREGS = 25; 78 79 enum PSW = 25; 80 enum STACK = 26; // top of stack 81 enum ST0 = 27; // 8087 top of stack register 82 enum ST01 = 28; // top two 8087 registers; for complex types 83 84 enum reg_t NOREG = 29; // no register 85 86 enum 87 { 88 AL = 0, 89 CL = 1, 90 DL = 2, 91 BL = 3, 92 AH = 4, 93 CH = 5, 94 DH = 6, 95 BH = 7, 96 } 97 98 enum 99 { 100 mAX = 1, 101 mCX = 2, 102 mDX = 4, 103 mBX = 8, 104 mSP = 0x10, 105 mBP = 0x20, 106 mSI = 0x40, 107 mDI = 0x80, 108 109 mR8 = (1 << R8), 110 mR9 = (1 << R9), 111 mR10 = (1 << R10), 112 mR11 = (1 << R11), 113 mR12 = (1 << R12), 114 mR13 = (1 << R13), 115 mR14 = (1 << R14), 116 mR15 = (1 << R15), 117 118 mXMM0 = (1 << XMM0), 119 mXMM1 = (1 << XMM1), 120 mXMM2 = (1 << XMM2), 121 mXMM3 = (1 << XMM3), 122 mXMM4 = (1 << XMM4), 123 mXMM5 = (1 << XMM5), 124 mXMM6 = (1 << XMM6), 125 mXMM7 = (1 << XMM7), 126 XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7), 127 128 mES = (1 << ES), // 0x1000000 129 mPSW = (1 << PSW), // 0x2000000 130 131 mSTACK = (1 << STACK), // 0x4000000 132 133 mST0 = (1 << ST0), // 0x20000000 134 mST01 = (1 << ST01), // 0x40000000 135 } 136 137 // Flags for getlvalue (must fit in regm_t) 138 enum RMload = (1 << 30); 139 enum RMstore = (1 << 31); 140 141 // To support positional independent code, 142 // must be able to remove BX from available registers 143 enum ALLREGS_INIT = (mAX|mBX|mCX|mDX|mSI|mDI); 144 enum ALLREGS_INIT_PIC = (mAX|mCX|mDX|mSI|mDI); 145 enum BYTEREGS_INIT = (mAX|mBX|mCX|mDX); 146 enum BYTEREGS_INIT_PIC = (mAX|mCX|mDX); 147 148 /* We use the same IDXREGS for the 386 as the 8088, because if 149 we used ALLREGS, it would interfere with mMSW 150 */ 151 enum IDXREGS = (mBX|mSI|mDI); 152 153 enum FLOATREGS_64 = mAX; 154 enum FLOATREGS2_64 = mDX; 155 enum DOUBLEREGS_64 = mAX; 156 enum DOUBLEREGS2_64 = mDX; 157 158 enum FLOATREGS_32 = mAX; 159 enum FLOATREGS2_32 = mDX; 160 enum DOUBLEREGS_32 = (mAX|mDX); 161 enum DOUBLEREGS2_32 = (mCX|mBX); 162 163 enum FLOATREGS_16 = (mDX|mAX); 164 enum FLOATREGS2_16 = (mCX|mBX); 165 enum DOUBLEREGS_16 = (mAX|mBX|mCX|mDX); 166 167 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/ 168 169 /* Segment registers */ 170 enum 171 { 172 SEG_ES = 0, 173 SEG_CS = 1, 174 SEG_SS = 2, 175 SEG_DS = 3, 176 } 177 178 /********************* 179 * Masks for register pairs. 180 * Note that index registers are always LSWs. This is for the convenience 181 * of implementing far pointers. 182 */ 183 184 static if (0) 185 { 186 // Give us an extra one so we can enregister a long 187 enum mMSW = mCX|mDX|mDI|mES; // most significant regs 188 enum mLSW = mAX|mBX|mSI; // least significant regs 189 } 190 else 191 { 192 enum mMSW = mCX|mDX|mES; // most significant regs 193 enum mLSW = mAX|mBX|mSI|mDI; // least significant regs 194 } 195 196 /* Return !=0 if there is a SIB byte */ 197 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; } 198 199 static if (0) 200 { 201 // relocation field size is always 32bits 202 //enum is32bitaddr(x,Iflags) (1) 203 } 204 else 205 { 206 // 207 // is32bitaddr works correctly only when x is 0 or 1. This is 208 // true today for the current definition of I32, but if the definition 209 // of I32 changes, this macro will need to change as well 210 // 211 // Note: even for linux targets, CFaddrsize can be set by the inline 212 // assembler. 213 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); } 214 } 215 216 217 /********************** 218 * C library routines. 219 * See callclib(). 220 */ 221 222 enum CLIB 223 { 224 lcmp, 225 lmul, 226 ldiv, 227 lmod, 228 uldiv, 229 ulmod, 230 231 dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub, 232 fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub, 233 234 dbllng,lngdbl,dblint,intdbl, 235 dbluns,unsdbl, 236 dblulng, 237 ulngdbl, 238 dblflt,fltdbl, 239 dblllng, 240 llngdbl, 241 dblullng, 242 ullngdbl, 243 dtst, 244 vptrfptr,cvptrfptr, 245 246 _87topsw,fltto87,dblto87,dblint87,dbllng87, 247 ftst, 248 fcompp, 249 ftest, 250 ftest0, 251 fdiv87, 252 253 // Complex numbers 254 cmul, 255 cdiv, 256 ccmp, 257 258 u64_ldbl, 259 ld_u64, 260 MAX 261 } 262 263 alias code_flags_t = uint; 264 enum 265 { 266 CFes = 1, // generate an ES: segment override for this instr 267 CFjmp16 = 2, // need 16 bit jump offset (long branch) 268 CFtarg = 4, // this code is the target of a jump 269 CFseg = 8, // get segment of immediate value 270 CFoff = 0x10, // get offset of immediate value 271 CFss = 0x20, // generate an SS: segment override (not with 272 // CFes at the same time, though!) 273 CFpsw = 0x40, // we need the flags result after this instruction 274 CFopsize = 0x80, // prefix with operand size 275 CFaddrsize = 0x100, // prefix with address size 276 CFds = 0x200, // need DS override (not with ES, SS, or CS ) 277 CFcs = 0x400, // need CS override 278 CFfs = 0x800, // need FS override 279 CFgs = CFcs | CFfs, // need GS override 280 CFwait = 0x1000, // If I32 it indicates when to output a WAIT 281 CFselfrel = 0x2000, // if self-relative 282 CFunambig = 0x4000, // indicates cannot be accessed by other addressing 283 // modes 284 CFtarg2 = 0x8000, // like CFtarg, but we can't optimize this away 285 CFvolatile = 0x10000, // volatile reference, do not schedule 286 CFclassinit = 0x20000, // class init code 287 CFoffset64 = 0x40000, // offset is 64 bits 288 CFpc32 = 0x80000, // I64: PC relative 32 bit fixup 289 290 CFvex = 0x10_0000, // vex prefix 291 CFvex3 = 0x20_0000, // 3 byte vex prefix 292 293 CFjmp5 = 0x40_0000, // always a 5 byte jmp 294 CFswitch = 0x80_0000, // kludge for switch table fixups 295 296 CFindirect = 0x100_0000, // OSX32: indirect fixups 297 298 /* These are for CFpc32 fixups, they're the negative of the offset of the fixup 299 * from the program counter 300 */ 301 CFREL = 0x700_0000, 302 303 CFSEG = CFes | CFss | CFds | CFcs | CFfs | CFgs, 304 CFPREFIX = CFSEG | CFopsize | CFaddrsize, 305 } 306 307 struct code 308 { 309 code *next; 310 code_flags_t Iflags; 311 312 union 313 { 314 opcode_t Iop; 315 struct Svex 316 { 317 nothrow: 318 align(1): 319 ubyte op; 320 321 // [R X B m-mmmm] [W vvvv L pp] 322 ushort _pp; 323 324 @property ushort pp() const { return _pp & 3; } 325 @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); } 326 327 @property ushort l() const { return (_pp >> 2) & 1; } 328 @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); } 329 330 @property ushort vvvv() const { return (_pp >> 3) & 0x0F; } 331 @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); } 332 333 @property ushort w() const { return (_pp >> 7) & 1; } 334 @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); } 335 336 @property ushort mmmm() const { return (_pp >> 8) & 0x1F; } 337 @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); } 338 339 @property ushort b() const { return (_pp >> 13) & 1; } 340 @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); } 341 342 @property ushort x() const { return (_pp >> 14) & 1; } 343 @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); } 344 345 @property ushort r() const { return (_pp >> 15) & 1; } 346 @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); } 347 348 ubyte pfx; // always 0xC4 349 } 350 Svex Ivex; 351 } 352 353 /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte, 354 * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm, 355 * the 32 bit x86 added the sib, and the 64 bit one added the rex. 356 */ 357 union 358 { 359 uint Iea; 360 struct 361 { 362 ubyte Irm; // reg/mode 363 ubyte Isib; // SIB byte 364 ubyte Irex; // REX prefix 365 } 366 } 367 368 /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective 369 * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second 370 * operand, usually for immediate instructions. 371 */ 372 373 FL IFL1,IFL2; // FLavors of 1st, 2nd operands 374 evc IEV1; // 1st operand, if any 375 evc IEV2; // 2nd operand, if any 376 377 nothrow: 378 void orReg(uint reg) 379 { if (reg & 8) 380 Irex |= REX_R; 381 Irm |= modregrm(0, reg & 7, 0); 382 } 383 384 void setReg(uint reg) 385 { 386 Irex &= ~REX_R; 387 Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0); 388 orReg(reg); 389 } 390 391 bool isJumpOP() { return Iop == JMP || Iop == JMPS; } 392 393 void print() // pretty-printer 394 { 395 code_print(&this); 396 } 397 } 398 399 /******************* 400 * Some instructions. 401 */ 402 403 enum 404 { 405 SEGES = 0x26, 406 SEGCS = 0x2E, 407 SEGSS = 0x36, 408 SEGDS = 0x3E, 409 SEGFS = 0x64, 410 SEGGS = 0x65, 411 412 CMP = 0x3B, 413 CALL = 0xE8, 414 JMP = 0xE9, // Intra-Segment Direct 415 JMPS = 0xEB, // JMP SHORT 416 JCXZ = 0xE3, 417 LOOP = 0xE2, 418 LES = 0xC4, 419 LEA = 0x8D, 420 LOCK = 0xF0, 421 INT3 = 0xCC, 422 HLT = 0xF4, 423 ENTER = 0xC8, 424 LEAVE = 0xC9, 425 MOVSXb = 0x0FBE, 426 MOVSXw = 0x0FBF, 427 MOVZXb = 0x0FB6, 428 MOVZXw = 0x0FB7, 429 430 STOSB = 0xAA, 431 STOS = 0xAB, 432 433 STO = 0x89, 434 LOD = 0x8B, 435 436 JO = 0x70, 437 JNO = 0x71, 438 JC = 0x72, 439 JB = 0x72, 440 JNC = 0x73, 441 JAE = 0x73, 442 JE = 0x74, 443 JNE = 0x75, 444 JBE = 0x76, 445 JA = 0x77, 446 JS = 0x78, 447 JNS = 0x79, 448 JP = 0x7A, 449 JNP = 0x7B, 450 JL = 0x7C, 451 JGE = 0x7D, 452 JLE = 0x7E, 453 JG = 0x7F, 454 455 UD2 = 0x0F0B, 456 PAUSE = 0xF390, // aka REP NOP 457 458 // NOP is used as a placeholder in the linked list of instructions, no 459 // actual code will be generated for it. 460 NOP = SEGCS, // don't use 0x90 because the 461 // Windows stuff wants to output 0x90's 462 463 ASM = SEGSS, // string of asm bytes 464 465 ESCAPE = SEGDS, // marker that special information is here 466 // (Iop2 is the type of special information) 467 ENDBR32 = 0xF30F1EFB, 468 ENDBR64 = 0xF30F1EFA, 469 } 470 471 472 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop 473 474 enum 475 { 476 ESClinnum = (1 << 8), // line number information 477 ESCctor = (2 << 8), // object is constructed 478 ESCdtor = (3 << 8), // object is destructed 479 ESCmark = (4 << 8), // mark eh stack 480 ESCrelease = (5 << 8), // release eh stack 481 ESCoffset = (6 << 8), // set code offset for eh 482 ESCadjesp = (7 << 8), // adjust ESP by IEV2.Vint 483 ESCmark2 = (8 << 8), // mark eh stack 484 ESCrelease2 = (9 << 8), // release eh stack 485 ESCframeptr = (10 << 8), // replace with load of frame pointer 486 ESCdctor = (11 << 8), // D object is constructed 487 ESCddtor = (12 << 8), // D object is destructed 488 ESCadjfpu = (13 << 8), // adjust fpustackused by IEV2.Vint 489 ESCfixesp = (14 << 8), // reset ESP to end of local frame 490 } 491 492 /********************************* 493 * Macros to ease generating code 494 * modregrm: generate mod reg r/m field 495 * modregxrm: reg could be R8..R15 496 * modregrmx: rm could be R8..R15 497 * modregxrmx: reg or rm could be R8..R15 498 * NEWREG: change reg field of x to r 499 * genorreg: OR t,f 500 */ 501 502 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); } 503 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); } 504 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); } 505 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); } 506 507 void NEWREXR(ref ubyte x, uint r) { x = (x&~REX_R)|((r&8)>>1); } 508 void NEWREG (ref ubyte x, uint r) { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); } 509 void code_newreg(code* c, uint r) { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); } 510 511 //#define genorreg(c,t,f) genregs((c),0x09,(f),(t)) 512 513 enum 514 { 515 REX = 0x40, // REX prefix byte, OR'd with the following bits: 516 REX_W = 8, // 0 = default operand size, 1 = 64 bit operand size 517 REX_R = 4, // high bit of reg field of modregrm 518 REX_X = 2, // high bit of sib index reg 519 REX_B = 1, // high bit of rm field, sib base reg, or opcode reg 520 } 521 522 uint VEX2_B1(code.Svex ivex) 523 { 524 return 525 ivex.r << 7 | 526 ivex.vvvv << 3 | 527 ivex.l << 2 | 528 ivex.pp; 529 } 530 531 uint VEX3_B1(code.Svex ivex) 532 { 533 return 534 ivex.r << 7 | 535 ivex.x << 6 | 536 ivex.b << 5 | 537 ivex.mmmm; 538 } 539 540 uint VEX3_B2(code.Svex ivex) 541 { 542 return 543 ivex.w << 7 | 544 ivex.vvvv << 3 | 545 ivex.l << 2 | 546 ivex.pp; 547 } 548 549 @trusted 550 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; } 551 552 /************************************ 553 */ 554 555 556 struct NDP 557 { 558 elem *e; // which elem is stored here (NULL if none) 559 uint offset; // offset from e (used for complex numbers) 560 } 561 562 struct Globals87 563 { 564 NDP[8] stack; // 8087 stack 565 int stackused = 0; // number of items on the 8087 stack 566 567 Barray!NDP save; // 8087 values spilled to memory 568 }