1 /** 2 * Code generation 4 3 * 4 * Includes: 5 * - assignemt variations of operators (+= -= *= /= %= <<= >>=) 6 * - integer comparison (< > <= >=) 7 * - converting integers to a different size (e.g. short to int) 8 * - bit instructions (bit scan, population count) 9 * 10 * Compiler implementation of the 11 * $(LINK2 https://www.dlang.org, D programming language). 12 * 13 * Mostly code generation for assignment operators. 14 * 15 * Copyright: Copyright (C) 1985-1998 by Symantec 16 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 17 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 18 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 19 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d) 20 * Documentation: https://dlang.org/phobos/dmd_backend_cod4.html 21 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d 22 */ 23 24 module dmd.backend.cod4; 25 26 import core.stdc.stdio; 27 import core.stdc.stdlib; 28 import core.stdc.string; 29 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.code_x86; 34 import dmd.backend.codebuilder; 35 import dmd.backend.mem; 36 import dmd.backend.el; 37 import dmd.backend.global; 38 import dmd.backend.oper; 39 import dmd.backend.ty; 40 import dmd.backend.evalu8 : el_toldoubled; 41 import dmd.backend.xmm; 42 43 44 nothrow: 45 @safe: 46 47 import dmd.backend.cg : datafl; 48 49 /* AX,CX,DX,BX */ 50 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; 51 52 import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients; 53 54 /******************************* 55 * Return number of times symbol s appears in tree e. 56 */ 57 58 @trusted 59 private int intree(Symbol *s,elem *e) 60 { 61 if (!OTleaf(e.Eoper)) 62 return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0); 63 return e.Eoper == OPvar && e.EV.Vsym == s; 64 } 65 66 /*********************************** 67 * Determine if expression e can be evaluated directly into register 68 * variable s. 69 * Have to be careful about things like x=x+x+x, and x=a+x. 70 * Returns: 71 * !=0 can 72 * 0 can't 73 */ 74 75 @trusted 76 int doinreg(Symbol *s, elem *e) 77 { 78 int in_ = 0; 79 OPER op; 80 81 L1: 82 op = e.Eoper; 83 if (op == OPind || 84 OTcall(op) || 85 OTleaf(op) || 86 (in_ = intree(s,e)) == 0 || 87 (OTunary(op) && OTleaf(e.EV.E1.Eoper)) 88 ) 89 return 1; 90 if (in_ == 1) 91 { 92 switch (op) 93 { 94 case OPadd: 95 case OPmin: 96 case OPand: 97 case OPor: 98 case OPxor: 99 case OPshl: 100 case OPmul: 101 if (!intree(s,e.EV.E2)) 102 { 103 e = e.EV.E1; 104 goto L1; 105 } 106 break; 107 108 default: 109 break; 110 } 111 } 112 return 0; 113 } 114 115 /**************************** 116 * Return code for saving common subexpressions if EA 117 * turns out to be a register. 118 * This is called just before modifying an EA. 119 */ 120 121 void modEA(ref CodeBuilder cdb,code *c) 122 { 123 if ((c.Irm & 0xC0) == 0xC0) // addressing mode refers to a register 124 { 125 reg_t reg = c.Irm & 7; 126 if (c.Irex & REX_B) 127 { reg |= 8; 128 assert(I64); 129 } 130 getregs(cdb,mask(reg)); 131 } 132 } 133 134 135 /**************************** 136 * Gen code for op= for doubles. 137 */ 138 @trusted 139 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op) 140 { 141 assert(config.exe & EX_windos); // for targets that may not have an 8087 142 143 static immutable uint[OPdivass - OPpostinc + 1] clibtab = 144 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 145 [ CLIB.dadd, CLIB.dsub, cast(uint)-1, CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ]; 146 147 if (config.inline8087) 148 { 149 opass87(cdb,e,pretregs); 150 return; 151 } 152 153 code cs; 154 regm_t retregs2,retregs,idxregs; 155 156 uint clib = clibtab[op - OPpostinc]; 157 elem *e1 = e.EV.E1; 158 tym_t tym = tybasic(e1.Ety); 159 getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX); 160 161 if (tym == TYfloat) 162 { 163 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 164 165 // Load EA into FLOATREGS 166 getregs(cdb,FLOATREGS); 167 cs.Iop = LOD; 168 cs.Irm |= modregrm(0,AX,0); 169 cdb.gen(&cs); 170 171 if (!I32) 172 { 173 cs.Irm |= modregrm(0,DX,0); 174 getlvalue_msw(&cs); 175 cdb.gen(&cs); 176 getlvalue_lsw(&cs); 177 178 } 179 retregs2 = FLOATREGS2; 180 idxregs = FLOATREGS | idxregm(&cs); 181 retregs = FLOATREGS; 182 } 183 else 184 { 185 if (I32) 186 { 187 // Load EA into DOUBLEREGS 188 getregs(cdb,DOUBLEREGS_32); 189 cs.Iop = LOD; 190 cs.Irm |= modregrm(0,AX,0); 191 cdb.gen(&cs); 192 cs.Irm |= modregrm(0,DX,0); 193 getlvalue_msw(&cs); 194 cdb.gen(&cs); 195 getlvalue_lsw(&cs); 196 197 retregs2 = DOUBLEREGS2_32; 198 idxregs = DOUBLEREGS_32 | idxregm(&cs); 199 } 200 else 201 { 202 // Push EA onto stack 203 cs.Iop = 0xFF; 204 cs.Irm |= modregrm(0,6,0); 205 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 206 cdb.gen(&cs); 207 getlvalue_lsw(&cs); 208 cdb.gen(&cs); 209 getlvalue_lsw(&cs); 210 cdb.gen(&cs); 211 getlvalue_lsw(&cs); 212 cdb.gen(&cs); 213 stackpush += DOUBLESIZE; 214 215 retregs2 = DOUBLEREGS_16; 216 idxregs = idxregm(&cs); 217 } 218 retregs = DOUBLEREGS; 219 } 220 221 if ((cs.Iflags & CFSEG) == CFes) 222 idxregs |= mES; 223 cgstate.stackclean++; 224 scodelem(cdb,e.EV.E2,&retregs2,idxregs,false); 225 cgstate.stackclean--; 226 callclib(cdb,e,clib,&retregs,0); 227 if (e1.Ecount) 228 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 229 freenode(e1); 230 cs.Iop = STO; // MOV EA,DOUBLEREGS 231 fltregs(cdb,&cs,tym); 232 fixresult(cdb,e,retregs,pretregs); 233 } 234 235 /**************************** 236 * Gen code for OPnegass for doubles. 237 */ 238 239 @trusted 240 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 241 { 242 assert(config.exe & EX_windos); // for targets that may not have an 8087 243 244 if (config.inline8087) 245 { 246 cdnegass87(cdb,e,pretregs); 247 return; 248 } 249 elem *e1 = e.EV.E1; 250 tym_t tym = tybasic(e1.Ety); 251 int sz = _tysize[tym]; 252 code cs; 253 254 getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 255 modEA(cdb,&cs); 256 cs.Irm |= modregrm(0,6,0); 257 cs.Iop = 0x80; 258 cs.IEV1.Voffset += sz - 1; 259 cs.IFL2 = FLconst; 260 cs.IEV2.Vuns = 0x80; 261 cdb.gen(&cs); // XOR 7[EA],0x80 262 if (tycomplex(tym)) 263 { 264 cs.IEV1.Voffset -= sz / 2; 265 cdb.gen(&cs); // XOR 7[EA],0x80 266 } 267 268 regm_t retregs; 269 if (*pretregs || e1.Ecount) 270 { 271 cs.IEV1.Voffset -= sz - 1; 272 273 if (tym == TYfloat) 274 { 275 // Load EA into FLOATREGS 276 getregs(cdb,FLOATREGS); 277 cs.Iop = LOD; 278 NEWREG(cs.Irm, AX); 279 cdb.gen(&cs); 280 281 if (!I32) 282 { 283 NEWREG(cs.Irm, DX); 284 getlvalue_msw(&cs); 285 cdb.gen(&cs); 286 getlvalue_lsw(&cs); 287 288 } 289 retregs = FLOATREGS; 290 } 291 else 292 { 293 if (I32) 294 { 295 // Load EA into DOUBLEREGS 296 getregs(cdb,DOUBLEREGS_32); 297 cs.Iop = LOD; 298 cs.Irm &= ~cast(uint)modregrm(0,7,0); 299 cs.Irm |= modregrm(0,AX,0); 300 cdb.gen(&cs); 301 cs.Irm |= modregrm(0,DX,0); 302 getlvalue_msw(&cs); 303 cdb.gen(&cs); 304 getlvalue_lsw(&cs); 305 } 306 else 307 { 308 static if (1) 309 { 310 cs.Iop = LOD; 311 fltregs(cdb,&cs,TYdouble); // MOV DOUBLEREGS, EA 312 } 313 else 314 { 315 // Push EA onto stack 316 cs.Iop = 0xFF; 317 cs.Irm |= modregrm(0,6,0); 318 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 319 cdb.gen(&cs); 320 cs.IEV1.Voffset -= REGSIZE; 321 cdb.gen(&cs); 322 cs.IEV1.Voffset -= REGSIZE; 323 cdb.gen(&cs); 324 cs.IEV1.Voffset -= REGSIZE; 325 cdb.gen(&cs); 326 stackpush += DOUBLESIZE; 327 } 328 } 329 retregs = DOUBLEREGS; 330 } 331 if (e1.Ecount) 332 cssave(e1,retregs,!OTleaf(e1.Eoper)); /* if lvalue is a CSE */ 333 } 334 else 335 { 336 retregs = 0; 337 assert(e1.Ecount == 0); 338 } 339 340 freenode(e1); 341 fixresult(cdb,e,retregs,pretregs); 342 } 343 344 345 346 /************************ 347 * Generate code for an assignment. 348 */ 349 350 @trusted 351 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 352 { 353 tym_t tymll; 354 reg_t reg; 355 code cs; 356 elem *e11; 357 bool regvar; // true means evaluate into register variable 358 regm_t varregm; 359 reg_t varreg; 360 targ_int postinc; 361 362 //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 363 elem *e1 = e.EV.E1; 364 elem *e2 = e.EV.E2; 365 int e2oper = e2.Eoper; 366 tym_t tyml = tybasic(e1.Ety); // type of lvalue 367 regm_t retregs = *pretregs; 368 369 if (tyxmmreg(tyml) && config.fpxmmregs) 370 { 371 xmmeq(cdb, e, CMP, e1, e2, pretregs); 372 return; 373 } 374 375 if (tyfloating(tyml) && config.inline8087) 376 { 377 if (tycomplex(tyml)) 378 { 379 complex_eq87(cdb, e, pretregs); 380 return; 381 } 382 383 if (!(retregs == 0 && 384 (e2oper == OPconst || e2oper == OPvar || e2oper == OPind)) 385 ) 386 { 387 eq87(cdb,e,pretregs); 388 return; 389 } 390 if (config.target_cpu >= TARGET_PentiumPro && 391 (e2oper == OPvar || e2oper == OPind) 392 ) 393 { 394 eq87(cdb,e,pretregs); 395 return; 396 } 397 if (tyml == TYldouble || tyml == TYildouble) 398 { 399 eq87(cdb,e,pretregs); 400 return; 401 } 402 } 403 404 uint sz = _tysize[tyml]; // # of bytes to transfer 405 assert(cast(int)sz > 0); 406 407 if (retregs == 0) // if no return value 408 { 409 int fl; 410 411 /* If registers are tight, and we might need them for the lvalue, 412 * prefer to not use them for the rvalue 413 */ 414 bool plenty = true; 415 if (e1.Eoper == OPind) 416 { 417 /* Will need 1 register for evaluation, +2 registers for 418 * e1's addressing mode 419 */ 420 regm_t m = allregs & ~regcon.mvar; // mask of non-register variables 421 m &= m - 1; // clear least significant bit 422 m &= m - 1; // clear least significant bit 423 plenty = m != 0; // at least 3 registers 424 } 425 426 if ((e2oper == OPconst || // if rvalue is a constant 427 e2oper == OPrelconst && 428 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 429 ((fl = el_fl(e2)) == FLdata || 430 fl==FLudata || fl == FLextern) 431 && !(e2.EV.Vsym.ty() & mTYcs) 432 ) && 433 !(evalinregister(e2) && plenty) && 434 !e1.Ecount) // and no CSE headaches 435 { 436 // Look for special case of (*p++ = ...), where p is a register variable 437 if (e1.Eoper == OPind && 438 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 439 e11.EV.E1.Eoper == OPvar && 440 e11.EV.E1.EV.Vsym.Sfl == FLreg && 441 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 442 ) 443 { 444 Symbol *s = e11.EV.E1.EV.Vsym; 445 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 446 { 447 regcon.params &= ~s.Spregm(); 448 } 449 postinc = e11.EV.E2.EV.Vint; 450 if (e11.Eoper == OPpostdec) 451 postinc = -postinc; 452 getlvalue(cdb,&cs,e1,RMstore); 453 freenode(e11.EV.E2); 454 } 455 else 456 { 457 postinc = 0; 458 getlvalue(cdb,&cs,e1,RMstore); 459 460 if (e2oper == OPconst && 461 config.flags4 & CFG4speed && 462 (config.target_cpu == TARGET_Pentium || 463 config.target_cpu == TARGET_PentiumMMX) && 464 (cs.Irm & 0xC0) == 0x80 465 ) 466 { 467 if (I64 && sz == 8 && e2.EV.Vpointer) 468 { 469 // MOV reg,imm64 470 // MOV EA,reg 471 regm_t rregm = allregs & ~idxregm(&cs); 472 reg_t regx; 473 regwithvalue(cdb,rregm,e2.EV.Vpointer,regx,64); 474 cs.Iop = STO; 475 cs.Irm |= modregrm(0,regx & 7,0); 476 if (regx & 8) 477 cs.Irex |= REX_R; 478 cdb.gen(&cs); 479 freenode(e2); 480 goto Lp; 481 } 482 if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint) 483 { 484 // MOV reg,imm 485 // MOV EA,reg 486 regm_t rregm = allregs & ~idxregm(&cs); 487 reg_t regx; 488 regwithvalue(cdb,rregm,e2.EV.Vint,regx,0); 489 cs.Iop = STO; 490 cs.Irm |= modregrm(0,regx & 7,0); 491 if (regx & 8) 492 cs.Irex |= REX_R; 493 cdb.gen(&cs); 494 freenode(e2); 495 goto Lp; 496 } 497 if (sz == 2 * REGSIZE && e2.EV.Vllong == 0) 498 { 499 // MOV reg,imm 500 // MOV EA,reg 501 // MOV EA+2,reg 502 regm_t rregm = getscratch() & ~idxregm(&cs); 503 if (rregm) 504 { 505 reg_t regx; 506 regwithvalue(cdb,rregm,e2.EV.Vint,regx,0); 507 cs.Iop = STO; 508 cs.Irm |= modregrm(0,regx,0); 509 cdb.gen(&cs); 510 getlvalue_msw(&cs); 511 cdb.gen(&cs); 512 freenode(e2); 513 goto Lp; 514 } 515 } 516 } 517 } 518 519 // If loading result into a register 520 if ((cs.Irm & 0xC0) == 0xC0) 521 { 522 modEA(cdb,&cs); 523 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 524 getregs(cdb,cs.IEV1.Vsym.Sregm); 525 } 526 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 527 528 if (e2oper == OPrelconst) 529 { 530 cs.IEV2.Voffset = e2.EV.Voffset; 531 cs.IFL2 = cast(ubyte)fl; 532 cs.IEV2.Vsym = e2.EV.Vsym; 533 cs.Iflags |= CFoff; 534 cdb.gen(&cs); // MOV EA,&variable 535 if (I64 && sz == 8) 536 code_orrex(cdb.last(), REX_W); 537 if (sz > REGSIZE) 538 { 539 cs.Iop = 0x8C; 540 getlvalue_msw(&cs); 541 cs.Irm |= modregrm(0,3,0); 542 cdb.gen(&cs); // MOV EA+2,DS 543 } 544 } 545 else 546 { 547 assert(e2oper == OPconst); 548 cs.IFL2 = FLconst; 549 targ_size_t *p = cast(targ_size_t *) &(e2.EV); 550 cs.IEV2.Vsize_t = *p; 551 // Look for loading a register variable 552 if ((cs.Irm & 0xC0) == 0xC0) 553 { 554 reg_t regx = cs.Irm & 7; 555 556 if (cs.Irex & REX_B) 557 regx |= 8; 558 if (I64 && sz == 8) 559 movregconst(cdb,regx,*p,64); 560 else 561 movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1)); 562 if (sz == 2 * REGSIZE) 563 { getlvalue_msw(&cs); 564 if (REGSIZE == 2) 565 movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0); 566 else if (REGSIZE == 4) 567 movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0); 568 else if (REGSIZE == 8) 569 movregconst(cdb,cs.Irm & 7,p[1],0); 570 else 571 assert(0); 572 } 573 } 574 else if (I64 && sz == 8 && *p >= 0x80000000) 575 { // Use 64 bit MOV, as the 32 bit one gets sign extended 576 // MOV reg,imm64 577 // MOV EA,reg 578 regm_t rregm = allregs & ~idxregm(&cs); 579 reg_t regx; 580 regwithvalue(cdb,rregm,*p,regx,64); 581 cs.Iop = STO; 582 cs.Irm |= modregrm(0,regx & 7,0); 583 if (regx & 8) 584 cs.Irex |= REX_R; 585 cdb.gen(&cs); 586 } 587 else 588 { 589 int off = sz; 590 do 591 { int regsize = REGSIZE; 592 if (off >= 4 && I16 && config.target_cpu >= TARGET_80386) 593 { 594 regsize = 4; 595 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 596 } 597 else if (I64 && sz == 16 && *p >= 0x80000000) 598 { 599 regm_t rregm = allregs & ~idxregm(&cs); 600 reg_t regx; 601 regwithvalue(cdb,rregm,*p,regx,64); 602 cs.Iop = STO; 603 cs.Irm |= modregrm(0,regx & 7,0); 604 if (regx & 8) 605 cs.Irex |= REX_R; 606 } 607 else 608 { 609 regm_t retregsx = (sz == 1) ? BYTEREGS : allregs; 610 reg_t regx; 611 if (reghasvalue(retregsx,*p,regx)) 612 { 613 cs.Iop = (cs.Iop & 1) | 0x88; 614 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx 615 if (regx & 8) 616 cs.Irex |= REX_R; 617 if (I64 && sz == 1 && regx >= 4) 618 cs.Irex |= REX; 619 } 620 if (!I16 && off == 2) // if 16 bit operand 621 cs.Iflags |= CFopsize; 622 if (I64 && sz == 8) 623 cs.Irex |= REX_W; 624 } 625 cdb.gen(&cs); // MOV EA,const 626 627 p = cast(targ_size_t *)(cast(char *) p + regsize); 628 cs.Iop = (cs.Iop & 1) | 0xC6; 629 cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0); 630 cs.Irex &= ~REX_R; 631 cs.IEV1.Voffset += regsize; 632 cs.IEV2.Vint = cast(int)*p; 633 off -= regsize; 634 } while (off > 0); 635 } 636 } 637 freenode(e2); 638 goto Lp; 639 } 640 retregs = allregs; // pick a reg, any reg 641 if (sz == 2 * REGSIZE) 642 retregs &= ~mBP; // BP cannot be used for register pair 643 } 644 if (retregs == mPSW) 645 { 646 retregs = allregs; 647 if (sz == 2 * REGSIZE) 648 retregs &= ~mBP; // BP cannot be used for register pair 649 } 650 cs.Iop = STO; 651 if (sz == 1) // must have byte regs 652 { 653 cs.Iop = 0x88; 654 retregs &= BYTEREGS; 655 if (!retregs) 656 retregs = BYTEREGS; 657 } 658 else if (retregs & mES && 659 ( 660 (e1.Eoper == OPind && 661 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) || 662 (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata) 663 ) 664 ) 665 // getlvalue() needs ES, so we can't return it 666 retregs = allregs; // no conflicts with ES 667 else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0) 668 retregs = DOUBLEREGS; 669 670 regvar = false; 671 varregm = 0; 672 if (config.flags4 & CFG4optimized) 673 { 674 // Be careful of cases like (x = x+x+x). We cannot evaluate in 675 // x if x is in a register. 676 if (isregvar(e1,varregm,varreg) && // if lvalue is register variable 677 doinreg(e1.EV.Vsym,e2) && // and we can compute directly into it 678 !(sz == 1 && e1.EV.Voffset == 1) 679 ) 680 { 681 if (varregm & XMMREGS) 682 { 683 // Could be an integer vector in the XMMREGS 684 xmmeq(cdb, e, CMP, e1, e2, pretregs); 685 return; 686 } 687 regvar = true; 688 retregs = varregm; 689 reg = varreg; // evaluate directly in target register 690 if (tysize(e1.Ety) == REGSIZE && 691 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE) 692 { 693 if (e1.EV.Voffset) 694 retregs &= mMSW; 695 else 696 retregs &= mLSW; 697 reg = findreg(retregs); 698 } 699 } 700 } 701 if (*pretregs & mPSW && OTleaf(e1.Eoper)) // if evaluating e1 couldn't change flags 702 { // Be careful that this lines up with jmpopcode() 703 retregs |= mPSW; 704 *pretregs &= ~mPSW; 705 } 706 scodelem(cdb,e2,&retregs,0,true); // get rvalue 707 708 // Look for special case of (*p++ = ...), where p is a register variable 709 if (e1.Eoper == OPind && 710 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 711 e11.EV.E1.Eoper == OPvar && 712 e11.EV.E1.EV.Vsym.Sfl == FLreg && 713 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 714 ) 715 { 716 Symbol *s = e11.EV.E1.EV.Vsym; 717 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 718 { 719 regcon.params &= ~s.Spregm(); 720 } 721 722 postinc = e11.EV.E2.EV.Vint; 723 if (e11.Eoper == OPpostdec) 724 postinc = -postinc; 725 getlvalue(cdb,&cs,e1,RMstore | retregs); 726 freenode(e11.EV.E2); 727 } 728 else 729 { 730 postinc = 0; 731 getlvalue(cdb,&cs,e1,RMstore | retregs); // get lvalue (cl == null if regvar) 732 } 733 734 getregs(cdb,varregm); 735 736 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 737 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 738 { 739 reg = findreglsw(retregs); 740 cs.Irm |= modregrm(0,reg,0); 741 cdb.gen(&cs); // MOV EA,reg 742 getlvalue_msw(&cs); // point to where segment goes 743 cs.Iop = 0x8C; 744 NEWREG(cs.Irm,0); 745 cdb.gen(&cs); // MOV EA+2,ES 746 } 747 else 748 { 749 if (!I16) 750 { 751 reg = findreg(retregs & 752 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 753 cs.Irm |= modregrm(0,reg & 7,0); 754 if (reg & 8) 755 cs.Irex |= REX_R; 756 for (; true; sz -= REGSIZE) 757 { 758 // Do not generate mov from register onto itself 759 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))) 760 break; 761 if (sz == 2) // if 16 bit operand 762 cs.Iflags |= CFopsize; 763 else if (sz == 1 && reg >= 4) 764 cs.Irex |= REX; 765 cdb.gen(&cs); // MOV EA+offset,reg 766 if (sz <= REGSIZE) 767 break; 768 getlvalue_msw(&cs); 769 reg = findregmsw(retregs); 770 code_newreg(&cs, reg); 771 } 772 } 773 else 774 { 775 if (sz > REGSIZE) 776 cs.IEV1.Voffset += sz - REGSIZE; // 0,2,6 777 reg = findreg(retregs & 778 (sz > REGSIZE ? mMSW : ALLREGS)); 779 if (tyml == TYdouble || tyml == TYdouble_alias) 780 reg = AX; 781 cs.Irm |= modregrm(0,reg,0); 782 // Do not generate mov from register onto itself 783 if (!regvar || reg != (cs.Irm & 7)) 784 for (; true; sz -= REGSIZE) // 1,2,4 785 { 786 cdb.gen(&cs); // MOV EA+offset,reg 787 if (sz <= REGSIZE) 788 break; 789 cs.IEV1.Voffset -= REGSIZE; 790 if (tyml == TYdouble || tyml == TYdouble_alias) 791 reg = dblreg[reg]; 792 else 793 reg = findreglsw(retregs); 794 NEWREG(cs.Irm,reg); 795 } 796 } 797 } 798 if (e1.Ecount || // if lvalue is a CSE or 799 regvar) // rvalue can't be a CSE 800 { 801 getregs_imm(cdb,retregs); // necessary if both lvalue and 802 // rvalue are CSEs (since a reg 803 // can hold only one e at a time) 804 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 805 } 806 807 fixresult(cdb,e,retregs,pretregs); 808 Lp: 809 if (postinc) 810 { 811 reg_t ireg = findreg(idxregm(&cs)); 812 if (*pretregs & mPSW) 813 { // Use LEA to avoid touching the flags 814 uint rm = cs.Irm & 7; 815 if (cs.Irex & REX_B) 816 rm |= 8; 817 cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc); 818 if (tysize(e11.EV.E1.Ety) == 8) 819 code_orrex(cdb.last(), REX_W); 820 } 821 else if (I64) 822 { 823 cdb.genc2(0x81,modregrmx(3,0,ireg),postinc); 824 if (tysize(e11.EV.E1.Ety) == 8) 825 code_orrex(cdb.last(), REX_W); 826 } 827 else 828 { 829 if (postinc == 1) 830 cdb.gen1(0x40 + ireg); // INC ireg 831 else if (postinc == -cast(targ_int)1) 832 cdb.gen1(0x48 + ireg); // DEC ireg 833 else 834 { 835 cdb.genc2(0x81,modregrm(3,0,ireg),postinc); 836 } 837 } 838 } 839 freenode(e1); 840 } 841 842 843 /************************ 844 * Generate code for += -= &= |= ^= negass 845 */ 846 847 @trusted 848 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 849 { 850 //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 851 OPER op = e.Eoper; 852 regm_t retregs = 0; 853 uint reverse = 0; 854 elem *e1 = e.EV.E1; 855 tym_t tyml = tybasic(e1.Ety); // type of lvalue 856 int sz = _tysize[tyml]; 857 int isbyte = (sz == 1); // 1 for byte operation, else 0 858 859 // See if evaluate in XMM registers 860 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0)) 861 { 862 xmmopass(cdb,e,pretregs); 863 return; 864 } 865 866 if (tyfloating(tyml)) 867 { 868 if (config.exe & EX_posix) 869 { 870 if (op == OPnegass) 871 cdnegass87(cdb,e,pretregs); 872 else 873 opass87(cdb,e,pretregs); 874 } 875 else 876 { 877 if (op == OPnegass) 878 opnegassdbl(cdb,e,pretregs); 879 else 880 opassdbl(cdb,e,pretregs,op); 881 } 882 return; 883 } 884 uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 885 ? CFopsize : 0; 886 uint cflags = 0; 887 regm_t forccs = *pretregs & mPSW; // return result in flags 888 regm_t forregs = *pretregs & ~mPSW; // return result in regs 889 // true if we want the result in a register 890 uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper)); 891 892 reg_t reg; 893 uint op1,op2,mode; 894 code cs; 895 elem *e2; 896 regm_t varregm; 897 reg_t varreg; 898 uint jop; 899 900 901 switch (op) // select instruction opcodes 902 { 903 case OPpostinc: op = OPaddass; // i++ => += 904 goto case OPaddass; 905 906 case OPaddass: op1 = 0x01; op2 = 0x11; 907 cflags = CFpsw; 908 mode = 0; break; // ADD, ADC 909 910 case OPpostdec: op = OPminass; // i-- => -= 911 goto case OPminass; 912 913 case OPminass: op1 = 0x29; op2 = 0x19; 914 cflags = CFpsw; 915 mode = 5; break; // SUB, SBC 916 917 case OPandass: op1 = op2 = 0x21; 918 mode = 4; break; // AND, AND 919 920 case OPorass: op1 = op2 = 0x09; 921 mode = 1; break; // OR , OR 922 923 case OPxorass: op1 = op2 = 0x31; 924 mode = 6; break; // XOR, XOR 925 926 case OPnegass: op1 = 0xF7; // NEG 927 break; 928 929 default: 930 assert(0); 931 } 932 op1 ^= isbyte; // bit 0 is 0 for byte operation 933 934 if (op == OPnegass) 935 { 936 getlvalue(cdb,&cs,e1,0); 937 modEA(cdb,&cs); 938 cs.Irm |= modregrm(0,3,0); 939 cs.Iop = op1; 940 switch (_tysize[tyml]) 941 { 942 case CHARSIZE: 943 cdb.gen(&cs); 944 break; 945 946 case SHORTSIZE: 947 cdb.gen(&cs); 948 if (!I16 && *pretregs & mPSW) 949 cdb.last().Iflags |= CFopsize | CFpsw; 950 break; 951 952 case LONGSIZE: 953 if (!I16 || opsize) 954 { cdb.gen(&cs); 955 cdb.last().Iflags |= opsize; 956 break; 957 } 958 neg_2reg: 959 getlvalue_msw(&cs); 960 cdb.gen(&cs); // NEG EA+2 961 getlvalue_lsw(&cs); 962 cdb.gen(&cs); // NEG EA 963 code_orflag(cdb.last(),CFpsw); 964 cs.Iop = 0x81; 965 getlvalue_msw(&cs); 966 cs.IFL2 = FLconst; 967 cs.IEV2.Vuns = 0; 968 cdb.gen(&cs); // SBB EA+2,0 969 break; 970 971 case LLONGSIZE: 972 if (I16) 973 assert(0); // not implemented yet 974 if (I32) 975 goto neg_2reg; 976 cdb.gen(&cs); 977 break; 978 979 default: 980 assert(0); 981 } 982 forccs = 0; // flags already set by NEG 983 *pretregs &= ~mPSW; 984 } 985 else if ((e2 = e.EV.E2).Eoper == OPconst && // if rvalue is a const 986 el_signx32(e2) && 987 // Don't evaluate e2 in register if we can use an INC or DEC 988 (((sz <= REGSIZE || tyfv(tyml)) && 989 (op == OPaddass || op == OPminass) && 990 (el_allbits(e2, 1) || el_allbits(e2, -1)) 991 ) || 992 (!evalinregister(e2) 993 && tyml != TYhptr 994 ) 995 ) 996 ) 997 { 998 getlvalue(cdb,&cs,e1,0); 999 modEA(cdb,&cs); 1000 cs.IFL2 = FLconst; 1001 cs.IEV2.Vsize_t = e2.EV.Vint; 1002 if (sz <= REGSIZE || tyfv(tyml) || opsize) 1003 { 1004 targ_int i = cs.IEV2.Vint; 1005 1006 // Handle shortcuts. Watch out for if result has 1007 // to be in flags. 1008 1009 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,reg) && i != 1 && i != -1 && 1010 !opsize) 1011 { 1012 cs.Iop = op1; 1013 cs.Irm |= modregrm(0,reg & 7,0); 1014 if (I64) 1015 { if (isbyte && reg >= 4) 1016 cs.Irex |= REX; 1017 if (reg & 8) 1018 cs.Irex |= REX_R; 1019 } 1020 } 1021 else 1022 { 1023 cs.Iop = 0x81; 1024 cs.Irm |= modregrm(0,mode,0); 1025 switch (op) 1026 { 1027 case OPminass: // convert to += 1028 cs.Irm ^= modregrm(0,5,0); 1029 i = -i; 1030 cs.IEV2.Vsize_t = i; 1031 goto case OPaddass; 1032 1033 case OPaddass: 1034 if (i == 1) // INC EA 1035 goto L1; 1036 else if (i == -1) // DEC EA 1037 { cs.Irm |= modregrm(0,1,0); 1038 L1: cs.Iop = 0xFF; 1039 } 1040 break; 1041 1042 default: 1043 break; 1044 } 1045 cs.Iop ^= isbyte; // for byte operations 1046 } 1047 cs.Iflags |= opsize; 1048 if (forccs) 1049 cs.Iflags |= CFpsw; 1050 else if (!I16 && cs.Iflags & CFopsize) 1051 { 1052 switch (op) 1053 { case OPorass: 1054 case OPxorass: 1055 cs.IEV2.Vsize_t &= 0xFFFF; 1056 cs.Iflags &= ~CFopsize; // don't worry about MSW 1057 break; 1058 1059 case OPandass: 1060 cs.IEV2.Vsize_t |= ~0xFFFFL; 1061 cs.Iflags &= ~CFopsize; // don't worry about MSW 1062 break; 1063 1064 case OPminass: 1065 case OPaddass: 1066 static if (1) 1067 { 1068 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 1069 cs.Iflags &= ~CFopsize; 1070 } 1071 else 1072 { 1073 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 1074 e1.Eoper == OPind) // not a register var 1075 cs.Iflags &= ~CFopsize; 1076 } 1077 break; 1078 1079 default: 1080 assert(0); 1081 } 1082 } 1083 1084 // For scheduling purposes, we wish to replace: 1085 // OP EA 1086 // with: 1087 // MOV reg,EA 1088 // OP reg 1089 // MOV EA,reg 1090 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 1091 (config.target_cpu == TARGET_Pentium || 1092 config.target_cpu == TARGET_PentiumMMX) && 1093 config.flags4 & CFG4speed) 1094 { 1095 regm_t sregm; 1096 code cs2; 1097 1098 // Determine which registers to use 1099 sregm = allregs & ~idxregm(&cs); 1100 if (isbyte) 1101 sregm &= BYTEREGS; 1102 if (sregm & forregs) 1103 sregm &= forregs; 1104 1105 allocreg(cdb,&sregm,®,tyml); // allocate register 1106 1107 cs2 = cs; 1108 cs2.Iflags &= ~CFpsw; 1109 cs2.Iop = LOD ^ isbyte; 1110 code_newreg(&cs2, reg); 1111 cdb.gen(&cs2); // MOV reg,EA 1112 1113 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 1114 if (reg & 8) 1115 cs.Irex |= REX_B; 1116 cdb.gen(&cs); // OP reg 1117 1118 cs2.Iop ^= 2; 1119 cdb.gen(&cs2); // MOV EA,reg 1120 1121 retregs = sregm; 1122 wantres = 0; 1123 if (e1.Ecount) 1124 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1125 } 1126 else 1127 { 1128 cdb.gen(&cs); 1129 cs.Iflags &= ~opsize; 1130 cs.Iflags &= ~CFpsw; 1131 if (I16 && opsize) // if DWORD operand 1132 cs.IEV1.Voffset += 2; // compensate for wantres code 1133 } 1134 } 1135 else if (sz == 2 * REGSIZE) 1136 { 1137 targ_uns msw; 1138 1139 cs.Iop = 0x81; 1140 cs.Irm |= modregrm(0,mode,0); 1141 cs.Iflags |= cflags; 1142 cdb.gen(&cs); 1143 cs.Iflags &= ~CFpsw; 1144 1145 getlvalue_msw(&cs); // point to msw 1146 msw = cast(uint)MSREG(e.EV.E2.EV.Vllong); 1147 cs.IEV2.Vuns = msw; // msw of constant 1148 switch (op) 1149 { 1150 case OPminass: 1151 cs.Irm ^= modregrm(0,6,0); // SUB => SBB 1152 break; 1153 1154 case OPaddass: 1155 cs.Irm |= modregrm(0,2,0); // ADD => ADC 1156 break; 1157 1158 default: 1159 break; 1160 } 1161 cdb.gen(&cs); 1162 } 1163 else 1164 assert(0); 1165 freenode(e.EV.E2); // don't need it anymore 1166 } 1167 else if (isregvar(e1,varregm,varreg) && 1168 (e2.Eoper == OPvar || e2.Eoper == OPind) && 1169 !evalinregister(e2) && 1170 sz <= REGSIZE) // deal with later 1171 { 1172 getlvalue(cdb,&cs,e2,0); 1173 freenode(e2); 1174 getregs(cdb,varregm); 1175 code_newreg(&cs, varreg); 1176 if (I64 && sz == 1 && varreg >= 4) 1177 cs.Irex |= REX; 1178 cs.Iop = op1 ^ 2; // toggle direction bit 1179 if (forccs) 1180 cs.Iflags |= CFpsw; 1181 reverse = 2; // remember we toggled it 1182 cdb.gen(&cs); 1183 retregs = 0; // to trigger a bug if we attempt to use it 1184 } 1185 else if ((op == OPaddass || op == OPminass) && 1186 sz <= REGSIZE && 1187 !e2.Ecount && 1188 ((jop = jmpopcode(e2)) == JC || jop == JNC || 1189 (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC))) 1190 ) 1191 { 1192 /* e1 += (x < y) ADC EA,0 1193 * e1 -= (x < y) SBB EA,0 1194 * e1 += (x >= y) SBB EA,-1 1195 * e1 -= (x >= y) ADC EA,-1 1196 */ 1197 getlvalue(cdb,&cs,e1,0); // get lvalue 1198 modEA(cdb,&cs); 1199 regm_t keepmsk = idxregm(&cs); 1200 retregs = mPSW; 1201 if (OTconv(e2.Eoper)) 1202 { 1203 scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true); 1204 freenode(e2); 1205 } 1206 else 1207 scodelem(cdb,e2,&retregs,keepmsk,true); 1208 cs.Iop = 0x81 ^ isbyte; // ADC EA,imm16/32 1209 uint regop = 2; // ADC 1210 if ((op == OPaddass) ^ (jop == JC)) 1211 regop = 3; // SBB 1212 code_newreg(&cs,regop); 1213 cs.Iflags |= opsize; 1214 if (forccs) 1215 cs.Iflags |= CFpsw; 1216 cs.IFL2 = FLconst; 1217 cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0; 1218 cdb.gen(&cs); 1219 retregs = 0; // to trigger a bug if we attempt to use it 1220 } 1221 else // evaluate e2 into register 1222 { 1223 retregs = (isbyte) ? BYTEREGS : ALLREGS; // pick working reg 1224 if (tyml == TYhptr) 1225 retregs &= ~mCX; // need CX for shift count 1226 scodelem(cdb,e.EV.E2,&retregs,0,true); // get rvalue 1227 getlvalue(cdb,&cs,e1,retregs); // get lvalue 1228 modEA(cdb,&cs); 1229 cs.Iop = op1; 1230 if (sz <= REGSIZE || tyfv(tyml)) 1231 { 1232 reg = findreg(retregs); 1233 code_newreg(&cs, reg); // OP1 EA,reg 1234 if (sz == 1 && reg >= 4 && I64) 1235 cs.Irex |= REX; 1236 if (forccs) 1237 cs.Iflags |= CFpsw; 1238 } 1239 else if (tyml == TYhptr) 1240 { 1241 uint mreg = findregmsw(retregs); 1242 uint lreg = findreglsw(retregs); 1243 getregs(cdb,retregs | mCX); 1244 1245 // If h -= l, convert to h += -l 1246 if (e.Eoper == OPminass) 1247 { 1248 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 1249 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 1250 code_orflag(cdb.last(),CFpsw); 1251 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 1252 } 1253 cs.Iop = 0x01; 1254 cs.Irm |= modregrm(0,lreg,0); 1255 cdb.gen(&cs); // ADD EA,lreg 1256 code_orflag(cdb.last(),CFpsw); 1257 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 1258 genshift(cdb); // MOV CX,offset __AHSHIFT 1259 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 1260 NEWREG(cs.Irm,mreg); // ADD EA+2,mreg 1261 getlvalue_msw(&cs); 1262 } 1263 else if (sz == 2 * REGSIZE) 1264 { 1265 cs.Irm |= modregrm(0,findreglsw(retregs),0); 1266 cdb.gen(&cs); // OP1 EA,reg+1 1267 code_orflag(cdb.last(),cflags); 1268 cs.Iop = op2; 1269 NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg 1270 getlvalue_msw(&cs); 1271 } 1272 else 1273 assert(0); 1274 cdb.gen(&cs); 1275 retregs = 0; // to trigger a bug if we attempt to use it 1276 } 1277 1278 // See if we need to reload result into a register. 1279 // Need result in registers in case we have a 32 bit 1280 // result and we want the flags as a result. 1281 if (wantres || (sz > REGSIZE && forccs)) 1282 { 1283 if (sz <= REGSIZE) 1284 { 1285 regm_t possregs; 1286 1287 possregs = ALLREGS; 1288 if (isbyte) 1289 possregs = BYTEREGS; 1290 retregs = forregs & possregs; 1291 if (!retregs) 1292 retregs = possregs; 1293 1294 // If reg field is destination 1295 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1296 { 1297 reg = (cs.Irm >> 3) & 7; 1298 if (cs.Irex & REX_R) 1299 reg |= 8; 1300 retregs = mask(reg); 1301 allocreg(cdb,&retregs,®,tyml); 1302 } 1303 // If lvalue is a register, just use that register 1304 else if ((cs.Irm & 0xC0) == 0xC0) 1305 { 1306 reg = cs.Irm & 7; 1307 if (cs.Irex & REX_B) 1308 reg |= 8; 1309 retregs = mask(reg); 1310 allocreg(cdb,&retregs,®,tyml); 1311 } 1312 else 1313 { 1314 allocreg(cdb,&retregs,®,tyml); 1315 cs.Iop = LOD ^ isbyte ^ reverse; 1316 code_newreg(&cs, reg); 1317 if (I64 && isbyte && reg >= 4) 1318 cs.Irex |= REX_W; 1319 cdb.gen(&cs); // MOV reg,EA 1320 } 1321 } 1322 else if (tyfv(tyml) || tyml == TYhptr) 1323 { 1324 regm_t idxregs; 1325 1326 if (tyml == TYhptr) 1327 getlvalue_lsw(&cs); 1328 idxregs = idxregm(&cs); 1329 retregs = forregs & ~idxregs; 1330 if (!(retregs & IDXREGS)) 1331 retregs |= IDXREGS & ~idxregs; 1332 if (!(retregs & mMSW)) 1333 retregs |= mMSW & ALLREGS; 1334 allocreg(cdb,&retregs,®,tyml); 1335 NEWREG(cs.Irm,findreglsw(retregs)); 1336 if (retregs & mES) // if want ES loaded 1337 { 1338 cs.Iop = 0xC4; 1339 cdb.gen(&cs); // LES lreg,EA 1340 } 1341 else 1342 { 1343 cs.Iop = LOD; 1344 cdb.gen(&cs); // MOV lreg,EA 1345 getlvalue_msw(&cs); 1346 if (I32) 1347 cs.Iflags |= CFopsize; 1348 NEWREG(cs.Irm,reg); 1349 cdb.gen(&cs); // MOV mreg,EA+2 1350 } 1351 } 1352 else if (sz == 2 * REGSIZE) 1353 { 1354 regm_t idx = idxregm(&cs); 1355 retregs = forregs; 1356 if (!retregs) 1357 retregs = ALLREGS; 1358 allocreg(cdb,&retregs,®,tyml); 1359 cs.Iop = LOD; 1360 NEWREG(cs.Irm,reg); 1361 1362 code csl = cs; 1363 NEWREG(csl.Irm,findreglsw(retregs)); 1364 getlvalue_lsw(&csl); 1365 1366 if (mask(reg) & idx) 1367 { 1368 cdb.gen(&csl); // MOV reg+1,EA 1369 cdb.gen(&cs); // MOV reg,EA+2 1370 } 1371 else 1372 { 1373 cdb.gen(&cs); // MOV reg,EA+2 1374 cdb.gen(&csl); // MOV reg+1,EA 1375 } 1376 } 1377 else 1378 assert(0); 1379 if (e1.Ecount) // if we gen a CSE 1380 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1381 } 1382 freenode(e1); 1383 if (sz <= REGSIZE) 1384 *pretregs &= ~mPSW; // flags are already set 1385 fixresult(cdb,e,retregs,pretregs); 1386 } 1387 1388 /******************************** 1389 * Generate code for *= 1390 */ 1391 1392 @trusted 1393 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1394 { 1395 code cs; 1396 regm_t retregs; 1397 reg_t resreg; 1398 uint opr,isbyte; 1399 1400 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1401 elem *e1 = e.EV.E1; 1402 elem *e2 = e.EV.E2; 1403 OPER op = e.Eoper; // OPxxxx 1404 1405 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1406 char uns = tyuns(tyml) || tyuns(e2.Ety); 1407 uint sz = _tysize[tyml]; 1408 1409 uint rex = (I64 && sz == 8) ? REX_W : 0; 1410 uint grex = rex << 16; // 64 bit operands 1411 1412 // See if evaluate in XMM registers 1413 if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0)) 1414 { 1415 xmmopass(cdb,e,pretregs); 1416 return; 1417 } 1418 1419 if (tyfloating(tyml)) 1420 { 1421 if (config.exe & EX_posix) 1422 { 1423 opass87(cdb,e,pretregs); 1424 } 1425 else 1426 { 1427 opassdbl(cdb,e,pretregs,op); 1428 } 1429 return; 1430 } 1431 1432 if (sz <= REGSIZE) // if word or byte 1433 { 1434 if (e2.Eoper == OPconst && 1435 (I32 || I64) && 1436 el_signx32(e2) && 1437 sz >= 4) 1438 { 1439 // See if we can use an LEA instruction 1440 1441 int ss; 1442 int ss2 = 0; 1443 int shift; 1444 1445 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1446 switch (e2factor) 1447 { 1448 case 12: ss = 1; ss2 = 2; goto L4; 1449 case 24: ss = 1; ss2 = 3; goto L4; 1450 1451 case 6: 1452 case 3: ss = 1; goto L4; 1453 1454 case 20: ss = 2; ss2 = 2; goto L4; 1455 case 40: ss = 2; ss2 = 3; goto L4; 1456 1457 case 10: 1458 case 5: ss = 2; goto L4; 1459 1460 case 36: ss = 3; ss2 = 2; goto L4; 1461 case 72: ss = 3; ss2 = 3; goto L4; 1462 1463 case 18: 1464 case 9: ss = 3; goto L4; 1465 L4: 1466 { 1467 getlvalue(cdb,&cs,e1,0); // get EA 1468 modEA(cdb,&cs); 1469 freenode(e2); 1470 regm_t idxregs = idxregm(&cs); 1471 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1472 if (!regm) 1473 regm = allregs & ~(idxregs | mBP | mR13); 1474 reg_t reg; 1475 allocreg(cdb,®m,®,tyml); 1476 cs.Iop = LOD; 1477 code_newreg(&cs,reg); 1478 cs.Irex |= rex; 1479 cdb.gen(&cs); // MOV reg,EA 1480 1481 assert((reg & 7) != BP); 1482 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1483 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1484 if (ss2) 1485 { 1486 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1487 modregxrm(ss2,reg,5)); 1488 cdb.last().IFL1 = FLconst; 1489 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1490 } 1491 else if (!(e2factor & 1)) // if even factor 1492 { 1493 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1494 code_orrex(cdb.last(),rex); 1495 } 1496 opAssStoreReg(cdb,cs,e,reg,pretregs); 1497 return; 1498 } 1499 1500 case 37: 1501 case 74: shift = 2; 1502 goto L5; 1503 case 13: 1504 case 26: shift = 0; 1505 goto L5; 1506 L5: 1507 { 1508 getlvalue(cdb,&cs,e1,0); // get EA 1509 modEA(cdb,&cs); 1510 freenode(e2); 1511 regm_t idxregs = idxregm(&cs); 1512 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1513 if (!regm) 1514 regm = allregs & ~(idxregs | mBP | mR13); 1515 reg_t reg; // return register 1516 allocreg(cdb,®m,®,tyml); 1517 1518 reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13)); 1519 1520 cs.Iop = LOD; 1521 code_newreg(&cs,sreg); 1522 cs.Irex |= rex; 1523 cdb.gen(&cs); // MOV sreg,EA 1524 1525 assert((sreg & 7) != BP); 1526 assert((reg & 7) != BP); 1527 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1528 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1529 if (shift) 1530 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1531 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1532 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1533 if (!(e2factor & 1)) // if even factor 1534 { 1535 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1536 code_orrex(cdb.last(),rex); 1537 } 1538 opAssStoreReg(cdb,cs,e,reg,pretregs); 1539 return; 1540 } 1541 1542 default: 1543 break; 1544 } 1545 } 1546 1547 isbyte = (sz == 1); // 1 for byte operation 1548 1549 if (config.target_cpu >= TARGET_80286 && 1550 e2.Eoper == OPconst && !isbyte) 1551 { 1552 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1553 if (I64 && sz == 8 && e2factor != cast(int)e2factor) 1554 goto L1; 1555 freenode(e2); 1556 getlvalue(cdb,&cs,e1,0); // get EA 1557 regm_t idxregs = idxregm(&cs); 1558 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1559 if (!retregs) 1560 retregs = ALLREGS & ~idxregs; 1561 allocreg(cdb,&retregs,&resreg,tyml); 1562 cs.Iop = 0x69; // IMUL reg,EA,e2value 1563 cs.IFL2 = FLconst; 1564 cs.IEV2.Vint = cast(int)e2factor; 1565 opr = resreg; 1566 } 1567 else if (!I16 && !isbyte) 1568 { 1569 L1: 1570 retregs = *pretregs & (ALLREGS | mBP); 1571 if (!retregs) 1572 retregs = ALLREGS; 1573 codelem(cdb,e2,&retregs,false); // load rvalue in reg 1574 getlvalue(cdb,&cs,e1,retregs); // get EA 1575 getregs(cdb,retregs); // destroy these regs 1576 cs.Iop = 0x0FAF; // IMUL resreg,EA 1577 resreg = findreg(retregs); 1578 opr = resreg; 1579 } 1580 else 1581 { 1582 retregs = mAX; 1583 codelem(cdb,e2,&retregs,false); // load rvalue in AX 1584 getlvalue(cdb,&cs,e1,mAX); // get EA 1585 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs 1586 cs.Iop = 0xF7 ^ isbyte; // [I]MUL EA 1587 opr = uns ? 4 : 5; // MUL/IMUL 1588 resreg = AX; // result register for * 1589 } 1590 code_newreg(&cs,opr); 1591 cdb.gen(&cs); 1592 1593 opAssStoreReg(cdb, cs, e, resreg, pretregs); 1594 return; 1595 } 1596 else if (sz == 2 * REGSIZE) 1597 { 1598 if (e2.Eoper == OPconst && I32) 1599 { 1600 /* if (msw) 1601 IMUL EDX,EDX,lsw 1602 IMUL reg,EAX,msw 1603 ADD reg,EDX 1604 else 1605 IMUL reg,EDX,lsw 1606 MOV EDX,lsw 1607 MUL EDX 1608 ADD EDX,reg 1609 */ 1610 freenode(e2); 1611 retregs = mDX|mAX; 1612 reg_t rhi, rlo; 1613 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 1614 const regm_t keepmsk = idxregm(&cs); 1615 1616 reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 1617 1618 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1619 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1620 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1621 1622 if (msw) 1623 { 1624 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1625 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1626 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1627 } 1628 else 1629 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1630 1631 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1632 getregs(cdb,mDX); 1633 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1634 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1635 } 1636 else 1637 { 1638 retregs = mDX | mAX; 1639 regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX; 1640 codelem(cdb,e2,&rretregs,false); 1641 getlvalue(cdb,&cs,e1,retregs | rretregs); 1642 getregs(cdb,retregs); 1643 cs.Iop = LOD; 1644 cdb.gen(&cs); // MOV AX,EA 1645 getlvalue_msw(&cs); 1646 cs.Irm |= modregrm(0,DX,0); 1647 cdb.gen(&cs); // MOV DX,EA+2 1648 getlvalue_lsw(&cs); 1649 if (config.target_cpu >= TARGET_PentiumPro) 1650 { 1651 regm_t rlo = findreglsw(rretregs); 1652 regm_t rhi = findregmsw(rretregs); 1653 /* IMUL rhi,EAX 1654 IMUL EDX,rlo 1655 ADD rhi,EDX 1656 MUL rlo 1657 ADD EDX,Erhi 1658 */ 1659 getregs(cdb,mAX|mDX|mask(rhi)); 1660 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1661 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1662 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1663 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1664 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1665 } 1666 else 1667 { 1668 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs)); 1669 } 1670 } 1671 1672 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 1673 return; 1674 } 1675 else 1676 { 1677 assert(0); 1678 } 1679 } 1680 1681 1682 /******************************** 1683 * Generate code for /= %= 1684 */ 1685 1686 @trusted 1687 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1688 { 1689 elem *e1 = e.EV.E1; 1690 elem *e2 = e.EV.E2; 1691 1692 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1693 OPER op = e.Eoper; // OPxxxx 1694 1695 // See if evaluate in XMM registers 1696 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0)) 1697 { 1698 xmmopass(cdb,e,pretregs); 1699 return; 1700 } 1701 1702 if (tyfloating(tyml)) 1703 { 1704 if (config.exe & EX_posix) 1705 { 1706 opass87(cdb,e,pretregs); 1707 } 1708 else 1709 { 1710 opassdbl(cdb,e,pretregs,op); 1711 } 1712 return; 1713 } 1714 1715 code cs = void; 1716 1717 //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1718 char uns = tyuns(tyml) || tyuns(e2.Ety); 1719 uint sz = _tysize[tyml]; 1720 1721 uint rex = (I64 && sz == 8) ? REX_W : 0; 1722 uint grex = rex << 16; // 64 bit operands 1723 1724 if (sz <= REGSIZE) // if word or byte 1725 { 1726 uint isbyte = (sz == 1); // 1 for byte operation 1727 reg_t resreg; 1728 targ_size_t e2factor; 1729 targ_size_t d; 1730 bool neg; 1731 int pow2; 1732 1733 assert(!isbyte); // should never happen 1734 assert(I16 || sz != SHORTSIZE); 1735 1736 if (e2.Eoper == OPconst) 1737 { 1738 e2factor = cast(targ_size_t)el_tolong(e2); 1739 pow2 = ispow2(e2factor); 1740 d = e2factor; 1741 if (!uns && cast(targ_llong)e2factor < 0) 1742 { 1743 neg = true; 1744 d = -d; 1745 } 1746 } 1747 1748 // Signed divide by a constant 1749 if (config.flags4 & CFG4speed && 1750 e2.Eoper == OPconst && 1751 !uns && 1752 (d & (d - 1)) && 1753 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1754 { 1755 /* R1 / 10 1756 * 1757 * MOV EAX,m 1758 * IMUL R1 1759 * MOV EAX,R1 1760 * SAR EAX,31 1761 * SAR EDX,shpost 1762 * SUB EDX,EAX 1763 * IMUL EAX,EDX,d 1764 * SUB R1,EAX 1765 * 1766 * EDX = quotient 1767 * R1 = remainder 1768 */ 1769 assert(sz == 4 || sz == 8); 1770 1771 ulong m; 1772 int shpost; 1773 const int N = sz * 8; 1774 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1775 1776 freenode(e2); 1777 1778 getlvalue(cdb,&cs,e1,mAX | mDX); 1779 reg_t reg; 1780 opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs))); // MOV reg,EA 1781 getregs(cdb, mAX|mDX); 1782 1783 /* Algorithm 5.2 1784 * if m>=2**(N-1) 1785 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1786 * else 1787 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1788 * if (neg) 1789 * q = -q 1790 */ 1791 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1792 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1793 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL reg 1794 if (mgt) 1795 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,reg 1796 getregsNoSave(mAX); // EAX no longer contains 'm' 1797 genmovreg(cdb, AX, reg); // MOV EAX,reg 1798 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1799 if (shpost) 1800 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1801 reg_t r3; 1802 if (neg && op == OPdivass) 1803 { 1804 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1805 r3 = AX; 1806 } 1807 else 1808 { 1809 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1810 r3 = DX; 1811 } 1812 1813 // r3 is quotient 1814 reg_t resregx; 1815 switch (op) 1816 { case OPdivass: 1817 resregx = r3; 1818 break; 1819 1820 case OPmodass: 1821 assert(reg != AX && r3 == DX); 1822 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1823 { 1824 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1825 } 1826 else 1827 { 1828 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1829 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1830 getregsNoSave(mAX); // EAX no longer contains 'd' 1831 } 1832 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1833 resregx = reg; 1834 break; 1835 1836 default: 1837 assert(0); 1838 } 1839 1840 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1841 return; 1842 } 1843 1844 // Unsigned divide by a constant 1845 void unsignedDivideByConstant(ref CodeBuilder cdb) 1846 { 1847 assert(sz == 4 || sz == 8); 1848 1849 reg_t r3; 1850 reg_t reg; 1851 ulong m; 1852 int shpre; 1853 int shpost; 1854 code cs = void; 1855 1856 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1857 { 1858 /* t1 = MULUH(m, n) 1859 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1860 * MOV EAX,reg 1861 * MOV EDX,m 1862 * MUL EDX 1863 * MOV EAX,reg 1864 * SUB EAX,EDX 1865 * SHR EAX,1 1866 * LEA R3,[EAX][EDX] 1867 * SHR R3,shpost-1 1868 */ 1869 assert(shpre == 0); 1870 1871 freenode(e2); 1872 getlvalue(cdb,&cs,e1,mAX | mDX); 1873 regm_t idxregs = idxregm(&cs); 1874 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1875 getregs(cdb, mAX|mDX); 1876 1877 genmovreg(cdb,AX,reg); // MOV EAX,reg 1878 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1879 getregs(cdb,mask(reg) | mDX | mAX); 1880 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1881 genmovreg(cdb,AX,reg); // MOV EAX,reg 1882 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1883 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1884 regm_t regm3 = allregs & ~idxregs; 1885 if (op == OPmodass) 1886 { 1887 regm3 &= ~mask(reg); 1888 if (!el_signx32(e2)) 1889 regm3 &= ~mAX; 1890 } 1891 allocreg(cdb,®m3,&r3,TYint); 1892 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1893 if (shpost != 1) 1894 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1895 } 1896 else 1897 { 1898 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1899 * SHR EAX,shpre 1900 * MOV reg,m 1901 * MUL reg 1902 * SHR EDX,shpost 1903 */ 1904 1905 freenode(e2); 1906 getlvalue(cdb,&cs,e1,mAX | mDX); 1907 regm_t idxregs = idxregm(&cs); 1908 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1909 getregs(cdb, mAX|mDX); 1910 1911 if (reg != AX) 1912 { 1913 getregs(cdb,mAX); 1914 genmovreg(cdb,AX,reg); // MOV EAX,reg 1915 } 1916 if (shpre) 1917 { 1918 getregs(cdb,mAX); 1919 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1920 } 1921 getregs(cdb,mDX); 1922 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1923 getregs(cdb,mDX | mAX); 1924 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1925 if (shpost) 1926 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1927 r3 = DX; 1928 } 1929 1930 reg_t resregx; 1931 switch (op) 1932 { 1933 case OPdivass: 1934 // r3 = quotient 1935 resregx = r3; 1936 break; 1937 1938 case OPmodass: 1939 /* reg = original value 1940 * r3 = quotient 1941 */ 1942 assert(reg != AX); 1943 if (el_signx32(e2)) 1944 { 1945 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1946 } 1947 else 1948 { 1949 assert(!(mask(r3) & mAX)); 1950 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1951 getregs(cdb,mAX); 1952 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1953 } 1954 getregs(cdb,mask(reg)); 1955 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1956 resregx = reg; 1957 break; 1958 1959 default: 1960 assert(0); 1961 } 1962 1963 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1964 return; 1965 } 1966 1967 if (config.flags4 & CFG4speed && 1968 e2.Eoper == OPconst && 1969 uns && 1970 e2factor > 2 && (e2factor & (e2factor - 1)) && 1971 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1972 { 1973 unsignedDivideByConstant(cdb); 1974 return; 1975 } 1976 1977 if (config.flags4 & CFG4speed && 1978 e2.Eoper == OPconst && !uns && 1979 (sz == REGSIZE || (I64 && sz == 4)) && 1980 pow2 != -1 && 1981 e2factor == cast(int)e2factor && 1982 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1983 ) 1984 { 1985 freenode(e2); 1986 if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386) 1987 { 1988 /* This is better than the code further down because it is 1989 * not constrained to using AX and DX. 1990 */ 1991 getlvalue(cdb,&cs,e1,0); 1992 regm_t idxregs = idxregm(&cs); 1993 reg_t reg; 1994 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA 1995 1996 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg))); 1997 genmovreg(cdb,r,reg); // MOV r,reg 1998 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 1999 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 2000 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 2001 2002 opAssStoreReg(cdb, cs, e, reg, pretregs); 2003 return; 2004 } 2005 2006 // Signed divide or modulo by power of 2 2007 getlvalue(cdb,&cs,e1,mAX | mDX); 2008 reg_t reg; 2009 opAssLoadReg(cdb,cs,e,reg,mAX); 2010 2011 getregs(cdb,mDX); // DX is scratch register 2012 cdb.gen1(0x99); // CWD 2013 code_orrex(cdb.last(), rex); 2014 if (pow2 == 1) 2015 { 2016 if (op == OPdivass) 2017 { 2018 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2019 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 2020 resreg = AX; 2021 } 2022 else // OPmod 2023 { 2024 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2025 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 2026 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 2027 resreg = DX; 2028 } 2029 } 2030 else 2031 { 2032 assert(pow2 < 32); 2033 targ_ulong m = (1 << pow2) - 1; 2034 if (op == OPdivass) 2035 { 2036 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 2037 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 2038 // Be careful not to generate this for 8088 2039 assert(config.target_cpu >= TARGET_80286); 2040 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 2041 resreg = AX; 2042 } 2043 else // OPmodass 2044 { 2045 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2046 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2047 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,m 2048 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2049 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2050 resreg = AX; 2051 } 2052 } 2053 } 2054 else 2055 { 2056 regm_t retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 2057 codelem(cdb,e2,&retregs,false); // load rvalue in retregs 2058 reg_t reg = findreg(retregs); 2059 getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA 2060 getregs(cdb,mAX | mDX); // destroy these regs 2061 cs.Irm |= modregrm(0,AX,0); 2062 cs.Iop = LOD; 2063 cdb.gen(&cs); // MOV AX,EA 2064 if (uns) // if uint 2065 movregconst(cdb,DX,0,0); // CLR DX 2066 else // else signed 2067 { 2068 cdb.gen1(0x99); // CWD 2069 code_orrex(cdb.last(),rex); 2070 } 2071 getregs(cdb,mDX | mAX); // DX and AX will be destroyed 2072 const uint opr = uns ? 6 : 7; // DIV/IDIV 2073 genregs(cdb,0xF7,opr,reg); // OPR reg 2074 code_orrex(cdb.last(),rex); 2075 resreg = (op == OPmodass) ? DX : AX; // result register 2076 } 2077 opAssStoreReg(cdb, cs, e, resreg, pretregs); 2078 return; 2079 } 2080 2081 assert(sz == 2 * REGSIZE); 2082 2083 targ_size_t e2factor; 2084 int pow2; 2085 if (e2.Eoper == OPconst) 2086 { 2087 e2factor = cast(targ_size_t)el_tolong(e2); 2088 pow2 = ispow2(e2factor); 2089 } 2090 2091 // Register pair signed divide by power of 2 2092 if (op == OPdivass && 2093 !uns && 2094 e.Eoper == OPconst && 2095 pow2 != -1 && 2096 I32 // not set up for I16 or I64 cent 2097 ) 2098 { 2099 freenode(e2); 2100 regm_t retregs = mDX|mAX | mCX|mBX; // LSW must be byte reg because of later SETZ 2101 reg_t rhi, rlo; 2102 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2103 const regm_t keepmsk = idxregm(&cs); 2104 retregs = mask(rhi) | mask(rlo); 2105 2106 if (pow2 < 32) 2107 { 2108 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2109 2110 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2111 if (pow2 == 1) 2112 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 2113 else 2114 { 2115 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2116 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 2117 } 2118 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2119 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2120 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 2121 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 2122 } 2123 else if (pow2 == 32) 2124 { 2125 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2126 2127 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2128 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2129 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2130 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2131 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2132 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2133 } 2134 else if (pow2 < 63) 2135 { 2136 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2137 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1))); 2138 2139 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2140 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2141 cdb.genmovreg(r2,r1); // MOV r2,r1 2142 2143 if (pow2 == 33) 2144 { 2145 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 2146 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 2147 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 2148 } 2149 else 2150 { 2151 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 2152 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2153 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2154 } 2155 2156 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2157 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 2158 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2159 } 2160 else 2161 { 2162 // This may be better done by cgelem.d 2163 assert(pow2 == 63); 2164 assert(mask(rlo) & BYTEREGS); // for SETZ 2165 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 2166 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 2167 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 2168 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 2169 movregconst(cdb,rhi,0,0); // MOV rhi,0 2170 } 2171 2172 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2173 return; 2174 } 2175 2176 // Register pair signed modulo by power of 2 2177 if (op == OPmodass && 2178 !uns && 2179 e.Eoper == OPconst && 2180 pow2 != -1 && 2181 I32 // not set up for I64 cent yet 2182 ) 2183 { 2184 freenode(e2); 2185 regm_t retregs = mDX|mAX; 2186 reg_t rhi, rlo; 2187 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2188 const regm_t keepmsk = idxregm(&cs); 2189 2190 regm_t scratchm = allregs & ~(retregs | keepmsk); 2191 if (pow2 == 63) 2192 scratchm &= BYTEREGS; // because of SETZ 2193 reg_t r1 = allocScratchReg(cdb, scratchm); 2194 2195 if (pow2 < 32) 2196 { 2197 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2198 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2199 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2200 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2201 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 2202 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2203 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2204 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2205 } 2206 else if (pow2 == 32) 2207 { 2208 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2209 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2210 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2211 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2212 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2213 } 2214 else if (pow2 < 63) 2215 { 2216 scratchm = allregs & ~(retregs | scratchm); 2217 reg_t r2; 2218 allocreg(cdb,&scratchm,&r2,TYint); 2219 2220 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2221 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2222 cdb.genmovreg(r2,r1); // MOV r2,r1 2223 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 2224 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 2225 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2226 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2227 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 2228 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2229 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 2230 } 2231 else 2232 { 2233 // This may be better done by cgelem.d 2234 assert(pow2 == 63); 2235 2236 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 2237 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 2238 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 2239 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 2240 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 2241 } 2242 2243 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2244 return; 2245 } 2246 2247 regm_t rretregs = mCX|mBX; 2248 codelem(cdb,e2,&rretregs,false); // load e2 into CX|BX 2249 2250 reg_t rlo; 2251 reg_t rhi; 2252 opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs); 2253 2254 regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX; 2255 uint lib = uns ? CLIB.uldiv : CLIB.ldiv; 2256 if (op == OPmodass) 2257 ++lib; 2258 callclib(cdb,e,lib,&retregs,idxregm(&cs)); 2259 2260 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 2261 } 2262 2263 2264 /******************************** 2265 * Generate code for <<= and >>= 2266 */ 2267 2268 @trusted 2269 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2270 { 2271 code cs; 2272 uint op1,op2; 2273 2274 elem *e1 = e.EV.E1; 2275 elem *e2 = e.EV.E2; 2276 2277 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2278 uint sz = _tysize[tyml]; 2279 uint isbyte = tybyte(e.Ety) != 0; // 1 for byte operations 2280 tym_t tym = tybasic(e.Ety); // type of result 2281 OPER oper = e.Eoper; 2282 assert(tysize(e2.Ety) <= REGSIZE); 2283 2284 uint rex = (I64 && sz == 8) ? REX_W : 0; 2285 2286 // if our lvalue is a cse, make sure we evaluate for result in register 2287 regm_t retregs; 2288 reg_t reg; 2289 if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,retregs,reg)) 2290 *pretregs |= ALLREGS; 2291 2292 // Select opcodes. op2 is used for msw for long shifts. 2293 2294 switch (oper) 2295 { 2296 case OPshlass: 2297 op1 = 4; // SHL 2298 op2 = 2; // RCL 2299 break; 2300 2301 case OPshrass: 2302 op1 = 5; // SHR 2303 op2 = 3; // RCR 2304 break; 2305 2306 case OPashrass: 2307 op1 = 7; // SAR 2308 op2 = 3; // RCR 2309 break; 2310 2311 default: 2312 assert(0); 2313 } 2314 2315 2316 uint v = 0xD3; // for SHIFT xx,CL cases 2317 uint loopcnt = 1; 2318 uint conste2 = false; 2319 uint shiftcnt = 0; // avoid "use before initialized" warnings 2320 if (e2.Eoper == OPconst) 2321 { 2322 conste2 = true; // e2 is a constant 2323 shiftcnt = e2.EV.Vint; // byte ordering of host 2324 if (config.target_cpu >= TARGET_80286 && 2325 sz <= REGSIZE && 2326 shiftcnt != 1) 2327 v = 0xC1; // SHIFT xx,shiftcnt 2328 else if (shiftcnt <= 3) 2329 { 2330 loopcnt = shiftcnt; 2331 v = 0xD1; // SHIFT xx,1 2332 } 2333 } 2334 2335 if (v == 0xD3) // if COUNT == CL 2336 { 2337 retregs = mCX; 2338 codelem(cdb,e2,&retregs,false); 2339 } 2340 else 2341 freenode(e2); 2342 getlvalue(cdb,&cs,e1,mCX); // get lvalue, preserve CX 2343 modEA(cdb,&cs); // check for modifying register 2344 2345 if (*pretregs == 0 || // if don't return result 2346 (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) || 2347 sz > REGSIZE 2348 ) 2349 { 2350 retregs = 0; // value not returned in a register 2351 cs.Iop = v ^ isbyte; 2352 while (loopcnt--) 2353 { 2354 NEWREG(cs.Irm,op1); // make sure op1 is first 2355 if (sz <= REGSIZE) 2356 { 2357 if (conste2) 2358 { 2359 cs.IFL2 = FLconst; 2360 cs.IEV2.Vint = shiftcnt; 2361 } 2362 cdb.gen(&cs); // SHIFT EA,[CL|1] 2363 if (*pretregs & mPSW && !loopcnt && conste2) 2364 code_orflag(cdb.last(),CFpsw); 2365 } 2366 else // TYlong 2367 { 2368 cs.Iop = 0xD1; // plain shift 2369 code *ce = gennop(null); // ce: NOP 2370 if (v == 0xD3) 2371 { 2372 getregs(cdb,mCX); 2373 if (!conste2) 2374 { 2375 assert(loopcnt == 0); 2376 genjmp(cdb,JCXZ,FLcode,cast(block *) ce); // JCXZ ce 2377 } 2378 } 2379 code *cg; 2380 if (oper == OPshlass) 2381 { 2382 cdb.gen(&cs); // cg: SHIFT EA 2383 cg = cdb.last(); 2384 code_orflag(cg,CFpsw); 2385 getlvalue_msw(&cs); 2386 NEWREG(cs.Irm,op2); 2387 cdb.gen(&cs); // SHIFT EA 2388 getlvalue_lsw(&cs); 2389 } 2390 else 2391 { 2392 getlvalue_msw(&cs); 2393 cdb.gen(&cs); 2394 cg = cdb.last(); 2395 code_orflag(cg,CFpsw); 2396 NEWREG(cs.Irm,op2); 2397 getlvalue_lsw(&cs); 2398 cdb.gen(&cs); 2399 } 2400 if (v == 0xD3) // if building a loop 2401 { 2402 genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg 2403 regimmed_set(CX,0); // note that now CX == 0 2404 } 2405 cdb.append(ce); 2406 } 2407 } 2408 2409 // If we want the result, we must load it from the EA 2410 // into a register. 2411 2412 if (sz == 2 * REGSIZE && *pretregs) 2413 { 2414 retregs = *pretregs & (ALLREGS | mBP); 2415 if (retregs) 2416 { 2417 retregs &= ~idxregm(&cs); 2418 allocreg(cdb,&retregs,®,tym); 2419 cs.Iop = LOD; 2420 2421 // be careful not to trash any index regs 2422 // do MSW first (which can't be an index reg) 2423 getlvalue_msw(&cs); 2424 NEWREG(cs.Irm,reg); 2425 cdb.gen(&cs); 2426 getlvalue_lsw(&cs); 2427 reg = findreglsw(retregs); 2428 NEWREG(cs.Irm,reg); 2429 cdb.gen(&cs); 2430 if (*pretregs & mPSW) 2431 tstresult(cdb,retregs,tyml,true); 2432 } 2433 else // flags only 2434 { 2435 retregs = ALLREGS & ~idxregm(&cs); 2436 allocreg(cdb,&retregs,®,TYint); 2437 cs.Iop = LOD; 2438 NEWREG(cs.Irm,reg); 2439 cdb.gen(&cs); // MOV reg,EA 2440 cs.Iop = 0x0B; // OR reg,EA+2 2441 cs.Iflags |= CFpsw; 2442 getlvalue_msw(&cs); 2443 cdb.gen(&cs); 2444 } 2445 } 2446 if (e1.Ecount && !(retregs & regcon.mvar)) // if lvalue is a CSE 2447 cssave(e1,retregs,!OTleaf(e1.Eoper)); 2448 freenode(e1); 2449 *pretregs = retregs; 2450 return; 2451 } 2452 else // else must evaluate in register 2453 { 2454 if (sz <= REGSIZE) 2455 { 2456 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 2457 if (isbyte) 2458 possregs &= BYTEREGS; 2459 retregs = *pretregs & possregs; 2460 if (retregs == 0) 2461 retregs = possregs; 2462 allocreg(cdb,&retregs,®,tym); 2463 cs.Iop = LOD ^ isbyte; 2464 code_newreg(&cs, reg); 2465 if (isbyte && I64 && (reg >= 4)) 2466 cs.Irex |= REX; 2467 cdb.gen(&cs); // MOV reg,EA 2468 if (!I16) 2469 { 2470 assert(!isbyte || (mask(reg) & BYTEREGS)); 2471 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt); 2472 if (isbyte && I64 && (reg >= 4)) 2473 cdb.last().Irex |= REX; 2474 code_orrex(cdb.last(), rex); 2475 // We can do a 32 bit shift on a 16 bit operand if 2476 // it's a left shift and we're not concerned about 2477 // the flags. Remember that flags are not set if 2478 // a shift of 0 occurs. 2479 if (_tysize[tym] == SHORTSIZE && 2480 (oper == OPshrass || oper == OPashrass || 2481 (*pretregs & mPSW && conste2))) 2482 cdb.last().Iflags |= CFopsize; // 16 bit operand 2483 } 2484 else 2485 { 2486 while (loopcnt--) 2487 { // Generate shift instructions. 2488 cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt); 2489 } 2490 } 2491 if (*pretregs & mPSW && conste2) 2492 { 2493 assert(shiftcnt); 2494 *pretregs &= ~mPSW; // result is already in flags 2495 code_orflag(cdb.last(),CFpsw); 2496 } 2497 2498 opAssStoreReg(cdb,cs,e,reg,pretregs); 2499 return; 2500 } 2501 assert(0); 2502 } 2503 } 2504 2505 2506 /********************************** 2507 * Generate code for compares. 2508 * Handles lt,gt,le,ge,eqeq,ne for all data types. 2509 */ 2510 2511 @trusted 2512 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2513 { 2514 regm_t retregs,rretregs; 2515 reg_t reg,rreg; 2516 int fl; 2517 2518 //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs)); 2519 // Collect extra parameter. This is pretty ugly... 2520 int flag = cdcmp_flag; 2521 cdcmp_flag = 0; 2522 2523 elem *e1 = e.EV.E1; 2524 elem *e2 = e.EV.E2; 2525 if (*pretregs == 0) // if don't want result 2526 { 2527 codelem(cdb,e1,pretregs,false); 2528 *pretregs = 0; // in case e1 changed it 2529 codelem(cdb,e2,pretregs,false); 2530 return; 2531 } 2532 2533 if (tyvector(tybasic(e1.Ety))) 2534 return orthxmm(cdb,e,pretregs); 2535 2536 uint jop = jmpopcode(e); // must be computed before 2537 // leaves are free'd 2538 uint reverse = 0; 2539 2540 OPER op = e.Eoper; 2541 assert(OTrel(op)); 2542 bool eqorne = (op == OPeqeq) || (op == OPne); 2543 2544 tym_t tym = tybasic(e1.Ety); 2545 uint sz = _tysize[tym]; 2546 uint isbyte = sz == 1; 2547 2548 uint rex = (I64 && sz == 8) ? REX_W : 0; 2549 uint grex = rex << 16; // 64 bit operands 2550 2551 code cs; 2552 code *ce; 2553 if (tyfloating(tym)) // if floating operation 2554 { 2555 if (config.fpxmmregs) 2556 { 2557 retregs = mPSW; 2558 if (tyxmmreg(tym)) 2559 orthxmm(cdb,e,&retregs); 2560 else 2561 orth87(cdb,e,&retregs); 2562 } 2563 else if (config.inline8087) 2564 { retregs = mPSW; 2565 orth87(cdb,e,&retregs); 2566 } 2567 else 2568 { 2569 if (config.exe & EX_windos) 2570 { 2571 int clib; 2572 2573 retregs = 0; /* skip result for now */ 2574 if (iffalse(e2)) /* second operand is constant 0 */ 2575 { 2576 assert(!eqorne); /* should be OPbool or OPnot */ 2577 if (tym == TYfloat) 2578 { 2579 retregs = FLOATREGS; 2580 clib = CLIB.ftst0; 2581 } 2582 else 2583 { 2584 retregs = DOUBLEREGS; 2585 clib = CLIB.dtst0; 2586 } 2587 if (rel_exception(op)) 2588 clib += CLIB.dtst0exc - CLIB.dtst0; 2589 codelem(cdb,e1,&retregs,false); 2590 retregs = 0; 2591 callclib(cdb,e,clib,&retregs,0); 2592 freenode(e2); 2593 } 2594 else 2595 { 2596 clib = CLIB.dcmp; 2597 if (rel_exception(op)) 2598 clib += CLIB.dcmpexc - CLIB.dcmp; 2599 opdouble(cdb,e,&retregs,clib); 2600 } 2601 } 2602 else 2603 { 2604 assert(0); 2605 } 2606 } 2607 goto L3; 2608 } 2609 2610 /* If it's a signed comparison of longs, we have to call a library */ 2611 /* routine, because we don't know the target of the signed branch */ 2612 /* (have to set up flags so that jmpopcode() will do it right) */ 2613 if (!eqorne && 2614 (I16 && tym == TYlong && tybasic(e2.Ety) == TYlong || 2615 I32 && tym == TYllong && tybasic(e2.Ety) == TYllong) 2616 ) 2617 { 2618 assert(jop != JC && jop != JNC); 2619 retregs = mDX | mAX; 2620 codelem(cdb,e1,&retregs,false); 2621 retregs = mCX | mBX; 2622 scodelem(cdb,e2,&retregs,mDX | mAX,false); 2623 2624 if (I16) 2625 { 2626 retregs = 0; 2627 callclib(cdb,e,CLIB.lcmp,&retregs,0); // gross, but it works 2628 } 2629 else 2630 { 2631 /* Generate: 2632 * CMP EDX,ECX 2633 * JNE C1 2634 * XOR EDX,EDX 2635 * CMP EAX,EBX 2636 * JZ C1 2637 * JA C3 2638 * DEC EDX 2639 * JMP C1 2640 * C3: INC EDX 2641 * C1: 2642 */ 2643 getregs(cdb,mDX); 2644 genregs(cdb,0x39,CX,DX); // CMP EDX,ECX 2645 code *c1 = gennop(null); 2646 genjmp(cdb,JNE,FLcode,cast(block *)c1); // JNE C1 2647 movregconst(cdb,DX,0,0); // XOR EDX,EDX 2648 genregs(cdb,0x39,BX,AX); // CMP EAX,EBX 2649 genjmp(cdb,JE,FLcode,cast(block *)c1); // JZ C1 2650 code *c3 = gen1(null,0x40 + DX); // INC EDX 2651 genjmp(cdb,JA,FLcode,cast(block *)c3); // JA C3 2652 cdb.gen1(0x48 + DX); // DEC EDX 2653 genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1 2654 cdb.append(c3); 2655 cdb.append(c1); 2656 getregs(cdb,mDX); 2657 retregs = mPSW; 2658 } 2659 goto L3; 2660 } 2661 2662 /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC 2663 * (This is already reflected in the jop) 2664 */ 2665 if ((jop == JC || jop == JNC) && 2666 (op == OPgt || op == OPle) && 2667 (tyuns(tym) || tyuns(e2.Ety)) 2668 ) 2669 { // jmpopcode() sez comparison should be reversed 2670 assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst); 2671 reverse ^= 2; 2672 } 2673 2674 /* See if we should swap operands */ 2675 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2676 { 2677 e1 = e.EV.E2; 2678 e2 = e.EV.E1; 2679 reverse ^= 2; 2680 } 2681 2682 retregs = allregs; 2683 if (isbyte) 2684 retregs = BYTEREGS; 2685 2686 ce = null; 2687 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 2688 cs.Irex = cast(ubyte)rex; 2689 if (sz > REGSIZE) 2690 ce = gennop(ce); 2691 2692 switch (e2.Eoper) 2693 { 2694 default: 2695 L2: 2696 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2697 rretregs = allregs & ~retregs; 2698 if (isbyte) 2699 rretregs &= BYTEREGS; 2700 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2701 if (sz <= REGSIZE) // CMP reg,rreg 2702 { 2703 reg = findreg(retregs); // get reg that e1 is in 2704 rreg = findreg(rretregs); 2705 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg); 2706 code_orrex(cdb.last(), rex); 2707 if (!I16 && sz == SHORTSIZE) 2708 cdb.last().Iflags |= CFopsize; // compare only 16 bits 2709 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 2710 cdb.last().Irex |= REX; // address byte registers 2711 } 2712 else 2713 { 2714 assert(sz <= 2 * REGSIZE); 2715 2716 // Compare MSW, if they're equal then compare the LSW 2717 reg = findregmsw(retregs); 2718 rreg = findregmsw(rretregs); 2719 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2720 if (I32 && sz == 6) 2721 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2722 else if (I64) 2723 code_orrex(cdb.last(), REX_W); 2724 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2725 2726 reg = findreglsw(retregs); 2727 rreg = findreglsw(rretregs); 2728 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2729 if (I64) 2730 code_orrex(cdb.last(), REX_W); 2731 } 2732 break; 2733 2734 case OPrelconst: 2735 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 2736 goto L2; 2737 fl = el_fl(e2); 2738 switch (fl) 2739 { 2740 case FLfunc: 2741 fl = FLextern; // so it won't be self-relative 2742 break; 2743 2744 case FLdata: 2745 case FLudata: 2746 case FLextern: 2747 if (sz > REGSIZE) // compare against DS, not DGROUP 2748 goto L2; 2749 break; 2750 2751 case FLfardata: 2752 break; 2753 2754 default: 2755 goto L2; 2756 } 2757 cs.IFL2 = cast(ubyte)fl; 2758 cs.IEV2.Vsym = e2.EV.Vsym; 2759 if (sz > REGSIZE) 2760 { 2761 cs.Iflags |= CFseg; 2762 cs.IEV2.Voffset = 0; 2763 } 2764 else 2765 { 2766 cs.Iflags |= CFoff; 2767 cs.IEV2.Voffset = e2.EV.Voffset; 2768 } 2769 goto L4; 2770 2771 case OPconst: 2772 // If compare against 0 2773 { 2774 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 2775 isregvar(e1,retregs,reg) 2776 ) 2777 { // Just do a TEST instruction 2778 genregs(cdb,0x85 ^ isbyte,reg,reg); // TEST reg2,reg2 2779 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw; 2780 code_orrex(cdb.last(), rex); 2781 if (I64 && isbyte && reg >= 4) 2782 cdb.last().Irex |= REX; // address byte registers 2783 retregs = mPSW; 2784 break; 2785 } 2786 } 2787 2788 if (!tyuns(tym) && !tyuns(e2.Ety) && 2789 !boolres(e2) && !(*pretregs & mPSW) && 2790 (sz == REGSIZE || (I64 && sz == 4)) && 2791 (!I16 || op == OPlt || op == OPge)) 2792 { 2793 assert(*pretregs & (allregs)); 2794 codelem(cdb,e1,pretregs,false); 2795 reg = findreg(*pretregs); 2796 getregs(cdb,mask(reg)); 2797 switch (op) 2798 { 2799 case OPle: 2800 cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1); // ADD reg,-1 2801 code_orflag(cdb.last(), CFpsw); 2802 cdb.genc2(0x81,grex | modregrmx(3,2,reg),0); // ADC reg,0 2803 goto oplt; 2804 2805 case OPgt: 2806 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2807 /* Flips the sign bit unless the value is 0 or int.min. 2808 Also sets the carry bit when the value is not 0. */ 2809 code_orflag(cdb.last(), CFpsw); 2810 cdb.genc2(0x81,grex | modregrmx(3,3,reg),0); // SBB reg,0 2811 /* Subtracts the carry bit. This turns int.min into 2812 int.max, flipping the sign bit. 2813 For other negative and positive values, subtracting 1 2814 doesn't affect the sign bit. 2815 For 0, the carry bit is not set, so this does nothing 2816 and the sign bit is not affected. */ 2817 goto oplt; 2818 2819 case OPlt: 2820 oplt: 2821 // Get the sign bit, i.e. 1 if the value is negative. 2822 if (!I16) 2823 cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31 2824 else 2825 { /* 8088-286 do not have a barrel shifter, so use this 2826 faster sequence 2827 */ 2828 genregs(cdb,0xD1,0,reg); // ROL reg,1 2829 reg_t regi; 2830 if (reghasvalue(allregs,1,regi)) 2831 genregs(cdb,0x23,reg,regi); // AND reg,regi 2832 else 2833 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1 2834 } 2835 break; 2836 2837 case OPge: 2838 genregs(cdb,0xD1,4,reg); // SHL reg,1 2839 code_orrex(cdb.last(),rex); 2840 code_orflag(cdb.last(), CFpsw); 2841 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2842 code_orrex(cdb.last(),rex); 2843 if (I64) 2844 { 2845 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2846 code_orrex(cdb.last(), rex); 2847 } 2848 else 2849 cdb.gen1(0x40 + reg); // INC reg 2850 break; 2851 2852 default: 2853 assert(0); 2854 } 2855 freenode(e2); 2856 goto ret; 2857 } 2858 2859 cs.IFL2 = FLconst; 2860 if (sz == 16) 2861 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.hi; 2862 else if (sz > REGSIZE) 2863 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); 2864 else 2865 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong; 2866 2867 // The cmp immediate relies on sign extension of the 32 bit immediate value 2868 if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint) 2869 goto L2; 2870 L4: 2871 cs.Iop = 0x81 ^ isbyte; 2872 2873 /* if ((e1 is data or a '*' reference) and it's not a 2874 * common subexpression 2875 */ 2876 2877 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2878 e1.Eoper == OPind) && 2879 !evalinregister(e1)) 2880 { 2881 getlvalue(cdb,&cs,e1,RMload); 2882 freenode(e1); 2883 if (evalinregister(e2)) 2884 { 2885 retregs = idxregm(&cs); 2886 if ((cs.Iflags & CFSEG) == CFes) 2887 retregs |= mES; // take no chances 2888 rretregs = allregs & ~retregs; 2889 if (isbyte) 2890 rretregs &= BYTEREGS; 2891 scodelem(cdb,e2,&rretregs,retregs,true); 2892 cs.Iop = 0x39 ^ isbyte ^ reverse; 2893 if (sz > REGSIZE) 2894 { 2895 rreg = findregmsw(rretregs); 2896 cs.Irm |= modregrm(0,rreg,0); 2897 getlvalue_msw(&cs); 2898 cdb.gen(&cs); // CMP EA+2,rreg 2899 if (I32 && sz == 6) 2900 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2901 if (I64 && isbyte && rreg >= 4) 2902 cdb.last().Irex |= REX; 2903 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2904 rreg = findreglsw(rretregs); 2905 NEWREG(cs.Irm,rreg); 2906 getlvalue_lsw(&cs); 2907 } 2908 else 2909 { 2910 rreg = findreg(rretregs); 2911 code_newreg(&cs, rreg); 2912 if (I64 && isbyte && rreg >= 4) 2913 cs.Irex |= REX; 2914 } 2915 } 2916 else 2917 { 2918 cs.Irm |= modregrm(0,7,0); 2919 if (sz > REGSIZE) 2920 { 2921 if (sz == 6) 2922 assert(0); 2923 if (e2.Eoper == OPrelconst) 2924 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 2925 cs.IEV2.Voffset = 0; 2926 } 2927 getlvalue_msw(&cs); 2928 cdb.gen(&cs); // CMP EA+2,const 2929 if (!I16 && sz == 6) 2930 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2931 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop 2932 if (e2.Eoper == OPconst) 2933 cs.IEV2.Vint = cast(int)e2.EV.Vllong; 2934 else if (e2.Eoper == OPrelconst) 2935 { // Turn off CFseg, on CFoff 2936 cs.Iflags ^= CFseg | CFoff; 2937 cs.IEV2.Voffset = e2.EV.Voffset; 2938 } 2939 else 2940 assert(0); 2941 getlvalue_lsw(&cs); 2942 } 2943 freenode(e2); 2944 } 2945 cdb.gen(&cs); 2946 break; 2947 } 2948 2949 regm_t regmx; 2950 reg_t regx; 2951 if (evalinregister(e2) && !OTassign(e1.Eoper) && 2952 !isregvar(e1,regmx,regx)) 2953 { 2954 regm_t m; 2955 2956 m = allregs & ~regcon.mvar; 2957 if (isbyte) 2958 m &= BYTEREGS; 2959 if (m & (m - 1)) // if more than one free register 2960 goto L2; 2961 } 2962 if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) && 2963 !boolres(e2) && !evalinregister(e1)) 2964 { 2965 retregs = mPSW; 2966 scodelem(cdb,e1,&retregs,0,false); 2967 freenode(e2); 2968 break; 2969 } 2970 if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW) 2971 { 2972 retregs |= mPSW; 2973 scodelem(cdb,e1,&retregs,0,false); 2974 freenode(e2); 2975 break; 2976 } 2977 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2978 if (sz == 1) 2979 { 2980 reg = findreg(retregs & allregs); // get reg that e1 is in 2981 cs.Irm = modregrm(3,7,reg & 7); 2982 if (reg & 8) 2983 cs.Irex |= REX_B; 2984 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg) 2985 { assert(reg < 4); 2986 cs.Irm |= 4; // use upper register half 2987 } 2988 if (I64 && reg >= 4) 2989 cs.Irex |= REX; // address byte registers 2990 } 2991 else if (sz <= REGSIZE) 2992 { // CMP reg,const 2993 reg = findreg(retregs & allregs); // get reg that e1 is in 2994 rretregs = allregs & ~retregs; 2995 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,rreg)) 2996 { 2997 genregs(cdb,0x3B,reg,rreg); 2998 code_orrex(cdb.last(), rex); 2999 if (!I16) 3000 cdb.last().Iflags |= cs.Iflags & CFopsize; 3001 freenode(e2); 3002 break; 3003 } 3004 cs.Irm = modregrm(3,7,reg & 7); 3005 if (reg & 8) 3006 cs.Irex |= REX_B; 3007 } 3008 else if (sz <= 2 * REGSIZE) 3009 { 3010 reg = findregmsw(retregs); // get reg that e1 is in 3011 cs.Irm = modregrm(3,7,reg); 3012 cdb.gen(&cs); // CMP reg,MSW 3013 if (I32 && sz == 6) 3014 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3015 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3016 3017 reg = findreglsw(retregs); 3018 cs.Irm = modregrm(3,7,reg); 3019 if (e2.Eoper == OPconst) 3020 cs.IEV2.Vint = e2.EV.Vlong; 3021 else if (e2.Eoper == OPrelconst) 3022 { // Turn off CFseg, on CFoff 3023 cs.Iflags ^= CFseg | CFoff; 3024 cs.IEV2.Voffset = e2.EV.Voffset; 3025 } 3026 else 3027 assert(0); 3028 } 3029 else 3030 assert(0); 3031 cdb.gen(&cs); // CMP sucreg,LSW 3032 freenode(e2); 3033 break; 3034 3035 case OPind: 3036 if (e2.Ecount) 3037 goto L2; 3038 goto L5; 3039 3040 case OPvar: 3041 if (config.exe & (EX_OSX | EX_OSX64)) 3042 { 3043 if (movOnly(e2)) 3044 goto L2; 3045 } 3046 if ((e1.Eoper == OPvar && 3047 isregvar(e2,rretregs,reg) && 3048 sz <= REGSIZE 3049 ) || 3050 (e1.Eoper == OPind && 3051 isregvar(e2,rretregs,reg) && 3052 !evalinregister(e1) && 3053 sz <= REGSIZE 3054 ) 3055 ) 3056 { 3057 // CMP EA,e2 3058 getlvalue(cdb,&cs,e1,RMload); 3059 freenode(e1); 3060 cs.Iop = 0x39 ^ isbyte ^ reverse; 3061 code_newreg(&cs,reg); 3062 if (I64 && isbyte && reg >= 4) 3063 cs.Irex |= REX; // address byte registers 3064 cdb.gen(&cs); 3065 freenode(e2); 3066 break; 3067 } 3068 L5: 3069 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3070 if (sz <= REGSIZE) // CMP reg,EA 3071 { 3072 reg = findreg(retregs & allregs); // get reg that e1 is in 3073 uint opsize = cs.Iflags & CFopsize; 3074 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0); 3075 code_orflag(cdb.last(),opsize); 3076 } 3077 else if (sz <= 2 * REGSIZE) 3078 { 3079 reg = findregmsw(retregs); // get reg that e1 is in 3080 // CMP reg,EA 3081 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 3082 if (I32 && sz == 6) 3083 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3084 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3085 reg = findreglsw(retregs); 3086 if (e2.Eoper == OPind) 3087 { 3088 NEWREG(cs.Irm,reg); 3089 getlvalue_lsw(&cs); 3090 cdb.gen(&cs); 3091 } 3092 else 3093 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0); 3094 } 3095 else 3096 assert(0); 3097 freenode(e2); 3098 break; 3099 } 3100 cdb.append(ce); 3101 3102 L3: 3103 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3104 { 3105 if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00)) 3106 { 3107 regm_t resregs = retregs; 3108 if (!I64) 3109 { 3110 resregs &= BYTEREGS; 3111 if (!resregs) 3112 resregs = BYTEREGS; 3113 } 3114 allocreg(cdb,&resregs,®,TYint); 3115 cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg 3116 if (I64 && reg >= 4) 3117 code_orrex(cdb.last(),REX); 3118 if (tysize(e.Ety) > 1) 3119 { 3120 genregs(cdb,MOVZXb,reg,reg); // MOVZX reg,reg 3121 if (I64 && sz == 8) 3122 code_orrex(cdb.last(),REX_W); 3123 if (I64 && reg >= 4) 3124 code_orrex(cdb.last(),REX); 3125 } 3126 *pretregs &= ~mPSW; 3127 fixresult(cdb,e,resregs,pretregs); 3128 } 3129 else 3130 { 3131 code *nop = null; 3132 regm_t save = regcon.immed.mval; 3133 allocreg(cdb,&retregs,®,TYint); 3134 regcon.immed.mval = save; 3135 if ((*pretregs & mPSW) == 0 && 3136 (jop == JC || jop == JNC)) 3137 { 3138 getregs(cdb,retregs); 3139 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3140 if (rex || flag & REX_W) 3141 code_orrex(cdb.last(), REX_W); 3142 if (flag) 3143 { } // cdcond() will handle it 3144 else if (jop == JNC) 3145 { 3146 if (I64) 3147 { 3148 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 3149 code_orrex(cdb.last(), rex); 3150 } 3151 else 3152 cdb.gen1(0x40 + reg); // INC reg 3153 } 3154 else 3155 { 3156 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3157 code_orrex(cdb.last(), rex); 3158 } 3159 } 3160 else if (I64 && sz == 8) 3161 { 3162 assert(!flag); 3163 movregconst(cdb,reg,1,64|8); // MOV reg,1 3164 nop = gennop(nop); 3165 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3166 // MOV reg,0 3167 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 3168 regcon.immed.mval &= ~mask(reg); 3169 } 3170 else 3171 { 3172 assert(!flag); 3173 movregconst(cdb,reg,1,8); // MOV reg,1 3174 nop = gennop(nop); 3175 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3176 // MOV reg,0 3177 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0); 3178 regcon.immed.mval &= ~mask(reg); 3179 } 3180 *pretregs = retregs; 3181 cdb.append(nop); 3182 } 3183 } 3184 ret: 3185 { } 3186 } 3187 3188 3189 /********************************** 3190 * Generate code for signed compare of longs. 3191 * Input: 3192 * targ block* or code* 3193 */ 3194 3195 @trusted 3196 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ) 3197 { 3198 // <= > < >= 3199 static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; 3200 static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ]; 3201 3202 //printf("longcmp(e = %p)\n", e); 3203 elem *e1 = e.EV.E1; 3204 elem *e2 = e.EV.E2; 3205 OPER op = e.Eoper; 3206 3207 // See if we should swap operands 3208 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 3209 { 3210 e1 = e.EV.E2; 3211 e2 = e.EV.E1; 3212 op = swaprel(op); 3213 } 3214 3215 code cs; 3216 cs.Iflags = 0; 3217 cs.Irex = 0; 3218 3219 code *ce = gennop(null); 3220 regm_t retregs = ALLREGS; 3221 regm_t rretregs; 3222 reg_t reg,rreg; 3223 3224 uint jop = jopmsw[op - OPle]; 3225 if (!(jcond & 1)) jop ^= (JL ^ JG); // toggle jump condition 3226 CodeBuilder cdbjmp; 3227 cdbjmp.ctor(); 3228 genjmp(cdbjmp,jop,fltarg, cast(block *) targ); // Jx targ 3229 genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce); // Jy nop 3230 3231 switch (e2.Eoper) 3232 { 3233 default: 3234 L2: 3235 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3236 rretregs = ALLREGS & ~retregs; 3237 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 3238 cse_flush(cdb,1); 3239 // Compare MSW, if they're equal then compare the LSW 3240 reg = findregmsw(retregs); 3241 rreg = findregmsw(rretregs); 3242 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3243 cdb.append(cdbjmp); 3244 3245 reg = findreglsw(retregs); 3246 rreg = findreglsw(rretregs); 3247 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3248 break; 3249 3250 case OPconst: 3251 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); // MSW first 3252 cs.IFL2 = FLconst; 3253 cs.Iop = 0x81; 3254 3255 /* if ((e1 is data or a '*' reference) and it's not a 3256 * common subexpression 3257 */ 3258 3259 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 3260 e1.Eoper == OPind) && 3261 !evalinregister(e1)) 3262 { 3263 getlvalue(cdb,&cs,e1,0); 3264 freenode(e1); 3265 if (evalinregister(e2)) 3266 { 3267 retregs = idxregm(&cs); 3268 if ((cs.Iflags & CFSEG) == CFes) 3269 retregs |= mES; // take no chances 3270 rretregs = ALLREGS & ~retregs; 3271 scodelem(cdb,e2,&rretregs,retregs,true); 3272 cse_flush(cdb,1); 3273 rreg = findregmsw(rretregs); 3274 cs.Iop = 0x39; 3275 cs.Irm |= modregrm(0,rreg,0); 3276 getlvalue_msw(&cs); 3277 cdb.gen(&cs); // CMP EA+2,rreg 3278 cdb.append(cdbjmp); 3279 rreg = findreglsw(rretregs); 3280 NEWREG(cs.Irm,rreg); 3281 } 3282 else 3283 { 3284 cse_flush(cdb,1); 3285 cs.Irm |= modregrm(0,7,0); 3286 getlvalue_msw(&cs); 3287 cdb.gen(&cs); // CMP EA+2,const 3288 cdb.append(cdbjmp); 3289 cs.IEV2.Vint = e2.EV.Vlong; 3290 freenode(e2); 3291 } 3292 getlvalue_lsw(&cs); 3293 cdb.gen(&cs); // CMP EA,rreg/const 3294 break; 3295 } 3296 if (evalinregister(e2)) 3297 goto L2; 3298 3299 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3300 cse_flush(cdb,1); 3301 reg = findregmsw(retregs); // get reg that e1 is in 3302 cs.Irm = modregrm(3,7,reg); 3303 3304 cdb.gen(&cs); // CMP reg,MSW 3305 cdb.append(cdbjmp); 3306 reg = findreglsw(retregs); 3307 cs.Irm = modregrm(3,7,reg); 3308 cs.IEV2.Vint = e2.EV.Vlong; 3309 cdb.gen(&cs); // CMP sucreg,LSW 3310 freenode(e2); 3311 break; 3312 3313 case OPvar: 3314 if (!e1.Ecount && e1.Eoper == OPs32_64) 3315 { 3316 reg_t msreg; 3317 3318 retregs = allregs; 3319 scodelem(cdb,e1.EV.E1,&retregs,0,true); 3320 freenode(e1); 3321 reg = findreg(retregs); 3322 retregs = allregs & ~retregs; 3323 allocreg(cdb,&retregs,&msreg,TYint); 3324 genmovreg(cdb,msreg,reg); // MOV msreg,reg 3325 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3326 cse_flush(cdb,1); 3327 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0); 3328 cdb.append(cdbjmp); 3329 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0); 3330 freenode(e2); 3331 } 3332 else 3333 { 3334 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3335 cse_flush(cdb,1); 3336 reg = findregmsw(retregs); // get reg that e1 is in 3337 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0); 3338 cdb.append(cdbjmp); 3339 reg = findreglsw(retregs); 3340 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0); 3341 freenode(e2); 3342 } 3343 break; 3344 } 3345 3346 jop = joplsw[op - OPle]; 3347 if (!(jcond & 1)) jop ^= 1; // toggle jump condition 3348 genjmp(cdb,jop,fltarg,cast(block *) targ); // Jcond targ 3349 3350 cdb.append(ce); 3351 freenode(e); 3352 } 3353 3354 /***************************** 3355 * Do conversions. 3356 * Depends on OPd_s32 and CLIB.dbllng being in sequence. 3357 */ 3358 3359 @trusted 3360 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 3361 { 3362 //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs)); 3363 //elem_print(e); 3364 3365 static immutable ubyte[2][16] clib = 3366 [ 3367 [ OPd_s32, CLIB.dbllng ], 3368 [ OPs32_d, CLIB.lngdbl ], 3369 [ OPd_s16, CLIB.dblint ], 3370 [ OPs16_d, CLIB.intdbl ], 3371 [ OPd_u16, CLIB.dbluns ], 3372 [ OPu16_d, CLIB.unsdbl ], 3373 [ OPd_u32, CLIB.dblulng ], 3374 [ OPu32_d, CLIB.ulngdbl ], 3375 [ OPd_s64, CLIB.dblllng ], 3376 [ OPs64_d, CLIB.llngdbl ], 3377 [ OPd_u64, CLIB.dblullng ], 3378 [ OPu64_d, CLIB.ullngdbl ], 3379 [ OPd_f, CLIB.dblflt ], 3380 [ OPf_d, CLIB.fltdbl ], 3381 [ OPvp_fp, CLIB.vptrfptr ], 3382 [ OPcvp_fp, CLIB.cvptrfptr] 3383 ]; 3384 3385 if (!*pretregs) 3386 { 3387 codelem(cdb,e.EV.E1,pretregs,false); 3388 return; 3389 } 3390 3391 regm_t retregs; 3392 if (config.inline8087) 3393 { 3394 switch (e.Eoper) 3395 { 3396 case OPld_d: 3397 case OPd_ld: 3398 { 3399 if (tycomplex(e.EV.E1.Ety)) 3400 { 3401 Lcomplex: 3402 regm_t retregsx = mST01 | (*pretregs & mPSW); 3403 codelem(cdb,e.EV.E1, &retregsx, false); 3404 fixresult_complex87(cdb, e, retregsx, pretregs); 3405 return; 3406 } 3407 regm_t retregsx = mST0 | (*pretregs & mPSW); 3408 codelem(cdb,e.EV.E1, &retregsx, false); 3409 fixresult87(cdb, e, retregsx, pretregs); 3410 return; 3411 } 3412 3413 case OPf_d: 3414 case OPd_f: 3415 if (tycomplex(e.EV.E1.Ety)) 3416 goto Lcomplex; 3417 if (config.fpxmmregs && *pretregs & XMMREGS) 3418 { 3419 xmmcnvt(cdb, e, pretregs); 3420 return; 3421 } 3422 3423 /* if won't do us much good to transfer back and */ 3424 /* forth between 8088 registers and 8087 registers */ 3425 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs)) 3426 { 3427 retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety); 3428 if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 3429 { 3430 codelem(cdb,e.EV.E1,pretregs,false); 3431 if (*pretregs & mST0) 3432 note87(e, 0, 0); 3433 return; 3434 } 3435 else 3436 break; 3437 } 3438 goto Lload87; 3439 3440 case OPs64_d: 3441 if (!I64) 3442 goto Lload87; 3443 goto case OPs32_d; 3444 3445 case OPs32_d: 3446 if (config.fpxmmregs && *pretregs & XMMREGS) 3447 { 3448 xmmcnvt(cdb, e, pretregs); 3449 return; 3450 } 3451 goto Lload87; 3452 3453 case OPs16_d: 3454 case OPu16_d: 3455 Lload87: 3456 load87(cdb,e,0,pretregs,null,-1); 3457 return; 3458 3459 case OPu32_d: 3460 if (I64 && config.fpxmmregs && *pretregs & XMMREGS) 3461 { 3462 xmmcnvt(cdb,e,pretregs); 3463 return; 3464 } 3465 else if (!I16) 3466 { 3467 regm_t retregsx = ALLREGS; 3468 codelem(cdb,e.EV.E1, &retregsx, false); 3469 reg_t reg = findreg(retregsx); 3470 cdb.genfltreg(STO, reg, 0); 3471 regwithvalue(cdb,ALLREGS,0,reg,0); 3472 cdb.genfltreg(STO, reg, 4); 3473 3474 push87(cdb); 3475 cdb.genfltreg(0xDF,5,0); // FILD m64int 3476 3477 regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/; 3478 fixresult87(cdb, e, retregsy, pretregs); 3479 return; 3480 } 3481 break; 3482 3483 case OPd_s64: 3484 if (!I64) 3485 goto Lcnvt87; 3486 goto case OPd_s32; 3487 3488 case OPd_s16: 3489 case OPd_s32: 3490 if (config.fpxmmregs) 3491 { 3492 xmmcnvt(cdb,e,pretregs); 3493 return; 3494 } 3495 goto Lcnvt87; 3496 3497 case OPd_u16: 3498 Lcnvt87: 3499 cnvt87(cdb,e,pretregs); 3500 return; 3501 3502 case OPd_u32: // use subroutine, not 8087 3503 if (I64 && config.fpxmmregs) 3504 { 3505 xmmcnvt(cdb,e,pretregs); 3506 return; 3507 } 3508 if (I32 || I64) 3509 { 3510 cdd_u32(cdb,e,pretregs); 3511 return; 3512 } 3513 if (config.exe & EX_posix) 3514 { 3515 retregs = mST0; 3516 } 3517 else 3518 { 3519 retregs = DOUBLEREGS; 3520 } 3521 goto L1; 3522 3523 case OPd_u64: 3524 if (I32 || I64) 3525 { 3526 cdd_u64(cdb,e,pretregs); 3527 return; 3528 } 3529 retregs = DOUBLEREGS; 3530 goto L1; 3531 3532 case OPu64_d: 3533 if (*pretregs & mST0) 3534 { 3535 regm_t retregsx = I64 ? mAX : mAX|mDX; 3536 codelem(cdb,e.EV.E1,&retregsx,false); 3537 callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); 3538 return; 3539 } 3540 break; 3541 3542 case OPld_u64: 3543 { 3544 if (I32 || I64) 3545 { 3546 cdd_u64(cdb,e,pretregs); 3547 return; 3548 } 3549 regm_t retregsx = mST0; 3550 codelem(cdb,e.EV.E1,&retregsx,false); 3551 callclib(cdb,e,CLIB.ld_u64,pretregs,0); 3552 return; 3553 } 3554 3555 default: 3556 break; 3557 } 3558 } 3559 retregs = regmask(e.EV.E1.Ety, TYnfunc); 3560 L1: 3561 codelem(cdb,e.EV.E1,&retregs,false); 3562 for (int i = 0; 1; i++) 3563 { 3564 assert(i < clib.length); 3565 if (clib[i][0] == e.Eoper) 3566 { 3567 callclib(cdb,e,clib[i][1],pretregs,0); 3568 break; 3569 } 3570 } 3571 } 3572 3573 3574 /*************************** 3575 * Convert short to long. 3576 * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64, 3577 * OPu64_128, OPs64_128 3578 */ 3579 3580 @trusted 3581 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3582 { 3583 reg_t reg; 3584 regm_t retregs; 3585 3586 //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3587 int e1comsub = e.EV.E1.Ecount; 3588 ubyte op = e.Eoper; 3589 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3590 { 3591 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3592 return; 3593 } 3594 else if ( 3595 op == OPnp_fp || 3596 (I16 && op == OPu16_32) || 3597 (I32 && op == OPu32_64) || 3598 (I64 && op == OPu64_128) 3599 ) 3600 { 3601 /* Result goes into a register pair. 3602 * Zero extend by putting a zero into most significant reg. 3603 */ 3604 3605 regm_t retregsx = *pretregs & mLSW; 3606 assert(retregsx); 3607 tym_t tym1 = tybasic(e.EV.E1.Ety); 3608 codelem(cdb,e.EV.E1,&retregsx,false); 3609 3610 regm_t regm = *pretregs & (mMSW & ALLREGS); 3611 if (regm == 0) // *pretregs could be mES 3612 regm = mMSW & ALLREGS; 3613 allocreg(cdb,®m,®,TYint); 3614 if (e1comsub) 3615 getregs(cdb,retregsx); 3616 if (op == OPnp_fp) 3617 { 3618 int segreg; 3619 3620 // BUG: what about pointers to functions? 3621 switch (tym1) 3622 { 3623 case TYimmutPtr: 3624 case TYnptr: segreg = SEG_DS; break; 3625 case TYcptr: segreg = SEG_CS; break; 3626 case TYsptr: segreg = SEG_SS; break; 3627 default: assert(0); 3628 } 3629 cdb.gen2(0x8C,modregrm(3,segreg,reg)); // MOV reg,segreg 3630 } 3631 else 3632 movregconst(cdb,reg,0,0); // 0 extend 3633 3634 fixresult(cdb,e,retregsx | regm,pretregs); 3635 return; 3636 } 3637 else if (I64 && op == OPu32_64) 3638 { 3639 elem *e1 = e.EV.E1; 3640 retregs = *pretregs; 3641 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3642 { 3643 code cs; 3644 3645 allocreg(cdb,&retregs,®,TYint); 3646 loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs); // MOV Ereg,EA 3647 freenode(e1); 3648 } 3649 else 3650 { 3651 *pretregs &= ~mPSW; // flags are set by eval of e1 3652 codelem(cdb,e1,&retregs,false); 3653 /* Determine if high 32 bits are already 0 3654 */ 3655 if (e1.Eoper == OPu16_32 && !e1.Ecount) 3656 { 3657 } 3658 else 3659 { 3660 // Zero high 32 bits 3661 getregs(cdb,retregs); 3662 reg = findreg(retregs); 3663 // Don't use x89 because that will get optimized away 3664 genregs(cdb,LOD,reg,reg); // MOV Ereg,Ereg 3665 } 3666 } 3667 fixresult(cdb,e,retregs,pretregs); 3668 return; 3669 } 3670 else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount) 3671 { 3672 /* Due to how e1 is calculated, the high 32 bits of the register 3673 * are already 0. 3674 */ 3675 retregs = *pretregs; 3676 codelem(cdb,e.EV.E1,&retregs,false); 3677 fixresult(cdb,e,retregs,pretregs); 3678 return; 3679 } 3680 else if (!I16 && (op == OPs16_32 || op == OPu16_32) || 3681 I64 && op == OPs32_64) 3682 { 3683 elem *e11; 3684 elem *e1 = e.EV.E1; 3685 3686 if (e1.Eoper == OPu8_16 && !e1.Ecount && 3687 ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount)) 3688 ) 3689 { 3690 code cs; 3691 3692 retregs = *pretregs & BYTEREGS; 3693 if (!retregs) 3694 retregs = BYTEREGS; 3695 allocreg(cdb,&retregs,®,TYint); 3696 movregconst(cdb,reg,0,0); // XOR reg,reg 3697 loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 3698 freenode(e11); 3699 freenode(e1); 3700 } 3701 else if (e1.Eoper == OPvar || 3702 (e1.Eoper == OPind && !e1.Ecount)) 3703 { 3704 code cs = void; 3705 3706 if (I32 && op == OPu16_32 && config.flags4 & CFG4speed) 3707 goto L2; 3708 retregs = *pretregs; 3709 allocreg(cdb,&retregs,®,TYint); 3710 const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA 3711 if (op == OPs32_64) 3712 { 3713 assert(I64); 3714 // MOVSXD reg,e1 3715 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs); 3716 code_orrex(cdb.last(), REX_W); 3717 } 3718 else 3719 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs); 3720 freenode(e1); 3721 } 3722 else 3723 { 3724 L2: 3725 retregs = *pretregs; 3726 if (op == OPs32_64) 3727 retregs = mAX | (*pretregs & mPSW); 3728 *pretregs &= ~mPSW; // flags are already set 3729 CodeBuilder cdbx; 3730 cdbx.ctor(); 3731 codelem(cdbx,e1,&retregs,false); 3732 code *cx = cdbx.finish(); 3733 cdb.append(cdbx); 3734 getregs(cdb,retregs); 3735 if (op == OPu16_32 && cx) 3736 { 3737 cx = code_last(cx); 3738 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) && 3739 mask(cx.Irm & 7) == retregs) 3740 { 3741 // Convert AND of a word to AND of a dword, zeroing upper word 3742 if (cx.Irex & REX_B) 3743 retregs = mask(8 | (cx.Irm & 7)); 3744 cx.Iflags &= ~CFopsize; 3745 cx.IEV2.Vint &= 0xFFFF; 3746 goto L1; 3747 } 3748 } 3749 if (op == OPs16_32 && retregs == mAX) 3750 cdb.gen1(0x98); // CWDE 3751 else if (op == OPs32_64 && retregs == mAX) 3752 { 3753 cdb.gen1(0x98); // CDQE 3754 code_orrex(cdb.last(), REX_W); 3755 } 3756 else 3757 { 3758 reg = findreg(retregs); 3759 if (config.flags4 & CFG4speed && op == OPu16_32) 3760 { // AND reg,0xFFFF 3761 cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu); 3762 } 3763 else 3764 { 3765 opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg 3766 genregs(cdb,iop,reg,reg); 3767 } 3768 } 3769 L1: 3770 if (e1comsub) 3771 getregs(cdb,retregs); 3772 } 3773 fixresult(cdb,e,retregs,pretregs); 3774 return; 3775 } 3776 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 3777 { 3778 // OPs16_32, OPs32_64 3779 // CWD doesn't affect flags, so we can depend on the integer 3780 // math to provide the flags. 3781 retregs = mAX | mPSW; // want integer result in AX 3782 *pretregs &= ~mPSW; // flags are already set 3783 codelem(cdb,e.EV.E1,&retregs,false); 3784 getregs(cdb,mDX); // sign extend into DX 3785 cdb.gen1(0x99); // CWD/CDQ 3786 if (e1comsub) 3787 getregs(cdb,retregs); 3788 fixresult(cdb,e,mDX | retregs,pretregs); 3789 return; 3790 } 3791 else 3792 { 3793 // OPs16_32, OPs32_64, OPs64_128 3794 uint msreg,lsreg; 3795 3796 retregs = *pretregs & mLSW; 3797 assert(retregs); 3798 codelem(cdb,e.EV.E1,&retregs,false); 3799 retregs |= *pretregs & mMSW; 3800 allocreg(cdb,&retregs,®,e.Ety); 3801 msreg = findregmsw(retregs); 3802 lsreg = findreglsw(retregs); 3803 genmovreg(cdb,msreg,lsreg); // MOV msreg,lsreg 3804 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 3805 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3806 fixresult(cdb,e,retregs,pretregs); 3807 return; 3808 } 3809 } 3810 3811 3812 /*************************** 3813 * Convert byte to int. 3814 * For OPu8_16 and OPs8_16. 3815 */ 3816 3817 @trusted 3818 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3819 { 3820 regm_t retregs; 3821 char size; 3822 3823 if ((*pretregs & (ALLREGS | mBP | XMMREGS)) == 0) // if don't need result in regs 3824 { 3825 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3826 return; 3827 } 3828 3829 //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3830 char op = e.Eoper; 3831 elem *e1 = e.EV.E1; 3832 if (e1.Eoper == OPcomma) 3833 docommas(cdb,e1); 3834 if (!I16) 3835 { 3836 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3837 { 3838 code cs; 3839 3840 regm_t retregsx = *pretregs; 3841 reg_t reg; 3842 allocreg(cdb,&retregsx,®,TYint); 3843 if (config.flags4 & CFG4speed && 3844 op == OPu8_16 && mask(reg) & BYTEREGS && 3845 config.target_cpu < TARGET_PentiumPro) 3846 { 3847 movregconst(cdb,reg,0,0); // XOR reg,reg 3848 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); // MOV regL,EA 3849 } 3850 else 3851 { 3852 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA 3853 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx); 3854 } 3855 freenode(e1); 3856 fixresult(cdb,e,retregsx,pretregs); 3857 return; 3858 } 3859 size = tysize(e.Ety); 3860 retregs = *pretregs & BYTEREGS; 3861 if (retregs == 0) 3862 retregs = BYTEREGS; 3863 retregs |= *pretregs & mPSW; 3864 *pretregs &= ~mPSW; 3865 } 3866 else 3867 { 3868 if (op == OPu8_16) // if uint conversion 3869 { 3870 retregs = *pretregs & BYTEREGS; 3871 if (retregs == 0) 3872 retregs = BYTEREGS; 3873 } 3874 else 3875 { 3876 // CBW doesn't affect flags, so we can depend on the integer 3877 // math to provide the flags. 3878 retregs = mAX | (*pretregs & mPSW); // want integer result in AX 3879 } 3880 } 3881 3882 CodeBuilder cdb1; 3883 cdb1.ctor(); 3884 codelem(cdb1,e1,&retregs,false); 3885 code *c1 = cdb1.finish(); 3886 cdb.append(cdb1); 3887 reg_t reg = findreg(retregs); 3888 code *c; 3889 if (!c1) 3890 goto L1; 3891 3892 // If previous instruction is an AND bytereg,value 3893 c = cdb.last(); 3894 if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) && 3895 (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0)) 3896 { 3897 if (*pretregs & mPSW) 3898 c.Iflags |= CFpsw; 3899 c.Iop |= 1; // convert to word operation 3900 c.IEV2.Vuns &= 0xFF; // dump any high order bits 3901 *pretregs &= ~mPSW; // flags already set 3902 } 3903 else 3904 { 3905 L1: 3906 if (!I16) 3907 { 3908 if (op == OPs8_16 && reg == AX && size == 2) 3909 { 3910 cdb.gen1(0x98); // CBW 3911 cdb.last().Iflags |= CFopsize; // don't do a CWDE 3912 } 3913 else 3914 { 3915 // We could do better by not forcing the src and dst 3916 // registers to be the same. 3917 3918 if (config.flags4 & CFG4speed && op == OPu8_16) 3919 { // AND reg,0xFF 3920 cdb.genc2(0x81,modregrmx(3,4,reg),0xFF); 3921 } 3922 else 3923 { 3924 opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg 3925 genregs(cdb,iop,reg,reg); 3926 if (I64 && reg >= 4) 3927 code_orrex(cdb.last(), REX); 3928 } 3929 } 3930 } 3931 else 3932 { 3933 if (op == OPu8_16) 3934 genregs(cdb,0x30,reg+4,reg+4); // XOR regH,regH 3935 else 3936 { 3937 cdb.gen1(0x98); // CBW 3938 *pretregs &= ~mPSW; // flags already set 3939 } 3940 } 3941 } 3942 getregs(cdb,retregs); 3943 fixresult(cdb,e,retregs,pretregs); 3944 } 3945 3946 3947 /*************************** 3948 * Convert long to short (OP32_16). 3949 * Get offset of far pointer (OPoffset). 3950 * Convert int to byte (OP16_8). 3951 * Convert long long to long (OP64_32). 3952 * OP128_64 3953 */ 3954 3955 @trusted 3956 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3957 { 3958 debug 3959 { 3960 switch (e.Eoper) 3961 { 3962 case OP32_16: 3963 case OPoffset: 3964 case OP16_8: 3965 case OP64_32: 3966 case OP128_64: 3967 break; 3968 3969 default: 3970 assert(0); 3971 } 3972 } 3973 3974 regm_t retregs; 3975 if (e.Eoper == OP16_8) 3976 { 3977 retregs = *pretregs ? BYTEREGS : 0; 3978 codelem(cdb,e.EV.E1,&retregs,false); 3979 } 3980 else 3981 { 3982 if (e.EV.E1.Eoper == OPrelconst) 3983 offsetinreg(cdb,e.EV.E1,&retregs); 3984 else 3985 { 3986 retregs = *pretregs ? ALLREGS : 0; 3987 codelem(cdb,e.EV.E1,&retregs,false); 3988 bool isOff = e.Eoper == OPoffset; 3989 if (I16 || 3990 I32 && (isOff || e.Eoper == OP64_32) || 3991 I64 && (isOff || e.Eoper == OP128_64)) 3992 retregs &= mLSW; // want LSW only 3993 } 3994 } 3995 3996 /* We "destroy" a reg by assigning it the result of a new e, even 3997 * though the values are the same. Weakness of our CSE strategy that 3998 * a register can only hold the contents of one elem at a time. 3999 */ 4000 if (e.Ecount) 4001 getregs(cdb,retregs); 4002 else 4003 useregs(retregs); 4004 4005 debug 4006 if (!(!*pretregs || retregs)) 4007 { 4008 printf("%s *pretregs = %s, retregs = %s, e = %p\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs),e); 4009 } 4010 4011 assert(!*pretregs || retregs); 4012 fixresult(cdb,e,retregs,pretregs); // lsw only 4013 } 4014 4015 /********************************************** 4016 * Get top 32 bits of 64 bit value (I32) 4017 * or top 16 bits of 32 bit value (I16) 4018 * or top 64 bits of 128 bit value (I64). 4019 * OPmsw 4020 */ 4021 4022 @trusted 4023 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4024 { 4025 assert(e.Eoper == OPmsw); 4026 4027 regm_t retregs = *pretregs ? ALLREGS : 0; 4028 codelem(cdb,e.EV.E1,&retregs,false); 4029 retregs &= mMSW; // want MSW only 4030 4031 /* We "destroy" a reg by assigning it the result of a new e, even 4032 * though the values are the same. Weakness of our CSE strategy that 4033 * a register can only hold the contents of one elem at a time. 4034 */ 4035 if (e.Ecount) 4036 getregs(cdb,retregs); 4037 else 4038 useregs(retregs); 4039 4040 debug 4041 if (!(!*pretregs || retregs)) 4042 { 4043 printf("%s *pretregs = %s, retregs = %s\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs)); 4044 elem_print(e); 4045 } 4046 4047 assert(!*pretregs || retregs); 4048 fixresult(cdb,e,retregs,pretregs); // msw only 4049 } 4050 4051 4052 4053 /****************************** 4054 * Handle operators OPinp and OPoutp. 4055 */ 4056 4057 @trusted 4058 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4059 { 4060 //printf("cdport\n"); 4061 ubyte op = 0xE4; // root of all IN/OUT opcodes 4062 elem *e1 = e.EV.E1; 4063 4064 // See if we can use immediate mode of IN/OUT opcodes 4065 ubyte port; 4066 if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 && 4067 (!evalinregister(e1) || regcon.mvar & mDX)) 4068 { 4069 port = cast(ubyte)e1.EV.Vuns; 4070 freenode(e1); 4071 } 4072 else 4073 { 4074 regm_t retregs = mDX; // port number is always DX 4075 codelem(cdb,e1,&retregs,false); 4076 op |= 0x08; // DX version of opcode 4077 port = 0; // not logically needed, but 4078 // quiets "uninitialized var" complaints 4079 } 4080 4081 uint sz; 4082 if (e.Eoper == OPoutp) 4083 { 4084 sz = tysize(e.EV.E2.Ety); 4085 regm_t retregs = mAX; // byte/word to output is in AL/AX 4086 scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true); 4087 op |= 0x02; // OUT opcode 4088 } 4089 else // OPinp 4090 { 4091 getregs(cdb,mAX); 4092 sz = tysize(e.Ety); 4093 } 4094 4095 if (sz != 1) 4096 op |= 1; // word operation 4097 cdb.genc2(op,0,port); // IN/OUT AL/AX,DX/port 4098 if (op & 1 && sz != REGSIZE) // if need size override 4099 cdb.last().Iflags |= CFopsize; 4100 regm_t retregs = mAX; 4101 fixresult(cdb,e,retregs,pretregs); 4102 } 4103 4104 /************************ 4105 * Generate code for an asm elem. 4106 */ 4107 4108 @trusted 4109 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4110 { 4111 // Assume only regs normally destroyed by a function are destroyed 4112 getregs(cdb,(ALLREGS | mES) & ~fregsaved); 4113 cdb.genasm(cast(ubyte[])e.EV.Vstring[0 .. e.EV.Vstrlen]); 4114 fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs); 4115 } 4116 4117 /************************ 4118 * Generate code for OPnp_f16p and OPf16p_np. 4119 */ 4120 4121 @trusted 4122 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4123 { 4124 code *cnop; 4125 code cs; 4126 4127 assert(I32); 4128 codelem(cdb,e.EV.E1,pretregs,false); 4129 reg_t reg = findreg(*pretregs); 4130 getregs(cdb,*pretregs); // we will destroy the regs 4131 4132 cs.Iop = 0xC1; 4133 cs.Irm = modregrm(3,0,reg); 4134 cs.Iflags = 0; 4135 cs.Irex = 0; 4136 cs.IFL2 = FLconst; 4137 cs.IEV2.Vuns = 16; 4138 4139 cdb.gen(&cs); // ROL ereg,16 4140 cs.Irm |= modregrm(0,1,0); 4141 cdb.gen(&cs); // ROR ereg,16 4142 cs.IEV2.Vuns = 3; 4143 cs.Iflags |= CFopsize; 4144 4145 if (e.Eoper == OPnp_f16p) 4146 { 4147 /* OR ereg,ereg 4148 JE L1 4149 ROR ereg,16 4150 SHL reg,3 4151 MOV rx,SS 4152 AND rx,3 ;mask off CPL bits 4153 OR rl,4 ;run on LDT bit 4154 OR regl,rl 4155 ROL ereg,16 4156 L1: NOP 4157 */ 4158 reg_t rx; 4159 4160 regm_t retregs = BYTEREGS & ~*pretregs; 4161 allocreg(cdb,&retregs,&rx,TYint); 4162 cnop = gennop(null); 4163 int jop = JCXZ; 4164 if (reg != CX) 4165 { 4166 gentstreg(cdb,reg); 4167 jop = JE; 4168 } 4169 genjmp(cdb,jop,FLcode, cast(block *)cnop); // Jop L1 4170 NEWREG(cs.Irm,4); 4171 cdb.gen(&cs); // SHL reg,3 4172 genregs(cdb,0x8C,2,rx); // MOV rx,SS 4173 int isbyte = (mask(reg) & BYTEREGS) == 0; 4174 cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3); // AND rl,3 4175 cdb.genc2(0x80,modregrm(3,1,rx),4); // OR rl,4 4176 genregs(cdb,0x0A | isbyte,reg,rx); // OR regl,rl 4177 } 4178 else // OPf16p_np 4179 { 4180 /* ROR ereg,16 4181 SHR reg,3 4182 ROL ereg,16 4183 */ 4184 4185 cs.Irm |= modregrm(0,5,0); 4186 cdb.gen(&cs); // SHR reg,3 4187 cnop = null; 4188 } 4189 } 4190 4191 /************************* 4192 * Generate code for OPbtst 4193 */ 4194 4195 @trusted 4196 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4197 { 4198 regm_t retregs; 4199 reg_t reg; 4200 4201 //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 4202 4203 opcode_t op = 0xA3; // BT EA,value 4204 int mode = 4; 4205 4206 elem *e1 = e.EV.E1; 4207 elem *e2 = e.EV.E2; 4208 code cs; 4209 cs.Iflags = 0; 4210 4211 if (*pretregs == 0) // if don't want result 4212 { 4213 codelem(cdb,e1,pretregs,false); // eval left leaf 4214 *pretregs = 0; // in case they got set 4215 codelem(cdb,e2,pretregs,false); 4216 return; 4217 } 4218 4219 regm_t idxregs; 4220 if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar) 4221 { 4222 getlvalue(cdb, &cs, e1, RMload); // get addressing mode 4223 idxregs = idxregm(&cs); // mask if index regs used 4224 } 4225 else 4226 { 4227 retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs; 4228 codelem(cdb,e1, &retregs, false); 4229 reg = findreg(retregs); 4230 cs.Irm = modregrm(3,0,reg & 7); 4231 cs.Iflags = 0; 4232 cs.Irex = 0; 4233 if (reg & 8) 4234 cs.Irex |= REX_B; 4235 idxregs = retregs; 4236 } 4237 4238 tym_t ty1 = tybasic(e1.Ety); 4239 const sz = tysize(e1.Ety); 4240 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4241 4242 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4243 if (e2.Eoper == OPconst) 4244 { 4245 cs.Iop = 0x0FBA; // BT rm,imm8 4246 cs.Irm |= modregrm(0,mode,0); 4247 cs.Iflags |= CFpsw | word; 4248 cs.IFL2 = FLconst; 4249 if (sz <= SHORTSIZE) 4250 { 4251 cs.IEV2.Vint = e2.EV.Vint & 15; 4252 } 4253 else if (sz == 4) 4254 { 4255 cs.IEV2.Vint = e2.EV.Vint & 31; 4256 } 4257 else 4258 { 4259 cs.IEV2.Vint = e2.EV.Vint & 63; 4260 if (I64) 4261 cs.Irex |= REX_W; 4262 } 4263 cdb.gen(&cs); 4264 } 4265 else 4266 { 4267 retregs = ALLREGS & ~idxregs; 4268 4269 /* A register variable may not have its upper 32 4270 * bits 0, so pick a different register to force 4271 * a MOV which will clear it 4272 */ 4273 if (I64 && sz == 8 && tysize(e2.Ety) == 4) 4274 { 4275 regm_t rregm; 4276 reg_t rreg; 4277 if (isregvar(e2, rregm, rreg)) 4278 retregs &= ~rregm; 4279 } 4280 4281 scodelem(cdb,e2,&retregs,idxregs,true); 4282 reg = findreg(retregs); 4283 4284 cs.Iop = 0x0F00 | op; // BT rm,reg 4285 code_newreg(&cs,reg); 4286 cs.Iflags |= CFpsw | word; 4287 if (I64 && _tysize[ty1] == 8) 4288 cs.Irex |= REX_W; 4289 cdb.gen(&cs); 4290 } 4291 4292 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4293 { 4294 if (tysize(e.Ety) == 1) 4295 { 4296 assert(I64 || retregs & BYTEREGS); 4297 allocreg(cdb,&retregs,®,TYint); 4298 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4299 if (I64 && reg >= 4) 4300 code_orrex(cdb.last(), REX); 4301 *pretregs = retregs; 4302 } 4303 else 4304 { 4305 code *cnop = null; 4306 regm_t save = regcon.immed.mval; 4307 allocreg(cdb,&retregs,®,TYint); 4308 regcon.immed.mval = save; 4309 if ((*pretregs & mPSW) == 0) 4310 { 4311 getregs(cdb,retregs); 4312 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4313 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4314 } 4315 else 4316 { 4317 movregconst(cdb,reg,1,8); // MOV reg,1 4318 cnop = gennop(null); 4319 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4320 // MOV reg,0 4321 movregconst(cdb,reg,0,8); 4322 regcon.immed.mval &= ~mask(reg); 4323 } 4324 *pretregs = retregs; 4325 cdb.append(cnop); 4326 } 4327 } 4328 } 4329 4330 /************************* 4331 * Generate code for OPbt, OPbtc, OPbtr, OPbts 4332 */ 4333 4334 @trusted 4335 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4336 { 4337 //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs)); 4338 regm_t retregs; 4339 reg_t reg; 4340 opcode_t op; 4341 int mode; 4342 4343 switch (e.Eoper) 4344 { 4345 case OPbt: op = 0xA3; mode = 4; break; 4346 case OPbtc: op = 0xBB; mode = 7; break; 4347 case OPbtr: op = 0xB3; mode = 6; break; 4348 case OPbts: op = 0xAB; mode = 5; break; 4349 4350 default: 4351 assert(0); 4352 } 4353 4354 elem *e1 = e.EV.E1; 4355 elem *e2 = e.EV.E2; 4356 code cs; 4357 cs.Iflags = 0; 4358 4359 getlvalue(cdb, &cs, e, RMload); // get addressing mode 4360 if (e.Eoper == OPbt && *pretregs == 0) 4361 { 4362 codelem(cdb,e2,pretregs,false); 4363 return; 4364 } 4365 4366 const ty1 = tybasic(e1.Ety); 4367 const ty2 = tybasic(e2.Ety); 4368 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4369 regm_t idxregs = idxregm(&cs); // mask if index regs used 4370 4371 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4372 if (e2.Eoper == OPconst) 4373 { 4374 cs.Iop = 0x0FBA; // BT rm,imm8 4375 cs.Irm |= modregrm(0,mode,0); 4376 cs.Iflags |= CFpsw | word; 4377 cs.IFL2 = FLconst; 4378 if (_tysize[ty1] == SHORTSIZE) 4379 { 4380 cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3; 4381 cs.IEV2.Vint = e2.EV.Vint & 15; 4382 } 4383 else if (_tysize[ty1] == 4) 4384 { 4385 cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3; 4386 cs.IEV2.Vint = e2.EV.Vint & 31; 4387 } 4388 else 4389 { 4390 cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3; 4391 cs.IEV2.Vint = e2.EV.Vint & 63; 4392 if (I64) 4393 cs.Irex |= REX_W; 4394 } 4395 cdb.gen(&cs); 4396 } 4397 else 4398 { 4399 retregs = ALLREGS & ~idxregs; 4400 scodelem(cdb,e2,&retregs,idxregs,true); 4401 reg = findreg(retregs); 4402 4403 cs.Iop = 0x0F00 | op; // BT rm,reg 4404 code_newreg(&cs,reg); 4405 cs.Iflags |= CFpsw | word; 4406 if (_tysize[ty2] == 8 && I64) 4407 cs.Irex |= REX_W; 4408 cdb.gen(&cs); 4409 } 4410 4411 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4412 { 4413 if (_tysize[e.Ety] == 1) 4414 { 4415 assert(I64 || retregs & BYTEREGS); 4416 allocreg(cdb,&retregs,®,TYint); 4417 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4418 if (I64 && reg >= 4) 4419 code_orrex(cdb.last(), REX); 4420 *pretregs = retregs; 4421 } 4422 else 4423 { 4424 code *cnop = null; 4425 const save = regcon.immed.mval; 4426 allocreg(cdb,&retregs,®,TYint); 4427 regcon.immed.mval = save; 4428 if ((*pretregs & mPSW) == 0) 4429 { 4430 getregs(cdb,retregs); 4431 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4432 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4433 } 4434 else 4435 { 4436 movregconst(cdb,reg,1,8); // MOV reg,1 4437 cnop = gennop(null); 4438 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4439 // MOV reg,0 4440 movregconst(cdb,reg,0,8); 4441 regcon.immed.mval &= ~mask(reg); 4442 } 4443 *pretregs = retregs; 4444 cdb.append(cnop); 4445 } 4446 } 4447 } 4448 4449 /************************************* 4450 * Generate code for OPbsf and OPbsr. 4451 */ 4452 4453 @trusted 4454 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4455 { 4456 //printf("cdbscan()\n"); 4457 //elem_print(e); 4458 if (!*pretregs) 4459 { 4460 codelem(cdb,e.EV.E1,pretregs,false); 4461 return; 4462 } 4463 4464 const tyml = tybasic(e.EV.E1.Ety); 4465 const sz = _tysize[tyml]; 4466 assert(sz == 2 || sz == 4 || sz == 8); 4467 code cs = void; 4468 4469 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4470 { 4471 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4472 } 4473 else 4474 { 4475 regm_t retregs = allregs; 4476 codelem(cdb,e.EV.E1, &retregs, false); 4477 const reg = findreg(retregs); 4478 cs.Irm = modregrm(3,0,reg & 7); 4479 cs.Iflags = 0; 4480 cs.Irex = 0; 4481 if (reg & 8) 4482 cs.Irex |= REX_B; 4483 } 4484 4485 regm_t retregs = *pretregs & allregs; 4486 if (!retregs) 4487 retregs = allregs; 4488 reg_t reg; 4489 allocreg(cdb,&retregs, ®, e.Ety); 4490 4491 cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 4492 code_newreg(&cs, reg); 4493 if (!I16 && sz == SHORTSIZE) 4494 cs.Iflags |= CFopsize; 4495 cdb.gen(&cs); 4496 if (sz == 8) 4497 code_orrex(cdb.last(), REX_W); 4498 4499 fixresult(cdb,e,retregs,pretregs); 4500 } 4501 4502 /************************ 4503 * OPpopcnt operator 4504 */ 4505 4506 @trusted 4507 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4508 { 4509 //printf("cdpopcnt()\n"); 4510 //elem_print(e); 4511 assert(!I16); 4512 if (!*pretregs) 4513 { 4514 codelem(cdb,e.EV.E1,pretregs,false); 4515 return; 4516 } 4517 4518 const tyml = tybasic(e.EV.E1.Ety); 4519 4520 const sz = _tysize[tyml]; 4521 assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op 4522 4523 code cs = void; 4524 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4525 { 4526 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4527 } 4528 else 4529 { 4530 regm_t retregs = allregs; 4531 codelem(cdb,e.EV.E1, &retregs, false); 4532 const reg = findreg(retregs); 4533 cs.Irm = modregrm(3,0,reg & 7); 4534 cs.Iflags = 0; 4535 cs.Irex = 0; 4536 if (reg & 8) 4537 cs.Irex |= REX_B; 4538 } 4539 4540 regm_t retregs = *pretregs & allregs; 4541 if (!retregs) 4542 retregs = allregs; 4543 reg_t reg; 4544 allocreg(cdb,&retregs, ®, e.Ety); 4545 4546 cs.Iop = POPCNT; // POPCNT reg,EA 4547 code_newreg(&cs, reg); 4548 if (sz == SHORTSIZE) 4549 cs.Iflags |= CFopsize; 4550 if (*pretregs & mPSW) 4551 cs.Iflags |= CFpsw; 4552 cdb.gen(&cs); 4553 if (sz == 8) 4554 code_orrex(cdb.last(), REX_W); 4555 *pretregs &= mBP | ALLREGS; // flags already set 4556 4557 fixresult(cdb,e,retregs,pretregs); 4558 } 4559 4560 4561 /******************************************* 4562 * Generate code for OPpair, OPrpair. 4563 */ 4564 4565 @trusted 4566 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4567 { 4568 if (*pretregs == 0) // if don't want result 4569 { 4570 codelem(cdb,e.EV.E1,pretregs,false); // eval left leaf 4571 *pretregs = 0; // in case they got set 4572 codelem(cdb,e.EV.E2,pretregs,false); 4573 return; 4574 } 4575 4576 //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4577 //WRTYxx(e.Ety);printf("\n"); 4578 //printf("Ecount = %d\n", e.Ecount); 4579 4580 regm_t retregs = *pretregs; 4581 if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087) 4582 { 4583 if (config.fpxmmregs) 4584 retregs |= mXMM0 | mXMM1; 4585 else 4586 retregs |= mST01; 4587 } 4588 4589 if (retregs & mST01) 4590 { 4591 loadPair87(cdb, e, pretregs); 4592 return; 4593 } 4594 4595 regm_t regs1; 4596 regm_t regs2; 4597 if (retregs & XMMREGS) 4598 { 4599 retregs &= XMMREGS; 4600 const reg = findreg(retregs); 4601 regs1 = mask(reg); 4602 regs2 = mask(findreg(retregs & ~regs1)); 4603 } 4604 else 4605 { 4606 retregs &= allregs; 4607 if (!retregs) 4608 retregs = allregs; 4609 regs1 = retregs & mLSW; 4610 regs2 = retregs & mMSW; 4611 } 4612 if (e.Eoper == OPrpair) 4613 { 4614 // swap 4615 regs1 ^= regs2; 4616 regs2 ^= regs1; 4617 regs1 ^= regs2; 4618 } 4619 //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4620 4621 codelem(cdb,e.EV.E1, ®s1, false); 4622 scodelem(cdb,e.EV.E2, ®s2, regs1, false); 4623 4624 if (e.EV.E1.Ecount) 4625 getregs(cdb,regs1); 4626 if (e.EV.E2.Ecount) 4627 getregs(cdb,regs2); 4628 4629 fixresult(cdb,e,regs1 | regs2,pretregs); 4630 } 4631 4632 /************************* 4633 * Generate code for OPcmpxchg 4634 */ 4635 4636 @trusted 4637 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4638 { 4639 /* The form is: 4640 * OPcmpxchg 4641 * / \ 4642 * lvalue OPparam 4643 * / \ 4644 * old new 4645 */ 4646 4647 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 4648 elem *e1 = e.EV.E1; 4649 elem *e2 = e.EV.E2; 4650 assert(e2.Eoper == OPparam); 4651 assert(!e2.Ecount); 4652 4653 const tyml = tybasic(e1.Ety); // type of lvalue 4654 const sz = _tysize[tyml]; 4655 4656 if (I32 && sz == 8) 4657 { 4658 regm_t retregsx = mDX|mAX; 4659 codelem(cdb,e2.EV.E1,&retregsx,false); // [DX,AX] = e2.EV.E1 4660 4661 regm_t retregs = mCX|mBX; 4662 scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false); // [CX,BX] = e2.EV.E2 4663 4664 code cs = void; 4665 getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX); // get EA 4666 4667 getregs(cdb,mDX|mAX); // CMPXCHG destroys these regs 4668 4669 if (e1.Ety & mTYvolatile) 4670 cdb.gen1(LOCK); // LOCK prefix 4671 cs.Iop = 0x0FC7; // CMPXCHG8B EA 4672 cs.Iflags |= CFpsw; 4673 code_newreg(&cs,1); 4674 cdb.gen(&cs); 4675 4676 assert(!e1.Ecount); 4677 freenode(e1); 4678 } 4679 else 4680 { 4681 const uint isbyte = (sz == 1); // 1 for byte operation 4682 const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 4683 const uint rex = (I64 && sz == 8) ? REX_W : 0; 4684 4685 regm_t retregsx = mAX; 4686 codelem(cdb,e2.EV.E1,&retregsx,false); // AX = e2.EV.E1 4687 4688 regm_t retregs = (ALLREGS | mBP) & ~mAX; 4689 scodelem(cdb,e2.EV.E2,&retregs,mAX,false); // load rvalue in reg 4690 4691 code cs = void; 4692 getlvalue(cdb,&cs,e1,mAX | retregs); // get EA 4693 4694 getregs(cdb,mAX); // CMPXCHG destroys AX 4695 4696 if (e1.Ety & mTYvolatile) 4697 cdb.gen1(LOCK); // LOCK prefix 4698 cs.Iop = 0x0FB1 ^ isbyte; // CMPXCHG EA,reg 4699 cs.Iflags |= CFpsw | word; 4700 cs.Irex |= rex; 4701 const reg = findreg(retregs); 4702 code_newreg(&cs,reg); 4703 cdb.gen(&cs); 4704 4705 assert(!e1.Ecount); 4706 freenode(e1); 4707 } 4708 4709 if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register 4710 { 4711 assert(tysize(e.Ety) == 1); 4712 assert(I64 || retregs & BYTEREGS); 4713 reg_t reg; 4714 allocreg(cdb,&retregs,®,TYint); 4715 uint ea = modregrmx(3,0,reg); 4716 if (I64 && reg >= 4) 4717 ea |= REX << 16; 4718 cdb.gen2(0x0F94,ea); // SETZ reg 4719 *pretregs = retregs; 4720 } 4721 } 4722 4723 /************************* 4724 * Generate code for OPprefetch 4725 */ 4726 4727 @trusted 4728 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4729 { 4730 /* Generate the following based on e2: 4731 * 0: prefetch0 4732 * 1: prefetch1 4733 * 2: prefetch2 4734 * 3: prefetchnta 4735 * 4: prefetchw 4736 * 5: prefetchwt1 4737 */ 4738 //printf("cdprefetch\n"); 4739 elem *e1 = e.EV.E1; 4740 4741 assert(*pretregs == 0); 4742 assert(e.EV.E2.Eoper == OPconst); 4743 opcode_t op; 4744 reg_t reg; 4745 switch (e.EV.E2.EV.Vuns) 4746 { 4747 case 0: op = PREFETCH; reg = 1; break; // PREFETCH0 4748 case 1: op = PREFETCH; reg = 2; break; // PREFETCH1 4749 case 2: op = PREFETCH; reg = 3; break; // PREFETCH2 4750 case 3: op = PREFETCH; reg = 0; break; // PREFETCHNTA 4751 case 4: op = 0x0F0D; reg = 1; break; // PREFETCHW 4752 case 5: op = 0x0F0D; reg = 2; break; // PREFETCHWT1 4753 default: assert(0); 4754 } 4755 4756 freenode(e.EV.E2); 4757 4758 code cs = void; 4759 getlvalue(cdb,&cs,e1,0); 4760 cs.Iop = op; 4761 cs.Irm |= modregrm(0,reg,0); 4762 cs.Iflags |= CFvolatile; // do not schedule 4763 cdb.gen(&cs); 4764 } 4765 4766 4767 /********************* 4768 * Load register from EA of assignment operation. 4769 * Params: 4770 * cdb = store generated code here 4771 * cs = instruction with EA already set in it 4772 * e = assignment expression that will be evaluated 4773 * reg = set to register loaded from EA 4774 * retregs = register candidates for reg 4775 */ 4776 @trusted 4777 private 4778 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs) 4779 { 4780 modEA(cdb, &cs); 4781 allocreg(cdb,&retregs,®,TYoffset); 4782 4783 cs.Iop = LOD; 4784 code_newreg(&cs,reg); 4785 cdb.gen(&cs); // MOV reg,EA 4786 } 4787 4788 /********************* 4789 * Load register pair from EA of assignment operation. 4790 * Params: 4791 * cdb = store generated code here 4792 * cs = instruction with EA already set in it 4793 * e = assignment expression that will be evaluated 4794 * rhi = set to most significant register of the pair 4795 * rlo = set toleast significant register of the pair 4796 * retregs = register candidates for rhi, rlo 4797 * keepmsk = registers to not modify 4798 */ 4799 @trusted 4800 private 4801 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk) 4802 { 4803 getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk); 4804 const tym_t tyml = tybasic(e.EV.E1.Ety); // type of lvalue 4805 reg_t reg; 4806 allocreg(cdb,&retregs,®,tyml); 4807 4808 rhi = findregmsw(retregs); 4809 rlo = findreglsw(retregs); 4810 4811 cs.Iop = LOD; 4812 code_newreg(&cs,rlo); 4813 cdb.gen(&cs); // MOV rlo,EA 4814 getlvalue_msw(&cs); 4815 code_newreg(&cs,rhi); 4816 cdb.gen(&cs); // MOV rhi,EA+2 4817 getlvalue_lsw(&cs); 4818 } 4819 4820 4821 /********************************************************* 4822 * Store register result of assignment operation EA. 4823 * Params: 4824 * cdb = store generated code here 4825 * cs = instruction with EA already set in it 4826 * e = assignment expression that was evaluated 4827 * reg = register of result 4828 * pretregs = registers to store result in 4829 */ 4830 @trusted 4831 private 4832 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs) 4833 { 4834 elem* e1 = e.EV.E1; 4835 const tym_t tyml = tybasic(e1.Ety); // type of lvalue 4836 const uint sz = _tysize[tyml]; 4837 const ubyte isbyte = (sz == 1); // 1 for byte operation 4838 cs.Iop = STO ^ isbyte; 4839 code_newreg(&cs,reg); 4840 cdb.gen(&cs); // MOV EA,resreg 4841 if (e1.Ecount) // if we gen a CSE 4842 cssave(e1,mask(reg),!OTleaf(e1.Eoper)); 4843 freenode(e1); 4844 fixresult(cdb,e,mask(reg),pretregs); 4845 } 4846 4847 /********************************************************* 4848 * Store register pair result of assignment operation EA. 4849 * Params: 4850 * cdb = store generated code here 4851 * cs = instruction with EA already set in it 4852 * e = assignment expression that was evaluated 4853 * rhi = most significant register of the pair 4854 * rlo = least significant register of the pair 4855 * pretregs = registers to store result in 4856 */ 4857 @trusted 4858 private 4859 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs) 4860 { 4861 cs.Iop = STO; 4862 code_newreg(&cs,rlo); 4863 cdb.gen(&cs); // MOV EA,lsreg 4864 code_newreg(&cs,rhi); 4865 getlvalue_msw(&cs); 4866 cdb.gen(&cs); // MOV EA+REGSIZE,msreg 4867 const regm_t retregs = mask(rhi) | mask(rlo); 4868 elem* e1 = e.EV.E1; 4869 if (e1.Ecount) // if we gen a CSE 4870 cssave(e1,retregs,!OTleaf(e1.Eoper)); 4871 freenode(e1); 4872 fixresult(cdb,e,retregs,pretregs); 4873 }