1 /**
2  * Support for 64-bit longs.
3  *
4  * Copyright: Copyright Digital Mars 2000 - 2012.
5  * License: Distributed under the
6  *      $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0).
7  *    (See accompanying file LICENSE)
8  * Authors:   Walter Bright, Sean Kelly
9  * Source: $(DRUNTIMESRC rt/_llmath.d)
10  */
11 
12 module rt.llmath;
13 
14 extern (C):
15 
16 
17 /***************************************
18  * Unsigned long divide.
19  * Input:
20  *      [EDX,EAX],[ECX,EBX]
21  * Output:
22  *      [EDX,EAX] = [EDX,EAX] / [ECX,EBX]
23  *      [ECX,EBX] = [EDX,EAX] % [ECX,EBX]
24  */
25 
26 void __ULDIV2__()
27 {
28     version (D_InlineAsm_X86)
29     {
30         asm
31         {
32             naked                   ;
33             mov     EBX,4[ESP]      ;   // the only difference between this and __ULDIV__
34             test    ECX,ECX         ;
35             jz      uldiv           ;
36 
37             // if ECX > EDX, then quotient is 0 and remainder is [EDX,EAX]
38             cmp     ECX,EDX         ;
39             ja      quo0            ;
40 
41             test    ECX,ECX         ;
42             js      Lleft           ;
43 
44             /* We have n>d, and know that n/d will fit in 32 bits.
45              * d will be left justified if we shift it left s bits.
46              * [d1,d0] <<= s
47              * [n2,n1,n0] = [n1,n0] << s
48              *
49              * Use one divide, by this reasoning:
50              * ([n2,n1]<<32 + n0)/(d1<<32 + d0)
51              * becomes:
52              * ([n2,n1]<<32)/(d1<<32 + d0) + n0/(d1<<32 + d0)
53              * The second divide is always 0.
54              * Ignore the d0 in the first divide, which will yield a quotient
55              * that might be too high by 1 (because d1 is left justified).
56              * We can tell if it's too big if:
57              *  q*[d1,d0] > [n2,n1,n0]
58              * which is:
59              *  q*[d1,d0] > [[q*[d1,0]+q%[d1,0],n1,n0]
60              * If we subtract q*[d1,0] from both sides, we get:
61              *  q*d0 > [[n2,n1]%d1,n0]
62              * So if it is too big by one, reduce q by one to q'=q-one.
63              * Compute remainder as:
64              *  r = ([n1,n0] - q'*[d1,d0]) >> s
65              * Again, we can subtract q*[d1,0]:
66              *  r = ([n1,n0] - q*[d1,0] - (q'*[d1,d0] - q*[d1,0])) >> s
67              *  r = ([[n2,n1]%d1,n0] + (q*[d1,0] - (q - one)*[d1,d0])) >> s
68              *  r = ([[n2,n1]%d1,n0] + (q*[d1,0] - [d1 *(q-one),d0*(1-q)])) >> s
69              *  r = ([[n2,n1]%d1,n0] + [d1 *one,d0*(one-q)])) >> s
70              */
71 
72             push    EBP             ;
73             push    ESI             ;
74             push    EDI             ;
75 
76             mov     ESI,EDX         ;
77             mov     EDI,EAX         ;
78             mov     EBP,ECX         ;
79 
80             bsr     EAX,ECX         ;       // EAX is now 30..0
81             xor     EAX,0x1F        ;       // EAX is now 1..31
82             mov     CH,AL           ;
83             neg     EAX             ;
84             add     EAX,32          ;
85             mov     CL,AL           ;
86 
87             mov     EAX,EBX         ;
88             shr     EAX,CL          ;
89             xchg    CH,CL           ;
90             shl     EBP,CL          ;
91             or      EBP,EAX         ;
92             shl     EBX,CL          ;
93 
94             mov     EDX,ESI         ;
95             xchg    CH,CL           ;
96             shr     EDX,CL          ;
97 
98             mov     EAX,EDI         ;
99             shr     EAX,CL          ;
100             xchg    CH,CL           ;
101             shl     EDI,CL          ;
102             shl     ESI,CL          ;
103             or      EAX,ESI         ;
104 
105             div     EBP             ;
106             push    EBP             ;
107             mov     EBP,EAX         ;
108             mov     ESI,EDX         ;
109 
110             mul     EBX             ;
111             cmp     EDX,ESI         ;
112             ja      L1              ;
113             jb      L2              ;
114             cmp     EAX,EDI         ;
115             jbe     L2              ;
116 L1:         dec     EBP             ;
117             sub     EAX,EBX         ;
118             sbb     EDX,0[ESP]      ;
119 L2:
120             add     ESP,4           ;
121             sub     EDI,EAX         ;
122             sbb     ESI,EDX         ;
123             mov     EAX,ESI         ;
124             xchg    CH,CL           ;
125             shl     EAX,CL          ;
126             xchg    CH,CL           ;
127             shr     EDI,CL          ;
128             or      EDI,EAX         ;
129             shr     ESI,CL          ;
130             mov     EBX,EDI         ;
131             mov     ECX,ESI         ;
132             mov     EAX,EBP         ;
133             xor     EDX,EDX         ;
134 
135             pop     EDI             ;
136             pop     ESI             ;
137             pop     EBP             ;
138             ret                     ;
139 
140 uldiv:      test    EDX,EDX         ;
141             jnz     D3              ;
142             // Both high words are 0, we can use the DIV instruction
143             div     EBX             ;
144             mov     EBX,EDX         ;
145             mov     EDX,ECX         ;       // EDX = ECX = 0
146             ret                     ;
147 
148             even                    ;
149 D3:         // Divide [EDX,EAX] by EBX
150             mov     ECX,EAX         ;
151             mov     EAX,EDX         ;
152             xor     EDX,EDX         ;
153             div     EBX             ;
154             xchg    ECX,EAX         ;
155             div     EBX             ;
156             // ECX,EAX = result
157             // EDX = remainder
158             mov     EBX,EDX         ;
159             mov     EDX,ECX         ;
160             xor     ECX,ECX         ;
161             ret                     ;
162 
163 quo0:       // Quotient is 0
164             // Remainder is [EDX,EAX]
165             mov     EBX,EAX         ;
166             mov     ECX,EDX         ;
167             xor     EAX,EAX         ;
168             xor     EDX,EDX         ;
169             ret                     ;
170 
171 Lleft:      // The quotient is 0 or 1 and EDX >= ECX
172             cmp     EDX,ECX         ;
173             ja      quo1            ;       // [EDX,EAX] > [ECX,EBX]
174             // EDX == ECX
175             cmp     EAX,EBX         ;
176             jb      quo0            ;
177 
178 quo1:       // Quotient is 1
179             // Remainder is [EDX,EAX] - [ECX,EBX]
180             sub     EAX,EBX         ;
181             sbb     EDX,ECX         ;
182             mov     EBX,EAX         ;
183             mov     ECX,EDX         ;
184             mov     EAX,1           ;
185             xor     EDX,EDX         ;
186             ret                     ;
187         }
188     }
189     else version (D_InlineAsm_X86_64)
190         assert(0);
191     else
192         static assert(0);
193 }
194 
195 void __ULDIV__()
196 {
197     version (D_InlineAsm_X86)
198     {
199         asm
200         {
201             naked                   ;
202             test    ECX,ECX         ;
203             jz      uldiv           ;
204 
205             // if ECX > EDX, then quotient is 0 and remainder is [EDX,EAX]
206             cmp     ECX,EDX         ;
207             ja      quo0            ;
208 
209             test    ECX,ECX         ;
210             js      Lleft           ;
211 
212             /* We have n>d, and know that n/d will fit in 32 bits.
213              * d will be left justified if we shift it left s bits.
214              * [d1,d0] <<= s
215              * [n2,n1,n0] = [n1,n0] << s
216              *
217              * Use one divide, by this reasoning:
218              * ([n2,n1]<<32 + n0)/(d1<<32 + d0)
219              * becomes:
220              * ([n2,n1]<<32)/(d1<<32 + d0) + n0/(d1<<32 + d0)
221              * The second divide is always 0.
222              * Ignore the d0 in the first divide, which will yield a quotient
223              * that might be too high by 1 (because d1 is left justified).
224              * We can tell if it's too big if:
225              *  q*[d1,d0] > [n2,n1,n0]
226              * which is:
227              *  q*[d1,d0] > [[q*[d1,0]+q%[d1,0],n1,n0]
228              * If we subtract q*[d1,0] from both sides, we get:
229              *  q*d0 > [[n2,n1]%d1,n0]
230              * So if it is too big by one, reduce q by one to q'=q-one.
231              * Compute remainder as:
232              *  r = ([n1,n0] - q'*[d1,d0]) >> s
233              * Again, we can subtract q*[d1,0]:
234              *  r = ([n1,n0] - q*[d1,0] - (q'*[d1,d0] - q*[d1,0])) >> s
235              *  r = ([[n2,n1]%d1,n0] + (q*[d1,0] - (q - one)*[d1,d0])) >> s
236              *  r = ([[n2,n1]%d1,n0] + (q*[d1,0] - [d1 *(q-one),d0*(1-q)])) >> s
237              *  r = ([[n2,n1]%d1,n0] + [d1 *one,d0*(one-q)])) >> s
238              */
239 
240             push    EBP             ;
241             push    ESI             ;
242             push    EDI             ;
243 
244             mov     ESI,EDX         ;
245             mov     EDI,EAX         ;
246             mov     EBP,ECX         ;
247 
248             bsr     EAX,ECX         ;       // EAX is now 30..0
249             xor     EAX,0x1F        ;       // EAX is now 1..31
250             mov     CH,AL           ;
251             neg     EAX             ;
252             add     EAX,32          ;
253             mov     CL,AL           ;
254 
255             mov     EAX,EBX         ;
256             shr     EAX,CL          ;
257             xchg    CH,CL           ;
258             shl     EBP,CL          ;
259             or      EBP,EAX         ;
260             shl     EBX,CL          ;
261 
262             mov     EDX,ESI         ;
263             xchg    CH,CL           ;
264             shr     EDX,CL          ;
265 
266             mov     EAX,EDI         ;
267             shr     EAX,CL          ;
268             xchg    CH,CL           ;
269             shl     EDI,CL          ;
270             shl     ESI,CL          ;
271             or      EAX,ESI         ;
272 
273             div     EBP             ;
274             push    EBP             ;
275             mov     EBP,EAX         ;
276             mov     ESI,EDX         ;
277 
278             mul     EBX             ;
279             cmp     EDX,ESI         ;
280             ja      L1              ;
281             jb      L2              ;
282             cmp     EAX,EDI         ;
283             jbe     L2              ;
284 L1:         dec     EBP             ;
285             sub     EAX,EBX         ;
286             sbb     EDX,0[ESP]      ;
287 L2:
288             add     ESP,4           ;
289             sub     EDI,EAX         ;
290             sbb     ESI,EDX         ;
291             mov     EAX,ESI         ;
292             xchg    CH,CL           ;
293             shl     EAX,CL          ;
294             xchg    CH,CL           ;
295             shr     EDI,CL          ;
296             or      EDI,EAX         ;
297             shr     ESI,CL          ;
298             mov     EBX,EDI         ;
299             mov     ECX,ESI         ;
300             mov     EAX,EBP         ;
301             xor     EDX,EDX         ;
302 
303             pop     EDI             ;
304             pop     ESI             ;
305             pop     EBP             ;
306             ret                     ;
307 
308 uldiv:      test    EDX,EDX         ;
309             jnz     D3              ;
310             // Both high words are 0, we can use the DIV instruction
311             div     EBX             ;
312             mov     EBX,EDX         ;
313             mov     EDX,ECX         ;       // EDX = ECX = 0
314             ret                     ;
315 
316             even                    ;
317 D3:         // Divide [EDX,EAX] by EBX
318             mov     ECX,EAX         ;
319             mov     EAX,EDX         ;
320             xor     EDX,EDX         ;
321             div     EBX             ;
322             xchg    ECX,EAX         ;
323             div     EBX             ;
324             // ECX,EAX = result
325             // EDX = remainder
326             mov     EBX,EDX         ;
327             mov     EDX,ECX         ;
328             xor     ECX,ECX         ;
329             ret                     ;
330 
331 quo0:       // Quotient is 0
332             // Remainder is [EDX,EAX]
333             mov     EBX,EAX         ;
334             mov     ECX,EDX         ;
335             xor     EAX,EAX         ;
336             xor     EDX,EDX         ;
337             ret                     ;
338 
339 Lleft:      // The quotient is 0 or 1 and EDX >= ECX
340             cmp     EDX,ECX         ;
341             ja      quo1            ;       // [EDX,EAX] > [ECX,EBX]
342             // EDX == ECX
343             cmp     EAX,EBX         ;
344             jb      quo0            ;
345 
346 quo1:       // Quotient is 1
347             // Remainder is [EDX,EAX] - [ECX,EBX]
348             sub     EAX,EBX         ;
349             sbb     EDX,ECX         ;
350             mov     EBX,EAX         ;
351             mov     ECX,EDX         ;
352             mov     EAX,1           ;
353             xor     EDX,EDX         ;
354             ret                     ;
355         }
356     }
357     else version (D_InlineAsm_X86_64)
358         assert(0);
359     else
360         static assert(0);
361 }
362 
363 
364 /***************************************
365  * Signed long divide.
366  * Input:
367  *      [EDX,EAX],[ECX,EBX]
368  * Output:
369  *      [EDX,EAX] = [EDX,EAX] / [ECX,EBX]
370  *      [ECX,EBX] = [EDX,EAX] % [ECX,EBX]
371  *      ESI,EDI destroyed
372  */
373 
374 void __LDIV2__()
375 {
376     version (D_InlineAsm_X86)
377     {
378         asm
379         {
380             naked                   ;
381             test    EDX,EDX         ;       // [EDX,EAX] negative?
382             jns     L10             ;       // no
383             //neg64 EDX,EAX         ;       // [EDX,EAX] = -[EDX,EAX]
384               neg   EDX             ;
385               neg   EAX             ;
386               sbb   EDX,0           ;
387             test    ECX,ECX         ;       // [ECX,EBX] negative?
388             jns     L11             ;       // no
389             //neg64 ECX,EBX         ;
390               neg   ECX             ;
391               neg   dword ptr 4[ESP] ;
392               sbb   ECX,0           ;
393             push    dword ptr 4[ESP] ;
394             call    __ULDIV2__      ;
395             add     ESP,4           ;
396             //neg64 ECX,EBX         ;       // remainder same sign as dividend
397               neg   ECX             ;
398               neg   EBX             ;
399               sbb   ECX,0           ;
400             ret                     ;
401 
402 L11:
403             push    dword ptr 4[ESP] ;
404             call    __ULDIV2__      ;
405             add     ESP,4           ;
406             //neg64 ECX,EBX         ;       // remainder same sign as dividend
407               neg   ECX             ;
408               neg   EBX             ;
409               sbb   ECX,0           ;
410             //neg64 EDX,EAX         ;       // quotient is negative
411               neg   EDX             ;
412               neg   EAX             ;
413               sbb   EDX,0           ;
414             ret                     ;
415 
416 L10:        test    ECX,ECX         ;       // [ECX,EBX] negative?
417             jns     L12             ;       // no (all is positive)
418             //neg64 ECX,EBX         ;
419               neg   ECX             ;
420               neg   dword ptr 4[ESP] ;
421               sbb   ECX,0           ;
422             push    dword ptr 4[ESP] ;
423             call    __ULDIV2__      ;
424             add     ESP,4           ;
425             //neg64 EDX,EAX         ;       // quotient is negative
426               neg   EDX             ;
427               neg   EAX             ;
428               sbb   EDX,0           ;
429             ret                     ;
430 
431 L12:        jmp     __ULDIV2__      ;
432         }
433     }
434     else version (D_InlineAsm_X86_64)
435         assert(0);
436     else
437         static assert(0);
438 }
439 
440 void __LDIV__()
441 {
442     version (D_InlineAsm_X86)
443     {
444         asm
445         {
446             naked                   ;
447             test    EDX,EDX         ;       // [EDX,EAX] negative?
448             jns     L10             ;       // no
449             //neg64 EDX,EAX         ;       // [EDX,EAX] = -[EDX,EAX]
450               neg   EDX             ;
451               neg   EAX             ;
452               sbb   EDX,0           ;
453             test    ECX,ECX         ;       // [ECX,EBX] negative?
454             jns     L11             ;       // no
455             //neg64 ECX,EBX         ;
456               neg   ECX             ;
457               neg   EBX             ;
458               sbb   ECX,0           ;
459             call    __ULDIV__       ;
460             //neg64 ECX,EBX         ;       // remainder same sign as dividend
461               neg   ECX             ;
462               neg   EBX             ;
463               sbb   ECX,0           ;
464             ret                     ;
465 
466 L11:        call    __ULDIV__       ;
467             //neg64 ECX,EBX         ;       // remainder same sign as dividend
468               neg   ECX             ;
469               neg   EBX             ;
470               sbb   ECX,0           ;
471             //neg64 EDX,EAX         ;       // quotient is negative
472               neg   EDX             ;
473               neg   EAX             ;
474               sbb   EDX,0           ;
475             ret                     ;
476 
477 L10:        test    ECX,ECX         ;       // [ECX,EBX] negative?
478             jns     L12             ;       // no (all is positive)
479             //neg64 ECX,EBX         ;
480               neg   ECX             ;
481               neg   EBX             ;
482               sbb   ECX,0           ;
483             call    __ULDIV__       ;
484             //neg64 EDX,EAX         ;       // quotient is negative
485               neg   EDX             ;
486               neg   EAX             ;
487               sbb   EDX,0           ;
488             ret                     ;
489 
490 L12:        jmp     __ULDIV__       ;
491         }
492     }
493     else version (D_InlineAsm_X86_64)
494         assert(0);
495     else
496         static assert(0);
497 }
498 
499 
500 version (Win32) version (CRuntime_Microsoft)
501 {
502     extern(C) void _alldiv();
503     extern(C) void _aulldiv();
504     extern(C) void _allrem();
505     extern(C) void _aullrem();
506 
507     void _ms_alldiv()
508     {
509         asm
510         {
511             naked            ;
512             push ECX         ;
513             push EBX         ;
514             push EDX         ;
515             push EAX         ;
516             call _alldiv     ;
517             ret              ;
518         }
519     }
520 
521     void _ms_aulldiv()
522     {
523         asm
524         {
525             naked            ;
526             push ECX         ;
527             push EBX         ;
528             push EDX         ;
529             push EAX         ;
530             call _aulldiv    ;
531             ret              ;
532         }
533     }
534 
535     void _ms_allrem()
536     {
537         asm
538         {
539             naked            ;
540             push ECX         ;
541             push EBX         ;
542             push EDX         ;
543             push EAX         ;
544             call _allrem     ;
545             mov EBX,EAX      ;
546             mov ECX,EDX      ;
547             ret              ;
548         }
549     }
550 
551     void _ms_aullrem()
552     {
553         asm
554         {
555             naked            ;
556             push ECX         ;
557             push EBX         ;
558             push EDX         ;
559             push EAX         ;
560             call _aullrem    ;
561             mov EBX,EAX      ;
562             mov ECX,EDX      ;
563             ret              ;
564         }
565     }
566 }