1 /**
2  * Algorithms from "Division by Invariant Integers using Multiplication"
3  * by Torbjoern Granlund and Peter L. Montgomery
4  *
5  * Compiler implementation of the
6  * $(LINK2 https://www.dlang.org, D programming language).
7  *
8  * Copyright:   Copyright (C) 2013-2023 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/divcoeff.d, backend/divcoeff.d)
12  */
13 module dmd.backend.divcoeff;
14 
15 import core.stdc.stdio;
16 
17 
18 nothrow:
19 @safe:
20 
21 alias ullong = ulong;
22 
23 /* unsigned 128 bit math
24  */
25 
26 bool SIGN64(ullong x)
27 {
28     return cast(long)x < 0;
29 }
30 
31 void SHL128(out ullong dh, out ullong dl, ullong xh,ullong xl)
32 {
33     dh = (xh << 1) | SIGN64(xl);
34     dl = xl << 1;
35 }
36 
37 void SHR128(out ullong dh, out ullong dl, ullong xh,ullong xl)
38 {
39     dl = (xl >> 1) | ((xh & 1) << 63);
40     dh = xh >> 1;
41 }
42 
43 bool XltY128(ullong xh, ullong xl, ullong yh, ullong yl)
44 {
45     return xh < yh || (xh == yh && xl < yl);
46 }
47 
48 void u128Div(ullong xh, ullong xl, ullong yh, ullong yl, ullong *pqh, ullong *pql)
49 {
50     /* Use auld skool shift & subtract algorithm.
51      * Not very efficient.
52      */
53 
54     //ullong xxh = xh, xxl = xl, yyh = yh, yyl = yl;
55 
56     assert(yh || yl);           // no div-by-0 bugs
57 
58     // left justify y
59     uint shiftcount = 1;
60     if (!yh)
61     {   yh = yl;
62         yl = 0;
63         shiftcount += 64;
64     }
65     while (!SIGN64(yh))
66     {
67         SHL128(yh,yl, yh,yl);
68         shiftcount += 1;
69     }
70 
71     ullong qh = 0;
72     ullong ql = 0;
73     do
74     {
75         SHL128(qh,ql, qh,ql);
76         if (XltY128(yh,yl,xh,xl))
77         {
78             // x -= y;
79             if (xl < yl)
80             {   xl -= yl;
81                 xh -= yh + 1;
82             }
83             else
84             {   xl -= yl;
85                 xh -= yh;
86             }
87 
88             ql |= 1;
89         }
90         SHR128(yh,yl, yh,yl);
91     } while (--shiftcount);
92 
93     *pqh = qh;
94     *pql = ql;
95 
96     // Remainder is xh,xl
97 
98     version (none)
99     {
100         printf("%016llx_%016llx / %016llx_%016llx = %016llx_%016llx\n", xxh,xxl,yyh,yyl,qh,ql);
101         if (xxh == 0 && yyh == 0)
102             printf("should be %llx\n", xxl / yyl);
103     }
104 }
105 
106 /************************************
107  * Implement Algorithm 6.2: Selection of multiplier and shift count
108  * Params:
109  *      N =     32 or 64
110  *      d =     divisor (must not be 0 or a power of 2)
111  *      prec =  bits of precision desired
112  * Output:
113  *      *pm =      factor
114  *      *pshpost = post shift
115  * Returns:
116  *      true    m >= 2**N
117  */
118 
119 @trusted
120 extern (C) bool choose_multiplier(int N, ullong d, int prec, ullong *pm, int *pshpost)
121 {
122     assert(N == 32 || N == 64);
123     assert(prec <= N);
124     assert(d > 1 && (d & (d - 1)));
125 
126     // Compute b such that 2**(b-1) < d <= 2**b
127     // which is the number of significant bits in d
128     int b = 0;
129     ullong d1 = d;
130     while (d1)
131     {
132         ++b;
133         d1 >>= 1;
134     }
135 
136     int shpost = b;
137 
138     bool mhighbit = false;
139     if (N == 32)
140     {
141         // mlow = (2**(N + b)) / d
142         ullong mlow = (1UL << (N + b)) / d;
143 
144         // uhigh = (2**(N + b) + 2**(N + b - prec)) / d
145         ullong mhigh = ((1UL << (N + b)) + (1UL << (N + b - prec))) / d;
146 
147         while (mlow/2 < mhigh/2 && shpost)
148         {
149             mlow /= 2;
150             mhigh /= 2;
151             --shpost;
152         }
153 
154         *pm = mhigh & 0xFFFFFFFF;
155         mhighbit = (mhigh >> N) != 0;
156     }
157     else if (N == 64)
158     {
159         // Same as for N==32, but use 128 bit unsigned arithmetic
160 
161         // mlow = (2**(N + b)) / d
162         ullong mlowl = 0;
163         ullong mlowh = 1UL << b;
164 
165         // mlow /= d
166         u128Div(mlowh, mlowl, 0, d, &mlowh, &mlowl);
167 
168         // mhigh = (2**(N + b) + 2**(N + b - prec)) / d
169         ullong mhighl = 0;
170         ullong mhighh = 1UL << b;
171         int e = N + b - prec;
172         if (e < 64)
173             mhighl = 1UL << e;
174         else
175             mhighh |= 1UL << (e - 64);
176 
177         // mhigh /= d
178         u128Div(mhighh, mhighl, 0, d, &mhighh, &mhighl);
179 
180         while (1)
181         {
182             // mlowb = mlow / 2
183             ullong mlowbh,mlowbl;
184             SHR128(mlowbh,mlowbl, mlowh,mlowl);
185 
186             // mhighb = mhigh / 2
187             ullong mhighbh,mhighbl;
188             SHR128(mhighbh,mhighbl, mhighh,mhighl);
189 
190             // if (mlowb < mhighb && shpost)
191             if (XltY128(mlowbh,mlowbl, mhighbh,mhighbl) && shpost)
192             {
193                 // mlow = mlowb
194                 mlowl = mlowbl;
195                 mlowh = mlowbh;
196 
197                 // mhigh = mhighb
198                 mhighl = mhighbl;
199                 mhighh = mhighbh;
200 
201                 --shpost;
202             }
203             else
204                 break;
205         }
206 
207         *pm = mhighl;
208         mhighbit = mhighh & 1;
209     }
210     else
211         assert(0);
212 
213     *pshpost = shpost;
214     return mhighbit;
215 }
216 
217 /*************************************
218  * Find coefficients for Algorithm 4.2:
219  * Optimized code generation of unsigned q=n/d for constant nonzero d
220  * Input:
221  *      N       32 or 64 (width of divide)
222  *      d       divisor (not a power of 2)
223  * Output:
224  *      *pshpre  pre-shift
225  *      *pm      factor
226  *      *pshpost post-shift
227  * Returns:
228  *      true    Use algorithm:
229  *              t1 = MULUH(m, n)
230  *              q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
231  *
232  *      false   Use algorithm:
233  *              q = SRL(MULUH(m, SRL(n, shpre)), shpost)
234  */
235 
236 extern (C) bool udiv_coefficients(int N, ullong d, int *pshpre, ullong *pm, int *pshpost)
237 {
238     bool mhighbit = choose_multiplier(N, d, N, pm, pshpost);
239     if (mhighbit && (d & 1) == 0)
240     {
241         int e = 0;
242         while ((d & 1) == 0)
243         {   ++e;
244             d >>= 1;
245         }
246         *pshpre = e;
247         mhighbit = choose_multiplier(N, d, N - e, pm, pshpost);
248         assert(mhighbit == false);
249     }
250     else
251         *pshpre = 0;
252     return mhighbit;
253 }
254 
255 @trusted
256 unittest
257 {
258     struct S
259     {
260         int N;
261         ullong d;
262         int shpre;
263         int highbit;
264         ullong m;
265         int shpost;
266     }
267 
268     static immutable S[14] table =
269     [
270         { 32, 10,     0, 0, 0xCCCCCCCD, 3 },
271         { 32, 13,     0, 0, 0x4EC4EC4F, 2 },
272         { 32, 14,     1, 0, 0x92492493, 2 },
273         { 32, 15,     0, 0, 0x88888889, 3 },
274         { 32, 17,     0, 0, 0xF0F0F0F1, 4 },
275         { 32, 14_007, 0, 1, 0x2B71840D, 14 },
276 
277         { 64, 7,      0, 1, 0x2492492492492493, 3 },
278         { 64, 10,     0, 0, 0xCCCCCCCCCCCCCCCD, 3 },
279         { 64, 13,     0, 0, 0x4EC4EC4EC4EC4EC5, 2 },
280         { 64, 14,     1, 0, 0x4924924924924925, 1 },
281         { 64, 15,     0, 0, 0x8888888888888889, 3 },
282         { 64, 17,     0, 0, 0xF0F0F0F0F0F0F0F1, 4 },
283         { 64, 100 ,   2, 0, 0x28F5C28F5C28F5C3, 2 },
284         { 64, 14_007, 0, 1, 0x2B71840C5ADF02C3, 14 },
285     ];
286 
287     for (int i = 0; i < table.length; i++)
288     {   const ps = &table[i];
289 
290         ullong m;
291         int shpre;
292         int shpost;
293         bool mhighbit = udiv_coefficients(ps.N, ps.d, &shpre, &m, &shpost);
294 
295         //printf("[%d] %d %d %llx %d\n", i, shpre, mhighbit, m, shpost);
296         assert(shpre == ps.shpre);
297         assert(mhighbit == ps.highbit);
298         assert(m == ps.m);
299         assert(shpost == ps.shpost);
300     }
301 }
302 
303 version (none)
304 {
305     import core.stdc.stdlib;
306 
307     extern (D) int main(string[] args)
308     {
309         if (args.length == 2)
310         {
311             ullong d = atoi(args[1].ptr);
312             ullong m;
313             int shpre;
314             int shpost;
315             bool mhighbit = udiv_coefficients(64, d, &shpre, &m, &shpost);
316 
317             printf("%d %d %llx, %d\n", shpre, mhighbit, m, shpost);
318         }
319         return 0;
320     }
321 }