1 /**
2  * This module contains a collection of bit-level operations.
3  *
4  * Copyright: Copyright Don Clugston 2005 - 2013.
5  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  * Authors:   Don Clugston, Sean Kelly, Walter Bright, Alex Rønne Petersen, Thomas Stuart Bockman
7  * Source:    $(DRUNTIMESRC core/_bitop.d)
8  */
9 
10 module core.bitop;
11 
12 nothrow:
13 @safe:
14 @nogc:
15 
16 version (D_InlineAsm_X86_64)
17     version = AsmX86;
18 else version (D_InlineAsm_X86)
19     version = AsmX86;
20 
21 version (X86_64)
22     version = AnyX86;
23 else version (X86)
24     version = AnyX86;
25 
26 // Use to implement 64-bit bitops on 32-bit arch.
27 private union Split64
28 {
29     ulong u64;
30     struct
31     {
32         version (LittleEndian)
33         {
34             uint lo;
35             uint hi;
36         }
37         else
38         {
39             uint hi;
40             uint lo;
41         }
42     }
43 
44     pragma(inline, true)
45     this(ulong u64) @safe pure nothrow @nogc
46     {
47         if (__ctfe)
48         {
49             lo = cast(uint) u64;
50             hi = cast(uint) (u64 >>> 32);
51         }
52         else
53             this.u64 = u64;
54     }
55 }
56 
57 unittest
58 {
59     const rt = Split64(1);
60     assert((rt.lo == 1) && (rt.hi == 0));
61 
62     enum ct = Split64(1);
63     assert((ct.lo == rt.lo) && (ct.hi == rt.hi));
64 }
65 
66 /**
67  * Scans the bits in v starting with bit 0, looking
68  * for the first set bit.
69  * Returns:
70  *      The bit number of the first bit set.
71  *      The return value is undefined if v is zero.
72  */
73 int bsf(uint v) pure
74 {
75     pragma(inline, false);  // so intrinsic detection will work
76     return softBsf!uint(v);
77 }
78 
79 /// ditto
80 int bsf(ulong v) pure
81 {
82     static if (size_t.sizeof == ulong.sizeof)  // 64 bit code gen
83     {
84         pragma(inline, false);   // so intrinsic detection will work
85         return softBsf!ulong(v);
86     }
87     else
88     {
89         /* intrinsic not available for 32 bit code,
90          * make do with 32 bit bsf
91          */
92         const sv = Split64(v);
93         return (sv.lo == 0)?
94             bsf(sv.hi) + 32 :
95             bsf(sv.lo);
96     }
97 }
98 
99 ///
100 unittest
101 {
102     assert(bsf(0x21) == 0);
103     assert(bsf(ulong.max << 39) == 39);
104 }
105 
106 unittest
107 {
108     // Make sure bsf() is available at CTFE
109     enum test_ctfe = bsf(ulong.max);
110     assert(test_ctfe == 0);
111 }
112 
113 /**
114  * Scans the bits in v from the most significant bit
115  * to the least significant bit, looking
116  * for the first set bit.
117  * Returns:
118  *      The bit number of the first bit set.
119  *      The return value is undefined if v is zero.
120  */
121 int bsr(uint v) pure
122 {
123     pragma(inline, false);  // so intrinsic detection will work
124     return softBsr!uint(v);
125 }
126 
127 /// ditto
128 int bsr(ulong v) pure
129 {
130     static if (size_t.sizeof == ulong.sizeof)  // 64 bit code gen
131     {
132         pragma(inline, false);   // so intrinsic detection will work
133         return softBsr!ulong(v);
134     }
135     else
136     {
137         /* intrinsic not available for 32 bit code,
138          * make do with 32 bit bsr
139          */
140         const sv = Split64(v);
141         return (sv.hi == 0)?
142             bsr(sv.lo) :
143             bsr(sv.hi) + 32;
144     }
145 }
146 
147 ///
148 unittest
149 {
150     assert(bsr(0x21) == 5);
151     assert(bsr((ulong.max >> 15) - 1) == 48);
152 }
153 
154 unittest
155 {
156     // Make sure bsr() is available at CTFE
157     enum test_ctfe = bsr(ulong.max);
158     assert(test_ctfe == 63);
159 }
160 
161 private alias softBsf(N) = softScan!(N, true);
162 private alias softBsr(N) = softScan!(N, false);
163 
164 /* Shared software fallback implementation for bit scan foward and reverse.
165 
166 If forward is true, bsf is computed (the index of the first set bit).
167 If forward is false, bsr is computed (the index of the last set bit).
168 
169 -1 is returned if no bits are set (v == 0).
170 */
171 private int softScan(N, bool forward)(N v) pure
172     if (is(N == uint) || is(N == ulong))
173 {
174     // bsf() and bsr() are officially undefined for v == 0.
175     if (!v)
176         return -1;
177 
178     // This is essentially an unrolled binary search:
179     enum mask(ulong lo) = forward ? cast(N) lo : cast(N)~lo;
180     enum inc(int up) = forward ? up : -up;
181 
182     N x;
183     int ret;
184     static if (is(N == ulong))
185     {
186         x = v & mask!0x0000_0000_FFFF_FFFFL;
187         if (x)
188         {
189             v = x;
190             ret = forward ? 0 : 63;
191         }
192         else
193             ret = forward ? 32 : 31;
194 
195         x = v & mask!0x0000_FFFF_0000_FFFFL;
196         if (x)
197             v = x;
198         else
199             ret += inc!16;
200     }
201     else static if (is(N == uint))
202     {
203         x = v & mask!0x0000_FFFF;
204         if (x)
205         {
206             v = x;
207             ret = forward ? 0 : 31;
208         }
209         else
210             ret = forward ? 16 : 15;
211     }
212     else
213         static assert(false);
214 
215     x = v & mask!0x00FF_00FF_00FF_00FFL;
216     if (x)
217         v = x;
218     else
219         ret += inc!8;
220 
221     x = v & mask!0x0F0F_0F0F_0F0F_0F0FL;
222     if (x)
223         v = x;
224     else
225         ret += inc!4;
226 
227     x = v & mask!0x3333_3333_3333_3333L;
228     if (x)
229         v = x;
230     else
231         ret += inc!2;
232 
233     x = v & mask!0x5555_5555_5555_5555L;
234     if (!x)
235         ret += inc!1;
236 
237     return ret;
238 }
239 
240 unittest
241 {
242     assert(softBsf!uint(0u) == -1);
243     assert(softBsr!uint(0u) == -1);
244     assert(softBsf!ulong(0uL) == -1);
245     assert(softBsr!ulong(0uL) == -1);
246 
247     assert(softBsf!uint(0x0031_A000) == 13);
248     assert(softBsr!uint(0x0031_A000) == 21);
249     assert(softBsf!ulong(0x0000_0001_8000_0000L) == 31);
250     assert(softBsr!ulong(0x0000_0001_8000_0000L) == 32);
251 
252     foreach (b; 0 .. 64)
253     {
254         if (b < 32)
255         {
256             assert(softBsf!uint(1u << b) == b);
257             assert(softBsr!uint(1u << b) == b);
258         }
259 
260         assert(softBsf!ulong(1uL << b) == b);
261         assert(softBsr!ulong(1uL << b) == b);
262     }
263 }
264 
265 /**
266  * Tests the bit.
267  * (No longer an intrisic - the compiler recognizes the patterns
268  * in the body.)
269  */
270 int bt(const scope size_t* p, size_t bitnum) pure @system
271 {
272     static if (size_t.sizeof == 8)
273         return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0;
274     else static if (size_t.sizeof == 4)
275         return ((p[bitnum >> 5] & (1  << (bitnum & 31)))) != 0;
276     else
277         static assert(0);
278 }
279 ///
280 @system pure unittest
281 {
282     size_t[2] array;
283 
284     array[0] = 2;
285     array[1] = 0x100;
286 
287     assert(bt(array.ptr, 1));
288     assert(array[0] == 2);
289     assert(array[1] == 0x100);
290 }
291 
292 /**
293  * Tests and complements the bit.
294  */
295 int btc(size_t* p, size_t bitnum) pure @system;
296 
297 
298 /**
299  * Tests and resets (sets to 0) the bit.
300  */
301 int btr(size_t* p, size_t bitnum) pure @system;
302 
303 
304 /**
305  * Tests and sets the bit.
306  * Params:
307  * p = a non-NULL pointer to an array of size_ts.
308  * bitnum = a bit number, starting with bit 0 of p[0],
309  * and progressing. It addresses bits like the expression:
310 ---
311 p[index / (size_t.sizeof*8)] & (1 << (index & ((size_t.sizeof*8) - 1)))
312 ---
313  * Returns:
314  *      A non-zero value if the bit was set, and a zero
315  *      if it was clear.
316  */
317 int bts(size_t* p, size_t bitnum) pure @system;
318 
319 ///
320 @system pure unittest
321 {
322     size_t[2] array;
323 
324     array[0] = 2;
325     array[1] = 0x100;
326 
327     assert(btc(array.ptr, 35) == 0);
328     if (size_t.sizeof == 8)
329     {
330         assert(array[0] == 0x8_0000_0002);
331         assert(array[1] == 0x100);
332     }
333     else
334     {
335         assert(array[0] == 2);
336         assert(array[1] == 0x108);
337     }
338 
339     assert(btc(array.ptr, 35));
340     assert(array[0] == 2);
341     assert(array[1] == 0x100);
342 
343     assert(bts(array.ptr, 35) == 0);
344     if (size_t.sizeof == 8)
345     {
346         assert(array[0] == 0x8_0000_0002);
347         assert(array[1] == 0x100);
348     }
349     else
350     {
351         assert(array[0] == 2);
352         assert(array[1] == 0x108);
353     }
354 
355     assert(btr(array.ptr, 35));
356     assert(array[0] == 2);
357     assert(array[1] == 0x100);
358 }
359 
360 /**
361  * Range over bit set. Each element is the bit number that is set.
362  *
363  * This is more efficient than testing each bit in a sparsely populated bit
364  * set. Note that the first bit in the bit set would be bit 0.
365  */
366 struct BitRange
367 {
368     /// Number of bits in each size_t
369     enum bitsPerWord = size_t.sizeof * 8;
370 
371     private
372     {
373         const(size_t)*bits; // points at next word of bits to check
374         size_t cur; // needed to allow searching bits using bsf
375         size_t idx; // index of current set bit
376         size_t len; // number of bits in the bit set.
377     }
378     @nogc nothrow pure:
379 
380     /**
381      * Construct a BitRange.
382      *
383      * Params:
384      *   bitarr = The array of bits to iterate over
385      *   numBits = The total number of valid bits in the given bit array
386      */
387     this(const(size_t)* bitarr, size_t numBits) @system
388     {
389         bits = bitarr;
390         len = numBits;
391         if (len)
392         {
393             // prime the first bit
394             cur = *bits++ ^ 1;
395             popFront();
396         }
397     }
398 
399     /// Range functions
400     size_t front()
401     {
402         assert(!empty);
403         return idx;
404     }
405 
406     /// ditto
407     bool empty() const
408     {
409         return idx >= len;
410     }
411 
412     /// ditto
413     void popFront() @system
414     {
415         // clear the current bit
416         auto curbit = idx % bitsPerWord;
417         cur ^= size_t(1) << curbit;
418         if (!cur)
419         {
420             // find next size_t with set bit
421             idx -= curbit;
422             while (!cur)
423             {
424                 if ((idx += bitsPerWord) >= len)
425                     // now empty
426                     return;
427                 cur = *bits++;
428             }
429             idx += bsf(cur);
430         }
431         else
432         {
433             idx += bsf(cur) - curbit;
434         }
435     }
436 }
437 
438 ///
439 @system unittest
440 {
441     import core.stdc.stdlib : malloc, free;
442     import core.stdc.string : memset;
443 
444     // initialize a bit array
445     enum nBytes = (100 + BitRange.bitsPerWord - 1) / 8;
446     size_t *bitArr = cast(size_t *)malloc(nBytes);
447     scope(exit) free(bitArr);
448     memset(bitArr, 0, nBytes);
449 
450     // set some bits
451     bts(bitArr, 48);
452     bts(bitArr, 24);
453     bts(bitArr, 95);
454     bts(bitArr, 78);
455 
456     enum sum = 48 + 24 + 95 + 78;
457 
458     // iterate
459     size_t testSum;
460     size_t nBits;
461     foreach (b; BitRange(bitArr, 100))
462     {
463         testSum += b;
464         ++nBits;
465     }
466 
467     assert(testSum == sum);
468     assert(nBits == 4);
469 }
470 
471 @system unittest
472 {
473     void testIt(size_t numBits, size_t[] bitsToTest...)
474     {
475         import core.stdc.stdlib : malloc, free;
476         import core.stdc.string : memset;
477         immutable numBytes = (numBits + size_t.sizeof * 8 - 1) / 8;
478         size_t* bitArr = cast(size_t *)malloc(numBytes);
479         scope(exit) free(bitArr);
480         memset(bitArr, 0, numBytes);
481         foreach (b; bitsToTest)
482             bts(bitArr, b);
483         auto br = BitRange(bitArr, numBits);
484         foreach (b; bitsToTest)
485         {
486             assert(!br.empty);
487             assert(b == br.front);
488             br.popFront();
489         }
490         assert(br.empty);
491     }
492 
493     testIt(100, 0, 1, 31, 63, 85);
494     testIt(100, 6, 45, 89, 92, 99);
495 }
496 
497 /**
498  * Swaps bytes in a 2 byte ushort.
499  * Params:
500  *      x = value
501  * Returns:
502  *      `x` with bytes swapped
503  */
504 pragma(inline, false)
505 ushort byteswap(ushort x) pure
506 {
507     /* Calling it bswap(ushort) would break existing code that calls bswap(uint).
508      *
509      * This pattern is meant to be recognized by the dmd code generator.
510      * Don't change it without checking that an XCH instruction is still
511      * used to implement it.
512      * Inlining may also throw it off.
513      */
514     return cast(ushort) (((x >> 8) & 0xFF) | ((x << 8) & 0xFF00u));
515 }
516 
517 ///
518 unittest
519 {
520     assert(byteswap(cast(ushort)0xF234) == 0x34F2);
521     static ushort xx = 0xF234;
522     assert(byteswap(xx) == 0x34F2);
523 }
524 
525 /**
526  * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes
527  * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3
528  * becomes byte 0.
529  */
530 uint bswap(uint v) pure;
531 
532 ///
533 unittest
534 {
535     assert(bswap(0x01020304u) == 0x04030201u);
536     static uint xx = 0x10203040u;
537     assert(bswap(xx) == 0x40302010u);
538 }
539 
540 /**
541  * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes
542  * byte 7, byte 1 becomes byte 6, etc.
543  * This is meant to be recognized by the compiler as an intrinsic.
544  */
545 ulong bswap(ulong v) pure;
546 
547 ///
548 unittest
549 {
550     assert(bswap(0x01020304_05060708uL) == 0x08070605_04030201uL);
551     static ulong xx = 0x10203040_50607080uL;
552     assert(bswap(xx) == 0x80706050_40302010uL);
553 }
554 
555 version (DigitalMars) version (AnyX86) @system // not pure
556 {
557     /**
558      * Reads I/O port at port_address.
559      */
560     ubyte inp(uint port_address);
561 
562 
563     /**
564      * ditto
565      */
566     ushort inpw(uint port_address);
567 
568 
569     /**
570      * ditto
571      */
572     uint inpl(uint port_address);
573 
574 
575     /**
576      * Writes and returns value to I/O port at port_address.
577      */
578     ubyte outp(uint port_address, ubyte value);
579 
580 
581     /**
582      * ditto
583      */
584     ushort outpw(uint port_address, ushort value);
585 
586 
587     /**
588      * ditto
589      */
590     uint outpl(uint port_address, uint value);
591 }
592 
593 
594 /**
595  *  Calculates the number of set bits in an integer.
596  */
597 int popcnt(uint x) pure
598 {
599     // Select the fastest method depending on the compiler and CPU architecture
600     version (DigitalMars)
601     {
602         static if (is(typeof(_popcnt(uint.max))))
603         {
604             import core.cpuid;
605             if (!__ctfe && hasPopcnt)
606                 return _popcnt(x);
607         }
608     }
609 
610     return softPopcnt!uint(x);
611 }
612 
613 unittest
614 {
615     assert( popcnt( 0 ) == 0 );
616     assert( popcnt( 7 ) == 3 );
617     assert( popcnt( 0xAA )== 4 );
618     assert( popcnt( 0x8421_1248 ) == 8 );
619     assert( popcnt( 0xFFFF_FFFF ) == 32 );
620     assert( popcnt( 0xCCCC_CCCC ) == 16 );
621     assert( popcnt( 0x7777_7777 ) == 24 );
622 
623     // Make sure popcnt() is available at CTFE
624     enum test_ctfe = popcnt(uint.max);
625     assert(test_ctfe == 32);
626 }
627 
628 /// ditto
629 int popcnt(ulong x) pure
630 {
631     // Select the fastest method depending on the compiler and CPU architecture
632     import core.cpuid;
633 
634     static if (size_t.sizeof == uint.sizeof)
635     {
636         const sx = Split64(x);
637         version (DigitalMars)
638         {
639             static if (is(typeof(_popcnt(uint.max))))
640             {
641                 if (!__ctfe && hasPopcnt)
642                     return _popcnt(sx.lo) + _popcnt(sx.hi);
643             }
644         }
645 
646         return softPopcnt!uint(sx.lo) + softPopcnt!uint(sx.hi);
647     }
648     else static if (size_t.sizeof == ulong.sizeof)
649     {
650         version (DigitalMars)
651         {
652             static if (is(typeof(_popcnt(ulong.max))))
653             {
654                 if (!__ctfe && hasPopcnt)
655                     return _popcnt(x);
656             }
657         }
658 
659         return softPopcnt!ulong(x);
660     }
661     else
662         static assert(false);
663 }
664 
665 unittest
666 {
667     assert(popcnt(0uL) == 0);
668     assert(popcnt(1uL) == 1);
669     assert(popcnt((1uL << 32) - 1) == 32);
670     assert(popcnt(0x48_65_6C_6C_6F_3F_21_00uL) == 28);
671     assert(popcnt(ulong.max) == 64);
672 
673     // Make sure popcnt() is available at CTFE
674     enum test_ctfe = popcnt(ulong.max);
675     assert(test_ctfe == 64);
676 }
677 
678 private int softPopcnt(N)(N x) pure
679     if (is(N == uint) || is(N == ulong))
680 {
681     // Avoid branches, and the potential for cache misses which
682     // could be incurred with a table lookup.
683 
684     // We need to mask alternate bits to prevent the
685     // sum from overflowing.
686     // add neighbouring bits. Each bit is 0 or 1.
687     enum mask1 = cast(N) 0x5555_5555_5555_5555L;
688     x = x - ((x>>1) & mask1);
689     // now each two bits of x is a number 00,01 or 10.
690     // now add neighbouring pairs
691     enum mask2a = cast(N) 0xCCCC_CCCC_CCCC_CCCCL;
692     enum mask2b = cast(N) 0x3333_3333_3333_3333L;
693     x = ((x & mask2a)>>2) + (x & mask2b);
694     // now each nibble holds 0000-0100. Adding them won't
695     // overflow any more, so we don't need to mask any more
696 
697     enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
698     x = (x + (x >> 4)) & mask4;
699 
700     enum shiftbits = is(N == uint)? 24 : 56;
701     enum maskMul = cast(N) 0x0101_0101_0101_0101L;
702     x = (x * maskMul) >> shiftbits;
703 
704     return cast(int) x;
705 }
706 
707 version (DigitalMars) version (AnyX86)
708 {
709     /**
710      * Calculates the number of set bits in an integer
711      * using the X86 SSE4 POPCNT instruction.
712      * POPCNT is not available on all X86 CPUs.
713      */
714     ushort _popcnt( ushort x ) pure;
715     /// ditto
716     int _popcnt( uint x ) pure;
717     version (X86_64)
718     {
719         /// ditto
720         int _popcnt( ulong x ) pure;
721     }
722 
723     unittest
724     {
725         // Not everyone has SSE4 instructions
726         import core.cpuid;
727         if (!hasPopcnt)
728             return;
729 
730         static int popcnt_x(ulong u) nothrow @nogc
731         {
732             int c;
733             while (u)
734             {
735                 c += u & 1;
736                 u >>= 1;
737             }
738             return c;
739         }
740 
741         for (uint u = 0; u < 0x1_0000; ++u)
742         {
743             //writefln("%x %x %x", u,   _popcnt(cast(ushort)u), popcnt_x(cast(ushort)u));
744             assert(_popcnt(cast(ushort)u) == popcnt_x(cast(ushort)u));
745 
746             assert(_popcnt(cast(uint)u) == popcnt_x(cast(uint)u));
747             uint ui = u * 0x3_0001;
748             assert(_popcnt(ui) == popcnt_x(ui));
749 
750             version (X86_64)
751             {
752                 assert(_popcnt(cast(ulong)u) == popcnt_x(cast(ulong)u));
753                 ulong ul = u * 0x3_0003_0001;
754                 assert(_popcnt(ul) == popcnt_x(ul));
755             }
756         }
757     }
758 }
759 
760 
761 /**
762  * Reverses the order of bits in a 32-bit integer.
763  */
764 pragma(inline, true)
765 uint bitswap( uint x ) pure
766 {
767     if (!__ctfe)
768     {
769         static if (is(typeof(asmBitswap32(x))))
770             return asmBitswap32(x);
771     }
772 
773     return softBitswap!uint(x);
774 }
775 
776 unittest
777 {
778     static void test(alias impl)()
779     {
780         assert (impl( 0x8000_0100 ) == 0x0080_0001);
781         foreach (i; 0 .. 32)
782             assert (impl(1 << i) == 1 << 32 - i - 1);
783     }
784 
785     test!(bitswap)();
786     test!(softBitswap!uint)();
787     static if (is(typeof(asmBitswap32(0u))))
788         test!(asmBitswap32)();
789 
790     // Make sure bitswap() is available at CTFE
791     enum test_ctfe = bitswap(1U);
792     assert(test_ctfe == (1U << 31));
793 }
794 
795 /**
796  * Reverses the order of bits in a 64-bit integer.
797  */
798 pragma(inline, true)
799 ulong bitswap ( ulong x ) pure
800 {
801     if (!__ctfe)
802     {
803         static if (is(typeof(asmBitswap64(x))))
804             return asmBitswap64(x);
805     }
806 
807     return softBitswap!ulong(x);
808 }
809 
810 unittest
811 {
812     static void test(alias impl)()
813     {
814         assert (impl( 0b1000000000000000000000010000000000000000100000000000000000000001)
815             == 0b1000000000000000000000010000000000000000100000000000000000000001);
816         assert (impl( 0b1110000000000000000000010000000000000000100000000000000000000001)
817             == 0b1000000000000000000000010000000000000000100000000000000000000111);
818         foreach (i; 0 .. 64)
819             assert (impl(1UL << i) == 1UL << 64 - i - 1);
820     }
821 
822     test!(bitswap)();
823     test!(softBitswap!ulong)();
824     static if (is(typeof(asmBitswap64(0uL))))
825         test!(asmBitswap64)();
826 
827     // Make sure bitswap() is available at CTFE
828     enum test_ctfe = bitswap(1UL);
829     assert(test_ctfe == (1UL << 63));
830 }
831 
832 private N softBitswap(N)(N x) pure
833     if (is(N == uint) || is(N == ulong))
834 {
835     // swap 1-bit pairs:
836     enum mask1 = cast(N) 0x5555_5555_5555_5555L;
837     x = ((x >> 1) & mask1) | ((x & mask1) << 1);
838     // swap 2-bit pairs:
839     enum mask2 = cast(N) 0x3333_3333_3333_3333L;
840     x = ((x >> 2) & mask2) | ((x & mask2) << 2);
841     // swap 4-bit pairs:
842     enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
843     x = ((x >> 4) & mask4) | ((x & mask4) << 4);
844 
845     // reverse the order of all bytes:
846     x = bswap(x);
847 
848     return x;
849 }
850 
851 version (AsmX86)
852 {
853     private uint asmBitswap32(uint x) @trusted pure
854     {
855         asm pure nothrow @nogc { naked; }
856 
857         version (D_InlineAsm_X86_64)
858         {
859             version (Win64)
860                 asm pure nothrow @nogc { mov EAX, ECX; }
861             else
862                 asm pure nothrow @nogc { mov EAX, EDI; }
863         }
864 
865         asm pure nothrow @nogc
866         {
867             // Author: Tiago Gasiba.
868             mov EDX, EAX;
869             shr EAX, 1;
870             and EDX, 0x5555_5555;
871             and EAX, 0x5555_5555;
872             shl EDX, 1;
873             or  EAX, EDX;
874             mov EDX, EAX;
875             shr EAX, 2;
876             and EDX, 0x3333_3333;
877             and EAX, 0x3333_3333;
878             shl EDX, 2;
879             or  EAX, EDX;
880             mov EDX, EAX;
881             shr EAX, 4;
882             and EDX, 0x0f0f_0f0f;
883             and EAX, 0x0f0f_0f0f;
884             shl EDX, 4;
885             or  EAX, EDX;
886             bswap EAX;
887             ret;
888         }
889     }
890 }
891 
892 version (D_InlineAsm_X86_64)
893 {
894     private ulong asmBitswap64(ulong x) @trusted pure
895     {
896         asm pure nothrow @nogc { naked; }
897 
898         version (Win64)
899             asm pure nothrow @nogc { mov RAX, RCX; }
900         else
901             asm pure nothrow @nogc { mov RAX, RDI; }
902 
903         asm pure nothrow @nogc
904         {
905             // Author: Tiago Gasiba.
906             mov RDX, RAX;
907             shr RAX, 1;
908             mov RCX, 0x5555_5555_5555_5555L;
909             and RDX, RCX;
910             and RAX, RCX;
911             shl RDX, 1;
912             or  RAX, RDX;
913 
914             mov RDX, RAX;
915             shr RAX, 2;
916             mov RCX, 0x3333_3333_3333_3333L;
917             and RDX, RCX;
918             and RAX, RCX;
919             shl RDX, 2;
920             or  RAX, RDX;
921 
922             mov RDX, RAX;
923             shr RAX, 4;
924             mov RCX, 0x0f0f_0f0f_0f0f_0f0fL;
925             and RDX, RCX;
926             and RAX, RCX;
927             shl RDX, 4;
928             or  RAX, RDX;
929             bswap RAX;
930             ret;
931         }
932     }
933 }
934 
935 /**
936  *  Bitwise rotate `value` left (`rol`) or right (`ror`) by
937  *  `count` bit positions.
938  */
939 pure T rol(T)(const T value, const uint count)
940     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
941 {
942     assert(count < 8 * T.sizeof);
943     if (count == 0)
944         return cast(T) value;
945 
946     return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count)));
947 }
948 /// ditto
949 pure T ror(T)(const T value, const uint count)
950     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
951 {
952     assert(count < 8 * T.sizeof);
953     if (count == 0)
954         return cast(T) value;
955 
956     return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count)));
957 }
958 /// ditto
959 pure T rol(uint count, T)(const T value)
960     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
961 {
962     static assert(count < 8 * T.sizeof);
963     static if (count == 0)
964         return cast(T) value;
965 
966     return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count)));
967 }
968 /// ditto
969 pure T ror(uint count, T)(const T value)
970     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
971 {
972     static assert(count < 8 * T.sizeof);
973     static if (count == 0)
974         return cast(T) value;
975 
976     return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count)));
977 }
978 
979 ///
980 unittest
981 {
982     ubyte a = 0b11110000U;
983     ulong b = ~1UL;
984 
985     assert(rol(a, 1) == 0b11100001);
986     assert(ror(a, 1) == 0b01111000);
987     assert(rol(a, 3) == 0b10000111);
988     assert(ror(a, 3) == 0b00011110);
989 
990     assert(rol(a, 0) == a);
991     assert(ror(a, 0) == a);
992 
993     assert(rol(b, 63) == ~(1UL << 63));
994     assert(ror(b, 63) == ~2UL);
995 
996     assert(rol!3(a) == 0b10000111);
997     assert(ror!3(a) == 0b00011110);
998 
999     enum c = rol(uint(1), 0);
1000     enum d = ror(uint(1), 0);
1001     assert(c == uint(1));
1002     assert(d == uint(1));
1003 }