1 /**
2  * This code handles decoding UTF strings for foreach loops.
3  *
4  * Copyright: Copyright Digital Mars 2004 - 2010.
5  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
6  * Authors:   Walter Bright
7  * Source: $(DRUNTIMESRC rt/_aApply.d)
8  */
9 module rt.aApply;
10 
11 import core.internal.utf : decode, toUTF8;
12 
13 /**********************************************/
14 /* 1 argument versions */
15 
16 /**
17 Delegate type corresponding to transformed loop body
18 
19 The parameter is a pointer to the current `char`, `wchar` or `dchar`
20 
21 Returns: non-zero when a `break` statement is hit
22 */
23 extern (D) alias dg_t = int delegate(void* c);
24 
25 // Note: dg is extern(D), but _aApplycd() is extern(C)
26 
27 /**
28 Loop over a string while changing the UTF encoding
29 
30 There are 6 combinations of conversions between `char`, `wchar`, and `dchar`,
31 and 2 of each of those.
32 
33 The naming convention is as follows:
34 
35 _aApply{c,d,w}{c,d,w}{1,2}
36 
37 The first letter corresponds to the input string encoding, and the second letter corresponds to the target character type.
38 
39 - c = `char`
40 - w = `wchar`
41 - d = `dchar`
42 
43 The `1` variant only produces the character, the `2` variant also produces a loop index.
44 
45 Examples:
46 ---
47 void main()
48 {
49     string str;
50     wtring wstr;
51     dstring dstr;
52 
53     foreach (dchar c; str) {}
54     // _aApplycd1
55 
56     foreach (wchar c; dstr) {}
57     // _aApplydw1
58 
59     foreach (i, wchar c; str) {}
60     // _aApplycw2
61 
62     foreach (wchar w; wstr) {}
63     // no conversion
64 }
65 ---
66 
67 Params:
68     aa = input string
69     dg = foreach body transformed into a delegate, similar to `opApply`
70 
71 Returns:
72     non-zero when the loop was exited through a `break`
73 */
74 extern (C) int _aApplycd1(scope const(char)[] aa, dg_t dg)
75 {
76     int result;
77     size_t len = aa.length;
78 
79     debug(apply) printf("_aApplycd1(), len = %d\n", len);
80     for (size_t i = 0; i < len; )
81     {
82         dchar d = aa[i];
83         if (d & 0x80)
84             d = decode(aa, i);
85         else
86             ++i;
87         result = dg(cast(void *)&d);
88         if (result)
89             break;
90     }
91     return result;
92 }
93 
94 unittest
95 {
96     debug(apply) printf("_aApplycd1.unittest\n");
97 
98     auto s = "hello"c[];
99     int i;
100 
101     foreach (dchar d; s)
102     {
103         switch (i)
104         {
105             case 0:     assert(d == 'h'); break;
106             case 1:     assert(d == 'e'); break;
107             case 2:     assert(d == 'l'); break;
108             case 3:     assert(d == 'l'); break;
109             case 4:     assert(d == 'o'); break;
110             default:    assert(0);
111         }
112         i++;
113     }
114     assert(i == 5);
115 
116     s = "a\u1234\U000A0456b";
117     i = 0;
118     foreach (dchar d; s)
119     {
120         //printf("i = %d, d = %x\n", i, d);
121         switch (i)
122         {
123             case 0:     assert(d == 'a'); break;
124             case 1:     assert(d == '\u1234'); break;
125             case 2:     assert(d == '\U000A0456'); break;
126             case 3:     assert(d == 'b'); break;
127             default:    assert(0);
128         }
129         i++;
130     }
131     assert(i == 4);
132 }
133 
134 /// ditto
135 extern (C) int _aApplywd1(scope const(wchar)[] aa, dg_t dg)
136 {
137     int result;
138     size_t len = aa.length;
139 
140     debug(apply) printf("_aApplywd1(), len = %d\n", len);
141     for (size_t i = 0; i < len; )
142     {
143         dchar d = aa[i];
144         if (d >= 0xD800)
145             d = decode(aa, i);
146         else
147             ++i;
148         result = dg(cast(void *)&d);
149         if (result)
150             break;
151     }
152     return result;
153 }
154 
155 unittest
156 {
157     debug(apply) printf("_aApplywd1.unittest\n");
158 
159     auto s = "hello"w[];
160     int i;
161 
162     foreach (dchar d; s)
163     {
164         switch (i)
165         {
166             case 0:     assert(d == 'h'); break;
167             case 1:     assert(d == 'e'); break;
168             case 2:     assert(d == 'l'); break;
169             case 3:     assert(d == 'l'); break;
170             case 4:     assert(d == 'o'); break;
171             default:    assert(0);
172         }
173         i++;
174     }
175     assert(i == 5);
176 
177     s = "a\u1234\U000A0456b";
178     i = 0;
179     foreach (dchar d; s)
180     {
181         //printf("i = %d, d = %x\n", i, d);
182         switch (i)
183         {
184             case 0:     assert(d == 'a'); break;
185             case 1:     assert(d == '\u1234'); break;
186             case 2:     assert(d == '\U000A0456'); break;
187             case 3:     assert(d == 'b'); break;
188             default:    assert(0);
189         }
190         i++;
191     }
192     assert(i == 4);
193 }
194 
195 /// ditto
196 extern (C) int _aApplycw1(scope const(char)[] aa, dg_t dg)
197 {
198     int result;
199     size_t len = aa.length;
200 
201     debug(apply) printf("_aApplycw1(), len = %d\n", len);
202     for (size_t i = 0; i < len; )
203     {
204         wchar w = aa[i];
205         if (w & 0x80)
206         {
207             dchar d = decode(aa, i);
208             if (d <= 0xFFFF)
209                 w = cast(wchar) d;
210             else
211             {
212                 w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
213                 result = dg(cast(void *)&w);
214                 if (result)
215                     break;
216                 w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
217             }
218         }
219         else
220             ++i;
221         result = dg(cast(void *)&w);
222         if (result)
223             break;
224     }
225     return result;
226 }
227 
228 unittest
229 {
230     debug(apply) printf("_aApplycw1.unittest\n");
231 
232     auto s = "hello"c[];
233     int i;
234 
235     foreach (wchar d; s)
236     {
237         switch (i)
238         {
239             case 0:     assert(d == 'h'); break;
240             case 1:     assert(d == 'e'); break;
241             case 2:     assert(d == 'l'); break;
242             case 3:     assert(d == 'l'); break;
243             case 4:     assert(d == 'o'); break;
244             default:    assert(0);
245         }
246         i++;
247     }
248     assert(i == 5);
249 
250     s = "a\u1234\U000A0456b";
251     i = 0;
252     foreach (wchar d; s)
253     {
254         //printf("i = %d, d = %x\n", i, d);
255         switch (i)
256         {
257             case 0:     assert(d == 'a'); break;
258             case 1:     assert(d == 0x1234); break;
259             case 2:     assert(d == 0xDA41); break;
260             case 3:     assert(d == 0xDC56); break;
261             case 4:     assert(d == 'b'); break;
262             default:    assert(0);
263         }
264         i++;
265     }
266     assert(i == 5);
267 }
268 
269 /// ditto
270 extern (C) int _aApplywc1(scope const(wchar)[] aa, dg_t dg)
271 {
272     int result;
273     size_t len = aa.length;
274 
275     debug(apply) printf("_aApplywc1(), len = %d\n", len);
276     for (size_t i = 0; i < len; )
277     {
278         wchar w = aa[i];
279         if (w & ~0x7F)
280         {
281             char[4] buf = void;
282 
283             dchar d = decode(aa, i);
284             auto b = toUTF8(buf, d);
285             foreach (char c2; b)
286             {
287                 result = dg(cast(void *)&c2);
288                 if (result)
289                     return result;
290             }
291         }
292         else
293         {
294             char c = cast(char)w;
295             ++i;
296             result = dg(cast(void *)&c);
297             if (result)
298                 break;
299         }
300     }
301     return result;
302 }
303 
304 unittest
305 {
306     debug(apply) printf("_aApplywc1.unittest\n");
307 
308     auto s = "hello"w[];
309     int i;
310 
311     foreach (char d; s)
312     {
313         switch (i)
314         {
315             case 0:     assert(d == 'h'); break;
316             case 1:     assert(d == 'e'); break;
317             case 2:     assert(d == 'l'); break;
318             case 3:     assert(d == 'l'); break;
319             case 4:     assert(d == 'o'); break;
320             default:    assert(0);
321         }
322         i++;
323     }
324     assert(i == 5);
325 
326     s = "a\u1234\U000A0456b";
327     i = 0;
328     foreach (char d; s)
329     {
330         //printf("i = %d, d = %x\n", i, d);
331         switch (i)
332         {
333             case 0:     assert(d == 'a'); break;
334             case 1:     assert(d == 0xE1); break;
335             case 2:     assert(d == 0x88); break;
336             case 3:     assert(d == 0xB4); break;
337             case 4:     assert(d == 0xF2); break;
338             case 5:     assert(d == 0xA0); break;
339             case 6:     assert(d == 0x91); break;
340             case 7:     assert(d == 0x96); break;
341             case 8:     assert(d == 'b'); break;
342             default:    assert(0);
343         }
344         i++;
345     }
346     assert(i == 9);
347 }
348 
349 /// ditto
350 extern (C) int _aApplydc1(scope const(dchar)[] aa, dg_t dg)
351 {
352     int result;
353 
354     debug(apply) printf("_aApplydc1(), len = %d\n", aa.length);
355     foreach (dchar d; aa)
356     {
357         if (d & ~0x7F)
358         {
359             char[4] buf = void;
360 
361             auto b = toUTF8(buf, d);
362             foreach (char c2; b)
363             {
364                 result = dg(cast(void *)&c2);
365                 if (result)
366                     return result;
367             }
368         }
369         else
370         {
371             char c = cast(char)d;
372             result = dg(cast(void *)&c);
373             if (result)
374                 break;
375         }
376     }
377     return result;
378 }
379 
380 unittest
381 {
382     debug(apply) printf("_aApplyRdc1.unittest\n");
383 
384     auto s = "hello"d[];
385     int i;
386 
387     foreach (char d; s)
388     {
389         switch (i)
390         {
391             case 0:     assert(d == 'h'); break;
392             case 1:     assert(d == 'e'); break;
393             case 2:     assert(d == 'l'); break;
394             case 3:     assert(d == 'l'); break;
395             case 4:     assert(d == 'o'); break;
396             default:    assert(0);
397         }
398         i++;
399     }
400     assert(i == 5);
401 
402     s = "a\u1234\U000A0456b";
403     i = 0;
404     foreach (char d; s)
405     {
406         //printf("i = %d, d = %x\n", i, d);
407         switch (i)
408         {
409             case 0:     assert(d == 'a'); break;
410             case 1:     assert(d == 0xE1); break;
411             case 2:     assert(d == 0x88); break;
412             case 3:     assert(d == 0xB4); break;
413             case 4:     assert(d == 0xF2); break;
414             case 5:     assert(d == 0xA0); break;
415             case 6:     assert(d == 0x91); break;
416             case 7:     assert(d == 0x96); break;
417             case 8:     assert(d == 'b'); break;
418             default:    assert(0);
419         }
420         i++;
421     }
422     assert(i == 9);
423 }
424 
425 /// ditto
426 extern (C) int _aApplydw1(scope const(dchar)[] aa, dg_t dg)
427 {
428     int result;
429 
430     debug(apply) printf("_aApplydw1(), len = %d\n", aa.length);
431     foreach (dchar d; aa)
432     {
433         wchar w;
434 
435         if (d <= 0xFFFF)
436             w = cast(wchar) d;
437         else
438         {
439             w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
440             result = dg(cast(void *)&w);
441             if (result)
442                 break;
443             w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
444         }
445         result = dg(cast(void *)&w);
446         if (result)
447             break;
448     }
449     return result;
450 }
451 
452 unittest
453 {
454     debug(apply) printf("_aApplydw1.unittest\n");
455 
456     auto s = "hello"d[];
457     int i;
458 
459     foreach (wchar d; s)
460     {
461         switch (i)
462         {
463             case 0:     assert(d == 'h'); break;
464             case 1:     assert(d == 'e'); break;
465             case 2:     assert(d == 'l'); break;
466             case 3:     assert(d == 'l'); break;
467             case 4:     assert(d == 'o'); break;
468             default:    assert(0);
469         }
470         i++;
471     }
472     assert(i == 5);
473 
474     s = "a\u1234\U000A0456b";
475     i = 0;
476     foreach (wchar d; s)
477     {
478         //printf("i = %d, d = %x\n", i, d);
479         switch (i)
480         {
481             case 0:     assert(d == 'a'); break;
482             case 1:     assert(d == 0x1234); break;
483             case 2:     assert(d == 0xDA41); break;
484             case 3:     assert(d == 0xDC56); break;
485             case 4:     assert(d == 'b'); break;
486             default:    assert(0);
487         }
488         i++;
489     }
490     assert(i == 5);
491 }
492 
493 
494 /****************************************************************************/
495 /* 2 argument versions */
496 
497 /**
498 Delegate type corresponding to transformed loop body
499 
500 Parameters are pointers to a `size_t` loop index, and the current `char`, `wchar` or `dchar`.
501 
502 Returns: non-zero when a `break` statement is hit
503 */
504 extern (D) alias dg2_t = int delegate(void* i, void* c);
505 
506 // Note: dg is extern(D), but _aApplycd2() is extern(C)
507 
508 /**
509 Variants of _aApplyXXX that include a loop index.
510 */
511 extern (C) int _aApplycd2(scope const(char)[] aa, dg2_t dg)
512 {
513     int result;
514     size_t len = aa.length;
515 
516     debug(apply) printf("_aApplycd2(), len = %d\n", len);
517     size_t n;
518     for (size_t i = 0; i < len; i += n)
519     {
520         dchar d = aa[i];
521         if (d & 0x80)
522         {
523             n = i;
524             d = decode(aa, n);
525             n -= i;
526         }
527         else
528             n = 1;
529         result = dg(&i, cast(void *)&d);
530         if (result)
531             break;
532     }
533     return result;
534 }
535 
536 unittest
537 {
538     debug(apply) printf("_aApplycd2.unittest\n");
539 
540     auto s = "hello"c[];
541     int i;
542 
543     foreach (k, dchar d; s)
544     {
545         //printf("i = %d, k = %d, d = %x\n", i, k, d);
546         assert(k == i);
547         switch (i)
548         {
549             case 0:     assert(d == 'h'); break;
550             case 1:     assert(d == 'e'); break;
551             case 2:     assert(d == 'l'); break;
552             case 3:     assert(d == 'l'); break;
553             case 4:     assert(d == 'o'); break;
554             default:    assert(0);
555         }
556         i++;
557     }
558     assert(i == 5);
559 
560     s = "a\u1234\U000A0456b";
561     i = 0;
562     foreach (k, dchar d; s)
563     {
564         //printf("i = %d, k = %d, d = %x\n", i, k, d);
565         switch (i)
566         {
567             case 0:     assert(d == 'a'); assert(k == 0); break;
568             case 1:     assert(d == '\u1234'); assert(k == 1); break;
569             case 2:     assert(d == '\U000A0456'); assert(k == 4); break;
570             case 3:     assert(d == 'b'); assert(k == 8); break;
571             default:    assert(0);
572         }
573         i++;
574     }
575     assert(i == 4);
576 }
577 
578 /// ditto
579 extern (C) int _aApplywd2(scope const(wchar)[] aa, dg2_t dg)
580 {
581     int result;
582     size_t len = aa.length;
583 
584     debug(apply) printf("_aApplywd2(), len = %d\n", len);
585     size_t n;
586     for (size_t i = 0; i < len; i += n)
587     {
588         dchar d = aa[i];
589         if (d & ~0x7F)
590         {
591             n = i;
592             d = decode(aa, n);
593             n -= i;
594         }
595         else
596             n = 1;
597         result = dg(&i, cast(void *)&d);
598         if (result)
599             break;
600     }
601     return result;
602 }
603 
604 unittest
605 {
606     debug(apply) printf("_aApplywd2.unittest\n");
607 
608     auto s = "hello"w[];
609     int i;
610 
611     foreach (k, dchar d; s)
612     {
613         //printf("i = %d, k = %d, d = %x\n", i, k, d);
614         assert(k == i);
615         switch (i)
616         {
617             case 0:     assert(d == 'h'); break;
618             case 1:     assert(d == 'e'); break;
619             case 2:     assert(d == 'l'); break;
620             case 3:     assert(d == 'l'); break;
621             case 4:     assert(d == 'o'); break;
622             default:    assert(0);
623         }
624         i++;
625     }
626     assert(i == 5);
627 
628     s = "a\u1234\U000A0456b";
629     i = 0;
630     foreach (k, dchar d; s)
631     {
632         //printf("i = %d, k = %d, d = %x\n", i, k, d);
633         switch (i)
634         {
635             case 0:     assert(k == 0); assert(d == 'a'); break;
636             case 1:     assert(k == 1); assert(d == '\u1234'); break;
637             case 2:     assert(k == 2); assert(d == '\U000A0456'); break;
638             case 3:     assert(k == 4); assert(d == 'b'); break;
639             default:    assert(0);
640         }
641         i++;
642     }
643     assert(i == 4);
644 }
645 
646 /// ditto
647 extern (C) int _aApplycw2(scope const(char)[] aa, dg2_t dg)
648 {
649     int result;
650     size_t len = aa.length;
651 
652     debug(apply) printf("_aApplycw2(), len = %d\n", len);
653     size_t n;
654     for (size_t i = 0; i < len; i += n)
655     {
656         wchar w = aa[i];
657         if (w & 0x80)
658         {
659             n = i;
660             dchar d = decode(aa, n);
661             n -= i;
662             if (d <= 0xFFFF)
663                 w = cast(wchar) d;
664             else
665             {
666                 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
667                 result = dg(&i, cast(void *)&w);
668                 if (result)
669                     break;
670                 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
671             }
672         }
673         else
674             n = 1;
675         result = dg(&i, cast(void *)&w);
676         if (result)
677             break;
678     }
679     return result;
680 }
681 
682 unittest
683 {
684     debug(apply) printf("_aApplycw2.unittest\n");
685 
686     auto s = "hello"c[];
687     int i;
688 
689     foreach (k, wchar d; s)
690     {
691         //printf("i = %d, k = %d, d = %x\n", i, k, d);
692         assert(k == i);
693         switch (i)
694         {
695             case 0:     assert(d == 'h'); break;
696             case 1:     assert(d == 'e'); break;
697             case 2:     assert(d == 'l'); break;
698             case 3:     assert(d == 'l'); break;
699             case 4:     assert(d == 'o'); break;
700             default:    assert(0);
701         }
702         i++;
703     }
704     assert(i == 5);
705 
706     s = "a\u1234\U000A0456b";
707     i = 0;
708     foreach (k, wchar d; s)
709     {
710         //printf("i = %d, k = %d, d = %x\n", i, k, d);
711         switch (i)
712         {
713             case 0:     assert(k == 0); assert(d == 'a'); break;
714             case 1:     assert(k == 1); assert(d == 0x1234); break;
715             case 2:     assert(k == 4); assert(d == 0xDA41); break;
716             case 3:     assert(k == 4); assert(d == 0xDC56); break;
717             case 4:     assert(k == 8); assert(d == 'b'); break;
718             default:    assert(0);
719         }
720         i++;
721     }
722     assert(i == 5);
723 }
724 
725 /// ditto
726 extern (C) int _aApplywc2(scope const(wchar)[] aa, dg2_t dg)
727 {
728     int result;
729     size_t len = aa.length;
730 
731     debug(apply) printf("_aApplywc2(), len = %d\n", len);
732     size_t n;
733     for (size_t i = 0; i < len; i += n)
734     {
735         wchar w = aa[i];
736         if (w & ~0x7F)
737         {
738             char[4] buf = void;
739 
740             n = i;
741             dchar d = decode(aa, n);
742             n -= i;
743             auto b = toUTF8(buf, d);
744             foreach (char c2; b)
745             {
746                 result = dg(&i, cast(void *)&c2);
747                 if (result)
748                     return result;
749             }
750         }
751         else
752         {
753             char c = cast(char)w;
754             n = 1;
755             result = dg(&i, cast(void *)&c);
756             if (result)
757                 break;
758         }
759     }
760     return result;
761 }
762 
763 unittest
764 {
765     debug(apply) printf("_aApplywc2.unittest\n");
766 
767     auto s = "hello"w[];
768     int i;
769 
770     foreach (k, char d; s)
771     {
772         //printf("i = %d, k = %d, d = %x\n", i, k, d);
773         assert(k == i);
774         switch (i)
775         {
776             case 0:     assert(d == 'h'); break;
777             case 1:     assert(d == 'e'); break;
778             case 2:     assert(d == 'l'); break;
779             case 3:     assert(d == 'l'); break;
780             case 4:     assert(d == 'o'); break;
781             default:    assert(0);
782         }
783         i++;
784     }
785     assert(i == 5);
786 
787     s = "a\u1234\U000A0456b";
788     i = 0;
789     foreach (k, char d; s)
790     {
791         //printf("i = %d, k = %d, d = %x\n", i, k, d);
792         switch (i)
793         {
794             case 0:     assert(k == 0); assert(d == 'a'); break;
795             case 1:     assert(k == 1); assert(d == 0xE1); break;
796             case 2:     assert(k == 1); assert(d == 0x88); break;
797             case 3:     assert(k == 1); assert(d == 0xB4); break;
798             case 4:     assert(k == 2); assert(d == 0xF2); break;
799             case 5:     assert(k == 2); assert(d == 0xA0); break;
800             case 6:     assert(k == 2); assert(d == 0x91); break;
801             case 7:     assert(k == 2); assert(d == 0x96); break;
802             case 8:     assert(k == 4); assert(d == 'b'); break;
803             default:    assert(0);
804         }
805         i++;
806     }
807     assert(i == 9);
808 }
809 
810 /// ditto
811 extern (C) int _aApplydc2(scope const(dchar)[] aa, dg2_t dg)
812 {
813     int result;
814     size_t len = aa.length;
815 
816     debug(apply) printf("_aApplydc2(), len = %d\n", len);
817     for (size_t i = 0; i < len; i++)
818     {
819         dchar d = aa[i];
820         if (d & ~0x7F)
821         {
822             char[4] buf = void;
823 
824             auto b = toUTF8(buf, d);
825             foreach (char c2; b)
826             {
827                 result = dg(&i, cast(void *)&c2);
828                 if (result)
829                     return result;
830             }
831         }
832         else
833         {
834             char c = cast(char)d;
835             result = dg(&i, cast(void *)&c);
836             if (result)
837                 break;
838         }
839     }
840     return result;
841 }
842 
843 unittest
844 {
845     debug(apply) printf("_aApplydc2.unittest\n");
846 
847     auto s = "hello"d[];
848     int i;
849 
850     foreach (k, char d; s)
851     {
852         //printf("i = %d, k = %d, d = %x\n", i, k, d);
853         assert(k == i);
854         switch (i)
855         {
856             case 0:     assert(d == 'h'); break;
857             case 1:     assert(d == 'e'); break;
858             case 2:     assert(d == 'l'); break;
859             case 3:     assert(d == 'l'); break;
860             case 4:     assert(d == 'o'); break;
861             default:    assert(0);
862         }
863         i++;
864     }
865     assert(i == 5);
866 
867     s = "a\u1234\U000A0456b";
868     i = 0;
869     foreach (k, char d; s)
870     {
871         //printf("i = %d, k = %d, d = %x\n", i, k, d);
872         switch (i)
873         {
874             case 0:     assert(k == 0); assert(d == 'a'); break;
875             case 1:     assert(k == 1); assert(d == 0xE1); break;
876             case 2:     assert(k == 1); assert(d == 0x88); break;
877             case 3:     assert(k == 1); assert(d == 0xB4); break;
878             case 4:     assert(k == 2); assert(d == 0xF2); break;
879             case 5:     assert(k == 2); assert(d == 0xA0); break;
880             case 6:     assert(k == 2); assert(d == 0x91); break;
881             case 7:     assert(k == 2); assert(d == 0x96); break;
882             case 8:     assert(k == 3); assert(d == 'b'); break;
883             default:    assert(0);
884         }
885         i++;
886     }
887     assert(i == 9);
888 }
889 
890 /// ditto
891 extern (C) int _aApplydw2(scope const(dchar)[] aa, dg2_t dg)
892 {   int result;
893 
894     debug(apply) printf("_aApplydw2(), len = %d\n", aa.length);
895     foreach (size_t i, dchar d; aa)
896     {
897         wchar w;
898         auto j = i;
899 
900         if (d <= 0xFFFF)
901             w = cast(wchar) d;
902         else
903         {
904             w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
905             result = dg(&j, cast(void *)&w);
906             if (result)
907                 break;
908             w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
909         }
910         result = dg(&j, cast(void *)&w);
911         if (result)
912             break;
913     }
914     return result;
915 }
916 
917 unittest
918 {
919     debug(apply) printf("_aApplydw2.unittest\n");
920 
921     auto s = "hello"d[];
922     int i;
923 
924     foreach (k, wchar d; s)
925     {
926         //printf("i = %d, k = %d, d = %x\n", i, k, d);
927         assert(k == i);
928         switch (i)
929         {
930             case 0:     assert(d == 'h'); break;
931             case 1:     assert(d == 'e'); break;
932             case 2:     assert(d == 'l'); break;
933             case 3:     assert(d == 'l'); break;
934             case 4:     assert(d == 'o'); break;
935             default:    assert(0);
936         }
937         i++;
938     }
939     assert(i == 5);
940 
941     s = "a\u1234\U000A0456b";
942     i = 0;
943     foreach (k, wchar d; s)
944     {
945         //printf("i = %d, k = %d, d = %x\n", i, k, d);
946         switch (i)
947         {
948             case 0:     assert(k == 0); assert(d == 'a'); break;
949             case 1:     assert(k == 1); assert(d == 0x1234); break;
950             case 2:     assert(k == 2); assert(d == 0xDA41); break;
951             case 3:     assert(k == 2); assert(d == 0xDC56); break;
952             case 4:     assert(k == 3); assert(d == 'b'); break;
953             default:    assert(0);
954         }
955         i++;
956     }
957     assert(i == 5);
958 }