1 /**
2  * Associative Array implementation
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
8  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright), Dave Fladebo
9  * License:     Distributed under the Boost Software License, Version 1.0.
10  *              https://www.boost.org/LICENSE_1_0.txt
11  * Source:      https://github.com/dlang/dmd/blob/master/src/dmd/backend/aarray.d
12  */
13 
14 module dmd.backend.aarray;
15 
16 import core.stdc.stdio;
17 import core.stdc.stdlib;
18 import core.stdc.string;
19 
20 alias hash_t = size_t;
21 
22 import dmd.root.hash;
23 import dmd.backend.global : err_nomem;
24 
25 nothrow:
26 @safe:
27 
28 /*********************
29  * This is the "bucket" used by the AArray.
30  */
31 private struct aaA
32 {
33     aaA *next;
34     hash_t hash;        // hash of the key
35     /* key   */         // key value goes here
36     /* value */         // value value goes here
37 }
38 
39 /**************************
40  * Associative Array type.
41  * Params:
42  *      TKey = type that has members Key, getHash(), and equals()
43  *      Value = value type
44  */
45 
46 struct AArray(TKey, Value)
47 {
48 nothrow:
49     alias Key = TKey.Key;       // key type
50 
51     ~this()
52     {
53         destroy();
54     }
55 
56     /****
57      * Frees all the data used by AArray
58      */
59     @trusted
60     void destroy()
61     {
62         if (buckets)
63         {
64             foreach (e; buckets)
65             {
66                 while (e)
67                 {
68                     auto en = e;
69                     e = e.next;
70                     free(en);
71                 }
72             }
73             free(buckets.ptr);
74             buckets = null;
75             nodes = 0;
76         }
77     }
78 
79     /********
80      * Returns:
81      *   Number of entries in the AArray
82      */
83     size_t length()
84     {
85         return nodes;
86     }
87 
88     /*************************************************
89      * Get pointer to value in associative array indexed by key.
90      * Add entry for key if it is not already there.
91      * Params:
92      *  pKey = pointer to key
93      * Returns:
94      *  pointer to Value
95      */
96     @trusted
97     Value* get(Key* pkey)
98     {
99         //printf("AArray::get()\n");
100         const aligned_keysize = aligntsize(Key.sizeof);
101 
102         if (!buckets.length)
103         {
104             alias aaAp = aaA*;
105             const len = prime_list[0];
106             auto p = cast(aaAp*)calloc(len, aaAp.sizeof);
107             if (!p)
108                 err_nomem();
109             buckets = p[0 .. len];
110         }
111 
112         hash_t key_hash = tkey.getHash(pkey);
113         const i = key_hash % buckets.length;
114         //printf("key_hash = %x, buckets.length = %d, i = %d\n", key_hash, buckets.length, i);
115         aaA* e;
116         auto pe = &buckets[i];
117         while ((e = *pe) != null)
118         {
119             if (key_hash == e.hash &&
120                 tkey.equals(pkey, cast(Key*)(e + 1)))
121             {
122                 goto Lret;
123             }
124             pe = &e.next;
125         }
126 
127         // Not found, create new elem
128         //printf("create new one\n");
129         e = cast(aaA *) malloc(aaA.sizeof + aligned_keysize + Value.sizeof);
130         if (!e)
131             err_nomem();
132         memcpy(e + 1, pkey, Key.sizeof);
133         memset(cast(void *)(e + 1) + aligned_keysize, 0, Value.sizeof);
134         e.hash = key_hash;
135         e.next = null;
136         *pe = e;
137 
138         ++nodes;
139         //printf("length = %d, nodes = %d\n", buckets_length, nodes);
140         if (nodes > buckets.length * 4)
141         {
142             //printf("rehash()\n");
143             rehash();
144         }
145 
146     Lret:
147         return cast(Value*)(cast(void*)(e + 1) + aligned_keysize);
148     }
149 
150     /*************************************************
151      * Determine if key is in aa.
152      * Params:
153      *  pKey = pointer to key
154      * Returns:
155      *  null    not in aa
156      *  !=null  in aa, return pointer to value
157      */
158 
159     @trusted
160     Value* isIn(Key* pkey)
161     {
162         //printf("AArray.isIn(), .length = %d, .ptr = %p\n", nodes, buckets.ptr);
163         if (!nodes)
164             return null;
165 
166         const key_hash = tkey.getHash(pkey);
167         //printf("hash = %d\n", key_hash);
168         const i = key_hash % buckets.length;
169         auto e = buckets[i];
170         while (e != null)
171         {
172             if (key_hash == e.hash &&
173                 tkey.equals(pkey, cast(Key*)(e + 1)))
174             {
175                 return cast(Value*)(cast(void*)(e + 1) + aligntsize(Key.sizeof));
176             }
177 
178             e = e.next;
179         }
180 
181         // Not found
182         return null;
183     }
184 
185 
186     /*************************************************
187      * Delete key entry in aa[].
188      * If key is not in aa[], do nothing.
189      * Params:
190      *  pKey = pointer to key
191      */
192 
193     @trusted
194     void del(Key *pkey)
195     {
196         if (!nodes)
197             return;
198 
199         const key_hash = tkey.getHash(pkey);
200         //printf("hash = %d\n", key_hash);
201         const i = key_hash % buckets.length;
202         auto pe = &buckets[i];
203         aaA* e;
204         while ((e = *pe) != null)       // null means not found
205         {
206             if (key_hash == e.hash &&
207                 tkey.equals(pkey, cast(Key*)(e + 1)))
208             {
209                 *pe = e.next;
210                 --nodes;
211                 free(e);
212                 break;
213             }
214             pe = &e.next;
215         }
216     }
217 
218 
219     /********************************************
220      * Produce array of keys from aa.
221      * Returns:
222      *  malloc'd array of keys
223      */
224 
225     @trusted
226     Key[] keys()
227     {
228         if (!nodes)
229             return null;
230 
231         if (nodes >= size_t.max / Key.sizeof)
232             err_nomem();
233         auto p = cast(Key *)malloc(nodes * Key.sizeof);
234         if (!p)
235             err_nomem();
236         auto q = p;
237         foreach (e; buckets)
238         {
239             while (e)
240             {
241                 memcpy(q, e + 1, Key.sizeof);
242                 ++q;
243                 e = e.next;
244             }
245         }
246         return p[0 .. nodes];
247     }
248 
249     /********************************************
250      * Produce array of values from aa.
251      * Returns:
252      *  malloc'd array of values
253      */
254 
255     @trusted
256     Value[] values()
257     {
258         if (!nodes)
259             return null;
260 
261         const aligned_keysize = aligntsize(Key.sizeof);
262         if (nodes >= size_t.max / Key.sizeof)
263             err_nomem();
264         auto p = cast(Value *)malloc(nodes * Value.sizeof);
265         if (!p)
266             err_nomem();
267         auto q = p;
268         foreach (e; buckets)
269         {
270             while (e)
271             {
272                 memcpy(q, cast(void*)(e + 1) + aligned_keysize, Value.sizeof);
273                 ++q;
274                 e = e.next;
275             }
276         }
277         return p[0 .. nodes];
278     }
279 
280     /********************************************
281      * Rehash an array.
282      */
283 
284     @trusted
285     void rehash()
286     {
287         //printf("Rehash\n");
288         if (!nodes)
289             return;
290 
291         size_t newbuckets_length = prime_list[$ - 1];
292 
293         foreach (prime; prime_list[0 .. $ - 1])
294         {
295             if (nodes <= prime)
296             {
297                 newbuckets_length = prime;
298                 break;
299             }
300         }
301         auto newbuckets = cast(aaA**)calloc(newbuckets_length, (aaA*).sizeof);
302         if (!newbuckets)
303             err_nomem();
304 
305         foreach (e; buckets)
306         {
307             while (e)
308             {
309                 auto en = e.next;
310                 auto b = &newbuckets[e.hash % newbuckets_length];
311                 e.next = *b;
312                 *b = e;
313                 e = en;
314             }
315         }
316 
317         free(buckets.ptr);
318         buckets = null;
319         buckets = newbuckets[0 .. newbuckets_length];
320     }
321 
322     alias applyDg = nothrow int delegate(Key*, Value*);
323     /*********************************************
324      * For each element in the AArray,
325      * call dg(Key* pkey, Value* pvalue)
326      * If dg returns !=0, stop and return that value.
327      * Params:
328      *  dg = delegate to call for each key/value pair
329      * Returns:
330      *  !=0 : value returned by first dg() call that returned non-zero
331      *  0   : no entries in aa, or all dg() calls returned 0
332      */
333 
334     @trusted
335     int apply(applyDg dg)
336     {
337         if (!nodes)
338             return 0;
339 
340         //printf("AArray.apply(aa = %p, keysize = %d, dg = %p)\n", &this, Key.sizeof, dg);
341 
342         const aligned_keysize = aligntsize(Key.sizeof);
343 
344         foreach (e; buckets)
345         {
346             while (e)
347             {
348                 auto result = dg(cast(Key*)(e + 1), cast(Value*)(cast(void*)(e + 1) + aligned_keysize));
349                 if (result)
350                     return result;
351                 e = e.next;
352             }
353         }
354 
355         return 0;
356     }
357 
358   private:
359 
360     aaA*[] buckets;
361     size_t nodes;               // number of nodes
362     TKey tkey;
363 }
364 
365 private:
366 
367 /**********************************
368  * Align to next pointer boundary, so value
369  * will be aligned.
370  * Params:
371  *      tsize = offset to be aligned
372  * Returns:
373  *      aligned offset
374  */
375 
376 size_t aligntsize(size_t tsize)
377 {
378     // Is pointer alignment on the x64 4 bytes or 8?
379     return (tsize + size_t.sizeof - 1) & ~(size_t.sizeof - 1);
380 }
381 
382 immutable uint[14] prime_list =
383 [
384                97,           389,
385              1543,          6151,
386            24_593,        98_317,
387           393_241,     1_572_869,
388         6_291_469,    25_165_843,
389       100_663_319,   402_653_189,
390     1_610_612_741, 4_294_967_291U,
391 ];
392 
393 /***************************************************************/
394 
395 /***
396  * A TKey for basic types
397  * Params:
398  *      K = a basic type
399  */
400 public struct Tinfo(K)
401 {
402 nothrow:
403     alias Key = K;
404 
405     static hash_t getHash(Key* pk)
406     {
407         return cast(hash_t)*pk;
408     }
409 
410     static bool equals(Key* pk1, Key* pk2)
411     {
412         return *pk1 == *pk2;
413     }
414 }
415 
416 /***************************************************************/
417 
418 /****
419  * A TKey that is a string
420  */
421 public struct TinfoChars
422 {
423 nothrow:
424     alias Key = const(char)[];
425 
426     static hash_t getHash(Key* pk)
427     {
428         auto buf = *pk;
429         return calcHash(cast(const(ubyte[]))buf);
430     }
431 
432     @trusted
433     static bool equals(Key* pk1, Key* pk2)
434     {
435         auto buf1 = *pk1;
436         auto buf2 = *pk2;
437         return buf1.length == buf2.length &&
438                memcmp(buf1.ptr, buf2.ptr, buf1.length) == 0;
439     }
440 }
441 
442 // Interface for C++ code
443 public struct AAchars
444 {
445 nothrow:
446     alias AA = AArray!(TinfoChars, uint);
447     AA aa;
448 
449     @trusted
450     static AAchars* create()
451     {
452         auto a = cast(AAchars*)calloc(1, AAchars.sizeof);
453         if (!a)
454             err_nomem();
455         return a;
456     }
457 
458     @trusted
459     static void destroy(AAchars* aac)
460     {
461         aac.aa.destroy();
462         free(aac);
463     }
464 
465     @trusted
466     extern(D) uint* get(const(char)[] buf)
467     {
468         return aa.get(&buf);
469     }
470 
471     uint length()
472     {
473         return cast(uint)aa.length();
474     }
475 }
476 
477 /***************************************************************/
478 
479 // Key is the slice specified by (*TinfoPair.pbase)[Pair.start .. Pair.end]
480 
481 public struct Pair { uint start, end; }
482 
483 public struct TinfoPair
484 {
485 nothrow:
486     alias Key = Pair;
487 
488     ubyte** pbase;
489 
490     @trusted
491     hash_t getHash(Key* pk)
492     {
493         auto buf = (*pbase)[pk.start .. pk.end];
494         return calcHash(buf);
495     }
496 
497     @trusted
498     bool equals(Key* pk1, Key* pk2)
499     {
500         const len1 = pk1.end - pk1.start;
501         const len2 = pk2.end - pk2.start;
502 
503         auto buf1 = *pk1;
504         auto buf2 = *pk2;
505         return len1 == len2 &&
506                memcmp(*pbase + pk1.start, *pbase + pk2.start, len1) == 0;
507     }
508 }
509 
510 // Interface for C++ code
511 public struct AApair
512 {
513 nothrow:
514     alias AA = AArray!(TinfoPair, uint);
515     AA aa;
516 
517     @trusted
518     static AApair* create(ubyte** pbase)
519     {
520         auto a = cast(AApair*)calloc(1, AApair.sizeof);
521         if (!a)
522             err_nomem();
523         a.aa.tkey.pbase = pbase;
524         return a;
525     }
526 
527     @trusted
528     static void destroy(AApair* aap)
529     {
530         aap.aa.destroy();
531         free(aap);
532     }
533 
534     @trusted
535     uint* get(uint start, uint end)
536     {
537         auto p = Pair(start, end);
538         return aa.get(&p);
539     }
540 
541     uint length()
542     {
543         return cast(uint)aa.length();
544     }
545 }
546 
547 // Interface for C++ code
548 public struct AApair2
549 {
550 nothrow:
551     alias AA = AArray!(TinfoPair, Pair);
552     AA aa;
553 
554     @trusted
555     static AApair2* create(ubyte** pbase)
556     {
557         auto a = cast(AApair2*)calloc(1, AApair2.sizeof);
558         if (!a)
559             err_nomem();
560         a.aa.tkey.pbase = pbase;
561         return a;
562     }
563 
564     @trusted
565     static void destroy(AApair2* aap)
566     {
567         aap.aa.destroy();
568         free(aap);
569     }
570 
571     @trusted
572     Pair* get(uint start, uint end)
573     {
574         auto p = Pair(start, end);
575         return aa.get(&p);
576     }
577 
578     uint length()
579     {
580         return cast(uint)aa.length();
581     }
582 }
583 
584 /*************************************************************/
585 
586 @system unittest
587 {
588     int dg(int* pk, bool* pv) { return 3; }
589     int dgz(int* pk, bool* pv) { return 0; }
590 
591     AArray!(Tinfo!int, bool) aa;
592     aa.rehash();
593     assert(aa.keys() == null);
594     assert(aa.values() == null);
595     assert(aa.apply(&dg) == 0);
596 
597     assert(aa.length == 0);
598     int k = 8;
599     aa.del(&k);
600     bool v = true;
601     assert(!aa.isIn(&k));
602     bool *pv = aa.get(&k);
603     *pv = true;
604     int j = 9;
605     pv = aa.get(&j);
606     *pv = false;
607     aa.rehash();
608 
609     assert(aa.length() == 2);
610     assert(*aa.get(&k) == true);
611     assert(*aa.get(&j) == false);
612 
613     assert(aa.apply(&dg) == 3);
614     assert(aa.apply(&dgz) == 0);
615 
616     aa.del(&k);
617     assert(aa.length() == 1);
618     assert(!aa.isIn(&k));
619     assert(*aa.isIn(&j) == false);
620 
621     auto keys = aa.keys();
622     assert(keys.length == 1);
623     assert(keys[0] == 9);
624 
625     auto values = aa.values();
626     assert(values.length == 1);
627     assert(values[0] == false);
628 
629     AArray!(Tinfo!int, bool) aa2;
630     int key = 10;
631     bool* getpv = aa2.get(&key);
632     aa2.apply(delegate(int* pk, bool* pv) @trusted {
633         assert(pv is getpv);
634         return 0;
635     });
636 }
637 
638 @system unittest
639 {
640     const(char)* buf = "abcb";
641     auto aap = AApair.create(cast(ubyte**)&buf);
642     auto pu = aap.get(1,2);
643     *pu = 10;
644     assert(aap.length == 1);
645     pu = aap.get(3,4);
646     assert(*pu == 10);
647     AApair.destroy(aap);
648 }