1 /**
2  * Text macro processor for Ddoc.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmacro.d, _dmacro.d)
8  * Documentation:  https://dlang.org/phobos/dmd_dmacro.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmacro.d
10  */
11 
12 module dmd.dmacro;
13 
14 import core.stdc.ctype;
15 import core.stdc.string;
16 import dmd.common.outbuffer;
17 import dmd.root.rmem;
18 
19 @trusted:
20 
21 struct MacroTable
22 {
23     /**********************************
24      * Define name=text macro.
25      * If macro `name` already exists, replace the text for it.
26      * Params:
27      *  name = name of macro
28      *  text = text of macro
29      */
30     void define(const(char)[] name, const(char)[] text) nothrow pure @safe
31     {
32         //printf("MacroTable::define('%.*s' = '%.*s')\n", cast(int)name.length, name.ptr, text.length, text.ptr);
33         if (auto table = name in mactab)
34         {
35             (*table).text = text;
36             return;
37         }
38         mactab[name] = new Macro(name, text);
39     }
40 
41     alias fp_t = bool function(const(char)* p) @nogc nothrow pure;
42 
43     /*****************************************************
44      * Look for macros in buf and expand them in place.
45      * Only look at the text in buf from start to pend.
46      *
47      * Returns: `true` on success, `false` when the recursion limit was reached
48      */
49     bool expand(ref OutBuffer buf, size_t start, ref size_t pend, const(char)[] arg, int recursionLimit,
50         fp_t isIdStart, fp_t isIdTail) nothrow pure
51     {
52         version (none)
53         {
54             printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, pend, cast(int)arg.length, arg.ptr);
55             printf("Buf is: '%.*s'\n", cast(int)(pend - start), buf.data + start);
56         }
57         // limit recursive expansion
58         recursionLimit--;
59         if (recursionLimit < 0)
60             return false;
61 
62         size_t end = pend;
63         assert(start <= end);
64         assert(end <= buf.length);
65         /* First pass - replace $0
66          */
67         arg = memdup(arg);
68         for (size_t u = start; u + 1 < end;)
69         {
70             char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
71             /* Look for $0, but not $$0, and replace it with arg.
72              */
73             if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
74             {
75                 if (u > start && p[u - 1] == '$')
76                 {
77                     // Don't expand $$0, but replace it with $0
78                     buf.remove(u - 1, 1);
79                     end--;
80                     u += 1; // now u is one past the closing '1'
81                     continue;
82                 }
83                 char c = p[u + 1];
84                 int n = (c == '+') ? -1 : c - '0';
85                 const(char)[] marg;
86                 if (n == 0)
87                 {
88                     marg = arg;
89                 }
90                 else
91                     extractArgN(arg, marg, n);
92                 if (marg.length == 0)
93                 {
94                     // Just remove macro invocation
95                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
96                     buf.remove(u, 2);
97                     end -= 2;
98                 }
99                 else if (c == '+')
100                 {
101                     // Replace '$+' with 'arg'
102                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
103                     buf.remove(u, 2);
104                     buf.insert(u, marg);
105                     end += marg.length - 2;
106                     // Scan replaced text for further expansion
107                     size_t mend = u + marg.length;
108                     const success = expand(buf, u, mend, null, recursionLimit, isIdStart, isIdTail);
109                     if (!success)
110                         return false;
111                     end += mend - (u + marg.length);
112                     u = mend;
113                 }
114                 else
115                 {
116                     // Replace '$1' with '\xFF{arg\xFF}'
117                     //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], cast(int)marg.length, marg.ptr);
118                     ubyte[] slice = cast(ubyte[])buf[];
119                     slice[u] = 0xFF;
120                     slice[u + 1] = '{';
121                     buf.insert(u + 2, marg);
122                     buf.insert(u + 2 + marg.length, "\xFF}");
123                     end += -2 + 2 + marg.length + 2;
124                     // Scan replaced text for further expansion
125                     size_t mend = u + 2 + marg.length;
126                     const success = expand(buf, u + 2, mend, null, recursionLimit, isIdStart, isIdTail);
127                     if (!success)
128                         return false;
129                     end += mend - (u + 2 + marg.length);
130                     u = mend;
131                 }
132                 //printf("u = %d, end = %d\n", u, end);
133                 //printf("#%.*s#\n", cast(int)end, &buf.data[0]);
134                 continue;
135             }
136             u++;
137         }
138         /* Second pass - replace other macros
139          */
140         for (size_t u = start; u + 4 < end;)
141         {
142             char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
143             /* A valid start of macro expansion is $(c, where c is
144              * an id start character, and not $$(c.
145              */
146             if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p + u + 2))
147             {
148                 //printf("\tfound macro start '%c'\n", p[u + 2]);
149                 char* name = p + u + 2;
150                 size_t namelen = 0;
151                 const(char)[] marg;
152                 size_t v;
153                 /* Scan forward to find end of macro name and
154                  * beginning of macro argument (marg).
155                  */
156                 for (v = u + 2; v < end; v += utfStride(p[v]))
157                 {
158                     if (!isIdTail(p + v))
159                     {
160                         // We've gone past the end of the macro name.
161                         namelen = v - (u + 2);
162                         break;
163                     }
164                 }
165                 v += extractArgN(p[v .. end], marg, 0);
166                 assert(v <= end);
167                 if (v < end)
168                 {
169                     // v is on the closing ')'
170                     if (u > start && p[u - 1] == '$')
171                     {
172                         // Don't expand $$(NAME), but replace it with $(NAME)
173                         buf.remove(u - 1, 1);
174                         end--;
175                         u = v; // now u is one past the closing ')'
176                         continue;
177                     }
178                     Macro* m = search(name[0 .. namelen]);
179                     if (!m)
180                     {
181                         immutable undef = "DDOC_UNDEFINED_MACRO";
182                         m = search(undef);
183                         if (m)
184                         {
185                             // Macro was not defined, so this is an expansion of
186                             //   DDOC_UNDEFINED_MACRO. Prepend macro name to args.
187                             // marg = name[ ] ~ "," ~ marg[ ];
188                             if (marg.length)
189                             {
190                                 char* q = cast(char*)mem.xmalloc(namelen + 1 + marg.length);
191                                 assert(q);
192                                 memcpy(q, name, namelen);
193                                 q[namelen] = ',';
194                                 memcpy(q + namelen + 1, marg.ptr, marg.length);
195                                 marg = q[0 .. marg.length + namelen + 1];
196                             }
197                             else
198                             {
199                                 marg = name[0 .. namelen];
200                             }
201                         }
202                     }
203                     if (m)
204                     {
205                         if (m.inuse && marg.length == 0)
206                         {
207                             // Remove macro invocation
208                             buf.remove(u, v + 1 - u);
209                             end -= v + 1 - u;
210                         }
211                         else if (m.inuse && ((arg.length == marg.length && memcmp(arg.ptr, marg.ptr, arg.length) == 0) ||
212                                              (arg.length + 4 == marg.length && marg[0] == 0xFF && marg[1] == '{' && memcmp(arg.ptr, marg.ptr + 2, arg.length) == 0 && marg[marg.length - 2] == 0xFF && marg[marg.length - 1] == '}')))
213                         {
214                             /* Recursive expansion:
215                              *   marg is same as arg (with blue paint added)
216                              * Just leave in place.
217                              */
218                         }
219                         else
220                         {
221                             //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", cast(int)m.namelen, m.name, cast(int)marg.length, marg.ptr, cast(int)m.textlen, m.text);
222                             marg = memdup(marg);
223                             // Insert replacement text
224                             buf.spread(v + 1, 2 + m.text.length + 2);
225                             ubyte[] slice = cast(ubyte[])buf[];
226                             slice[v + 1] = 0xFF;
227                             slice[v + 2] = '{';
228                             slice[v + 3 .. v + 3 + m.text.length] = cast(ubyte[])m.text[];
229                             slice[v + 3 + m.text.length] = 0xFF;
230                             slice[v + 3 + m.text.length + 1] = '}';
231                             end += 2 + m.text.length + 2;
232                             // Scan replaced text for further expansion
233                             m.inuse++;
234                             size_t mend = v + 1 + 2 + m.text.length + 2;
235                             const success = expand(buf, v + 1, mend, marg, recursionLimit, isIdStart, isIdTail);
236                             if (!success)
237                                 return false;
238                             end += mend - (v + 1 + 2 + m.text.length + 2);
239                             m.inuse--;
240                             buf.remove(u, v + 1 - u);
241                             end -= v + 1 - u;
242                             u += mend - (v + 1);
243                             mem.xfree(cast(char*)marg.ptr);
244                             //printf("u = %d, end = %d\n", u, end);
245                             //printf("#%.*s#\n", cast(int)(end - u), &buf.data[u]);
246                             continue;
247                         }
248                     }
249                     else
250                     {
251                         // Replace $(NAME) with nothing
252                         buf.remove(u, v + 1 - u);
253                         end -= (v + 1 - u);
254                         continue;
255                     }
256                 }
257             }
258             u++;
259         }
260         mem.xfree(cast(char*)arg);
261         pend = end;
262         return true;
263     }
264 
265   private:
266 
267     Macro* search(const(char)[] name) @nogc nothrow pure @safe
268     {
269         //printf("Macro::search(%.*s)\n", cast(int)name.length, name.ptr);
270         if (auto table = name in mactab)
271         {
272             //printf("\tfound %d\n", table.textlen);
273             return *table;
274         }
275         return null;
276     }
277 
278     private Macro*[const(char)[]] mactab;
279 }
280 
281 /* ************************************************************************ */
282 
283 private:
284 
285 struct Macro
286 {
287     const(char)[] name;     // macro name
288     const(char)[] text;     // macro replacement text
289     int inuse;              // macro is in use (don't expand)
290 
291     this(const(char)[] name, const(char)[] text) @nogc nothrow pure @safe
292     {
293         this.name = name;
294         this.text = text;
295     }
296 }
297 
298 /************************
299  * Make mutable copy of slice p.
300  * Params:
301  *      p = slice
302  * Returns:
303  *      copy allocated with mem.xmalloc()
304  */
305 
306 char[] memdup(const(char)[] p) nothrow pure
307 {
308     size_t len = p.length;
309     return (cast(char*)memcpy(mem.xmalloc(len), p.ptr, len))[0 .. len];
310 }
311 
312 /**********************************************************
313  * Given buffer buf[], extract argument marg[].
314  * Params:
315  *      buf = source string
316  *      marg = set to slice of buf[]
317  *      n =     0:      get entire argument
318  *              1..9:   get nth argument
319  *              -1:     get 2nd through end
320  */
321 size_t extractArgN(const(char)[] buf, out const(char)[] marg, int n) @nogc nothrow pure
322 {
323     /* Scan forward for matching right parenthesis.
324      * Nest parentheses.
325      * Skip over "..." and '...' strings inside HTML tags.
326      * Skip over <!-- ... --> comments.
327      * Skip over previous macro insertions
328      * Set marg.
329      */
330     uint parens = 1;
331     ubyte instring = 0;
332     uint incomment = 0;
333     uint intag = 0;
334     uint inexp = 0;
335     uint argn = 0;
336     size_t v = 0;
337     const p = buf.ptr;
338     const end = buf.length;
339 Largstart:
340     // Skip first space, if any, to find the start of the macro argument
341     if (n != 1 && v < end && isspace(p[v]))
342         v++;
343     size_t vstart = v;
344     for (; v < end; v++)
345     {
346         char c = p[v];
347         switch (c)
348         {
349         case ',':
350             if (!inexp && !instring && !incomment && parens == 1)
351             {
352                 argn++;
353                 if (argn == 1 && n == -1)
354                 {
355                     v++;
356                     goto Largstart;
357                 }
358                 if (argn == n)
359                     break;
360                 if (argn + 1 == n)
361                 {
362                     v++;
363                     goto Largstart;
364                 }
365             }
366             continue;
367         case '(':
368             if (!inexp && !instring && !incomment)
369                 parens++;
370             continue;
371         case ')':
372             if (!inexp && !instring && !incomment && --parens == 0)
373             {
374                 break;
375             }
376             continue;
377         case '"':
378         case '\'':
379             if (!inexp && !incomment && intag)
380             {
381                 if (c == instring)
382                     instring = 0;
383                 else if (!instring)
384                     instring = c;
385             }
386             continue;
387         case '<':
388             if (!inexp && !instring && !incomment)
389             {
390                 if (v + 6 < end && p[v + 1] == '!' && p[v + 2] == '-' && p[v + 3] == '-')
391                 {
392                     incomment = 1;
393                     v += 3;
394                 }
395                 else if (v + 2 < end && isalpha(p[v + 1]))
396                     intag = 1;
397             }
398             continue;
399         case '>':
400             if (!inexp)
401                 intag = 0;
402             continue;
403         case '-':
404             if (!inexp && !instring && incomment && v + 2 < end && p[v + 1] == '-' && p[v + 2] == '>')
405             {
406                 incomment = 0;
407                 v += 2;
408             }
409             continue;
410         case 0xFF:
411             if (v + 1 < end)
412             {
413                 if (p[v + 1] == '{')
414                     inexp++;
415                 else if (p[v + 1] == '}')
416                     inexp--;
417             }
418             continue;
419         default:
420             continue;
421         }
422         break;
423     }
424     if (argn == 0 && n == -1)
425         marg = p[v .. v];
426     else
427         marg = p[vstart .. v];
428     //printf("extractArg%d('%.*s') = '%.*s'\n", n, cast(int)end, p, cast(int)marg.length, marg.ptr);
429     return v;
430 }
431 
432 /*****************************************
433  * Get number of UTF-8 code units in code point that starts with `c`
434  * Params:
435  *      c = starting code unit
436  * Returns: number of UTF-8 code units (i.e. bytes), else 1 on invalid UTF start
437  */
438 @safe
439 int utfStride(char c) @nogc nothrow pure
440 {
441     return
442         c < 0x80 ? 1 :
443         c < 0xC0 ? 1 : // invalid UTF start
444         c < 0xE0 ? 2 :
445         c < 0xF0 ? 3 :
446         c < 0xF8 ? 4 :
447         c < 0xFC ? 5 :
448         c < 0xFE ? 6 :
449                    1; // invalid UTF start
450 }
451 
452 unittest
453 {
454     assert(utfStride(0) == 1);
455     assert(utfStride(0x80) == 1);
456     assert(utfStride(0xC0) == 2);
457     assert(utfStride(0xE0) == 3);
458     assert(utfStride(0xF0) == 4);
459     assert(utfStride(0xF8) == 5);
460     assert(utfStride(0xFC) == 6);
461     assert(utfStride(0xFE) == 1);
462 }