1 /** 2 * Text macro processor for Ddoc. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmacro.d, _dmacro.d) 8 * Documentation: https://dlang.org/phobos/dmd_dmacro.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmacro.d 10 */ 11 12 module dmd.dmacro; 13 14 import core.stdc.ctype; 15 import core.stdc.string; 16 import dmd.common.outbuffer; 17 import dmd.root.rmem; 18 19 @trusted: 20 21 struct MacroTable 22 { 23 /********************************** 24 * Define name=text macro. 25 * If macro `name` already exists, replace the text for it. 26 * Params: 27 * name = name of macro 28 * text = text of macro 29 */ 30 void define(const(char)[] name, const(char)[] text) nothrow pure @safe 31 { 32 //printf("MacroTable::define('%.*s' = '%.*s')\n", cast(int)name.length, name.ptr, text.length, text.ptr); 33 if (auto table = name in mactab) 34 { 35 (*table).text = text; 36 return; 37 } 38 mactab[name] = new Macro(name, text); 39 } 40 41 alias fp_t = bool function(const(char)* p) @nogc nothrow pure; 42 43 /***************************************************** 44 * Look for macros in buf and expand them in place. 45 * Only look at the text in buf from start to pend. 46 * 47 * Returns: `true` on success, `false` when the recursion limit was reached 48 */ 49 bool expand(ref OutBuffer buf, size_t start, ref size_t pend, const(char)[] arg, int recursionLimit, 50 fp_t isIdStart, fp_t isIdTail) nothrow pure 51 { 52 version (none) 53 { 54 printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, pend, cast(int)arg.length, arg.ptr); 55 printf("Buf is: '%.*s'\n", cast(int)(pend - start), buf.data + start); 56 } 57 // limit recursive expansion 58 recursionLimit--; 59 if (recursionLimit < 0) 60 return false; 61 62 size_t end = pend; 63 assert(start <= end); 64 assert(end <= buf.length); 65 /* First pass - replace $0 66 */ 67 arg = memdup(arg); 68 for (size_t u = start; u + 1 < end;) 69 { 70 char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant 71 /* Look for $0, but not $$0, and replace it with arg. 72 */ 73 if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+')) 74 { 75 if (u > start && p[u - 1] == '$') 76 { 77 // Don't expand $$0, but replace it with $0 78 buf.remove(u - 1, 1); 79 end--; 80 u += 1; // now u is one past the closing '1' 81 continue; 82 } 83 char c = p[u + 1]; 84 int n = (c == '+') ? -1 : c - '0'; 85 const(char)[] marg; 86 if (n == 0) 87 { 88 marg = arg; 89 } 90 else 91 extractArgN(arg, marg, n); 92 if (marg.length == 0) 93 { 94 // Just remove macro invocation 95 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr); 96 buf.remove(u, 2); 97 end -= 2; 98 } 99 else if (c == '+') 100 { 101 // Replace '$+' with 'arg' 102 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr); 103 buf.remove(u, 2); 104 buf.insert(u, marg); 105 end += marg.length - 2; 106 // Scan replaced text for further expansion 107 size_t mend = u + marg.length; 108 const success = expand(buf, u, mend, null, recursionLimit, isIdStart, isIdTail); 109 if (!success) 110 return false; 111 end += mend - (u + marg.length); 112 u = mend; 113 } 114 else 115 { 116 // Replace '$1' with '\xFF{arg\xFF}' 117 //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], cast(int)marg.length, marg.ptr); 118 ubyte[] slice = cast(ubyte[])buf[]; 119 slice[u] = 0xFF; 120 slice[u + 1] = '{'; 121 buf.insert(u + 2, marg); 122 buf.insert(u + 2 + marg.length, "\xFF}"); 123 end += -2 + 2 + marg.length + 2; 124 // Scan replaced text for further expansion 125 size_t mend = u + 2 + marg.length; 126 const success = expand(buf, u + 2, mend, null, recursionLimit, isIdStart, isIdTail); 127 if (!success) 128 return false; 129 end += mend - (u + 2 + marg.length); 130 u = mend; 131 } 132 //printf("u = %d, end = %d\n", u, end); 133 //printf("#%.*s#\n", cast(int)end, &buf.data[0]); 134 continue; 135 } 136 u++; 137 } 138 /* Second pass - replace other macros 139 */ 140 for (size_t u = start; u + 4 < end;) 141 { 142 char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant 143 /* A valid start of macro expansion is $(c, where c is 144 * an id start character, and not $$(c. 145 */ 146 if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p + u + 2)) 147 { 148 //printf("\tfound macro start '%c'\n", p[u + 2]); 149 char* name = p + u + 2; 150 size_t namelen = 0; 151 const(char)[] marg; 152 size_t v; 153 /* Scan forward to find end of macro name and 154 * beginning of macro argument (marg). 155 */ 156 for (v = u + 2; v < end; v += utfStride(p[v])) 157 { 158 if (!isIdTail(p + v)) 159 { 160 // We've gone past the end of the macro name. 161 namelen = v - (u + 2); 162 break; 163 } 164 } 165 v += extractArgN(p[v .. end], marg, 0); 166 assert(v <= end); 167 if (v < end) 168 { 169 // v is on the closing ')' 170 if (u > start && p[u - 1] == '$') 171 { 172 // Don't expand $$(NAME), but replace it with $(NAME) 173 buf.remove(u - 1, 1); 174 end--; 175 u = v; // now u is one past the closing ')' 176 continue; 177 } 178 Macro* m = search(name[0 .. namelen]); 179 if (!m) 180 { 181 immutable undef = "DDOC_UNDEFINED_MACRO"; 182 m = search(undef); 183 if (m) 184 { 185 // Macro was not defined, so this is an expansion of 186 // DDOC_UNDEFINED_MACRO. Prepend macro name to args. 187 // marg = name[ ] ~ "," ~ marg[ ]; 188 if (marg.length) 189 { 190 char* q = cast(char*)mem.xmalloc(namelen + 1 + marg.length); 191 assert(q); 192 memcpy(q, name, namelen); 193 q[namelen] = ','; 194 memcpy(q + namelen + 1, marg.ptr, marg.length); 195 marg = q[0 .. marg.length + namelen + 1]; 196 } 197 else 198 { 199 marg = name[0 .. namelen]; 200 } 201 } 202 } 203 if (m) 204 { 205 if (m.inuse && marg.length == 0) 206 { 207 // Remove macro invocation 208 buf.remove(u, v + 1 - u); 209 end -= v + 1 - u; 210 } 211 else if (m.inuse && ((arg.length == marg.length && memcmp(arg.ptr, marg.ptr, arg.length) == 0) || 212 (arg.length + 4 == marg.length && marg[0] == 0xFF && marg[1] == '{' && memcmp(arg.ptr, marg.ptr + 2, arg.length) == 0 && marg[marg.length - 2] == 0xFF && marg[marg.length - 1] == '}'))) 213 { 214 /* Recursive expansion: 215 * marg is same as arg (with blue paint added) 216 * Just leave in place. 217 */ 218 } 219 else 220 { 221 //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", cast(int)m.namelen, m.name, cast(int)marg.length, marg.ptr, cast(int)m.textlen, m.text); 222 marg = memdup(marg); 223 // Insert replacement text 224 buf.spread(v + 1, 2 + m.text.length + 2); 225 ubyte[] slice = cast(ubyte[])buf[]; 226 slice[v + 1] = 0xFF; 227 slice[v + 2] = '{'; 228 slice[v + 3 .. v + 3 + m.text.length] = cast(ubyte[])m.text[]; 229 slice[v + 3 + m.text.length] = 0xFF; 230 slice[v + 3 + m.text.length + 1] = '}'; 231 end += 2 + m.text.length + 2; 232 // Scan replaced text for further expansion 233 m.inuse++; 234 size_t mend = v + 1 + 2 + m.text.length + 2; 235 const success = expand(buf, v + 1, mend, marg, recursionLimit, isIdStart, isIdTail); 236 if (!success) 237 return false; 238 end += mend - (v + 1 + 2 + m.text.length + 2); 239 m.inuse--; 240 buf.remove(u, v + 1 - u); 241 end -= v + 1 - u; 242 u += mend - (v + 1); 243 mem.xfree(cast(char*)marg.ptr); 244 //printf("u = %d, end = %d\n", u, end); 245 //printf("#%.*s#\n", cast(int)(end - u), &buf.data[u]); 246 continue; 247 } 248 } 249 else 250 { 251 // Replace $(NAME) with nothing 252 buf.remove(u, v + 1 - u); 253 end -= (v + 1 - u); 254 continue; 255 } 256 } 257 } 258 u++; 259 } 260 mem.xfree(cast(char*)arg); 261 pend = end; 262 return true; 263 } 264 265 private: 266 267 Macro* search(const(char)[] name) @nogc nothrow pure @safe 268 { 269 //printf("Macro::search(%.*s)\n", cast(int)name.length, name.ptr); 270 if (auto table = name in mactab) 271 { 272 //printf("\tfound %d\n", table.textlen); 273 return *table; 274 } 275 return null; 276 } 277 278 private Macro*[const(char)[]] mactab; 279 } 280 281 /* ************************************************************************ */ 282 283 private: 284 285 struct Macro 286 { 287 const(char)[] name; // macro name 288 const(char)[] text; // macro replacement text 289 int inuse; // macro is in use (don't expand) 290 291 this(const(char)[] name, const(char)[] text) @nogc nothrow pure @safe 292 { 293 this.name = name; 294 this.text = text; 295 } 296 } 297 298 /************************ 299 * Make mutable copy of slice p. 300 * Params: 301 * p = slice 302 * Returns: 303 * copy allocated with mem.xmalloc() 304 */ 305 306 char[] memdup(const(char)[] p) nothrow pure 307 { 308 size_t len = p.length; 309 return (cast(char*)memcpy(mem.xmalloc(len), p.ptr, len))[0 .. len]; 310 } 311 312 /********************************************************** 313 * Given buffer buf[], extract argument marg[]. 314 * Params: 315 * buf = source string 316 * marg = set to slice of buf[] 317 * n = 0: get entire argument 318 * 1..9: get nth argument 319 * -1: get 2nd through end 320 */ 321 size_t extractArgN(const(char)[] buf, out const(char)[] marg, int n) @nogc nothrow pure 322 { 323 /* Scan forward for matching right parenthesis. 324 * Nest parentheses. 325 * Skip over "..." and '...' strings inside HTML tags. 326 * Skip over <!-- ... --> comments. 327 * Skip over previous macro insertions 328 * Set marg. 329 */ 330 uint parens = 1; 331 ubyte instring = 0; 332 uint incomment = 0; 333 uint intag = 0; 334 uint inexp = 0; 335 uint argn = 0; 336 size_t v = 0; 337 const p = buf.ptr; 338 const end = buf.length; 339 Largstart: 340 // Skip first space, if any, to find the start of the macro argument 341 if (n != 1 && v < end && isspace(p[v])) 342 v++; 343 size_t vstart = v; 344 for (; v < end; v++) 345 { 346 char c = p[v]; 347 switch (c) 348 { 349 case ',': 350 if (!inexp && !instring && !incomment && parens == 1) 351 { 352 argn++; 353 if (argn == 1 && n == -1) 354 { 355 v++; 356 goto Largstart; 357 } 358 if (argn == n) 359 break; 360 if (argn + 1 == n) 361 { 362 v++; 363 goto Largstart; 364 } 365 } 366 continue; 367 case '(': 368 if (!inexp && !instring && !incomment) 369 parens++; 370 continue; 371 case ')': 372 if (!inexp && !instring && !incomment && --parens == 0) 373 { 374 break; 375 } 376 continue; 377 case '"': 378 case '\'': 379 if (!inexp && !incomment && intag) 380 { 381 if (c == instring) 382 instring = 0; 383 else if (!instring) 384 instring = c; 385 } 386 continue; 387 case '<': 388 if (!inexp && !instring && !incomment) 389 { 390 if (v + 6 < end && p[v + 1] == '!' && p[v + 2] == '-' && p[v + 3] == '-') 391 { 392 incomment = 1; 393 v += 3; 394 } 395 else if (v + 2 < end && isalpha(p[v + 1])) 396 intag = 1; 397 } 398 continue; 399 case '>': 400 if (!inexp) 401 intag = 0; 402 continue; 403 case '-': 404 if (!inexp && !instring && incomment && v + 2 < end && p[v + 1] == '-' && p[v + 2] == '>') 405 { 406 incomment = 0; 407 v += 2; 408 } 409 continue; 410 case 0xFF: 411 if (v + 1 < end) 412 { 413 if (p[v + 1] == '{') 414 inexp++; 415 else if (p[v + 1] == '}') 416 inexp--; 417 } 418 continue; 419 default: 420 continue; 421 } 422 break; 423 } 424 if (argn == 0 && n == -1) 425 marg = p[v .. v]; 426 else 427 marg = p[vstart .. v]; 428 //printf("extractArg%d('%.*s') = '%.*s'\n", n, cast(int)end, p, cast(int)marg.length, marg.ptr); 429 return v; 430 } 431 432 /***************************************** 433 * Get number of UTF-8 code units in code point that starts with `c` 434 * Params: 435 * c = starting code unit 436 * Returns: number of UTF-8 code units (i.e. bytes), else 1 on invalid UTF start 437 */ 438 @safe 439 int utfStride(char c) @nogc nothrow pure 440 { 441 return 442 c < 0x80 ? 1 : 443 c < 0xC0 ? 1 : // invalid UTF start 444 c < 0xE0 ? 2 : 445 c < 0xF0 ? 3 : 446 c < 0xF8 ? 4 : 447 c < 0xFC ? 5 : 448 c < 0xFE ? 6 : 449 1; // invalid UTF start 450 } 451 452 unittest 453 { 454 assert(utfStride(0) == 1); 455 assert(utfStride(0x80) == 1); 456 assert(utfStride(0xC0) == 2); 457 assert(utfStride(0xE0) == 3); 458 assert(utfStride(0xF0) == 4); 459 assert(utfStride(0xF8) == 5); 460 assert(utfStride(0xFC) == 6); 461 assert(utfStride(0xFE) == 1); 462 }