1 /**
2  * Defines a package and module.
3  *
4  * Specification: $(LINK2 https://dlang.org/spec/module.html, Modules)
5  *
6  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmodule.d, _dmodule.d)
10  * Documentation:  https://dlang.org/phobos/dmd_dmodule.html
11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmodule.d
12  */
13 
14 module dmd.dmodule;
15 
16 import core.stdc.stdio;
17 import core.stdc.stdlib;
18 import core.stdc.string;
19 import dmd.aggregate;
20 import dmd.arraytypes;
21 import dmd.astcodegen;
22 import dmd.astenums;
23 import dmd.compiler;
24 import dmd.gluelayer;
25 import dmd.dimport;
26 import dmd.dmacro;
27 import dmd.doc;
28 import dmd.dscope;
29 import dmd.dsymbol;
30 import dmd.dsymbolsem;
31 import dmd.errors;
32 import dmd.errorsink;
33 import dmd.expression;
34 import dmd.expressionsem;
35 import dmd.file_manager;
36 import dmd.globals;
37 import dmd.id;
38 import dmd.identifier;
39 import dmd.location;
40 import dmd.parse;
41 import dmd.cparse;
42 import dmd.root.array;
43 import dmd.root.file;
44 import dmd.root.filename;
45 import dmd.common.outbuffer;
46 import dmd.root.port;
47 import dmd.root.rmem;
48 import dmd.rootobject;
49 import dmd.root.string;
50 import dmd.semantic2;
51 import dmd.semantic3;
52 import dmd.target;
53 import dmd.utils;
54 import dmd.visitor;
55 
56 version (IN_GCC) {}
57 else version (IN_LLVM) {}
58 else version = MARS;
59 
60 // function used to call semantic3 on a module's dependencies
61 void semantic3OnDependencies(Module m)
62 {
63     if (!m)
64         return;
65 
66     if (m.semanticRun > PASS.semantic3)
67         return;
68 
69     m.semantic3(null);
70 
71     foreach (i; 1 .. m.aimports.length)
72         semantic3OnDependencies(m.aimports[i]);
73 }
74 
75 /**
76  * Remove generated .di files on error and exit
77  */
78 void removeHdrFilesAndFail(ref Param params, ref Modules modules) nothrow
79 {
80     if (params.dihdr.doOutput)
81     {
82         foreach (m; modules)
83         {
84             if (m.filetype == FileType.dhdr)
85                 continue;
86             File.remove(m.hdrfile.toChars());
87         }
88     }
89 
90     fatal();
91 }
92 
93 /**
94  * Converts a chain of identifiers to the filename of the module
95  *
96  * Params:
97  *  packages = the names of the "parent" packages
98  *  ident = the name of the child package or module
99  *
100  * Returns:
101  *  the filename of the child package or module
102  */
103 private const(char)[] getFilename(Identifier[] packages, Identifier ident) nothrow
104 {
105     const(char)[] filename = ident.toString();
106 
107     OutBuffer buf;
108     OutBuffer dotmods;
109     auto modAliases = &global.params.modFileAliasStrings;
110 
111     if (packages.length == 0 && modAliases.length == 0)
112         return filename;
113 
114     void checkModFileAlias(const(char)[] p)
115     {
116         /* Check and replace the contents of buf[] with
117         * an alias string from global.params.modFileAliasStrings[]
118         */
119         dotmods.writestring(p);
120         foreach_reverse (const m; *modAliases)
121         {
122             const q = strchr(m, '=');
123             assert(q);
124             if (dotmods.length == q - m && memcmp(dotmods.peekChars(), m, q - m) == 0)
125             {
126                 buf.setsize(0);
127                 auto rhs = q[1 .. strlen(q)];
128                 if (rhs.length > 0 && (rhs[$ - 1] == '/' || rhs[$ - 1] == '\\'))
129                     rhs = rhs[0 .. $ - 1]; // remove trailing separator
130                 buf.writestring(rhs);
131                 break; // last matching entry in ms[] wins
132             }
133         }
134         dotmods.writeByte('.');
135     }
136 
137     foreach (pid; packages)
138     {
139         const p = pid.toString();
140         buf.writestring(p);
141         if (modAliases.length)
142             checkModFileAlias(p);
143         version (Windows)
144             enum FileSeparator = '\\';
145         else
146             enum FileSeparator = '/';
147         buf.writeByte(FileSeparator);
148     }
149     buf.writestring(filename);
150     if (modAliases.length)
151         checkModFileAlias(filename);
152     buf.writeByte(0);
153     filename = buf.extractSlice()[0 .. $ - 1];
154 
155     return filename;
156 }
157 
158 /***********************************************************
159  */
160 extern (C++) class Package : ScopeDsymbol
161 {
162     PKG isPkgMod = PKG.unknown;
163     uint tag;        // auto incremented tag, used to mask package tree in scopes
164     Module mod;     // !=null if isPkgMod == PKG.module_
165 
166     final extern (D) this(const ref Loc loc, Identifier ident) nothrow
167     {
168         super(loc, ident);
169         __gshared uint packageTag;
170         this.tag = packageTag++;
171     }
172 
173     override const(char)* kind() const nothrow
174     {
175         return "package";
176     }
177 
178     override bool equals(const RootObject o) const
179     {
180         // custom 'equals' for bug 17441. "package a" and "module a" are not equal
181         if (this == o)
182             return true;
183         auto p = cast(Package)o;
184         return p && isModule() == p.isModule() && ident.equals(p.ident);
185     }
186 
187     /****************************************************
188      * Input:
189      *      packages[]      the pkg1.pkg2 of pkg1.pkg2.mod
190      * Returns:
191      *      the symbol table that mod should be inserted into
192      * Output:
193      *      *pparent        the rightmost package, i.e. pkg2, or NULL if no packages
194      *      *ppkg           the leftmost package, i.e. pkg1, or NULL if no packages
195      */
196     extern (D) static DsymbolTable resolve(Identifier[] packages, Dsymbol* pparent, Package* ppkg)
197     {
198         DsymbolTable dst = Module.modules;
199         Dsymbol parent = null;
200         //printf("Package::resolve()\n");
201         if (ppkg)
202             *ppkg = null;
203         foreach (pid; packages)
204         {
205             Package pkg;
206             Dsymbol p = dst.lookup(pid);
207             if (!p)
208             {
209                 pkg = new Package(Loc.initial, pid);
210                 dst.insert(pkg);
211                 pkg.parent = parent;
212                 pkg.symtab = new DsymbolTable();
213             }
214             else
215             {
216                 pkg = p.isPackage();
217                 assert(pkg);
218                 // It might already be a module, not a package, but that needs
219                 // to be checked at a higher level, where a nice error message
220                 // can be generated.
221                 // dot net needs modules and packages with same name
222                 // But we still need a symbol table for it
223                 if (!pkg.symtab)
224                     pkg.symtab = new DsymbolTable();
225             }
226             parent = pkg;
227             dst = pkg.symtab;
228             if (ppkg && !*ppkg)
229                 *ppkg = pkg;
230             if (pkg.isModule())
231             {
232                 // Return the module so that a nice error message can be generated
233                 if (ppkg)
234                     *ppkg = cast(Package)p;
235                 break;
236             }
237         }
238 
239         if (pparent)
240             *pparent = parent;
241         return dst;
242     }
243 
244     override final inout(Package) isPackage() inout
245     {
246         return this;
247     }
248 
249     /**
250      * Checks if pkg is a sub-package of this
251      *
252      * For example, if this qualifies to 'a1.a2' and pkg - to 'a1.a2.a3',
253      * this function returns 'true'. If it is other way around or qualified
254      * package paths conflict function returns 'false'.
255      *
256      * Params:
257      *  pkg = possible subpackage
258      *
259      * Returns:
260      *  see description
261      */
262     final bool isAncestorPackageOf(const Package pkg) const
263     {
264         if (this == pkg)
265             return true;
266         if (!pkg || !pkg.parent)
267             return false;
268         return isAncestorPackageOf(pkg.parent.isPackage());
269     }
270 
271     override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly)
272     {
273         //printf("%s Package.search('%s', flags = x%x)\n", toChars(), ident.toChars(), flags);
274         flags &= ~SearchLocalsOnly;  // searching an import is always transitive
275         if (!isModule() && mod)
276         {
277             // Prefer full package name.
278             Dsymbol s = symtab ? symtab.lookup(ident) : null;
279             if (s)
280                 return s;
281             //printf("[%s] through pkdmod: %s\n", loc.toChars(), toChars());
282             return mod.search(loc, ident, flags);
283         }
284         return ScopeDsymbol.search(loc, ident, flags);
285     }
286 
287     override void accept(Visitor v)
288     {
289         v.visit(this);
290     }
291 
292     final Module isPackageMod()
293     {
294         if (isPkgMod == PKG.module_)
295         {
296             return mod;
297         }
298         return null;
299     }
300 
301     /**
302      * Checks for the existence of a package.d to set isPkgMod appropriately
303      * if isPkgMod == PKG.unknown
304      */
305     final void resolvePKGunknown()
306     {
307         if (isModule())
308             return;
309         if (isPkgMod != PKG.unknown)
310             return;
311 
312         Identifier[] packages;
313         for (Dsymbol s = this.parent; s; s = s.parent)
314             packages ~= s.ident;
315         reverse(packages);
316 
317         if (Module.find(getFilename(packages, ident)))
318             Module.load(Loc.initial, packages, this.ident);
319         else
320             isPkgMod = PKG.package_;
321     }
322 }
323 
324 /***********************************************************
325  */
326 extern (C++) final class Module : Package
327 {
328     extern (C++) __gshared Module rootModule;
329     extern (C++) __gshared DsymbolTable modules; // symbol table of all modules
330     extern (C++) __gshared Modules amodules;     // array of all modules
331     extern (C++) __gshared Dsymbols deferred;    // deferred Dsymbol's needing semantic() run on them
332     extern (C++) __gshared Dsymbols deferred2;   // deferred Dsymbol's needing semantic2() run on them
333     extern (C++) __gshared Dsymbols deferred3;   // deferred Dsymbol's needing semantic3() run on them
334 
335     static void _init()
336     {
337         modules = new DsymbolTable();
338     }
339 
340     /**
341      * Deinitializes the global state of the compiler.
342      *
343      * This can be used to restore the state set by `_init` to its original
344      * state.
345      */
346     static void deinitialize()
347     {
348         modules = modules.init;
349     }
350 
351     extern (C++) __gshared AggregateDeclaration moduleinfo;
352 
353     const(char)[] arg;           // original argument name
354     ModuleDeclaration* md;      // if !=null, the contents of the ModuleDeclaration declaration
355     const FileName srcfile;     // input source file
356     const FileName objfile;     // output .obj file
357     const FileName hdrfile;     // 'header' file
358     FileName docfile;           // output documentation file
359     const(ubyte)[] src;         /// Raw content of the file
360     uint errors;                // if any errors in file
361     uint numlines;              // number of lines in source file
362     FileType filetype;          // source file type
363     bool hasAlwaysInlines;      // contains references to functions that must be inlined
364     bool isPackageFile;         // if it is a package.d
365     Package pkg;                // if isPackageFile is true, the Package that contains this package.d
366     Strings contentImportedFiles; // array of files whose content was imported
367     int needmoduleinfo;
368     private ThreeState selfimports;
369     private ThreeState rootimports;
370     Dsymbol[void*] tagSymTab;   /// ImportC: tag symbols that conflict with other symbols used as the index
371 
372     private OutBuffer defines;  // collect all the #define lines here
373 
374 
375     /*************************************
376      * Return true if module imports itself.
377      */
378     bool selfImports()
379     {
380         //printf("Module::selfImports() %s\n", toChars());
381         if (selfimports == ThreeState.none)
382         {
383             foreach (Module m; amodules)
384                 m.insearch = false;
385             selfimports = imports(this) ? ThreeState.yes : ThreeState.no;
386             foreach (Module m; amodules)
387                 m.insearch = false;
388         }
389         return selfimports == ThreeState.yes;
390     }
391 
392     /*************************************
393      * Return true if module imports root module.
394      */
395     bool rootImports()
396     {
397         //printf("Module::rootImports() %s\n", toChars());
398         if (rootimports == ThreeState.none)
399         {
400             foreach (Module m; amodules)
401                 m.insearch = false;
402             rootimports = ThreeState.no;
403             foreach (Module m; amodules)
404             {
405                 if (m.isRoot() && imports(m))
406                 {
407                     rootimports = ThreeState.yes;
408                     break;
409                 }
410             }
411             foreach (Module m; amodules)
412                 m.insearch = false;
413         }
414         return rootimports == ThreeState.yes;
415     }
416 
417     private Identifier searchCacheIdent;
418     private Dsymbol searchCacheSymbol;  // cached value of search
419     private int searchCacheFlags;       // cached flags
420     private bool insearch;
421 
422     /**
423      * A root module is one that will be compiled all the way to
424      * object code.  This field holds the root module that caused
425      * this module to be loaded.  If this module is a root module,
426      * then it will be set to `this`.  This is used to determine
427      * ownership of template instantiation.
428      */
429     Module importedFrom;
430 
431     Dsymbols* decldefs;         // top level declarations for this Module
432 
433     Modules aimports;           // all imported modules
434 
435     uint debuglevel;            // debug level
436     Identifiers* debugids;      // debug identifiers
437     Identifiers* debugidsNot;   // forward referenced debug identifiers
438 
439     uint versionlevel;          // version level
440     Identifiers* versionids;    // version identifiers
441     Identifiers* versionidsNot; // forward referenced version identifiers
442 
443     MacroTable macrotable;      // document comment macros
444     Escape* _escapetable;       // document comment escapes
445 
446     size_t nameoffset;          // offset of module name from start of ModuleInfo
447     size_t namelen;             // length of module name in characters
448 
449     extern (D) this(const ref Loc loc, const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen)
450     {
451         super(loc, ident);
452         const(char)[] srcfilename;
453         //printf("Module::Module(filename = '%.*s', ident = '%s')\n", cast(int)filename.length, filename.ptr, ident.toChars());
454         this.arg = filename;
455         srcfilename = FileName.defaultExt(filename, mars_ext);
456         if (target.run_noext && global.params.run &&
457             !FileName.ext(filename) &&
458             FileName.exists(srcfilename) == 0 &&
459             FileName.exists(filename) == 1)
460         {
461             FileName.free(srcfilename.ptr);
462             srcfilename = FileName.removeExt(filename); // just does a mem.strdup(filename)
463         }
464         else if (!FileName.equalsExt(srcfilename, mars_ext) &&
465                  !FileName.equalsExt(srcfilename, hdr_ext) &&
466                  !FileName.equalsExt(srcfilename, c_ext) &&
467                  !FileName.equalsExt(srcfilename, i_ext) &&
468                  !FileName.equalsExt(srcfilename, dd_ext))
469         {
470 
471             error(loc, "%s `%s` source file name '%.*s' must have .%.*s extension",
472                   kind, toPrettyChars,
473                   cast(int)srcfilename.length, srcfilename.ptr,
474                   cast(int)mars_ext.length, mars_ext.ptr);
475             fatal();
476         }
477 
478         srcfile = FileName(srcfilename);
479         objfile = setOutfilename(global.params.objname, global.params.objdir, filename, target.obj_ext);
480         if (doDocComment)
481             setDocfile();
482         if (doHdrGen)
483             hdrfile = setOutfilename(global.params.dihdr.name, global.params.dihdr.dir, arg, hdr_ext);
484     }
485 
486     extern (D) this(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen)
487     {
488         this(Loc.initial, filename, ident, doDocComment, doHdrGen);
489     }
490 
491     static Module create(const(char)* filename, Identifier ident, int doDocComment, int doHdrGen)
492     {
493         return create(filename.toDString, ident, doDocComment, doHdrGen);
494     }
495 
496     extern (D) static Module create(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen)
497     {
498         return new Module(Loc.initial, filename, ident, doDocComment, doHdrGen);
499     }
500 
501     static const(char)* find(const(char)* filename)
502     {
503         return find(filename.toDString).ptr;
504     }
505 
506     extern (D) static const(char)[] find(const(char)[] filename)
507     {
508         return global.fileManager.lookForSourceFile(filename, global.path ? (*global.path)[] : null);
509     }
510 
511     extern (C++) static Module load(const ref Loc loc, Identifiers* packages, Identifier ident)
512     {
513         return load(loc, packages ? (*packages)[] : null, ident);
514     }
515 
516     extern (D) static Module load(const ref Loc loc, Identifier[] packages, Identifier ident)
517     {
518         //printf("Module::load(ident = '%s')\n", ident.toChars());
519         // Build module filename by turning:
520         //  foo.bar.baz
521         // into:
522         //  foo\bar\baz
523         const(char)[] filename = getFilename(packages, ident);
524         // Look for the source file
525         if (const result = find(filename))
526             filename = result; // leaks
527 
528         auto m = new Module(loc, filename, ident, 0, 0);
529 
530         if (!m.read(loc))
531             return null;
532         if (global.params.v.verbose)
533         {
534             OutBuffer buf;
535             foreach (pid; packages)
536             {
537                 buf.writestring(pid.toString());
538                 buf.writeByte('.');
539             }
540             buf.printf("%s\t(%s)", ident.toChars(), m.srcfile.toChars());
541             message("import    %s", buf.peekChars());
542         }
543         if((m = m.parse()) is null) return null;
544 
545         return m;
546     }
547 
548     override const(char)* kind() const
549     {
550         return "module";
551     }
552 
553     /*********************************************
554      * Combines things into output file name for .html and .di files.
555      * Input:
556      *      name    Command line name given for the file, NULL if none
557      *      dir     Command line directory given for the file, NULL if none
558      *      arg     Name of the source file
559      *      ext     File name extension to use if 'name' is NULL
560      *      global.params.preservePaths     get output path from arg
561      *      srcfile Input file - output file name must not match input file
562      */
563     extern(D) FileName setOutfilename(const(char)[] name, const(char)[] dir, const(char)[] arg, const(char)[] ext)
564     {
565         const(char)[] docfilename;
566         if (name)
567         {
568             docfilename = name;
569         }
570         else
571         {
572             const(char)[] argdoc;
573             OutBuffer buf;
574             if (arg == "__stdin.d")
575             {
576                 version (Posix)
577                     import core.sys.posix.unistd : getpid;
578                 else version (Windows)
579                     import core.sys.windows.winbase : getpid = GetCurrentProcessId;
580                 buf.printf("__stdin_%d.d", getpid());
581                 arg = buf[];
582             }
583             if (global.params.preservePaths)
584                 argdoc = arg;
585             else
586                 argdoc = FileName.name(arg);
587             // If argdoc doesn't have an absolute path, make it relative to dir
588             if (!FileName.absolute(argdoc))
589             {
590                 //FileName::ensurePathExists(dir);
591                 argdoc = FileName.combine(dir, argdoc);
592             }
593             docfilename = FileName.forceExt(argdoc, ext);
594         }
595         if (FileName.equals(docfilename, srcfile.toString()))
596         {
597             error(loc, "%s `%s` source file and output file have same name '%s'",
598                 kind, toPrettyChars, srcfile.toChars());
599             fatal();
600         }
601         return FileName(docfilename);
602     }
603 
604     extern (D) void setDocfile()
605     {
606         docfile = setOutfilename(global.params.ddoc.name, global.params.ddoc.dir, arg, doc_ext);
607     }
608 
609     /**
610      * Trigger the relevant semantic error when a file cannot be read
611      *
612      * We special case `object.d` as a failure is likely to be a rare
613      * but difficult to diagnose case for the user. Packages also require
614      * special handling to avoid exposing the compiler's internals.
615      *
616      * Params:
617      *  loc = The location at which the file read originated (e.g. import)
618      */
619     private void onFileReadError(const ref Loc loc)
620     {
621         if (FileName.equals(srcfile.toString(), "object.d"))
622         {
623             .error(loc, "cannot find source code for runtime library file 'object.d'");
624             version (IN_LLVM)
625             {
626                 errorSupplemental(loc, "ldc2 might not be correctly installed.");
627                 errorSupplemental(loc, "Please check your ldc2.conf configuration file.");
628                 errorSupplemental(loc, "Installation instructions can be found at http://wiki.dlang.org/LDC.");
629             }
630             version (MARS)
631             {
632                 errorSupplemental(loc, "dmd might not be correctly installed. Run 'dmd -man' for installation instructions.");
633                 const dmdConfFile = global.inifilename.length ? FileName.canonicalName(global.inifilename) : "not found";
634                 errorSupplemental(loc, "config file: %.*s", cast(int)dmdConfFile.length, dmdConfFile.ptr);
635             }
636         }
637         else if (FileName.ext(this.arg) || !loc.isValid())
638         {
639             // Modules whose original argument name has an extension, or do not
640             // have a valid location come from the command-line.
641             // Error that their file cannot be found and return early.
642             .error(loc, "cannot find input file `%s`", srcfile.toChars());
643         }
644         else
645         {
646             // if module is not named 'package' but we're trying to read 'package.d', we're looking for a package module
647             bool isPackageMod = (strcmp(toChars(), "package") != 0) && isPackageFileName(srcfile);
648             if (isPackageMod)
649                 .error(loc, "importing package '%s' requires a 'package.d' file which cannot be found in '%s'", toChars(), srcfile.toChars());
650             else
651             {
652                 .error(loc, "unable to read module `%s`", toChars());
653                 const pkgfile = FileName.combine(FileName.removeExt(srcfile.toString()), package_d);
654                 .errorSupplemental(loc, "Expected '%s' or '%s' in one of the following import paths:",
655                     srcfile.toChars(), pkgfile.ptr);
656             }
657         }
658         if (!global.gag)
659         {
660             /* Print path
661              */
662             if (global.path)
663             {
664                 foreach (i, p; *global.path)
665                     fprintf(stderr, "import path[%llu] = %s\n", cast(ulong)i, p);
666             }
667             else
668             {
669                 fprintf(stderr, "Specify path to file '%s' with -I switch\n", srcfile.toChars());
670             }
671 
672             removeHdrFilesAndFail(global.params, Module.amodules);
673         }
674     }
675 
676     /**
677      * Reads the file from `srcfile` and loads the source buffer.
678      *
679      * If makefile module dependency is requested, we add this module
680      * to the list of dependencies from here.
681      *
682      * Params:
683      *  loc = the location
684      *
685      * Returns: `true` if successful
686      */
687     bool read(const ref Loc loc)
688     {
689         if (this.src)
690             return true; // already read
691 
692         //printf("Module::read('%s') file '%s'\n", toChars(), srcfile.toChars());
693 
694         /* Preprocess the file if it's a .c file
695          */
696         FileName filename = srcfile;
697         bool ifile = false;             // did we generate a .i file
698         scope (exit)
699         {
700             if (ifile)
701                 File.remove(filename.toChars());        // remove generated file
702         }
703 
704         if (global.preprocess &&
705             FileName.equalsExt(srcfile.toString(), c_ext) &&
706             FileName.exists(srcfile.toString()))
707         {
708             filename = global.preprocess(srcfile, loc, ifile, &defines);  // run C preprocessor
709         }
710 
711         if (auto result = global.fileManager.lookup(filename))
712         {
713             this.src = result;
714             if (global.params.makeDeps.doOutput)
715                 global.params.makeDeps.files.push(srcfile.toChars());
716             return true;
717         }
718 
719         this.onFileReadError(loc);
720         return false;
721     }
722 
723     /// syntactic parse
724     Module parse()
725     {
726         return parseModule!ASTCodegen();
727     }
728 
729     /// ditto
730     extern (D) Module parseModule(AST)()
731     {
732         const(char)* srcname = srcfile.toChars();
733         //printf("Module::parse(srcname = '%s')\n", srcname);
734         isPackageFile = isPackageFileName(srcfile);
735         const(char)[] buf = processSource(src, this);
736         // an error happened on UTF conversion
737         if (buf is null) return null;
738 
739         /* If it starts with the string "Ddoc", then it's a documentation
740          * source file.
741          */
742         if (buf.length>= 4 && buf[0..4] == "Ddoc")
743         {
744             comment = buf.ptr + 4;
745             filetype = FileType.ddoc;
746             if (!docfile)
747                 setDocfile();
748             return this;
749         }
750         /* If it has the extension ".dd", it is also a documentation
751          * source file. Documentation source files may begin with "Ddoc"
752          * but do not have to if they have the .dd extension.
753          * https://issues.dlang.org/show_bug.cgi?id=15465
754          */
755         if (FileName.equalsExt(arg, dd_ext))
756         {
757             comment = buf.ptr; // the optional Ddoc, if present, is handled above.
758             filetype = FileType.ddoc;
759             if (!docfile)
760                 setDocfile();
761             return this;
762         }
763         /* If it has the extension ".di", it is a "header" file.
764          */
765         if (FileName.equalsExt(arg, hdr_ext))
766             filetype = FileType.dhdr;
767 
768         /// Promote `this` to a root module if requested via `-i`
769         void checkCompiledImport()
770         {
771             if (!this.isRoot() && Compiler.onImport(this))
772                 this.importedFrom = this;
773         }
774 
775         DsymbolTable dst;
776         Package ppack = null;
777 
778         /* If it has the extension ".c", it is a "C" file.
779          * If it has the extension ".i", it is a preprocessed "C" file.
780          */
781         if (FileName.equalsExt(arg, c_ext) || FileName.equalsExt(arg, i_ext))
782         {
783             filetype = FileType.c;
784 
785             global.compileEnv.masm = target.os == Target.OS.Windows && !target.omfobj; // Microsoft inline assembler format
786             scope p = new CParser!AST(this, buf, cast(bool) docfile, global.errorSink, target.c, &defines, &global.compileEnv);
787             global.compileEnv.masm = false;
788             p.nextToken();
789             checkCompiledImport();
790             members = p.parseModule();
791             assert(!p.md); // C doesn't have module declarations
792             numlines = p.scanloc.linnum;
793         }
794         else
795         {
796             const bool doUnittests = global.params.useUnitTests || global.params.ddoc.doOutput || global.params.dihdr.doOutput;
797             scope p = new Parser!AST(this, buf, cast(bool) docfile, global.errorSink, &global.compileEnv, doUnittests);
798             p.transitionIn = global.params.v.vin;
799             p.nextToken();
800             p.parseModuleDeclaration();
801             md = p.md;
802 
803             if (md)
804             {
805                 /* A ModuleDeclaration, md, was provided.
806                 * The ModuleDeclaration sets the packages this module appears in, and
807                 * the name of this module.
808                 */
809                 this.ident = md.id;
810                 dst = Package.resolve(md.packages, &this.parent, &ppack);
811             }
812 
813             // Done after parsing the module header because `module x.y.z` may override the file name
814             checkCompiledImport();
815 
816             members = p.parseModuleContent();
817             numlines = p.scanloc.linnum;
818         }
819 
820         /* The symbol table into which the module is to be inserted.
821          */
822 
823         if (md)
824         {
825             // Mark the package path as accessible from the current module
826             // https://issues.dlang.org/show_bug.cgi?id=21661
827             // Code taken from Import.addPackageAccess()
828             if (md.packages.length > 0)
829             {
830                 // module a.b.c.d;
831                 auto p = ppack; // a
832                 addAccessiblePackage(p, Visibility(Visibility.Kind.private_));
833                 foreach (id; md.packages[1 .. $]) // [b, c]
834                 {
835                     p = cast(Package) p.symtab.lookup(id);
836                     if (p is null)
837                         break;
838                     addAccessiblePackage(p, Visibility(Visibility.Kind.private_));
839                 }
840             }
841             assert(dst);
842             Module m = ppack ? ppack.isModule() : null;
843             if (m && !isPackageFileName(m.srcfile))
844             {
845                 .error(md.loc, "package name '%s' conflicts with usage as a module name in file %s", ppack.toPrettyChars(), m.srcfile.toChars());
846             }
847         }
848         else
849         {
850             /* The name of the module is set to the source file name.
851              * There are no packages.
852              */
853             dst = modules; // and so this module goes into global module symbol table
854             /* Check to see if module name is a valid identifier
855              */
856             if (!Identifier.isValidIdentifier(this.ident.toChars()))
857                 error(loc, "%s `%s` has non-identifier characters in filename, use module declaration instead", kind, toPrettyChars);
858         }
859         // Insert module into the symbol table
860         Dsymbol s = this;
861         if (isPackageFile)
862         {
863             /* If the source tree is as follows:
864              *     pkg/
865              *     +- package.d
866              *     +- common.d
867              * the 'pkg' will be incorporated to the internal package tree in two ways:
868              *     import pkg;
869              * and:
870              *     import pkg.common;
871              *
872              * If both are used in one compilation, 'pkg' as a module (== pkg/package.d)
873              * and a package name 'pkg' will conflict each other.
874              *
875              * To avoid the conflict:
876              * 1. If preceding package name insertion had occurred by Package::resolve,
877              *    reuse the previous wrapping 'Package' if it exists
878              * 2. Otherwise, 'package.d' wrapped by 'Package' is inserted to the internal tree in here.
879              *
880              * Then change Package::isPkgMod to PKG.module_ and set Package::mod.
881              *
882              * Note that the 'wrapping Package' is the Package that contains package.d and other submodules,
883              * the one inserted to the symbol table.
884              */
885             auto ps = dst.lookup(ident);
886             Package p = ps ? ps.isPackage() : null;
887             if (p is null)
888             {
889                 p = new Package(Loc.initial, ident);
890                 p.tag = this.tag; // reuse the same package tag
891                 p.symtab = new DsymbolTable();
892             }
893             this.tag = p.tag; // reuse the 'older' package tag
894             this.pkg = p;
895             p.parent = this.parent;
896             p.isPkgMod = PKG.module_;
897             p.mod = this;
898             s = p;
899         }
900         if (!dst.insert(s))
901         {
902             /* It conflicts with a name that is already in the symbol table.
903              * Figure out what went wrong, and issue error message.
904              */
905             Dsymbol prev = dst.lookup(ident);
906             assert(prev);
907             if (Module mprev = prev.isModule())
908             {
909                 if (!FileName.equals(srcname, mprev.srcfile.toChars()))
910                     error(loc, "%s `%s` from file %s conflicts with another module %s from file %s", kind, toPrettyChars, srcname, mprev.toChars(), mprev.srcfile.toChars());
911                 else if (isRoot() && mprev.isRoot())
912                     error(loc, "%s `%s` from file %s is specified twice on the command line", kind, toPrettyChars, srcname);
913                 else
914                     error(loc, "%s `%s` from file %s must be imported with 'import %s;'", kind, toPrettyChars, srcname, toPrettyChars());
915                 // https://issues.dlang.org/show_bug.cgi?id=14446
916                 // Return previously parsed module to avoid AST duplication ICE.
917                 return mprev;
918             }
919             else if (Package pkg = prev.isPackage())
920             {
921                 // 'package.d' loaded after a previous 'Package' insertion
922                 if (isPackageFile)
923                     amodules.push(this); // Add to global array of all modules
924                 else
925                     error(md ? md.loc : loc, "%s `%s` from file %s conflicts with package name %s", kind, toPrettyChars, srcname, pkg.toChars());
926             }
927             else
928                 assert(global.errors);
929         }
930         else
931         {
932             // Add to global array of all modules
933             amodules.push(this);
934         }
935         Compiler.onParseModule(this);
936         return this;
937     }
938 
939     override void importAll(Scope* prevsc)
940     {
941         //printf("+Module::importAll(this = %p, '%s'): parent = %p\n", this, toChars(), parent);
942         if (_scope)
943             return; // already done
944         if (filetype == FileType.ddoc)
945         {
946             error(loc, "%s `%s` is a Ddoc file, cannot import it", kind, toPrettyChars);
947             return;
948         }
949 
950         /* Note that modules get their own scope, from scratch.
951          * This is so regardless of where in the syntax a module
952          * gets imported, it is unaffected by context.
953          * Ignore prevsc.
954          */
955         Scope* sc = Scope.createGlobal(this, global.errorSink); // create root scope
956 
957         if (md && md.msg)
958             md.msg = semanticString(sc, md.msg, "deprecation message");
959 
960         // Add import of "object", even for the "object" module.
961         // If it isn't there, some compiler rewrites, like
962         //    classinst == classinst -> .object.opEquals(classinst, classinst)
963         // would fail inside object.d.
964         if (filetype != FileType.c &&
965             (members.length == 0 ||
966              (*members)[0].ident != Id.object ||
967              (*members)[0].isImport() is null))
968         {
969             auto im = new Import(Loc.initial, null, Id.object, null, 0);
970             members.shift(im);
971         }
972         if (!symtab)
973         {
974             // Add all symbols into module's symbol table
975             symtab = new DsymbolTable();
976             for (size_t i = 0; i < members.length; i++)
977             {
978                 Dsymbol s = (*members)[i];
979                 s.addMember(sc, sc.scopesym);
980             }
981         }
982         // anything else should be run after addMember, so version/debug symbols are defined
983         /* Set scope for the symbols so that if we forward reference
984          * a symbol, it can possibly be resolved on the spot.
985          * If this works out well, it can be extended to all modules
986          * before any semantic() on any of them.
987          */
988         setScope(sc); // remember module scope for semantic
989         for (size_t i = 0; i < members.length; i++)
990         {
991             Dsymbol s = (*members)[i];
992             s.setScope(sc);
993         }
994         for (size_t i = 0; i < members.length; i++)
995         {
996             Dsymbol s = (*members)[i];
997             s.importAll(sc);
998         }
999         sc = sc.pop();
1000         sc.pop(); // 2 pops because Scope.createGlobal() created 2
1001     }
1002 
1003     /**********************************
1004      * Determine if we need to generate an instance of ModuleInfo
1005      * for this Module.
1006      */
1007     int needModuleInfo()
1008     {
1009         //printf("needModuleInfo() %s, %d, %d\n", toChars(), needmoduleinfo, global.params.cov);
1010         return needmoduleinfo || global.params.cov;
1011     }
1012 
1013     /*******************************************
1014      * Print deprecation warning if we're deprecated, when
1015      * this module is imported from scope sc.
1016      *
1017      * Params:
1018      *  sc = the scope into which we are imported
1019      *  loc = the location of the import statement
1020      */
1021     void checkImportDeprecation(const ref Loc loc, Scope* sc)
1022     {
1023         if (md && md.isdeprecated && !sc.isDeprecated)
1024         {
1025             Expression msg = md.msg;
1026             if (StringExp se = msg ? msg.toStringExp() : null)
1027             {
1028                 const slice = se.peekString();
1029                 if (slice.length)
1030                 {
1031                     deprecation(loc, "%s `%s` is deprecated - %.*s", kind, toPrettyChars, cast(int)slice.length, slice.ptr);
1032                     return;
1033                 }
1034             }
1035             deprecation(loc, "%s `%s` is deprecated", kind, toPrettyChars);
1036         }
1037     }
1038 
1039     override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly)
1040     {
1041         /* Since modules can be circularly referenced,
1042          * need to stop infinite recursive searches.
1043          * This is done with the cache.
1044          */
1045         //printf("%s Module.search('%s', flags = x%x) insearch = %d\n", toChars(), ident.toChars(), flags, insearch);
1046         if (insearch)
1047             return null;
1048 
1049         /* Qualified module searches always search their imports,
1050          * even if SearchLocalsOnly
1051          */
1052         if (!(flags & SearchUnqualifiedModule))
1053             flags &= ~(SearchUnqualifiedModule | SearchLocalsOnly);
1054 
1055         if (searchCacheIdent == ident && searchCacheFlags == flags)
1056         {
1057             //printf("%s Module::search('%s', flags = %d) insearch = %d searchCacheSymbol = %s\n",
1058             //        toChars(), ident.toChars(), flags, insearch, searchCacheSymbol ? searchCacheSymbol.toChars() : "null");
1059             return searchCacheSymbol;
1060         }
1061 
1062         uint errors = global.errors;
1063 
1064         insearch = true;
1065         Dsymbol s = ScopeDsymbol.search(loc, ident, flags);
1066         insearch = false;
1067 
1068         if (errors == global.errors)
1069         {
1070             // https://issues.dlang.org/show_bug.cgi?id=10752
1071             // Can cache the result only when it does not cause
1072             // access error so the side-effect should be reproduced in later search.
1073             searchCacheIdent = ident;
1074             searchCacheSymbol = s;
1075             searchCacheFlags = flags;
1076         }
1077         return s;
1078     }
1079 
1080     override bool isPackageAccessible(Package p, Visibility visibility, int flags = 0)
1081     {
1082         if (insearch) // don't follow import cycles
1083             return false;
1084         insearch = true;
1085         scope (exit)
1086             insearch = false;
1087         if (flags & IgnorePrivateImports)
1088             visibility = Visibility(Visibility.Kind.public_); // only consider public imports
1089         return super.isPackageAccessible(p, visibility);
1090     }
1091 
1092     override Dsymbol symtabInsert(Dsymbol s)
1093     {
1094         searchCacheIdent = null; // symbol is inserted, so invalidate cache
1095         return Package.symtabInsert(s);
1096     }
1097 
1098     extern (D) void deleteObjFile()
1099     {
1100         if (global.params.obj)
1101             File.remove(objfile.toChars());
1102         if (docfile)
1103             File.remove(docfile.toChars());
1104     }
1105 
1106     /*******************************************
1107      * Can't run semantic on s now, try again later.
1108      */
1109     extern (D) static void addDeferredSemantic(Dsymbol s)
1110     {
1111         //printf("Module::addDeferredSemantic('%s')\n", s.toChars());
1112         if (!deferred.contains(s))
1113             deferred.push(s);
1114     }
1115 
1116     extern (D) static void addDeferredSemantic2(Dsymbol s)
1117     {
1118         //printf("Module::addDeferredSemantic2('%s')\n", s.toChars());
1119         if (!deferred2.contains(s))
1120             deferred2.push(s);
1121     }
1122 
1123     extern (D) static void addDeferredSemantic3(Dsymbol s)
1124     {
1125         //printf("Module::addDeferredSemantic3('%s')\n", s.toChars());
1126         if (!deferred.contains(s))
1127             deferred3.push(s);
1128     }
1129 
1130     /******************************************
1131      * Run semantic() on deferred symbols.
1132      */
1133     static void runDeferredSemantic()
1134     {
1135         __gshared int nested;
1136         if (nested)
1137             return;
1138         //if (deferred.length) printf("+Module::runDeferredSemantic(), len = %ld\n", deferred.length);
1139         nested++;
1140 
1141         size_t len;
1142         do
1143         {
1144             len = deferred.length;
1145             if (!len)
1146                 break;
1147 
1148             Dsymbol* todo;
1149             Dsymbol* todoalloc = null;
1150             Dsymbol tmp;
1151             if (len == 1)
1152             {
1153                 todo = &tmp;
1154             }
1155             else
1156             {
1157                 todo = cast(Dsymbol*)Mem.check(malloc(len * Dsymbol.sizeof));
1158                 todoalloc = todo;
1159             }
1160             memcpy(todo, deferred.tdata(), len * Dsymbol.sizeof);
1161             deferred.setDim(0);
1162 
1163             foreach (i; 0..len)
1164             {
1165                 Dsymbol s = todo[i];
1166                 s.dsymbolSemantic(null);
1167                 //printf("deferred: %s, parent = %s\n", s.toChars(), s.parent.toChars());
1168             }
1169             //printf("\tdeferred.length = %ld, len = %ld\n", deferred.length, len);
1170             if (todoalloc)
1171                 free(todoalloc);
1172         }
1173         while (deferred.length != len); // while making progress
1174         nested--;
1175         //printf("-Module::runDeferredSemantic(), len = %ld\n", deferred.length);
1176     }
1177 
1178     static void runDeferredSemantic2()
1179     {
1180         Module.runDeferredSemantic();
1181 
1182         Dsymbols* a = &Module.deferred2;
1183         for (size_t i = 0; i < a.length; i++)
1184         {
1185             Dsymbol s = (*a)[i];
1186             //printf("[%d] %s semantic2a\n", i, s.toPrettyChars());
1187             s.semantic2(null);
1188 
1189             if (global.errors)
1190                 break;
1191         }
1192         a.setDim(0);
1193     }
1194 
1195     static void runDeferredSemantic3()
1196     {
1197         Module.runDeferredSemantic2();
1198 
1199         Dsymbols* a = &Module.deferred3;
1200         for (size_t i = 0; i < a.length; i++)
1201         {
1202             Dsymbol s = (*a)[i];
1203             //printf("[%d] %s semantic3a\n", i, s.toPrettyChars());
1204             s.semantic3(null);
1205 
1206             if (global.errors)
1207                 break;
1208         }
1209         a.setDim(0);
1210     }
1211 
1212     extern (D) static void clearCache() nothrow
1213     {
1214         foreach (Module m; amodules)
1215             m.searchCacheIdent = null;
1216     }
1217 
1218     /************************************
1219      * Recursively look at every module this module imports,
1220      * return true if it imports m.
1221      * Can be used to detect circular imports.
1222      */
1223     int imports(Module m) nothrow
1224     {
1225         //printf("%s Module::imports(%s)\n", toChars(), m.toChars());
1226         version (none)
1227         {
1228             foreach (i, Module mi; aimports)
1229                 printf("\t[%d] %s\n", cast(int) i, mi.toChars());
1230         }
1231         foreach (Module mi; aimports)
1232         {
1233             if (mi == m)
1234                 return true;
1235             if (!mi.insearch)
1236             {
1237                 mi.insearch = true;
1238                 int r = mi.imports(m);
1239                 if (r)
1240                     return r;
1241             }
1242         }
1243         return false;
1244     }
1245 
1246     bool isRoot() nothrow
1247     {
1248         return this.importedFrom == this;
1249     }
1250 
1251     /// Returns: Whether this module is in the `core` package and has name `ident`
1252     bool isCoreModule(Identifier ident) nothrow
1253     {
1254         return this.ident == ident && parent && parent.ident == Id.core && !parent.parent;
1255     }
1256 
1257     // Back end
1258     int doppelganger; // sub-module
1259     Symbol* cov; // private uint[] __coverage;
1260     uint[] covb; // bit array of valid code line numbers
1261     Symbol* sictor; // module order independent constructor
1262     Symbol* sctor; // module constructor
1263     Symbol* sdtor; // module destructor
1264     Symbol* ssharedctor; // module shared constructor
1265     Symbol* sshareddtor; // module shared destructor
1266     Symbol* stest; // module unit test
1267     Symbol* sfilename; // symbol for filename
1268 
1269     uint[uint] ctfe_cov; /// coverage information from ctfe execution_count[line]
1270 
1271     override inout(Module) isModule() inout nothrow
1272     {
1273         return this;
1274     }
1275 
1276     override void accept(Visitor v)
1277     {
1278         v.visit(this);
1279     }
1280 
1281     /***********************************************
1282      * Writes this module's fully-qualified name to buf
1283      * Params:
1284      *    buf = The buffer to write to
1285      */
1286     void fullyQualifiedName(ref OutBuffer buf) nothrow
1287     {
1288         buf.writestring(ident.toString());
1289 
1290         for (auto package_ = parent; package_ !is null; package_ = package_.parent)
1291         {
1292             buf.prependstring(".");
1293             buf.prependstring(package_.ident.toChars());
1294         }
1295     }
1296 
1297     /** Lazily initializes and returns the escape table.
1298     Turns out it eats a lot of memory.
1299     */
1300     extern(D) Escape* escapetable() nothrow
1301     {
1302         if (!_escapetable)
1303             _escapetable = new Escape();
1304         return _escapetable;
1305     }
1306 
1307     /****************************
1308      * A Singleton that loads core.stdc.config
1309      * Returns:
1310      *  Module of core.stdc.config, null if couldn't find it
1311      */
1312     extern (D) static Module loadCoreStdcConfig()
1313     {
1314         __gshared Module core_stdc_config;
1315         auto pkgids = new Identifier[2];
1316         pkgids[0] = Id.core;
1317         pkgids[1] = Id.stdc;
1318         return loadModuleFromLibrary(core_stdc_config, pkgids, Id.config);
1319     }
1320 
1321     /****************************
1322      * A Singleton that loads core.atomic
1323      * Returns:
1324      *  Module of core.atomic, null if couldn't find it
1325      */
1326     extern (D) static Module loadCoreAtomic()
1327     {
1328         __gshared Module core_atomic;
1329         auto pkgids = new Identifier[1];
1330         pkgids[0] = Id.core;
1331         return loadModuleFromLibrary(core_atomic, pkgids, Id.atomic);
1332     }
1333 
1334     /****************************
1335      * A Singleton that loads std.math
1336      * Returns:
1337      *  Module of std.math, null if couldn't find it
1338      */
1339     extern (D) static Module loadStdMath()
1340     {
1341         __gshared Module std_math;
1342         auto pkgids = new Identifier[1];
1343         pkgids[0] = Id.std;
1344         return loadModuleFromLibrary(std_math, pkgids, Id.math);
1345     }
1346 
1347     /**********************************
1348      * Load a Module from the library.
1349      * Params:
1350      *  mod = cached return value of this call
1351      *  pkgids = package identifiers
1352      *  modid = module id
1353      * Returns:
1354      *  Module loaded, null if cannot load it
1355      */
1356     extern (D) private static Module loadModuleFromLibrary(ref Module mod, Identifier[] pkgids, Identifier modid)
1357     {
1358         if (mod)
1359             return mod;
1360 
1361         auto imp = new Import(Loc.initial, pkgids[], modid, null, true);
1362         // Module.load will call fatal() if there's no module available.
1363         // Gag the error here, pushing the error handling to the caller.
1364         const errors = global.startGagging();
1365         imp.load(null);
1366         if (imp.mod)
1367         {
1368             imp.mod.importAll(null);
1369             imp.mod.dsymbolSemantic(null);
1370         }
1371         global.endGagging(errors);
1372         mod = imp.mod;
1373         return mod;
1374     }
1375 }
1376 
1377 /***********************************************************
1378  */
1379 extern (C++) struct ModuleDeclaration
1380 {
1381     Loc loc;
1382     Identifier id;
1383     Identifier[] packages;  // array of Identifier's representing packages
1384     bool isdeprecated;      // if it is a deprecated module
1385     Expression msg;
1386 
1387     extern (D) this(const ref Loc loc, Identifier[] packages, Identifier id, Expression msg, bool isdeprecated) @safe
1388     {
1389         this.loc = loc;
1390         this.packages = packages;
1391         this.id = id;
1392         this.msg = msg;
1393         this.isdeprecated = isdeprecated;
1394     }
1395 
1396     extern (C++) const(char)* toChars() const @safe
1397     {
1398         OutBuffer buf;
1399         foreach (pid; packages)
1400         {
1401             buf.writestring(pid.toString());
1402             buf.writeByte('.');
1403         }
1404         buf.writestring(id.toString());
1405         return buf.extractChars();
1406     }
1407 
1408     /// Provide a human readable representation
1409     extern (D) const(char)[] toString() const
1410     {
1411         return this.toChars().toDString;
1412     }
1413 }
1414 
1415 /****************************************
1416  * Create array of the local classes in the Module, suitable
1417  * for inclusion in ModuleInfo
1418  * Params:
1419  *      mod = the Module
1420  *      aclasses = array to fill in
1421  * Returns: array of local classes
1422  */
1423 extern (C++) void getLocalClasses(Module mod, ref ClassDeclarations aclasses)
1424 {
1425     //printf("members.length = %d\n", mod.members.length);
1426     int pushAddClassDg(size_t n, Dsymbol sm)
1427     {
1428         if (!sm)
1429             return 0;
1430 
1431         if (auto cd = sm.isClassDeclaration())
1432         {
1433             // compatibility with previous algorithm
1434             if (cd.parent && cd.parent.isTemplateMixin())
1435                 return 0;
1436 
1437             if (cd.classKind != ClassKind.objc)
1438                 aclasses.push(cd);
1439         }
1440         return 0;
1441     }
1442 
1443     ScopeDsymbol._foreach(null, mod.members, &pushAddClassDg);
1444 }
1445 
1446 /**
1447  * Process the content of a source file
1448  *
1449  * Attempts to find which encoding it is using, if it has BOM,
1450  * and then normalize the source to UTF-8. If no encoding is required,
1451  * a slice of `src` will be returned without extra allocation.
1452  *
1453  * Params:
1454  *  src = Content of the source file to process
1455  *  mod = Module matching `src`, used for error handling
1456  *
1457  * Returns:
1458  *   UTF-8 encoded variant of `src`, stripped of any BOM,
1459  *   or `null` if an error happened.
1460  */
1461 private const(char)[] processSource (const(ubyte)[] src, Module mod)
1462 {
1463     enum SourceEncoding { utf16, utf32}
1464     enum Endian { little, big}
1465 
1466     /*
1467      * Convert a buffer from UTF32 to UTF8
1468      * Params:
1469      *    Endian = is the buffer big/little endian
1470      *    buf = buffer of UTF32 data
1471      * Returns:
1472      *    input buffer reencoded as UTF8
1473      */
1474 
1475     char[] UTF32ToUTF8(Endian endian)(const(char)[] buf)
1476     {
1477         static if (endian == Endian.little)
1478             alias readNext = Port.readlongLE;
1479         else
1480             alias readNext = Port.readlongBE;
1481 
1482         if (buf.length & 3)
1483         {
1484             .error(mod.loc, "%s `%s` odd length of UTF-32 char source %llu",
1485                 mod.kind, mod.toPrettyChars, cast(ulong) buf.length);
1486             return null;
1487         }
1488 
1489         const (uint)[] eBuf = cast(const(uint)[])buf;
1490 
1491         OutBuffer dbuf;
1492         dbuf.reserve(eBuf.length);
1493 
1494         foreach (i; 0 .. eBuf.length)
1495         {
1496             const u = readNext(&eBuf[i]);
1497             if (u & ~0x7F)
1498             {
1499                 if (u > 0x10FFFF)
1500                 {
1501                     .error(mod.loc, "%s `%s` UTF-32 value %08x greater than 0x10FFFF", mod.kind, mod.toPrettyChars, u);
1502                     return null;
1503                 }
1504                 dbuf.writeUTF8(u);
1505             }
1506             else
1507                 dbuf.writeByte(u);
1508         }
1509         dbuf.writeByte(0); //add null terminator
1510         return dbuf.extractSlice();
1511     }
1512 
1513     /*
1514      * Convert a buffer from UTF16 to UTF8
1515      * Params:
1516      *    Endian = is the buffer big/little endian
1517      *    buf = buffer of UTF16 data
1518      * Returns:
1519      *    input buffer reencoded as UTF8
1520      */
1521 
1522     char[] UTF16ToUTF8(Endian endian)(const(char)[] buf)
1523     {
1524         static if (endian == Endian.little)
1525             alias readNext = Port.readwordLE;
1526         else
1527             alias readNext = Port.readwordBE;
1528 
1529         if (buf.length & 1)
1530         {
1531             .error(mod.loc, "%s `%s` odd length of UTF-16 char source %llu", mod.kind, mod.toPrettyChars, cast(ulong) buf.length);
1532             return null;
1533         }
1534 
1535         const (ushort)[] eBuf = cast(const(ushort)[])buf;
1536 
1537         OutBuffer dbuf;
1538         dbuf.reserve(eBuf.length);
1539 
1540         //i will be incremented in the loop for high codepoints
1541         foreach (ref i; 0 .. eBuf.length)
1542         {
1543             uint u = readNext(&eBuf[i]);
1544             if (u & ~0x7F)
1545             {
1546                 if (0xD800 <= u && u < 0xDC00)
1547                 {
1548                     i++;
1549                     if (i >= eBuf.length)
1550                     {
1551                         .error(mod.loc, "%s `%s` surrogate UTF-16 high value %04x at end of file", mod.kind, mod.toPrettyChars, u);
1552                         return null;
1553                     }
1554                     const u2 = readNext(&eBuf[i]);
1555                     if (u2 < 0xDC00 || 0xE000 <= u2)
1556                     {
1557                         .error(mod.loc, "%s `%s` surrogate UTF-16 low value %04x out of range", mod.kind, mod.toPrettyChars, u2);
1558                         return null;
1559                     }
1560                     u = (u - 0xD7C0) << 10;
1561                     u |= (u2 - 0xDC00);
1562                 }
1563                 else if (u >= 0xDC00 && u <= 0xDFFF)
1564                 {
1565                     .error(mod.loc, "%s `%s` unpaired surrogate UTF-16 value %04x", mod.kind, mod.toPrettyChars, u);
1566                     return null;
1567                 }
1568                 else if (u == 0xFFFE || u == 0xFFFF)
1569                 {
1570                     .error(mod.loc, "%s `%s` illegal UTF-16 value %04x", mod.kind, mod.toPrettyChars, u);
1571                     return null;
1572                 }
1573                 dbuf.writeUTF8(u);
1574             }
1575             else
1576                 dbuf.writeByte(u);
1577         }
1578         dbuf.writeByte(0); //add a terminating null byte
1579         return dbuf.extractSlice();
1580     }
1581 
1582     const(char)[] buf = cast(const(char)[]) src;
1583 
1584     // Assume the buffer is from memory and has not be read from disk. Assume UTF-8.
1585     if (buf.length < 2)
1586         return buf;
1587 
1588     /* Convert all non-UTF-8 formats to UTF-8.
1589      * BOM : https://www.unicode.org/faq/utf_bom.html
1590      * 00 00 FE FF  UTF-32BE, big-endian
1591      * FF FE 00 00  UTF-32LE, little-endian
1592      * FE FF        UTF-16BE, big-endian
1593      * FF FE        UTF-16LE, little-endian
1594      * EF BB BF     UTF-8
1595      */
1596     if (buf[0] == 0xFF && buf[1] == 0xFE)
1597     {
1598         if (buf.length >= 4 && buf[2] == 0 && buf[3] == 0)
1599             return UTF32ToUTF8!(Endian.little)(buf[4 .. $]);
1600         return UTF16ToUTF8!(Endian.little)(buf[2 .. $]);
1601     }
1602 
1603     if (buf[0] == 0xFE && buf[1] == 0xFF)
1604         return UTF16ToUTF8!(Endian.big)(buf[2 .. $]);
1605 
1606     if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)
1607         return UTF32ToUTF8!(Endian.big)(buf[4 .. $]);
1608 
1609     if (buf.length >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF)
1610         return buf[3 .. $];
1611 
1612     /* There is no BOM. Make use of Arcane Jill's insight that
1613      * the first char of D source must be ASCII to
1614      * figure out the encoding.
1615      */
1616     if (buf.length >= 4 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0)
1617         return UTF32ToUTF8!(Endian.little)(buf);
1618     if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0)
1619         return UTF32ToUTF8!(Endian.big)(buf);
1620     // try to check for UTF-16
1621     if (buf.length >= 2 && buf[1] == 0)
1622         return UTF16ToUTF8!(Endian.little)(buf);
1623     if (buf[0] == 0)
1624         return UTF16ToUTF8!(Endian.big)(buf);
1625 
1626     // It's UTF-8
1627     if (buf[0] >= 0x80)
1628     {
1629         auto loc = mod.getLoc();
1630         .error(loc, "%s `%s` source file must start with BOM or ASCII character, not \\x%02X", mod.kind, mod.toPrettyChars, buf[0]);
1631         return null;
1632     }
1633 
1634     return buf;
1635 }