1 /**
2  * A library in the Mach-O format, used on macOS.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d
10  */
11 
12 module dmd.libmach;
13 
14 import core.stdc.time;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.stdc.stdio;
18 import core.stdc.config;
19 
20 version (Posix)
21 {
22     import core.sys.posix.sys.stat;
23     import core.sys.posix.unistd;
24 }
25 version (Windows)
26 {
27     import core.sys.windows.stat;
28 }
29 
30 import dmd.lib;
31 import dmd.location;
32 import dmd.utils;
33 
34 import dmd.root.array;
35 import dmd.root.filename;
36 import dmd.common.outbuffer;
37 import dmd.root.port;
38 import dmd.root.rmem;
39 import dmd.root.string;
40 import dmd.root.stringtable;
41 
42 import dmd.scanmach;
43 
44 // Entry point (only public symbol in this module).
45 public extern (C++) Library LibMach_factory()
46 {
47     return new LibMach();
48 }
49 
50 private: // for the remainder of this module
51 
52 enum LOG = false;
53 
54 struct MachObjSymbol
55 {
56     const(char)[] name;         // still has a terminating 0
57     MachObjModule* om;
58 }
59 
60 alias MachObjModules = Array!(MachObjModule*);
61 alias MachObjSymbols = Array!(MachObjSymbol*);
62 
63 final class LibMach : Library
64 {
65     MachObjModules objmodules; // MachObjModule[]
66     MachObjSymbols objsymbols; // MachObjSymbol[]
67     StringTable!(MachObjSymbol*) tab;
68 
69     extern (D) this()
70     {
71         tab._init(14_000);
72     }
73 
74     /***************************************
75      * Add object module or library to the library.
76      * Examine the buffer to see which it is.
77      * If the buffer is NULL, use module_name as the file name
78      * and load the file.
79      */
80     override void addObject(const(char)[] module_name, const ubyte[] buffer)
81     {
82         static if (LOG)
83         {
84             printf("LibMach::addObject(%.*s)\n",
85                    cast(int)module_name.length, module_name.ptr);
86         }
87 
88         void corrupt(int reason)
89         {
90             eSink.error(loc, "corrupt Mach object module %.*s %d",
91                   cast(int)module_name.length, module_name.ptr, reason);
92         }
93 
94         int fromfile = 0;
95         auto buf = buffer.ptr;
96         auto buflen = buffer.length;
97         if (!buf)
98         {
99             assert(module_name[0]);
100             // read file and take buffer ownership
101             auto data = readFile(Loc.initial, module_name).extractSlice();
102             buf = data.ptr;
103             buflen = data.length;
104             fromfile = 1;
105         }
106         if (buflen < 16)
107         {
108             static if (LOG)
109             {
110                 printf("buf = %p, buflen = %d\n", buf, buflen);
111             }
112             return corrupt(__LINE__);
113         }
114         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
115         {
116             /* Library file.
117              * Pull each object module out of the library and add it
118              * to the object module array.
119              */
120             static if (LOG)
121             {
122                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
123             }
124             uint offset = 8;
125             char* symtab = null;
126             uint symtab_size = 0;
127             uint mstart = cast(uint)objmodules.length;
128             while (offset < buflen)
129             {
130                 if (offset + MachLibHeader.sizeof >= buflen)
131                     return corrupt(__LINE__);
132                 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset);
133                 offset += MachLibHeader.sizeof;
134                 char* endptr = null;
135                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
136                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
137                     return corrupt(__LINE__);
138                 if (offset + size > buflen)
139                     return corrupt(__LINE__);
140                 if (memcmp(header.object_name.ptr, "__.SYMDEF       ".ptr, 16) == 0 ||
141                     memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0)
142                 {
143                     /* Instead of rescanning the object modules we pull from a
144                      * library, just use the already created symbol table.
145                      */
146                     if (symtab)
147                         return corrupt(__LINE__);
148                     symtab = cast(char*)buf + offset;
149                     symtab_size = size;
150                     if (size < 4)
151                         return corrupt(__LINE__);
152                 }
153                 else
154                 {
155                     auto om = new MachObjModule();
156                     om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof;
157                     om.length = cast(uint)(size + MachLibHeader.sizeof);
158                     om.offset = 0;
159                     const n = cast(const(char)*)(om.base + MachLibHeader.sizeof);
160                     om.name = n.toDString();
161                     om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10);
162                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
163                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
164                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
165                     om.scan = 0; // don't scan object module for symbols
166                     objmodules.push(om);
167                 }
168                 offset += (size + 1) & ~1;
169             }
170             if (offset != buflen)
171                 return corrupt(__LINE__);
172             /* Scan the library's symbol table, and insert it into our own.
173              * We use this instead of rescanning the object module, because
174              * the library's creator may have a different idea of what symbols
175              * go into the symbol table than we do.
176              * This is also probably faster.
177              */
178             uint nsymbols = Port.readlongLE(symtab) / 8;
179             char* s = symtab + 4 + nsymbols * 8 + 4;
180             if (4 + nsymbols * 8 + 4 > symtab_size)
181                 return corrupt(__LINE__);
182             for (uint i = 0; i < nsymbols; i++)
183             {
184                 uint soff = Port.readlongLE(symtab + 4 + i * 8);
185                 const(char)* name = s + soff;
186                 size_t namelen = strlen(name);
187                 //printf("soff = x%x name = %s\n", soff, name);
188                 if (s + namelen + 1 - symtab > symtab_size)
189                     return corrupt(__LINE__);
190                 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4);
191                 //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + MachLibHeader.sizeof, name);
192                 for (uint m = mstart; 1; m++)
193                 {
194                     if (m == objmodules.length)
195                         return corrupt(__LINE__);       // didn't find it
196                     MachObjModule* om = objmodules[m];
197                     //printf("\tom offset = x%x\n", cast(char *)om.base - cast(char *)buf);
198                     if (moff == cast(char*)om.base - cast(char*)buf)
199                     {
200                         addSymbol(om, name[0 .. namelen], 1);
201                         //if (mstart == m)
202                         //    mstart++;
203                         break;
204                     }
205                 }
206             }
207             return;
208         }
209         /* It's an object module
210          */
211         auto om = new MachObjModule();
212         om.base = cast(ubyte*)buf;
213         om.length = cast(uint)buflen;
214         om.offset = 0;
215         const n = FileName.name(module_name); // remove path, but not extension
216         om.name = n;
217         om.scan = 1;
218         if (fromfile)
219         {
220             version (Posix)
221                 stat_t statbuf;
222             version (Windows)
223                 struct_stat statbuf;
224             int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf));
225             if (i == -1) // error, errno is set
226                 return corrupt(__LINE__);
227             om.file_time = statbuf.st_ctime;
228             om.user_id = statbuf.st_uid;
229             om.group_id = statbuf.st_gid;
230             om.file_mode = statbuf.st_mode;
231         }
232         else
233         {
234             /* Mock things up for the object module file that never was
235              * actually written out.
236              */
237             version (Posix)
238             {
239                 __gshared uid_t uid;
240                 __gshared gid_t gid;
241                 __gshared int _init;
242                 if (!_init)
243                 {
244                     _init = 1;
245                     uid = getuid();
246                     gid = getgid();
247                 }
248                 om.user_id = uid;
249                 om.group_id = gid;
250             }
251             version (Windows)
252             {
253                 om.user_id = 0; // meaningless on Windows
254                 om.group_id = 0;        // meaningless on Windows
255             }
256             time(&om.file_time);
257             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
258         }
259         objmodules.push(om);
260     }
261 
262     /*****************************************************************************/
263 
264     void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0)
265     {
266         static if (LOG)
267         {
268             printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
269         }
270         version (none)
271         {
272             // let linker sort out duplicates
273             StringValue* s = tab.insert(name.ptr, name.length, null);
274             if (!s)
275             {
276                 // already in table
277                 if (!pickAny)
278                 {
279                     s = tab.lookup(name.ptr, name.length);
280                     assert(s);
281                     MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue;
282                     eSink.error(loc, "multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
283                 }
284             }
285             else
286             {
287                 auto os = new MachObjSymbol();
288                 os.name = xarraydup(name);
289                 os.om = om;
290                 s.ptrvalue = cast(void*)os;
291                 objsymbols.push(os);
292             }
293         }
294         else
295         {
296             auto os = new MachObjSymbol();
297             os.name = xarraydup(name);
298             os.om = om;
299             objsymbols.push(os);
300         }
301     }
302 
303 private:
304     /************************************
305      * Scan single object module for dictionary symbols.
306      * Send those symbols to LibMach::addSymbol().
307      */
308     void scanObjModule(MachObjModule* om)
309     {
310         static if (LOG)
311         {
312             printf("LibMach::scanObjModule(%s)\n", om.name.ptr);
313         }
314 
315         extern (D) void addSymbol(const(char)[] name, int pickAny)
316         {
317             this.addSymbol(om, name, pickAny);
318         }
319 
320         scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc, eSink);
321     }
322 
323     /*****************************************************************************/
324     /*****************************************************************************/
325     /**********************************************
326      * Create and write library to libbuf.
327      * The library consists of:
328      *      !<arch>\n
329      *      header
330      *      dictionary
331      *      object modules...
332      */
333     protected override void writeLibToBuffer(ref OutBuffer libbuf)
334     {
335         static if (LOG)
336         {
337             printf("LibMach::WriteLibToBuffer()\n");
338         }
339         __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A];
340         /************* Scan Object Modules for Symbols ******************/
341         for (size_t i = 0; i < objmodules.length; i++)
342         {
343             MachObjModule* om = objmodules[i];
344             if (om.scan)
345             {
346                 scanObjModule(om);
347             }
348         }
349         /************* Determine module offsets ******************/
350         uint moffset = 8 + MachLibHeader.sizeof + 4 + 4;
351         for (size_t i = 0; i < objsymbols.length; i++)
352         {
353             MachObjSymbol* os = objsymbols[i];
354             moffset += 8 + os.name.length + 1;
355         }
356         moffset = (moffset + 3) & ~3;
357         //if (moffset & 4)
358         //    moffset += 4;
359         uint hoffset = moffset;
360         static if (LOG)
361         {
362             printf("\tmoffset = x%x\n", moffset);
363         }
364         for (size_t i = 0; i < objmodules.length; i++)
365         {
366             MachObjModule* om = objmodules[i];
367             moffset += moffset & 1;
368             om.offset = moffset;
369             if (om.scan)
370             {
371                 const slen = om.name.length;
372                 int nzeros = 8 - ((slen + 4) & 7);
373                 if (nzeros < 4)
374                     nzeros += 8; // emulate mysterious behavior of ar
375                 int filesize = om.length;
376                 filesize = (filesize + 7) & ~7;
377                 moffset += MachLibHeader.sizeof + slen + nzeros + filesize;
378             }
379             else
380             {
381                 moffset += om.length;
382             }
383         }
384         libbuf.reserve(moffset);
385         /************* Write the library ******************/
386         libbuf.write("!<arch>\n");
387         MachObjModule om;
388         om.base = null;
389         om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof));
390         om.offset = 8;
391         om.name = "";
392         .time(&om.file_time);
393         version (Posix)
394         {
395             om.user_id = getuid();
396             om.group_id = getgid();
397         }
398         version (Windows)
399         {
400             om.user_id = 0;
401             om.group_id = 0;
402         }
403         om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
404         MachLibHeader h;
405         MachOmToHeader(&h, &om);
406         memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9);
407         int len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%u", om.length);
408         assert(len <= 10);
409         memset(h.file_size.ptr + len, ' ', 10 - len);
410         libbuf.write((&h)[0 .. 1]);
411         char[4] buf;
412         Port.writelongLE(cast(uint)(objsymbols.length * 8), buf.ptr);
413         libbuf.write(buf[0 .. 4]);
414         int stringoff = 0;
415         for (size_t i = 0; i < objsymbols.length; i++)
416         {
417             MachObjSymbol* os = objsymbols[i];
418             Port.writelongLE(stringoff, buf.ptr);
419             libbuf.write(buf[0 .. 4]);
420             Port.writelongLE(os.om.offset, buf.ptr);
421             libbuf.write(buf[0 .. 4]);
422             stringoff += os.name.length + 1;
423         }
424         Port.writelongLE(stringoff, buf.ptr);
425         libbuf.write(buf[0 .. 4]);
426         for (size_t i = 0; i < objsymbols.length; i++)
427         {
428             MachObjSymbol* os = objsymbols[i];
429             libbuf.writestring(os.name);
430             libbuf.writeByte(0);
431         }
432         while (libbuf.length & 3)
433             libbuf.writeByte(0);
434         //if (libbuf.length & 4)
435         //    libbuf.write(pad[0 .. 4]);
436         static if (LOG)
437         {
438             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
439         }
440         assert(libbuf.length == hoffset);
441         /* Write out each of the object modules
442          */
443         for (size_t i = 0; i < objmodules.length; i++)
444         {
445             MachObjModule* om2 = objmodules[i];
446             if (libbuf.length & 1)
447                 libbuf.writeByte('\n'); // module alignment
448             assert(libbuf.length == om2.offset);
449             if (om2.scan)
450             {
451                 MachOmToHeader(&h, om2);
452                 libbuf.write((&h)[0 .. 1]); // module header
453                 libbuf.write(om2.name.ptr[0 .. om2.name.length]);
454                 int nzeros = 8 - ((om2.name.length + 4) & 7);
455                 if (nzeros < 4)
456                     nzeros += 8; // emulate mysterious behavior of ar
457                 libbuf.fill0(nzeros);
458                 libbuf.write(om2.base[0 .. om2.length]); // module contents
459                 // obj modules are padded out to 8 bytes in length with 0x0A
460                 int filealign = om2.length & 7;
461                 if (filealign)
462                 {
463                     libbuf.write(pad[0 .. 8 - filealign]);
464                 }
465             }
466             else
467             {
468                 libbuf.write(om2.base[0 .. om2.length]); // module contents
469             }
470         }
471         static if (LOG)
472         {
473             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
474         }
475         assert(libbuf.length == moffset);
476     }
477 }
478 
479 /*****************************************************************************/
480 /*****************************************************************************/
481 struct MachObjModule
482 {
483     ubyte* base; // where are we holding it in memory
484     uint length; // in bytes
485     uint offset; // offset from start of library
486     const(char)[] name; // module name (file name) with terminating 0
487     c_long file_time; // file time
488     uint user_id;
489     uint group_id;
490     uint file_mode;
491     int scan; // 1 means scan for symbols
492 }
493 
494 enum MACH_OBJECT_NAME_SIZE = 16;
495 enum MACH_FILE_TIME_SIZE = 12;
496 enum MACH_USER_ID_SIZE = 6;
497 enum MACH_GROUP_ID_SIZE = 6;
498 enum MACH_FILE_MODE_SIZE = 8;
499 enum MACH_FILE_SIZE_SIZE = 10;
500 enum MACH_TRAILER_SIZE = 2;
501 
502 struct MachLibHeader
503 {
504     char[MACH_OBJECT_NAME_SIZE] object_name;
505     char[MACH_FILE_TIME_SIZE] file_time;
506     char[MACH_USER_ID_SIZE] user_id;
507     char[MACH_GROUP_ID_SIZE] group_id;
508     char[MACH_FILE_MODE_SIZE] file_mode; // in octal
509     char[MACH_FILE_SIZE_SIZE] file_size;
510     char[MACH_TRAILER_SIZE] trailer;
511 }
512 
513 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om)
514 {
515     const slen = om.name.length;
516     int nzeros = 8 - ((slen + 4) & 7);
517     if (nzeros < 4)
518         nzeros += 8; // emulate mysterious behavior of ar
519     size_t len = snprintf(h.object_name.ptr, MACH_OBJECT_NAME_SIZE, "#1/%lld", cast(long)(slen + nzeros));
520     memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len);
521     len = snprintf(h.file_time.ptr, MACH_FILE_TIME_SIZE, "%llu", cast(long)om.file_time);
522     assert(len <= 12);
523     memset(h.file_time.ptr + len, ' ', 12 - len);
524     if (om.user_id > 999_999) // yes, it happens
525         om.user_id = 0; // don't really know what to do here
526     len = snprintf(h.user_id.ptr, MACH_USER_ID_SIZE, "%u", om.user_id);
527     assert(len <= 6);
528     memset(h.user_id.ptr + len, ' ', 6 - len);
529     if (om.group_id > 999_999) // yes, it happens
530         om.group_id = 0; // don't really know what to do here
531     len = snprintf(h.group_id.ptr, MACH_GROUP_ID_SIZE, "%u", om.group_id);
532     assert(len <= 6);
533     memset(h.group_id.ptr + len, ' ', 6 - len);
534     len = snprintf(h.file_mode.ptr, MACH_FILE_MODE_SIZE, "%o", om.file_mode);
535     assert(len <= 8);
536     memset(h.file_mode.ptr + len, ' ', 8 - len);
537     int filesize = om.length;
538     filesize = (filesize + 7) & ~7;
539     len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%llu", cast(ulong)(slen + nzeros + filesize));
540     assert(len <= 10);
541     memset(h.file_size.ptr + len, ' ', 10 - len);
542     h.trailer[0] = '`';
543     h.trailer[1] = '\n';
544 }