1 /**
2  * A library in the ELF format, used on Unix.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libelf.d, _libelf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libelf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libelf.d
10  */
11 
12 module dmd.libelf;
13 
14 import core.stdc.time;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.stdc.stdio;
18 version (Posix)
19 {
20     import core.sys.posix.sys.stat;
21     import core.sys.posix.unistd;
22 }
23 version (Windows)
24 {
25     import core.sys.windows.stat;
26 }
27 
28 import dmd.lib;
29 import dmd.location;
30 import dmd.utils;
31 
32 import dmd.root.array;
33 import dmd.root.filename;
34 import dmd.common.outbuffer;
35 import dmd.root.port;
36 import dmd.root.rmem;
37 import dmd.root.string;
38 import dmd.root.stringtable;
39 
40 import dmd.scanelf;
41 
42 // Entry point (only public symbol in this module).
43 public extern (C++) Library LibElf_factory()
44 {
45     return new LibElf();
46 }
47 
48 private: // for the remainder of this module
49 
50 enum LOG = false;
51 
52 struct ElfObjSymbol
53 {
54     const(char)[] name;
55     ElfObjModule* om;
56 }
57 
58 alias ElfObjModules = Array!(ElfObjModule*);
59 alias ElfObjSymbols = Array!(ElfObjSymbol*);
60 
61 final class LibElf : Library
62 {
63     ElfObjModules objmodules; // ElfObjModule[]
64     ElfObjSymbols objsymbols; // ElfObjSymbol[]
65     StringTable!(ElfObjSymbol*) tab;
66 
67     extern (D) this()
68     {
69         tab._init(14_000);
70     }
71 
72     /***************************************
73      * Add object module or library to the library.
74      * Examine the buffer to see which it is.
75      * If the buffer is NULL, use module_name as the file name
76      * and load the file.
77      */
78     override void addObject(const(char)[] module_name, const ubyte[] buffer)
79     {
80         static if (LOG)
81         {
82             printf("LibElf::addObject(%.*s)\n",
83                    cast(int)module_name.length, module_name.ptr);
84         }
85 
86         void corrupt(int reason)
87         {
88             eSink.error(loc, "corrupt ELF object module %.*s %d",
89                   cast(int)module_name.length, module_name.ptr, reason);
90         }
91 
92         int fromfile = 0;
93         auto buf = buffer.ptr;
94         auto buflen = buffer.length;
95         if (!buf)
96         {
97             assert(module_name.length);
98             // read file and take buffer ownership
99             auto data = readFile(Loc.initial, module_name).extractSlice();
100             buf = data.ptr;
101             buflen = data.length;
102             fromfile = 1;
103         }
104         if (buflen < 16)
105         {
106             static if (LOG)
107             {
108                 printf("buf = %p, buflen = %d\n", buf, buflen);
109             }
110             return corrupt(__LINE__);
111         }
112         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
113         {
114             /* Library file.
115              * Pull each object module out of the library and add it
116              * to the object module array.
117              */
118             static if (LOG)
119             {
120                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
121             }
122             uint offset = 8;
123             char* symtab = null;
124             uint symtab_size = 0;
125             char* filenametab = null;
126             uint filenametab_size = 0;
127             uint mstart = cast(uint)objmodules.length;
128             while (offset < buflen)
129             {
130                 if (offset + ElfLibHeader.sizeof >= buflen)
131                     return corrupt(__LINE__);
132                 ElfLibHeader* header = cast(ElfLibHeader*)(cast(ubyte*)buf + offset);
133                 offset += ElfLibHeader.sizeof;
134                 char* endptr = null;
135                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
136                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
137                     return corrupt(__LINE__);
138                 if (offset + size > buflen)
139                     return corrupt(__LINE__);
140                 if (header.object_name[0] == '/' && header.object_name[1] == ' ')
141                 {
142                     /* Instead of rescanning the object modules we pull from a
143                      * library, just use the already created symbol table.
144                      */
145                     if (symtab)
146                         return corrupt(__LINE__);
147                     symtab = cast(char*)buf + offset;
148                     symtab_size = size;
149                     if (size < 4)
150                         return corrupt(__LINE__);
151                 }
152                 else if (header.object_name[0] == '/' && header.object_name[1] == '/')
153                 {
154                     /* This is the file name table, save it for later.
155                      */
156                     if (filenametab)
157                         return corrupt(__LINE__);
158                     filenametab = cast(char*)buf + offset;
159                     filenametab_size = size;
160                 }
161                 else
162                 {
163                     auto om = new ElfObjModule();
164                     om.base = cast(ubyte*)buf + offset; /*- sizeof(ElfLibHeader)*/
165                     om.length = size;
166                     om.offset = 0;
167                     if (header.object_name[0] == '/')
168                     {
169                         /* Pick long name out of file name table
170                          */
171                         uint foff = cast(uint)strtoul(header.object_name.ptr + 1, &endptr, 10);
172                         uint i;
173                         for (i = 0; 1; i++)
174                         {
175                             if (foff + i >= filenametab_size)
176                                 return corrupt(__LINE__);
177                             char c = filenametab[foff + i];
178                             if (c == '/')
179                                 break;
180                         }
181                         auto n = cast(char*)Mem.check(malloc(i + 1));
182                         memcpy(n, filenametab + foff, i);
183                         n[i] = 0;
184                         om.name = n[0 .. i];
185                     }
186                     else
187                     {
188                         /* Pick short name out of header
189                          */
190                         auto n = cast(char*)Mem.check(malloc(ELF_OBJECT_NAME_SIZE));
191                         for (int i = 0; 1; i++)
192                         {
193                             if (i == ELF_OBJECT_NAME_SIZE)
194                                 return corrupt(__LINE__);
195                             char c = header.object_name[i];
196                             if (c == '/')
197                             {
198                                 n[i] = 0;
199                                 om.name = n[0 .. i];
200                                 break;
201                             }
202                             n[i] = c;
203                         }
204                     }
205                     om.name_offset = -1;
206                     om.file_time = strtoul(header.file_time.ptr, &endptr, 10);
207                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
208                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
209                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
210                     om.scan = 0; // don't scan object module for symbols
211                     objmodules.push(om);
212                 }
213                 offset += (size + 1) & ~1;
214             }
215             if (offset != buflen)
216                 return corrupt(__LINE__);
217             /* Scan the library's symbol table, and insert it into our own.
218              * We use this instead of rescanning the object module, because
219              * the library's creator may have a different idea of what symbols
220              * go into the symbol table than we do.
221              * This is also probably faster.
222              */
223             uint nsymbols = Port.readlongBE(symtab);
224             char* s = symtab + 4 + nsymbols * 4;
225             if (4 + nsymbols * (4 + 1) > symtab_size)
226                 return corrupt(__LINE__);
227             for (uint i = 0; i < nsymbols; i++)
228             {
229                 const(char)[] name = s.toDString();
230                 s += name.length + 1;
231                 if (s - symtab > symtab_size)
232                     return corrupt(__LINE__);
233                 uint moff = Port.readlongBE(symtab + 4 + i * 4);
234                 //printf("symtab[%d] moff = %x  %x, name = %s\n", i, moff, moff + ElfLibHeader.sizeof, name.ptr);
235                 for (uint m = mstart; 1; m++)
236                 {
237                     if (m == objmodules.length)
238                         return corrupt(__LINE__);  // didn't find it
239                     ElfObjModule* om = objmodules[m];
240                     //printf("\t%x\n", cast(char *)om.base - cast(char *)buf);
241                     if (moff + ElfLibHeader.sizeof == cast(char*)om.base - cast(char*)buf)
242                     {
243                         addSymbol(om, name, 1);
244                         //if (mstart == m)
245                         //    mstart++;
246                         break;
247                     }
248                 }
249             }
250             return;
251         }
252         /* It's an object module
253          */
254         auto om = new ElfObjModule();
255         om.base = cast(ubyte*)buf;
256         om.length = cast(uint)buflen;
257         om.offset = 0;
258         // remove path, but not extension
259         om.name = FileName.name(module_name);
260         om.name_offset = -1;
261         om.scan = 1;
262         if (fromfile)
263         {
264             version (Posix)
265                 stat_t statbuf;
266             version (Windows)
267                 struct_stat statbuf;
268             int i = module_name.toCStringThen!(name => stat(name.ptr, &statbuf));
269             if (i == -1) // error, errno is set
270                 return corrupt(__LINE__);
271             om.file_time = statbuf.st_ctime;
272             om.user_id = statbuf.st_uid;
273             om.group_id = statbuf.st_gid;
274             om.file_mode = statbuf.st_mode;
275         }
276         else
277         {
278             /* Mock things up for the object module file that never was
279              * actually written out.
280              */
281             version (Posix)
282             {
283                 __gshared uid_t uid;
284                 __gshared gid_t gid;
285                 __gshared int _init;
286                 if (!_init)
287                 {
288                     _init = 1;
289                     uid = getuid();
290                     gid = getgid();
291                 }
292                 om.user_id = uid;
293                 om.group_id = gid;
294             }
295             version (Windows)
296             {
297                 om.user_id = 0;  // meaningless on Windows
298                 om.group_id = 0; // meaningless on Windows
299             }
300             time_t file_time = 0;
301             time(&file_time);
302             om.file_time = cast(long)file_time;
303             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
304         }
305         objmodules.push(om);
306     }
307 
308     /*****************************************************************************/
309 
310     void addSymbol(ElfObjModule* om, const(char)[] name, int pickAny = 0)
311     {
312         static if (LOG)
313         {
314             printf("LibElf::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
315         }
316         auto s = tab.insert(name.ptr, name.length, null);
317         if (!s)
318         {
319             // already in table
320             if (!pickAny)
321             {
322                 s = tab.lookup(name.ptr, name.length);
323                 assert(s);
324                 ElfObjSymbol* os = s.value;
325                 eSink.error(loc, "multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
326             }
327         }
328         else
329         {
330             auto os = new ElfObjSymbol();
331             os.name = xarraydup(name);
332             os.om = om;
333             s.value = os;
334             objsymbols.push(os);
335         }
336     }
337 
338 private:
339     /************************************
340      * Scan single object module for dictionary symbols.
341      * Send those symbols to LibElf::addSymbol().
342      */
343     void scanObjModule(ElfObjModule* om)
344     {
345         static if (LOG)
346         {
347             printf("LibElf::scanObjModule(%s)\n", om.name.ptr);
348         }
349 
350         extern (D) void addSymbol(const(char)[] name, int pickAny)
351         {
352             this.addSymbol(om, name, pickAny);
353         }
354 
355         scanElfObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc, eSink);
356     }
357 
358     /*****************************************************************************/
359     /*****************************************************************************/
360     /**********************************************
361      * Create and write library to libbuf.
362      * The library consists of:
363      *      !<arch>\n
364      *      header
365      *      dictionary
366      *      object modules...
367      */
368     protected override void writeLibToBuffer(ref OutBuffer libbuf)
369     {
370         static if (LOG)
371         {
372             printf("LibElf::WriteLibToBuffer()\n");
373         }
374         /************* Scan Object Modules for Symbols ******************/
375         foreach (om; objmodules)
376         {
377             if (om.scan)
378             {
379                 scanObjModule(om);
380             }
381         }
382         /************* Determine string section ******************/
383         /* The string section is where we store long file names.
384          */
385         uint noffset = 0;
386         foreach (om; objmodules)
387         {
388             size_t len = om.name.length;
389             if (len >= ELF_OBJECT_NAME_SIZE)
390             {
391                 om.name_offset = noffset;
392                 noffset += len + 2;
393             }
394             else
395                 om.name_offset = -1;
396         }
397         static if (LOG)
398         {
399             printf("\tnoffset = x%x\n", noffset);
400         }
401         /************* Determine module offsets ******************/
402         uint moffset = 8 + ElfLibHeader.sizeof + 4;
403         foreach (os; objsymbols)
404         {
405             moffset += 4 + os.name.length + 1;
406         }
407         uint hoffset = moffset;
408         static if (LOG)
409         {
410             printf("\tmoffset = x%x\n", moffset);
411         }
412         moffset += moffset & 1;
413         if (noffset)
414             moffset += ElfLibHeader.sizeof + noffset;
415         foreach (om; objmodules)
416         {
417             moffset += moffset & 1;
418             om.offset = moffset;
419             moffset += ElfLibHeader.sizeof + om.length;
420         }
421         libbuf.reserve(moffset);
422         /************* Write the library ******************/
423         libbuf.write("!<arch>\n");
424         ElfObjModule om;
425         om.name_offset = -1;
426         om.base = null;
427         om.length = cast(uint)(hoffset - (8 + ElfLibHeader.sizeof));
428         om.offset = 8;
429         om.name = "";
430         .time(&om.file_time);
431         om.user_id = 0;
432         om.group_id = 0;
433         om.file_mode = 0;
434         ElfLibHeader h;
435         ElfOmToHeader(&h, &om);
436         libbuf.write((&h)[0 .. 1]);
437         char[4] buf;
438         Port.writelongBE(cast(uint)objsymbols.length, buf.ptr);
439         libbuf.write(buf[0 .. 4]);
440         foreach (os; objsymbols)
441         {
442             Port.writelongBE(os.om.offset, buf.ptr);
443             libbuf.write(buf[0 .. 4]);
444         }
445         foreach (os; objsymbols)
446         {
447             libbuf.writestring(os.name);
448             libbuf.writeByte(0);
449         }
450         static if (LOG)
451         {
452             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
453         }
454         /* Write out the string section
455          */
456         if (noffset)
457         {
458             if (libbuf.length & 1)
459                 libbuf.writeByte('\n');
460             // header
461             memset(&h, ' ', ElfLibHeader.sizeof);
462             h.object_name[0] = '/';
463             h.object_name[1] = '/';
464             size_t len = snprintf(h.file_size.ptr, ELF_FILE_SIZE_SIZE, "%u", noffset);
465             assert(len < 10);
466             h.file_size[len] = ' ';
467             h.trailer[0] = '`';
468             h.trailer[1] = '\n';
469             libbuf.write((&h)[0 .. 1]);
470             foreach (om2; objmodules)
471             {
472                 if (om2.name_offset >= 0)
473                 {
474                     libbuf.writestring(om2.name);
475                     libbuf.writeByte('/');
476                     libbuf.writeByte('\n');
477                 }
478             }
479         }
480         /* Write out each of the object modules
481          */
482         foreach (om2; objmodules)
483         {
484             if (libbuf.length & 1)
485                 libbuf.writeByte('\n'); // module alignment
486             assert(libbuf.length == om2.offset);
487             ElfOmToHeader(&h, om2);
488             libbuf.write((&h)[0 .. 1]); // module header
489             libbuf.write(om2.base[0 .. om2.length]); // module contents
490         }
491         static if (LOG)
492         {
493             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
494         }
495         assert(libbuf.length == moffset);
496     }
497 }
498 
499 /*****************************************************************************/
500 /*****************************************************************************/
501 struct ElfObjModule
502 {
503     ubyte* base; // where are we holding it in memory
504     uint length; // in bytes
505     uint offset; // offset from start of library
506     const(char)[] name; // module name (file name) with terminating 0
507     int name_offset; // if not -1, offset into string table of name
508     time_t file_time; // file time
509     uint user_id;
510     uint group_id;
511     uint file_mode;
512     int scan; // 1 means scan for symbols
513 }
514 
515 enum ELF_OBJECT_NAME_SIZE = 16;
516 enum ELF_FILE_TIME_SIZE = 12;
517 enum ELF_USER_ID_SIZE = 6;
518 enum ELF_GROUP_ID_SIZE = 6;
519 enum ELF_FILE_MODE_SIZE = 8;
520 enum ELF_FILE_SIZE_SIZE = 10;
521 enum ELF_TRAILER_SIZE = 2;
522 
523 struct ElfLibHeader
524 {
525     char[ELF_OBJECT_NAME_SIZE] object_name;
526     char[ELF_FILE_TIME_SIZE] file_time;
527     char[ELF_USER_ID_SIZE] user_id;
528     char[ELF_GROUP_ID_SIZE] group_id;
529     char[ELF_FILE_MODE_SIZE] file_mode; // in octal
530     char[ELF_FILE_SIZE_SIZE] file_size;
531     char[ELF_TRAILER_SIZE] trailer;
532 }
533 
534 extern (C++) void ElfOmToHeader(ElfLibHeader* h, ElfObjModule* om)
535 {
536     char* buffer = cast(char*)h;
537     // user_id and group_id are padded on 6 characters in Header struct.
538     // Squashing to 0 if more than 999999.
539     if (om.user_id > 999_999)
540         om.user_id = 0;
541     if (om.group_id > 999_999)
542         om.group_id = 0;
543     size_t len;
544     if (om.name_offset == -1)
545     {
546         // "name/           1423563789  5000  5000  100640  3068      `\n"
547         //  |^^^^^^^^^^^^^^^|^^^^^^^^^^^|^^^^^|^^^^^|^^^^^^^|^^^^^^^^^|^^
548         //        name       file_time   u_id gr_id  fmode    fsize   trailer
549         len = snprintf(buffer, ElfLibHeader.sizeof, "%-16s%-12llu%-6u%-6u%-8o%-10u`", om.name.ptr, cast(long)om.file_time, om.user_id, om.group_id, om.file_mode, om.length);
550         // adding '/' after the name field
551         const(size_t) name_length = om.name.length;
552         assert(name_length < ELF_OBJECT_NAME_SIZE);
553         buffer[name_length] = '/';
554     }
555     else
556     {
557         // "/162007         1423563789  5000  5000  100640  3068      `\n"
558         //  |^^^^^^^^^^^^^^^|^^^^^^^^^^^|^^^^^|^^^^^|^^^^^^^|^^^^^^^^^|^^
559         //     name_offset   file_time   u_id gr_id  fmode    fsize   trailer
560         len = snprintf(buffer, ElfLibHeader.sizeof, "/%-15d%-12llu%-6u%-6u%-8o%-10u`", om.name_offset, cast(long)om.file_time, om.user_id, om.group_id, om.file_mode, om.length);
561     }
562     assert(ElfLibHeader.sizeof > 0 && len == ElfLibHeader.sizeof - 1);
563     // replace trailing \0 with \n
564     buffer[len] = '\n';
565 }