1 /**
2  * Extract symbols from an ELF object file.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanelf.d, _scanelf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanelf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanelf.d
10  */
11 
12 module dmd.scanelf;
13 
14 import core.stdc.string;
15 import core.stdc.stdint;
16 import core.checkedint;
17 
18 import dmd.errorsink;
19 import dmd.location;
20 
21 enum LOG = false;
22 
23 /*****************************************
24  * Reads an object module from base[] and passes the names
25  * of any exported symbols to (*pAddSymbol)().
26  * Params:
27  *      pAddSymbol =  function to pass the names to
28  *      base =        array of contents of object module
29  *      module_name = name of the object module (used for error messages)
30  *      loc =         location to use for error printing
31  *      eSink =       where the error messages go
32  */
33 void scanElfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
34         const(ubyte)[] base, const(char)* module_name, Loc loc, ErrorSink eSink)
35 {
36     static if (LOG)
37     {
38         printf("scanElfObjModule(%s)\n", module_name);
39     }
40 
41     void corrupt(int reason)
42     {
43         eSink.error(loc, "corrupt ELF object module `%s` %d", module_name, reason);
44     }
45 
46     if (base.length < Elf32_Ehdr.sizeof)
47         return corrupt(__LINE__); // must be at least large enough for ELF32
48     static immutable ubyte[4] elf = [0x7F, 'E', 'L', 'F']; // ELF file signature
49     if (base[0 .. elf.length] != elf[])
50         return corrupt(__LINE__);
51 
52     if (base[EI_VERSION] != EV_CURRENT)
53     {
54         return eSink.error(loc, "ELF object module `%s` has EI_VERSION = %d, should be %d",
55             module_name, base[EI_VERSION], EV_CURRENT);
56     }
57     if (base[EI_DATA] != ELFDATA2LSB)
58     {
59         return eSink.error(loc, "ELF object module `%s` is byte swapped and unsupported", module_name);
60     }
61     if (base[EI_CLASS] != ELFCLASS32 && base[EI_CLASS] != ELFCLASS64)
62     {
63         return eSink.error(loc, "ELF object module `%s` is unrecognized class %d", module_name, base[EI_CLASS]);
64     }
65 
66     void scanELF(uint model)()
67     {
68         static if (model == 32)
69         {
70             alias ElfXX_Ehdr = Elf32_Ehdr;
71             alias ElfXX_Shdr = Elf32_Shdr;
72             alias ElfXX_Sym = Elf32_Sym;
73         }
74         else
75         {
76             static assert(model == 64);
77             alias ElfXX_Ehdr = Elf64_Ehdr;
78             alias ElfXX_Shdr = Elf64_Shdr;
79             alias ElfXX_Sym = Elf64_Sym;
80         }
81 
82         if (base.length < ElfXX_Ehdr.sizeof)
83             return corrupt(__LINE__);
84 
85         const eh = cast(const(ElfXX_Ehdr)*) base.ptr;
86         if (eh.e_type != ET_REL)
87             return eSink.error(loc, "ELF object module `%s` is not relocatable", module_name);
88         if (eh.e_version != EV_CURRENT)
89             return corrupt(__LINE__);
90 
91         bool overflow;
92         const end = addu(eh.e_shoff, mulu(eh.e_shentsize, eh.e_shnum, overflow), overflow);
93         if (overflow || end > base.length)
94             return corrupt(__LINE__);
95 
96         /* For each Section
97          */
98         const sections = (cast(const(ElfXX_Shdr)*)(base.ptr + eh.e_shoff))[0 .. eh.e_shnum];
99         foreach (ref const section; sections)
100         {
101             if (section.sh_type != SHT_SYMTAB)
102                 continue;
103 
104             bool checkShdrXX(const ref ElfXX_Shdr shdr)
105             {
106                 bool overflow;
107                 return addu(shdr.sh_offset, shdr.sh_size, overflow) > base.length || overflow;
108             }
109 
110             if (checkShdrXX(section))
111                 return corrupt(__LINE__);
112 
113             /* sh_link gives the particular string table section
114              * used for the symbol names.
115              */
116             if (section.sh_link >= eh.e_shnum)
117                 return corrupt(__LINE__);
118 
119             const string_section = &sections[section.sh_link];
120             if (string_section.sh_type != SHT_STRTAB)
121                 return corrupt(__LINE__);
122 
123             if (checkShdrXX(*string_section))
124                 return corrupt(__LINE__);
125 
126             const string_tab = (cast(const(char)[])base)
127                 [cast(size_t)string_section.sh_offset ..
128                  cast(size_t)(string_section.sh_offset + string_section.sh_size)];
129 
130             /* Get the array of symbols this section refers to
131              */
132             const symbols = (cast(ElfXX_Sym*)(base.ptr + cast(size_t)section.sh_offset))
133                 [0 .. cast(size_t)(section.sh_size / ElfXX_Sym.sizeof)];
134 
135             foreach (ref const sym; symbols)
136             {
137                 const stb = sym.st_info >> 4;
138                 if (stb != STB_GLOBAL && stb != STB_WEAK || sym.st_shndx == SHN_UNDEF)
139                     continue; // it's extern
140 
141                 if (sym.st_name >= string_tab.length)
142                     return corrupt(__LINE__);
143 
144                 const name = &string_tab[sym.st_name];
145                 //printf("sym st_name = x%x\n", sym.st_name);
146                 const pend = cast(const(char*)) memchr(name, 0, string_tab.length - sym.st_name);
147                 if (!pend)       // if didn't find terminating 0 inside the string section
148                     return corrupt(__LINE__);
149                 pAddSymbol(name[0 .. pend - name], 1);
150             }
151         }
152     }
153 
154     if (base[EI_CLASS] == ELFCLASS32)
155     {
156         scanELF!32;
157     }
158     else
159     {
160         assert(base[EI_CLASS] == ELFCLASS64);
161         scanELF!64;
162     }
163 }
164 
165 alias Elf32_Half = uint16_t;
166 alias Elf64_Half = uint16_t;
167 
168 alias Elf32_Word  = uint32_t;
169 alias Elf32_Sword = int32_t;
170 alias Elf64_Word  = uint32_t;
171 alias Elf64_Sword = int32_t;
172 
173 alias Elf32_Xword  = uint64_t;
174 alias Elf32_Sxword = int64_t;
175 alias Elf64_Xword  = uint64_t;
176 alias Elf64_Sxword = int64_t;
177 
178 alias Elf32_Addr = uint32_t;
179 alias Elf64_Addr = uint64_t;
180 
181 alias Elf32_Off = uint32_t;
182 alias Elf64_Off = uint64_t;
183 
184 alias Elf32_Section = uint16_t;
185 alias Elf64_Section = uint16_t;
186 
187 alias Elf32_Versym = Elf32_Half;
188 alias Elf64_Versym = Elf64_Half;
189 
190 struct Elf32_Ehdr
191 {
192     char[EI_NIDENT] e_ident = 0;
193     Elf32_Half    e_type;
194     Elf32_Half    e_machine;
195     Elf32_Word    e_version;
196     Elf32_Addr    e_entry;
197     Elf32_Off     e_phoff;
198     Elf32_Off     e_shoff;
199     Elf32_Word    e_flags;
200     Elf32_Half    e_ehsize;
201     Elf32_Half    e_phentsize;
202     Elf32_Half    e_phnum;
203     Elf32_Half    e_shentsize;
204     Elf32_Half    e_shnum;
205     Elf32_Half    e_shstrndx;
206 }
207 
208 struct Elf64_Ehdr
209 {
210     char[EI_NIDENT] e_ident = 0;
211     Elf64_Half    e_type;
212     Elf64_Half    e_machine;
213     Elf64_Word    e_version;
214     Elf64_Addr    e_entry;
215     Elf64_Off     e_phoff;
216     Elf64_Off     e_shoff;
217     Elf64_Word    e_flags;
218     Elf64_Half    e_ehsize;
219     Elf64_Half    e_phentsize;
220     Elf64_Half    e_phnum;
221     Elf64_Half    e_shentsize;
222     Elf64_Half    e_shnum;
223     Elf64_Half    e_shstrndx;
224 }
225 
226 enum EI_NIDENT = 16;
227 enum EI_VERSION =      6;
228 enum EI_CLASS =        4;
229 enum EI_DATA =         5;
230 enum EV_CURRENT =      1;
231 
232 enum ELFDATANONE =     0;
233 enum ELFDATA2LSB =     1;
234 enum ELFDATA2MSB =     2;
235 enum ELFDATANUM =      3;
236 enum ELFCLASSNONE =    0;
237 enum ELFCLASS32 =      1;
238 enum ELFCLASS64 =      2;
239 enum ELFCLASSNUM =     3;
240 
241 enum ET_REL =          1;
242 
243 struct Elf32_Shdr
244 {
245     Elf32_Word    sh_name;
246     Elf32_Word    sh_type;
247     Elf32_Word    sh_flags;
248     Elf32_Addr    sh_addr;
249     Elf32_Off     sh_offset;
250     Elf32_Word    sh_size;
251     Elf32_Word    sh_link;
252     Elf32_Word    sh_info;
253     Elf32_Word    sh_addralign;
254     Elf32_Word    sh_entsize;
255 }
256 
257 struct Elf64_Shdr
258 {
259     Elf64_Word    sh_name;
260     Elf64_Word    sh_type;
261     Elf64_Xword   sh_flags;
262     Elf64_Addr    sh_addr;
263     Elf64_Off     sh_offset;
264     Elf64_Xword   sh_size;
265     Elf64_Word    sh_link;
266     Elf64_Word    sh_info;
267     Elf64_Xword   sh_addralign;
268     Elf64_Xword   sh_entsize;
269 }
270 
271 enum SHT_SYMTAB =        2;
272 enum SHT_STRTAB =        3;
273 
274 struct Elf32_Sym
275 {
276     Elf32_Word    st_name;
277     Elf32_Addr    st_value;
278     Elf32_Word    st_size;
279     ubyte st_info;
280     ubyte st_other;
281     Elf32_Section st_shndx;
282 }
283 
284 struct Elf64_Sym
285 {
286     Elf64_Word    st_name;
287     ubyte st_info;
288     ubyte st_other;
289     Elf64_Section st_shndx;
290     Elf64_Addr    st_value;
291     Elf64_Xword   st_size;
292 }
293 
294 enum STB_GLOBAL =      1;
295 enum STB_WEAK =        2;
296 
297 enum SHN_UNDEF =       0;