1 /**
2  * Extract symbols from an OMF object file.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d
10  */
11 
12 module dmd.scanomf;
13 
14 import core.stdc.string;
15 import core.stdc.stdlib;
16 
17 import dmd.arraytypes;
18 import dmd.common.outbuffer;
19 import dmd.errorsink;
20 import dmd.location;
21 import dmd.root.rmem;
22 import dmd.root.string;
23 
24 private enum LOG = false;
25 
26 /*****************************************
27  * Reads an object module from base[] and passes the names
28  * of any exported symbols to (*pAddSymbol)().
29  * Params:
30  *      pAddSymbol =  function to pass the names to
31  *      base =        array of contents of object module
32  *      module_name = name of the object module (used for error messages)
33  *      loc =         location to use for error printing
34  *      eSink =       where the error messages go
35  */
36 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
37         scope const ubyte[] base, scope const char* module_name, Loc loc, ErrorSink eSink)
38 {
39     static if (LOG)
40     {
41         printf("scanOmfObjModule(%s)\n", module_name);
42     }
43     char[LIBIDMAX + 1] name = void;
44     Strings names;
45     scope(exit)
46         for (size_t u = 1; u < names.length; u++)
47             free(cast(void*)names[u]);
48     names.push(null); // don't use index 0
49     bool easyomf = false; // assume not EASY-OMF
50     const pend = base.ptr + base.length;
51     const(ubyte)* pnext;
52     for (auto p = base.ptr; 1; p = pnext)
53     {
54         assert(p < pend);
55         ubyte recTyp = *p++;
56         ushort recLen = *cast(ushort*)p;
57         p += 2;
58         pnext = p + recLen;
59         recLen--; // forget the checksum
60         switch (recTyp)
61         {
62         case LNAMES:
63         case LLNAMES:
64             while (p + 1 < pnext)
65             {
66                 parseName(p, name.ptr);
67                 char* copy = cast(char*)Mem.check(strdup(name.ptr));
68                 names.push(copy);
69             }
70             break;
71         case PUBDEF:
72             if (easyomf)
73                 recTyp = PUB386; // convert to MS format
74             goto case;
75         case PUB386:
76             if (!(parseIdx(p) | parseIdx(p)))
77                 p += 2; // skip seg, grp, frame
78             while (p + 1 < pnext)
79             {
80                 parseName(p, name.ptr);
81                 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset
82                 parseIdx(p); // skip type index
83                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
84             }
85             break;
86         case COMDAT:
87             if (easyomf)
88                 recTyp = COMDAT + 1; // convert to MS format
89             goto case;
90         case COMDAT + 1:
91             {
92                 int pickAny = 0;
93                 if (*p++ & 5) // if continuation or local comdat
94                     break;
95                 ubyte attr = *p++;
96                 if (attr & 0xF0) // attr: if multiple instances allowed
97                     pickAny = 1;
98                 p++; // align
99                 p += 2; // enum data offset
100                 if (recTyp == COMDAT + 1)
101                     p += 2; // enum data offset
102                 parseIdx(p); // type index
103                 if ((attr & 0x0F) == 0) // if explicit allocation
104                 {
105                     parseIdx(p); // base group
106                     parseIdx(p); // base segment
107                 }
108                 uint idx = parseIdx(p); // public name index
109                 if (idx == 0 || idx >= names.length)
110                 {
111                     //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames));
112                     eSink.error(loc, "corrupt COMDAT");
113                     return;
114                 }
115                 //printf("[s] name='%s'\n",name);
116                 const(char)* n = names[idx];
117                 pAddSymbol(n.toDString(), pickAny);
118                 break;
119             }
120         case COMDEF:
121             {
122                 while (p + 1 < pnext)
123                 {
124                     parseName(p, name.ptr);
125                     parseIdx(p); // type index
126                     skipDataType(p); // data type
127                     pAddSymbol(name[0 .. strlen(name.ptr)], 1);
128                 }
129                 break;
130             }
131         case ALIAS:
132             while (p + 1 < pnext)
133             {
134                 parseName(p, name.ptr);
135                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
136                 parseName(p, name.ptr);
137             }
138             break;
139         case MODEND:
140         case M386END:
141             return;
142         case COMENT:
143             // Recognize Phar Lap EASY-OMF format
144             {
145                 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6'];
146                 if (recLen == (omfstr1).sizeof)
147                 {
148                     for (uint i = 0; i < (omfstr1).sizeof; i++)
149                         if (*p++ != omfstr1[i])
150                             goto L1;
151                     easyomf = true;
152                     break;
153                 L1:
154                 }
155             }
156             // Recognize .IMPDEF Import Definition Records
157             {
158                 __gshared ubyte* omfstr2 = [0, 0xA0, 1];
159                 if (recLen >= 7)
160                 {
161                     p++;
162                     for (uint i = 1; i < (omfstr2).sizeof; i++)
163                         if (*p++ != omfstr2[i])
164                             goto L2;
165                     p++; // skip OrdFlag field
166                     parseName(p, name.ptr);
167                     pAddSymbol(name[0 .. strlen(name.ptr)], 0);
168                     break;
169                 L2:
170                 }
171             }
172             break;
173         default:
174             // ignore
175         }
176     }
177 }
178 
179 /*************************************************
180  * Scan a block of memory buf[0..buflen], pulling out each
181  * OMF object module in it and sending the info in it to (*pAddObjModule).
182  * Returns:
183  *      true for corrupt OMF data
184  */
185 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, scope void* buf, size_t buflen, uint pagesize)
186 {
187     /* Split up the buffer buf[0..buflen] into multiple object modules,
188      * each aligned on a pagesize boundary.
189      */
190     const(ubyte)* base = null;
191     char[LIBIDMAX + 1] name = void;
192     auto p = cast(const(ubyte)*)buf;
193     auto pend = p + buflen;
194     const(ubyte)* pnext;
195     for (; p < pend; p = pnext) // for each OMF record
196     {
197         if (p + 3 >= pend)
198             return true; // corrupt
199         ubyte recTyp = *p;
200         ushort recLen = *cast(const(ushort)*)(p + 1);
201         pnext = p + 3 + recLen;
202         if (pnext > pend)
203             return true; // corrupt
204         recLen--; // forget the checksum
205         switch (recTyp)
206         {
207         case LHEADR:
208         case THEADR:
209             if (!base)
210             {
211                 base = p;
212                 p += 3;
213                 parseName(p, name.ptr);
214                 if (name[0] == 'C' && name[1] == 0) // old C compilers did this
215                     base = pnext; // skip past THEADR
216             }
217             break;
218         case MODEND:
219         case M386END:
220             {
221                 if (base)
222                 {
223                     pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base);
224                     base = null;
225                 }
226                 // Round up to next page
227                 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf);
228                 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1);
229                 pnext = cast(const(ubyte)*)buf + t;
230                 break;
231             }
232         default:
233             // ignore
234         }
235     }
236     return (base !is null); // missing MODEND record
237 }
238 
239 uint OMFObjSize(scope const void* base, size_t length, scope const char* name)
240 {
241     ubyte c = *cast(const(ubyte)*)base;
242     if (c != THEADR && c != LHEADR)
243     {
244         size_t len = strlen(name);
245         assert(len <= LIBIDMAX);
246         length += len + 5;
247     }
248     return cast(uint)length;
249 }
250 
251 void writeOMFObj(ref OutBuffer buf, scope const void* base, size_t length, scope const char* name)
252 {
253     ubyte c = *cast(const(ubyte)*)base;
254     if (c != THEADR && c != LHEADR)
255     {
256         const len = strlen(name);
257         assert(len <= LIBIDMAX);
258         ubyte[4 + LIBIDMAX + 1] header = void;
259         header[0] = THEADR;
260         header[1] = cast(ubyte)(2 + len);
261         header[2] = 0;
262         header[3] = cast(ubyte)len;
263         assert(len <= 0xFF - 2);
264         memcpy(4 + header.ptr, name, len);
265         // Compute and store record checksum
266         uint n = cast(uint)(len + 4);
267         ubyte checksum = 0;
268         ubyte* p = header.ptr;
269         while (n--)
270         {
271             checksum -= *p;
272             p++;
273         }
274         *p = checksum;
275         buf.write(header.ptr[0 .. len + 5]);
276     }
277     buf.write(base[0 .. length]);
278 }
279 
280 private: // for the remainder of this module
281 
282 /**************************
283  * Record types:
284  */
285 enum RHEADR = 0x6E;
286 enum REGINT = 0x70;
287 enum REDATA = 0x72;
288 enum RIDATA = 0x74;
289 enum OVLDEF = 0x76;
290 enum ENDREC = 0x78;
291 enum BLKDEF = 0x7A;
292 enum BLKEND = 0x7C;
293 enum DEBSYM = 0x7E;
294 enum THEADR = 0x80;
295 enum LHEADR = 0x82;
296 enum PEDATA = 0x84;
297 enum PIDATA = 0x86;
298 enum COMENT = 0x88;
299 enum MODEND = 0x8A;
300 enum M386END = 0x8B; /* 32 bit module end record */
301 enum EXTDEF = 0x8C;
302 enum TYPDEF = 0x8E;
303 enum PUBDEF = 0x90;
304 enum PUB386 = 0x91;
305 enum LOCSYM = 0x92;
306 enum LINNUM = 0x94;
307 enum LNAMES = 0x96;
308 enum SEGDEF = 0x98;
309 enum GRPDEF = 0x9A;
310 enum FIXUPP = 0x9C;
311 /*#define (none)        0x9E    */
312 enum LEDATA = 0xA0;
313 enum LIDATA = 0xA2;
314 enum LIBHED = 0xA4;
315 enum LIBNAM = 0xA6;
316 enum LIBLOC = 0xA8;
317 enum LIBDIC = 0xAA;
318 enum COMDEF = 0xB0;
319 enum LEXTDEF = 0xB4;
320 enum LPUBDEF = 0xB6;
321 enum LCOMDEF = 0xB8;
322 enum CEXTDEF = 0xBC;
323 enum COMDAT = 0xC2;
324 enum LINSYM = 0xC4;
325 enum ALIAS = 0xC6;
326 enum LLNAMES = 0xCA;
327 enum LIBIDMAX = (512 - 0x25 - 3 - 4);
328 
329 // max size that will fit in dictionary
330 void parseName(ref scope const(ubyte)* pp, char* name)
331 {
332     auto p = pp;
333     uint len = *p++;
334     if (len == 0xFF && *p == 0) // if long name
335     {
336         len = p[1] & 0xFF;
337         len |= cast(uint)p[2] << 8;
338         p += 3;
339         assert(len <= LIBIDMAX);
340     }
341     memcpy(name, p, len);
342     name[len] = 0;
343     pp = p + len;
344 }
345 
346 ushort parseIdx(ref scope const(ubyte)* pp)
347 {
348     auto p = pp;
349     const c = *p++;
350     ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c;
351     pp = p;
352     return idx;
353 }
354 
355 // skip numeric field of a data type of a COMDEF record
356 void skipNumericField(ref scope const(ubyte)* pp)
357 {
358     const(ubyte)* p = pp;
359     const c = *p++;
360     if (c == 0x81)
361         p += 2;
362     else if (c == 0x84)
363         p += 3;
364     else if (c == 0x88)
365         p += 4;
366     else
367         assert(c <= 0x80);
368     pp = p;
369 }
370 
371 // skip data type of a COMDEF record
372 void skipDataType(ref scope const(ubyte)* pp)
373 {
374     auto p = pp;
375     const c = *p++;
376     if (c == 0x61)
377     {
378         // FAR data
379         skipNumericField(p);
380         skipNumericField(p);
381     }
382     else if (c == 0x62)
383     {
384         // NEAR data
385         skipNumericField(p);
386     }
387     else
388     {
389         assert(1 <= c && c <= 0x5f); // Borland segment indices
390     }
391     pp = p;
392 }