1 /**
2  * Generates a human-readable stack-trace on POSIX targets using DWARF
3  *
4  * The common use case for printing a stack trace is when `toString` is called
5  * on a `Throwable` (see `object.d`). It will iterate on what is likely to be
6  * the default trace handler (see `core.runtime : defaultTraceHandler`).
7  * The class returned by `defaultTraceHandler` is what ends up calling into
8  * this module, through the use of `core.internal.traits : externDFunc`.
9  *
10  * The entry point of this module is `traceHandlerOpApplyImpl`,
11  * and the only really "public" symbol (since all `rt` symbols are private).
12  * In the future, this implementation should probably be made idiomatic,
13  * so that it can for example work with attributes.
14  *
15  * Resilience:
16  * As this module is used for diagnostic, it should handle failures
17  * as gracefully as possible. Having the runtime error out on printing
18  * the stack trace one is trying to debug would be quite a terrible UX.
19  * For this reason, this module works on a "best effort" basis and will
20  * sometimes print mangled symbols, or "???" when it cannot do anything
21  * more useful.
22  *
23  * Source_of_data:
24  * This module uses two main sources for generating human-readable data.
25  * First, it uses `backtrace_symbols` to obtain the name of the symbols
26  * (functions or methods) associated with the addresses.
27  * Since the names are mangled, it will also call into `core.demangle`,
28  * and doesn't need to use any DWARF information for this,
29  * however a future extension  could make use of the call frame information
30  * (See DWARF4 "6.4 Call Frame Information", PDF page 126).
31  *
32  * The other piece of data used is the DWARF `.debug_line` section,
33  * which contains the line informations of a program, necessary to associate
34  * the instruction address with its (file, line) information.
35  *
36  * Since debug lines informations are quite large, they are encoded using a
37  * program that is to be fed to a finite state machine.
38  * See `runStateMachine` and `readLineNumberProgram` for more details.
39  *
40  * DWARF_Version:
41  * This module only supports DWARF 3, 4 and 5.
42  *
43  * Reference: http://www.dwarfstd.org/
44  * Copyright: Copyright Digital Mars 2015 - 2015.
45  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
46  * Authors:   Yazan Dabain, Sean Kelly
47  * Source: $(DRUNTIMESRC rt/backtrace/dwarf.d)
48  */
49 
50 module core.internal.backtrace.dwarf;
51 
52 import core.internal.execinfo;
53 import core.internal.string;
54 
55 version (Posix):
56 
57 version (OSX)
58     version = Darwin;
59 else version (iOS)
60     version = Darwin;
61 else version (TVOS)
62     version = Darwin;
63 else version (WatchOS)
64     version = Darwin;
65 
66 version (Darwin)
67     import core.internal.backtrace.macho;
68 else
69     import core.internal.backtrace.elf;
70 
71 import core.internal.container.array;
72 import core.stdc.string : strlen, memcpy;
73 
74 //debug = DwarfDebugMachine;
75 debug(DwarfDebugMachine) import core.stdc.stdio : printf;
76 
77 struct Location
78 {
79     /**
80      * Address of the instruction for which this location is for.
81      */
82     const(void)* address;
83 
84     /**
85      * The name of the procedure, or function, this address is in.
86      */
87     const(char)[] procedure;
88 
89     /**
90      * Path to the file this location references, relative to `directory`
91      *
92      * Note that depending on implementation, this could be just a name,
93      * a relative path, or an absolute path.
94      *
95      * If no debug info is present, this may be `null`.
96      */
97     const(char)[] file;
98 
99     /**
100      * Directory where `file` resides
101      *
102      * This may be `null`, either if there is no debug info,
103      * or if the compiler implementation doesn't use this feature (e.g. DMD).
104      */
105     const(char)[] directory;
106 
107     /**
108      * Line within the file that correspond to this `location`.
109      *
110      * Note that in addition to a positive value, the values `0` and `-1`
111      * are to be expected by consumers. A value of `0` means that the code
112      * is not attributable to a specific line in the file, e.g. module-specific
113      * generated code, and `-1` means that no debug info could be found.
114      */
115     int line = -1;
116 
117     /// Format this location into a human-readable string
118     void toString (scope void delegate(scope const char[]) sink) const
119     {
120         import core.demangle;
121 
122         // If there's no file information, there shouldn't be any directory
123         // information. If there is we will simply ignore it.
124         if (this.file.length)
125         {
126             // Note: Sink needs to handle empty data
127             sink(this.directory);
128             // Only POSIX path because this module is not used on Windows
129             if (this.directory.length && this.directory[$ - 1] != '/')
130                 sink("/");
131             sink(this.file);
132         }
133         else
134             // Most likely, no debug information
135             sink("??");
136 
137         // Also no debug infos
138         if (this.line < 0)
139             sink(":?");
140         // Line can be 0, e.g. if the frame is in generated code
141         else if (this.line)
142         {
143             sink(":");
144             sink(signedToTempString(this.line));
145         }
146 
147         char[1024] symbolBuffer = void;
148         // When execinfo style is used, procedure can be null if the format
149         // of the line cannot be read, but it generally should not happen
150         if (this.procedure.length)
151         {
152             sink(" ");
153             sink(demangle(this.procedure, symbolBuffer, getCXXDemangler()));
154         }
155 
156         sink(" [0x");
157         sink(unsignedToTempString!16(cast(size_t) this.address));
158         sink("]");
159     }
160 }
161 
162 int traceHandlerOpApplyImpl(size_t numFrames,
163                             scope const(void)* delegate(size_t) getNthAddress,
164                             scope const(char)[] delegate(size_t) getNthFuncName,
165                             scope int delegate(ref size_t, ref const(char[])) dg)
166 {
167     auto image = Image.openSelf();
168 
169     Array!Location locations;
170     locations.length = numFrames;
171     size_t startIdx;
172     foreach (idx; 0 .. numFrames)
173     {
174         locations[idx].address = getNthAddress(idx);
175         locations[idx].procedure = getNthFuncName(idx);
176 
177         // NOTE: The first few frames with the current implementation are
178         //       inside core.runtime and the object code, so eliminate
179         //       these for readability.
180         // They also might depend on build parameters, which would make
181         // using a fixed number of frames otherwise brittle.
182         version (LDC) enum BaseExceptionFunctionName = "_d_throw_exception";
183         else          enum BaseExceptionFunctionName = "_d_throwdwarf";
184         if (!startIdx && locations[idx].procedure == BaseExceptionFunctionName)
185             startIdx = idx + 1;
186     }
187 
188 
189     if (!image.isValid())
190         return locations[startIdx .. $].processCallstack(null, 0, dg);
191 
192     // find address -> file, line mapping using dwarf debug_line
193     return image.processDebugLineSectionData(
194         (line) => locations[startIdx .. $].processCallstack(line, image.baseAddress, dg));
195 }
196 
197 struct TraceInfoBuffer
198 {
199     private char[1536] buf = void;
200     private size_t position;
201 
202     // BUG: https://issues.dlang.org/show_bug.cgi?id=21285
203     @safe pure nothrow @nogc
204     {
205         ///
206         inout(char)[] opSlice() inout return
207         {
208             return this.buf[0 .. this.position > $ ? $ : this.position];
209         }
210 
211         ///
212         void reset()
213         {
214             this.position = 0;
215         }
216     }
217 
218     /// Used as `sink` argument to `Location.toString`
219     void put(scope const char[] data)
220     {
221         // We cannot write anymore
222         if (this.position > this.buf.length)
223             return;
224 
225         if (this.position + data.length > this.buf.length)
226         {
227             this.buf[this.position .. $] = data[0 .. this.buf.length - this.position];
228             this.buf[$ - 3 .. $] = "...";
229             // +1 is a marker for the '...', otherwise if the symbol
230             // name was to exactly fill the buffer,
231             // we'd discard anything else without printing the '...'.
232             this.position = this.buf.length + 1;
233             return;
234         }
235 
236         this.buf[this.position .. this.position + data.length] = data;
237         this.position += data.length;
238     }
239 }
240 
241 private:
242 
243 int processCallstack(Location[] locations, const(ubyte)[] debugLineSectionData,
244                      size_t baseAddress, scope int delegate(ref size_t, ref const(char[])) dg)
245 {
246     if (debugLineSectionData)
247         resolveAddresses(debugLineSectionData, locations, baseAddress);
248     version (Darwin)
249     {
250         if (!debugLineSectionData)
251             resolveAddressesWithAtos(locations);
252     }
253 
254     TraceInfoBuffer buffer;
255     foreach (idx, const ref loc; locations)
256     {
257         buffer.reset();
258         loc.toString(&buffer.put);
259 
260         auto lvalue = buffer[];
261         if (auto ret = dg(idx, lvalue))
262             return ret;
263 
264         if (loc.procedure == "_Dmain")
265             break;
266     }
267 
268     return 0;
269 }
270 
271 version (Darwin) {
272     /**
273      * Resolve the addresses of `locations` using `atos` (executable that ships with XCode)
274      *
275      * Spawns a child process that calls `atos`. Communication is through stdin/stdout pipes.
276      *
277      * After this function successfully completes, `locations` will contain
278      * file / lines informations.
279      *
280      * The lifetime of the `Location` data surpases function return (strndup is used).
281      *
282      * Params:
283      * locations = The locations to resolve
284      */
285     private void resolveAddressesWithAtos(Location[] locations) @nogc nothrow
286     {
287         import core.stdc.stdio : fclose, fflush, fgets, fprintf, printf, snprintf;
288         import core.stdc.stdlib : exit;
289         import core.sys.posix.stdio : fdopen;
290         import core.sys.posix.unistd : close, dup2, execlp, fork, getpid, pipe;
291         // Create in/out pipes to communicate with the forked exec
292         int[2] dummy_pipes; // these dummy pipes are there to prevent funny issues when stdin/stdout is closed and pipe returns id 0 or 1
293         int[2] pipes_to_atos;
294         int[2] pipes_from_atos;
295         if ( pipe(dummy_pipes) < 0 || pipe(pipes_to_atos) < 0 || pipe(pipes_from_atos) < 0 ) {
296             printf("some pipe creation error!\n");
297             return;
298         }
299         close(dummy_pipes[0]);
300         close(dummy_pipes[1]);
301         auto write_to_atos = pipes_to_atos[1];
302         auto read_from_atos = pipes_from_atos[0];
303         auto atos_stdin = pipes_to_atos[0];
304         auto atos_stdout = pipes_from_atos[1];
305         auto self_pid = cast(int) getpid();
306         // Spawn a child process that calls atos, reads/writes from the pipes, and then exits.
307         auto child_id = fork();
308         if (child_id == -1)
309         {
310             printf("some fork error!\n");
311             return;
312         }
313         else if (child_id == 0)
314         {
315             // We are in the child process, spawn atos and link pipes
316             // Close unused read/write ends of pipes
317             close(write_to_atos);
318             close(read_from_atos);
319             // Link pipes to stdin/stdout
320             dup2(atos_stdin, 0);
321             close(atos_stdin);
322             dup2(atos_stdout, 1);
323             close(atos_stdout);
324             char[10] pid_str;
325             snprintf(pid_str.ptr, pid_str.sizeof, "%d", cast(int) self_pid);
326             const(char)* atos_executable = "atos";
327             const(char)* atos_p_arg = "-p";
328             const(char)* atos_fullpath_arg = "-fullPath";
329             execlp(atos_executable, atos_executable, atos_fullpath_arg, atos_p_arg, pid_str.ptr, null);
330             // If exec returns, an error occurred, need to exit the forked process here.
331             printf("some exec error!\n");
332             exit(0);
333         }
334         // Parent process just continues from here.
335         // Close unused pipes
336         close(atos_stdin);
337         close(atos_stdout);
338         auto to_atos = fdopen(write_to_atos, "w");
339         auto from_atos = fdopen(read_from_atos, "r");
340         // buffer for atos reading. Note that symbol names can be super large...
341         static char[16 * 1024] read_buffer = void;
342         char* status_ptr = null;
343         foreach (ref loc; locations)
344         {
345             fprintf(to_atos, "%p\n", loc.address);
346             fflush(to_atos);
347             read_buffer[0] = '\0';
348             status_ptr = fgets(read_buffer.ptr, read_buffer.sizeof, from_atos);
349             if (!status_ptr)
350                 break;
351             Location parsed_loc = parseAtosLine(read_buffer.ptr);
352             if (parsed_loc.line != -1)
353             {
354                 // Only update the file:line info, keep the procedure name as found before (preserving the standard truncation).
355                 loc.file = parsed_loc.file;
356                 loc.line = parsed_loc.line;
357             }
358         }
359         if (!status_ptr)
360             printf("\nDid not succeed in using 'atos' for extra debug information.\n");
361         fclose(to_atos);
362         fclose(from_atos);
363         close(write_to_atos);
364         close(read_from_atos);
365     }
366     private Location parseAtosLine(char* buffer) @nogc nothrow
367     {
368         // The line from `atos` is in one of these formats:
369         // myfunction (in library.dylib) (sourcefile.c:17)
370         // myfunction (in library.dylib) + 0x1fe
371         // myfunction (in library.dylib) + 15
372         // 0xdeadbeef (in library.dylib) + 0x1fe
373         // 0xdeadbeef (in library.dylib) + 15
374         // 0xdeadbeef (in library.dylib)
375         // 0xdeadbeef
376         import core.stdc.stdlib : atoi;
377         import core.stdc.string : strchr, strstr;
378         import core.sys.posix.string : strndup;
379         Location loc;
380         if (!buffer)
381             return loc;
382         if (buffer[0] == '0' && buffer[1] == 'x')
383             // no named symbol found
384             return loc;
385         const symbolname_end = strstr(buffer, " (in ");
386         if (!symbolname_end)
387             return loc;
388         const symbolname_size = symbolname_end - buffer;
389         loc.procedure = strndup(buffer, symbolname_size)[0..symbolname_size];
390         const filename_start = strstr(symbolname_end, ") (") + 3;
391         if (cast(size_t)filename_start < 4)
392             return loc;
393         const colon_location = strchr(filename_start, ':');
394         if (!colon_location)
395             return loc;
396         const filename_size = colon_location - filename_start;
397         loc.file = strndup(filename_start, filename_size)[0..filename_size];
398         const final_paren = strchr(colon_location+1, ')');
399         if (!final_paren)
400             return loc;
401         loc.line = atoi(colon_location+1);
402         return loc;
403     }
404 }
405 
406 /**
407  * Resolve the addresses of `locations` using `debugLineSectionData`
408  *
409  * Runs the DWARF state machine on `debugLineSectionData`,
410  * assuming it represents a debugging program describing the addresses
411  * in a continous and increasing manner.
412  *
413  * After this function successfully completes, `locations` will contains
414  * file / lines informations.
415  *
416  * Note that the lifetime of the `Location` data is bound to the lifetime
417  * of `debugLineSectionData`.
418  *
419  * Params:
420  *   debugLineSectionData = A DWARF program to feed the state machine
421  *   locations = The locations to resolve
422  *   baseAddress = The offset to apply to every address
423  */
424 void resolveAddresses(const(ubyte)[] debugLineSectionData, Location[] locations, size_t baseAddress) @nogc nothrow
425 {
426     debug(DwarfDebugMachine) import core.stdc.stdio;
427 
428     size_t numberOfLocationsFound = 0;
429 
430     const(ubyte)[] dbg = debugLineSectionData;
431     while (dbg.length > 0)
432     {
433         debug(DwarfDebugMachine) printf("new debug program\n");
434         const lp = readLineNumberProgram(dbg);
435 
436         LocationInfo lastLoc = LocationInfo(-1, -1);
437         const(void)* lastAddress;
438 
439         debug(DwarfDebugMachine) printf("program:\n");
440         runStateMachine(lp,
441             (const(void)* address, LocationInfo locInfo, bool isEndSequence)
442             {
443                 // adjust to ASLR offset
444                 address += baseAddress;
445                 debug (DwarfDebugMachine)
446                     printf("-- offsetting %p to %p\n", address - baseAddress, address);
447 
448                 foreach (ref loc; locations)
449                 {
450                     // If loc.line != -1, then it has been set previously.
451                     // Some implementations (eg. dmd) write an address to
452                     // the debug data multiple times, but so far I have found
453                     // that the first occurrence to be the correct one.
454                     if (loc.line != -1)
455                         continue;
456 
457                     // Can be called with either `locInfo` or `lastLoc`
458                     void update(const ref LocationInfo match)
459                     {
460                         // File indices are 1-based for DWARF < 5
461                         const fileIndex = match.file - (lp.dwarfVersion < 5 ? 1 : 0);
462                         const sourceFile = lp.sourceFiles[fileIndex];
463                         debug (DwarfDebugMachine)
464                         {
465                             printf("-- found for [%p]:\n", loc.address);
466                             printf("--   file: %.*s\n",
467                                    cast(int) sourceFile.file.length, sourceFile.file.ptr);
468                             printf("--   line: %d\n", match.line);
469                         }
470                         // DMD emits entries with FQN, but other implementations
471                         // (e.g. LDC) make use of directories
472                         // See https://github.com/dlang/druntime/pull/2945
473                         if (sourceFile.dirIndex != 0)
474                             loc.directory = lp.includeDirectories[sourceFile.dirIndex - 1];
475 
476                         loc.file = sourceFile.file;
477                         loc.line = match.line;
478                         numberOfLocationsFound++;
479                     }
480 
481                     // The state machine will not contain an entry for each
482                     // address, as consecutive addresses with the same file/line
483                     // are merged together to save on space, so we need to
484                     // check if our address is within two addresses we get
485                     // called with.
486                     //
487                     // Specs (DWARF v4, Section 6.2, PDF p.109) says:
488                     // "We shrink it with two techniques. First, we delete from
489                     // the matrix each row whose file, line, source column and
490                     // discriminator information is identical with that of its
491                     // predecessors.
492                     if (loc.address == address)
493                         update(locInfo);
494                     else if (lastAddress &&
495                              loc.address > lastAddress && loc.address < address)
496                         update(lastLoc);
497                 }
498 
499                 if (isEndSequence)
500                 {
501                     lastAddress = null;
502                 }
503                 else
504                 {
505                     lastAddress = address;
506                     lastLoc = locInfo;
507                 }
508 
509                 return numberOfLocationsFound < locations.length;
510             }
511         );
512 
513         if (numberOfLocationsFound == locations.length) return;
514     }
515 }
516 
517 /**
518  * A callback type for `runStateMachine`
519  *
520  * The callback is called when certain specific opcode are encountered
521  * (a.k.a when a complete `LocationInfo` is encountered).
522  * See `runStateMachine` implementation and the DWARF specs for more detail.
523  *
524  * Params:
525  *   address = The address that the `LocationInfo` describes
526  *   info = The `LocationInfo` itself, describing `address`
527  *   isEndSequence = Whether the end of a sequence has been reached
528  */
529 alias RunStateMachineCallback =
530     bool delegate(const(void)* address, LocationInfo info, bool isEndSequence)
531     @nogc nothrow;
532 
533 /**
534  * Run the state machine to generate line number matrix
535  *
536  * Line number informations generated by the compiler are stored in the
537  * `.debug_line` section. Conceptually, they can be seen as a large matrix,
538  * with row such as "file", "line", "column", "is_statement", etc...
539  * However such a matrix would be too big to store in an object file,
540  * so DWARF instead generate this matrix using bytecode fed to a state machine.
541  *
542  * Note:
543  * Each compilation unit can have its own line number program.
544  *
545  * See_Also:
546  * - DWARF v4, Section 6.2: Line Number Information
547  *
548  * Params:
549  *   lp = Program to execute
550  *   callback = Delegate to call whenever a LocationInfo is completed
551  *
552  * Returns:
553  *   `false` if an error happened (e.g. unknown opcode)
554  */
555 bool runStateMachine(ref const(LineNumberProgram) lp, scope RunStateMachineCallback callback) @nogc nothrow
556 {
557     StateMachine machine;
558     machine.isStatement = lp.defaultIsStatement;
559 
560     const(ubyte)[] program = lp.program;
561     while (program.length > 0)
562     {
563         size_t advanceAddressAndOpIndex(size_t operationAdvance)
564         {
565             const addressIncrement = lp.minimumInstructionLength * ((machine.operationIndex + operationAdvance) / lp.maximumOperationsPerInstruction);
566             machine.address += addressIncrement;
567             machine.operationIndex = (machine.operationIndex + operationAdvance) % lp.maximumOperationsPerInstruction;
568             return addressIncrement;
569         }
570 
571         ubyte opcode = program.read!ubyte();
572         if (opcode < lp.opcodeBase)
573         {
574             switch (opcode) with (StandardOpcode)
575             {
576                 case extendedOp:
577                     size_t len = cast(size_t) program.readULEB128();
578                     ubyte eopcode = program.read!ubyte();
579 
580                     switch (eopcode) with (ExtendedOpcode)
581                     {
582                         case endSequence:
583                             machine.isEndSequence = true;
584                             debug(DwarfDebugMachine) printf("endSequence %p\n", machine.address);
585                             if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), true)) return true;
586                             machine = StateMachine.init;
587                             machine.isStatement = lp.defaultIsStatement;
588                             break;
589 
590                         case setAddress:
591                             const address = program.read!(void*)();
592                             debug(DwarfDebugMachine) printf("setAddress %p\n", address);
593                             machine.address = address;
594                             machine.operationIndex = 0;
595                             break;
596 
597                         case defineFile: // TODO: add proper implementation
598                             debug(DwarfDebugMachine) printf("defineFile\n");
599                             program = program[len - 1 .. $];
600                             break;
601 
602                         case setDiscriminator:
603                             const discriminator = cast(uint) program.readULEB128();
604                             debug(DwarfDebugMachine) printf("setDiscriminator %d\n", discriminator);
605                             machine.discriminator = discriminator;
606                             break;
607 
608                         default:
609                             // unknown opcode
610                             debug(DwarfDebugMachine) printf("unknown extended opcode %d\n", cast(int) eopcode);
611                             program = program[len - 1 .. $];
612                             break;
613                     }
614 
615                     break;
616 
617                 case copy:
618                     debug(DwarfDebugMachine) printf("copy %p\n", machine.address);
619                     if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true;
620                     machine.isBasicBlock = false;
621                     machine.isPrologueEnd = false;
622                     machine.isEpilogueBegin = false;
623                     machine.discriminator = 0;
624                     break;
625 
626                 case advancePC:
627                     const operationAdvance = cast(size_t) readULEB128(program);
628                     advanceAddressAndOpIndex(operationAdvance);
629                     debug(DwarfDebugMachine) printf("advancePC %d to %p\n", cast(int) operationAdvance, machine.address);
630                     break;
631 
632                 case advanceLine:
633                     long ad = readSLEB128(program);
634                     machine.line += ad;
635                     debug(DwarfDebugMachine) printf("advanceLine %d to %d\n", cast(int) ad, cast(int) machine.line);
636                     break;
637 
638                 case setFile:
639                     uint index = cast(uint) readULEB128(program);
640                     debug(DwarfDebugMachine) printf("setFile to %d\n", cast(int) index);
641                     machine.fileIndex = index;
642                     break;
643 
644                 case setColumn:
645                     uint col = cast(uint) readULEB128(program);
646                     debug(DwarfDebugMachine) printf("setColumn %d\n", cast(int) col);
647                     machine.column = col;
648                     break;
649 
650                 case negateStatement:
651                     debug(DwarfDebugMachine) printf("negateStatement\n");
652                     machine.isStatement = !machine.isStatement;
653                     break;
654 
655                 case setBasicBlock:
656                     debug(DwarfDebugMachine) printf("setBasicBlock\n");
657                     machine.isBasicBlock = true;
658                     break;
659 
660                 case constAddPC:
661                     const operationAdvance = (255 - lp.opcodeBase) / lp.lineRange;
662                     advanceAddressAndOpIndex(operationAdvance);
663                     debug(DwarfDebugMachine) printf("constAddPC %p\n", machine.address);
664                     break;
665 
666                 case fixedAdvancePC:
667                     const add = program.read!ushort();
668                     machine.address += add;
669                     machine.operationIndex = 0;
670                     debug(DwarfDebugMachine) printf("fixedAdvancePC %d to %p\n", cast(int) add, machine.address);
671                     break;
672 
673                 case setPrologueEnd:
674                     machine.isPrologueEnd = true;
675                     debug(DwarfDebugMachine) printf("setPrologueEnd\n");
676                     break;
677 
678                 case setEpilogueBegin:
679                     machine.isEpilogueBegin = true;
680                     debug(DwarfDebugMachine) printf("setEpilogueBegin\n");
681                     break;
682 
683                 case setISA:
684                     machine.isa = cast(uint) readULEB128(program);
685                     debug(DwarfDebugMachine) printf("setISA %d\n", cast(int) machine.isa);
686                     break;
687 
688                 default:
689                     debug(DwarfDebugMachine) printf("unknown opcode %d\n", cast(int) opcode);
690                     return false;
691             }
692         }
693         else
694         {
695             opcode -= lp.opcodeBase;
696             const operationAdvance = opcode / lp.lineRange;
697             const addressIncrement = advanceAddressAndOpIndex(operationAdvance);
698             const lineIncrement = lp.lineBase + (opcode % lp.lineRange);
699             machine.line += lineIncrement;
700 
701             debug (DwarfDebugMachine)
702                 printf("special %d %d to %p line %d\n", cast(int) addressIncrement,
703                        cast(int) lineIncrement, machine.address, machine.line);
704 
705             if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true;
706 
707             machine.isBasicBlock = false;
708             machine.isPrologueEnd = false;
709             machine.isEpilogueBegin = false;
710             machine.discriminator = 0;
711         }
712     }
713 
714     return true;
715 }
716 
717 T read(T)(ref const(ubyte)[] buffer) @nogc nothrow
718 {
719     version (X86)         enum hasUnalignedLoads = true;
720     else version (X86_64) enum hasUnalignedLoads = true;
721     else                  enum hasUnalignedLoads = false;
722 
723     static if (hasUnalignedLoads || T.alignof == 1)
724     {
725         T result = *(cast(T*) buffer.ptr);
726     }
727     else
728     {
729         T result = void;
730         memcpy(&result, buffer.ptr, T.sizeof);
731     }
732 
733     buffer = buffer[T.sizeof .. $];
734     return result;
735 }
736 
737 // Reads a null-terminated string from `buffer`.
738 const(char)[] readStringz(ref const(ubyte)[] buffer) @nogc nothrow
739 {
740     const p = cast(char*) buffer.ptr;
741     const str = p[0 .. strlen(p)];
742     buffer = buffer[str.length+1 .. $];
743     return str;
744 }
745 
746 ulong readULEB128(ref const(ubyte)[] buffer) @nogc nothrow
747 {
748     ulong val = 0;
749     uint shift = 0;
750 
751     while (true)
752     {
753         ubyte b = buffer.read!ubyte();
754 
755         val |= (b & 0x7f) << shift;
756         if ((b & 0x80) == 0) break;
757         shift += 7;
758     }
759 
760     return val;
761 }
762 
763 unittest
764 {
765     const(ubyte)[] data = [0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF];
766     assert(readULEB128(data) == 624_485);
767     assert(data[] == [0xDE, 0xAD, 0xBE, 0xEF]);
768 }
769 
770 long readSLEB128(ref const(ubyte)[] buffer) @nogc nothrow
771 {
772     long val = 0;
773     uint shift = 0;
774     int size = 8 << 3;
775     ubyte b;
776 
777     while (true)
778     {
779         b = buffer.read!ubyte();
780         val |= (b & 0x7f) << shift;
781         shift += 7;
782         if ((b & 0x80) == 0)
783             break;
784     }
785 
786     if (shift < size && (b & 0x40) != 0)
787         val |= -(1 << shift);
788 
789     return val;
790 }
791 
792 enum DW_LNCT : ushort
793 {
794     path = 1,
795     directoryIndex = 2,
796     timestamp = 3,
797     size = 4,
798     md5 = 5,
799     loUser = 0x2000,
800     hiUser = 0x3fff,
801 }
802 
803 enum DW_FORM : ubyte
804 {
805     addr = 1,
806     block2 = 3,
807     block4 = 4,
808     data2 = 5,
809     data4 = 6,
810     data8 = 7,
811     string_ = 8,
812     block = 9,
813     block1 = 10,
814     data1 = 11,
815     flag = 12,
816     sdata = 13,
817     strp = 14,
818     udata = 15,
819     ref_addr = 16,
820     ref1 = 17,
821     ref2 = 18,
822     ref4 = 19,
823     ref8 = 20,
824     ref_udata = 21,
825     indirect = 22,
826     sec_offset = 23,
827     exprloc = 24,
828     flag_present = 25,
829     strx = 26,
830     addrx = 27,
831     ref_sup4 = 28,
832     strp_sup = 29,
833     data16 = 30,
834     line_strp = 31,
835     ref_sig8 = 32,
836     implicit_const = 33,
837     loclistx = 34,
838     rnglistx = 35,
839     ref_sup8 = 36,
840     strx1 = 37,
841     strx2 = 38,
842     strx3 = 39,
843     strx4 = 40,
844     addrx1 = 41,
845     addrx2 = 42,
846     addrx3 = 43,
847     addrx4 = 44,
848 }
849 
850 struct EntryFormatPair
851 {
852     DW_LNCT type;
853     DW_FORM form;
854 }
855 
856 /// Reads a DWARF v5 directory/file name entry format.
857 Array!EntryFormatPair readEntryFormat(ref const(ubyte)[] buffer) @nogc nothrow
858 {
859     const numPairs = buffer.read!ubyte();
860 
861     Array!EntryFormatPair pairs;
862     pairs.length = numPairs;
863 
864     foreach (ref pair; pairs)
865     {
866         pair.type = cast(DW_LNCT) buffer.readULEB128();
867         pair.form = cast(DW_FORM) buffer.readULEB128();
868     }
869 
870     debug (DwarfDebugMachine)
871     {
872         printf("entryFormat: (%d)\n", cast(int) pairs.length);
873         foreach (ref pair; pairs)
874             printf("\t- type: %d, form: %d\n", cast(int) pair.type, cast(int) pair.form);
875     }
876 
877     return pairs;
878 }
879 
880 enum StandardOpcode : ubyte
881 {
882     extendedOp = 0,
883     copy = 1,
884     advancePC = 2,
885     advanceLine = 3,
886     setFile = 4,
887     setColumn = 5,
888     negateStatement = 6,
889     setBasicBlock = 7,
890     constAddPC = 8,
891     fixedAdvancePC = 9,
892     setPrologueEnd = 10,
893     setEpilogueBegin = 11,
894     setISA = 12,
895 }
896 
897 enum ExtendedOpcode : ubyte
898 {
899     endSequence = 1,
900     setAddress = 2,
901     defineFile = 3,
902     setDiscriminator = 4,
903 }
904 
905 struct StateMachine
906 {
907     const(void)* address;
908     uint operationIndex = 0;
909     uint fileIndex = 1;
910     uint line = 1;
911     uint column = 0;
912     uint isa = 0;
913     uint discriminator = 0;
914     bool isStatement;
915     bool isBasicBlock = false;
916     bool isEndSequence = false;
917     bool isPrologueEnd = false;
918     bool isEpilogueBegin = false;
919 }
920 
921 struct LocationInfo
922 {
923     int file;
924     int line;
925 }
926 
927 struct LineNumberProgram
928 {
929     ulong unitLength;
930     ushort dwarfVersion;
931     ubyte addressSize;
932     ubyte segmentSelectorSize;
933     ulong headerLength;
934     ubyte minimumInstructionLength;
935     ubyte maximumOperationsPerInstruction;
936     bool defaultIsStatement;
937     byte lineBase;
938     ubyte lineRange;
939     ubyte opcodeBase;
940     const(ubyte)[] standardOpcodeLengths;
941     Array!(const(char)[]) includeDirectories;
942     Array!SourceFile sourceFiles;
943     const(ubyte)[] program;
944 }
945 
946 struct SourceFile
947 {
948     const(char)[] file;
949     size_t dirIndex; // 1-based
950 }
951 
952 LineNumberProgram readLineNumberProgram(ref const(ubyte)[] data) @nogc nothrow
953 {
954     const originalData = data;
955 
956     LineNumberProgram lp;
957 
958     bool is64bitDwarf = false;
959     lp.unitLength = data.read!uint();
960     if (lp.unitLength == uint.max)
961     {
962         is64bitDwarf = true;
963         lp.unitLength = data.read!ulong();
964     }
965 
966     const dwarfVersionFieldOffset = cast(size_t) (data.ptr - originalData.ptr);
967     lp.dwarfVersion = data.read!ushort();
968     assert(lp.dwarfVersion < 6, "DWARF v6+ not supported yet");
969 
970     if (lp.dwarfVersion >= 5)
971     {
972         lp.addressSize = data.read!ubyte();
973         lp.segmentSelectorSize = data.read!ubyte();
974     }
975 
976     lp.headerLength = (is64bitDwarf ? data.read!ulong() : data.read!uint());
977 
978     const minimumInstructionLengthFieldOffset = cast(size_t) (data.ptr - originalData.ptr);
979     lp.minimumInstructionLength = data.read!ubyte();
980 
981     lp.maximumOperationsPerInstruction = (lp.dwarfVersion >= 4 ? data.read!ubyte() : 1);
982     lp.defaultIsStatement = (data.read!ubyte() != 0);
983     lp.lineBase = data.read!byte();
984     lp.lineRange = data.read!ubyte();
985     lp.opcodeBase = data.read!ubyte();
986 
987     lp.standardOpcodeLengths = data[0 .. lp.opcodeBase - 1];
988     data = data[lp.opcodeBase - 1 .. $];
989 
990     if (lp.dwarfVersion >= 5)
991     {
992         static void consumeGenericForm(ref const(ubyte)[] data, DW_FORM form, bool is64bitDwarf)
993         {
994             with (DW_FORM) switch (form)
995             {
996                 case strp, strp_sup, line_strp:
997                     data = data[is64bitDwarf ? 8 : 4 .. $]; break;
998                 case data1, strx1:
999                     data = data[1 .. $]; break;
1000                 case data2, strx2:
1001                     data = data[2 .. $]; break;
1002                 case strx3:
1003                     data = data[3 .. $]; break;
1004                 case data4, strx4:
1005                     data = data[4 .. $]; break;
1006                 case data8:
1007                     data = data[8 .. $]; break;
1008                 case data16:
1009                     data = data[16 .. $]; break;
1010                 case udata, strx:
1011                     data.readULEB128(); break;
1012                 case block:
1013                     const length = cast(size_t) data.readULEB128();
1014                     data = data[length .. $];
1015                     break;
1016                 default:
1017                     assert(0); // TODO: support other forms for vendor extensions
1018             }
1019         }
1020 
1021         const dirFormat = data.readEntryFormat();
1022         lp.includeDirectories.length = cast(size_t) data.readULEB128();
1023         foreach (ref dir; lp.includeDirectories)
1024         {
1025             dir = "<unknown dir>"; // fallback
1026             foreach (ref pair; dirFormat)
1027             {
1028                 if (pair.type == DW_LNCT.path &&
1029                     // TODO: support other forms too (offsets in other sections)
1030                     pair.form == DW_FORM.string_)
1031                 {
1032                     dir = data.readStringz();
1033                 }
1034                 else // uninteresting type
1035                     consumeGenericForm(data, pair.form, is64bitDwarf);
1036             }
1037         }
1038 
1039         const fileFormat = data.readEntryFormat();
1040         lp.sourceFiles.length = cast(size_t) data.readULEB128();
1041         foreach (ref sf; lp.sourceFiles)
1042         {
1043             sf.file = "<unknown file>"; // fallback
1044             foreach (ref pair; fileFormat)
1045             {
1046                 if (pair.type == DW_LNCT.path &&
1047                     // TODO: support other forms too (offsets in other sections)
1048                     pair.form == DW_FORM.string_)
1049                 {
1050                     sf.file = data.readStringz();
1051                 }
1052                 else if (pair.type == DW_LNCT.directoryIndex)
1053                 {
1054                     if (pair.form == DW_FORM.data1)
1055                         sf.dirIndex = data.read!ubyte();
1056                     else if (pair.form == DW_FORM.data2)
1057                         sf.dirIndex = data.read!ushort();
1058                     else if (pair.form == DW_FORM.udata)
1059                         sf.dirIndex = cast(size_t) data.readULEB128();
1060                     else
1061                         assert(0); // not allowed by DWARF 5 spec
1062                     sf.dirIndex++; // DWARF v5 indices are 0-based
1063                 }
1064                 else // uninteresting type
1065                     consumeGenericForm(data, pair.form, is64bitDwarf);
1066             }
1067         }
1068     }
1069     else
1070     {
1071         // A sequence ends with a null-byte.
1072         static auto readSequence(alias ReadEntry)(ref const(ubyte)[] data)
1073         {
1074             alias ResultType = typeof(ReadEntry(data));
1075 
1076             static size_t count(const(ubyte)[] data)
1077             {
1078                 size_t count = 0;
1079                 while (data.length && data[0] != 0)
1080                 {
1081                     ReadEntry(data);
1082                     ++count;
1083                 }
1084                 return count;
1085             }
1086 
1087             const numEntries = count(data);
1088 
1089             Array!ResultType result;
1090             result.length = numEntries;
1091 
1092             foreach (i; 0 .. numEntries)
1093                 result[i] = ReadEntry(data);
1094 
1095             data = data[1 .. $]; // skip over sequence-terminating null
1096 
1097             return result;
1098         }
1099 
1100         /// Directories are simply a sequence of NUL-terminated strings
1101         static const(char)[] readIncludeDirectoryEntry(ref const(ubyte)[] data)
1102         {
1103             return data.readStringz();
1104         }
1105         lp.includeDirectories = readSequence!readIncludeDirectoryEntry(data);
1106 
1107         static SourceFile readFileNameEntry(ref const(ubyte)[] data)
1108         {
1109             const file = data.readStringz();
1110             const dirIndex = cast(size_t) data.readULEB128();
1111             data.readULEB128(); // last mod
1112             data.readULEB128(); // file len
1113 
1114             return SourceFile(
1115                 file,
1116                 dirIndex,
1117             );
1118         }
1119         lp.sourceFiles = readSequence!readFileNameEntry(data);
1120     }
1121 
1122     debug (DwarfDebugMachine)
1123     {
1124         printf("include_directories: (%d)\n", cast(int) lp.includeDirectories.length);
1125         foreach (dir; lp.includeDirectories)
1126             printf("\t- %.*s\n", cast(int) dir.length, dir.ptr);
1127         printf("source_files: (%d)\n", cast(int) lp.sourceFiles.length);
1128         foreach (ref sf; lp.sourceFiles)
1129         {
1130             if (sf.dirIndex > lp.includeDirectories.length)
1131                 printf("\t- Out of bound directory! (%llu): %.*s\n",
1132                        sf.dirIndex, cast(int) sf.file.length, sf.file.ptr);
1133             else if (sf.dirIndex > 0)
1134             {
1135                 const dir = lp.includeDirectories[sf.dirIndex - 1];
1136                 printf("\t- (Dir:%llu:%.*s/)%.*s\n", sf.dirIndex,
1137                        cast(int) dir.length, dir.ptr,
1138                        cast(int) sf.file.length, sf.file.ptr);
1139             }
1140             else
1141                 printf("\t- %.*s\n", cast(int) sf.file.length, sf.file.ptr);
1142         }
1143     }
1144 
1145     const programStart = cast(size_t) (minimumInstructionLengthFieldOffset + lp.headerLength);
1146     const programEnd = cast(size_t) (dwarfVersionFieldOffset + lp.unitLength);
1147     lp.program = originalData[programStart .. programEnd];
1148 
1149     data = originalData[programEnd .. $];
1150 
1151     return lp;
1152 }