1 /** 2 * Generates a human-readable stack-trace on POSIX targets using DWARF 3 * 4 * The common use case for printing a stack trace is when `toString` is called 5 * on a `Throwable` (see `object.d`). It will iterate on what is likely to be 6 * the default trace handler (see `core.runtime : defaultTraceHandler`). 7 * The class returned by `defaultTraceHandler` is what ends up calling into 8 * this module, through the use of `core.internal.traits : externDFunc`. 9 * 10 * The entry point of this module is `traceHandlerOpApplyImpl`, 11 * and the only really "public" symbol (since all `rt` symbols are private). 12 * In the future, this implementation should probably be made idiomatic, 13 * so that it can for example work with attributes. 14 * 15 * Resilience: 16 * As this module is used for diagnostic, it should handle failures 17 * as gracefully as possible. Having the runtime error out on printing 18 * the stack trace one is trying to debug would be quite a terrible UX. 19 * For this reason, this module works on a "best effort" basis and will 20 * sometimes print mangled symbols, or "???" when it cannot do anything 21 * more useful. 22 * 23 * Source_of_data: 24 * This module uses two main sources for generating human-readable data. 25 * First, it uses `backtrace_symbols` to obtain the name of the symbols 26 * (functions or methods) associated with the addresses. 27 * Since the names are mangled, it will also call into `core.demangle`, 28 * and doesn't need to use any DWARF information for this, 29 * however a future extension could make use of the call frame information 30 * (See DWARF4 "6.4 Call Frame Information", PDF page 126). 31 * 32 * The other piece of data used is the DWARF `.debug_line` section, 33 * which contains the line informations of a program, necessary to associate 34 * the instruction address with its (file, line) information. 35 * 36 * Since debug lines informations are quite large, they are encoded using a 37 * program that is to be fed to a finite state machine. 38 * See `runStateMachine` and `readLineNumberProgram` for more details. 39 * 40 * DWARF_Version: 41 * This module only supports DWARF 3, 4 and 5. 42 * 43 * Reference: http://www.dwarfstd.org/ 44 * Copyright: Copyright Digital Mars 2015 - 2015. 45 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 46 * Authors: Yazan Dabain, Sean Kelly 47 * Source: $(DRUNTIMESRC rt/backtrace/dwarf.d) 48 */ 49 50 module core.internal.backtrace.dwarf; 51 52 import core.internal.execinfo; 53 import core.internal.string; 54 55 version (Posix): 56 57 version (OSX) 58 version = Darwin; 59 else version (iOS) 60 version = Darwin; 61 else version (TVOS) 62 version = Darwin; 63 else version (WatchOS) 64 version = Darwin; 65 66 version (Darwin) 67 import core.internal.backtrace.macho; 68 else 69 import core.internal.backtrace.elf; 70 71 import core.internal.container.array; 72 import core.stdc.string : strlen, memcpy; 73 74 //debug = DwarfDebugMachine; 75 debug(DwarfDebugMachine) import core.stdc.stdio : printf; 76 77 struct Location 78 { 79 /** 80 * Address of the instruction for which this location is for. 81 */ 82 const(void)* address; 83 84 /** 85 * The name of the procedure, or function, this address is in. 86 */ 87 const(char)[] procedure; 88 89 /** 90 * Path to the file this location references, relative to `directory` 91 * 92 * Note that depending on implementation, this could be just a name, 93 * a relative path, or an absolute path. 94 * 95 * If no debug info is present, this may be `null`. 96 */ 97 const(char)[] file; 98 99 /** 100 * Directory where `file` resides 101 * 102 * This may be `null`, either if there is no debug info, 103 * or if the compiler implementation doesn't use this feature (e.g. DMD). 104 */ 105 const(char)[] directory; 106 107 /** 108 * Line within the file that correspond to this `location`. 109 * 110 * Note that in addition to a positive value, the values `0` and `-1` 111 * are to be expected by consumers. A value of `0` means that the code 112 * is not attributable to a specific line in the file, e.g. module-specific 113 * generated code, and `-1` means that no debug info could be found. 114 */ 115 int line = -1; 116 117 /// Format this location into a human-readable string 118 void toString (scope void delegate(scope const char[]) sink) const 119 { 120 import core.demangle; 121 122 // If there's no file information, there shouldn't be any directory 123 // information. If there is we will simply ignore it. 124 if (this.file.length) 125 { 126 // Note: Sink needs to handle empty data 127 sink(this.directory); 128 // Only POSIX path because this module is not used on Windows 129 if (this.directory.length && this.directory[$ - 1] != '/') 130 sink("/"); 131 sink(this.file); 132 } 133 else 134 // Most likely, no debug information 135 sink("??"); 136 137 // Also no debug infos 138 if (this.line < 0) 139 sink(":?"); 140 // Line can be 0, e.g. if the frame is in generated code 141 else if (this.line) 142 { 143 sink(":"); 144 sink(signedToTempString(this.line)); 145 } 146 147 char[1024] symbolBuffer = void; 148 // When execinfo style is used, procedure can be null if the format 149 // of the line cannot be read, but it generally should not happen 150 if (this.procedure.length) 151 { 152 sink(" "); 153 sink(demangle(this.procedure, symbolBuffer, getCXXDemangler())); 154 } 155 156 sink(" [0x"); 157 sink(unsignedToTempString!16(cast(size_t) this.address)); 158 sink("]"); 159 } 160 } 161 162 int traceHandlerOpApplyImpl(size_t numFrames, 163 scope const(void)* delegate(size_t) getNthAddress, 164 scope const(char)[] delegate(size_t) getNthFuncName, 165 scope int delegate(ref size_t, ref const(char[])) dg) 166 { 167 auto image = Image.openSelf(); 168 169 Array!Location locations; 170 locations.length = numFrames; 171 size_t startIdx; 172 foreach (idx; 0 .. numFrames) 173 { 174 locations[idx].address = getNthAddress(idx); 175 locations[idx].procedure = getNthFuncName(idx); 176 177 // NOTE: The first few frames with the current implementation are 178 // inside core.runtime and the object code, so eliminate 179 // these for readability. 180 // They also might depend on build parameters, which would make 181 // using a fixed number of frames otherwise brittle. 182 version (LDC) enum BaseExceptionFunctionName = "_d_throw_exception"; 183 else enum BaseExceptionFunctionName = "_d_throwdwarf"; 184 if (!startIdx && locations[idx].procedure == BaseExceptionFunctionName) 185 startIdx = idx + 1; 186 } 187 188 189 if (!image.isValid()) 190 return locations[startIdx .. $].processCallstack(null, 0, dg); 191 192 // find address -> file, line mapping using dwarf debug_line 193 return image.processDebugLineSectionData( 194 (line) => locations[startIdx .. $].processCallstack(line, image.baseAddress, dg)); 195 } 196 197 struct TraceInfoBuffer 198 { 199 private char[1536] buf = void; 200 private size_t position; 201 202 // BUG: https://issues.dlang.org/show_bug.cgi?id=21285 203 @safe pure nothrow @nogc 204 { 205 /// 206 inout(char)[] opSlice() inout return 207 { 208 return this.buf[0 .. this.position > $ ? $ : this.position]; 209 } 210 211 /// 212 void reset() 213 { 214 this.position = 0; 215 } 216 } 217 218 /// Used as `sink` argument to `Location.toString` 219 void put(scope const char[] data) 220 { 221 // We cannot write anymore 222 if (this.position > this.buf.length) 223 return; 224 225 if (this.position + data.length > this.buf.length) 226 { 227 this.buf[this.position .. $] = data[0 .. this.buf.length - this.position]; 228 this.buf[$ - 3 .. $] = "..."; 229 // +1 is a marker for the '...', otherwise if the symbol 230 // name was to exactly fill the buffer, 231 // we'd discard anything else without printing the '...'. 232 this.position = this.buf.length + 1; 233 return; 234 } 235 236 this.buf[this.position .. this.position + data.length] = data; 237 this.position += data.length; 238 } 239 } 240 241 private: 242 243 int processCallstack(Location[] locations, const(ubyte)[] debugLineSectionData, 244 size_t baseAddress, scope int delegate(ref size_t, ref const(char[])) dg) 245 { 246 if (debugLineSectionData) 247 resolveAddresses(debugLineSectionData, locations, baseAddress); 248 version (Darwin) 249 { 250 if (!debugLineSectionData) 251 resolveAddressesWithAtos(locations); 252 } 253 254 TraceInfoBuffer buffer; 255 foreach (idx, const ref loc; locations) 256 { 257 buffer.reset(); 258 loc.toString(&buffer.put); 259 260 auto lvalue = buffer[]; 261 if (auto ret = dg(idx, lvalue)) 262 return ret; 263 264 if (loc.procedure == "_Dmain") 265 break; 266 } 267 268 return 0; 269 } 270 271 version (Darwin) { 272 /** 273 * Resolve the addresses of `locations` using `atos` (executable that ships with XCode) 274 * 275 * Spawns a child process that calls `atos`. Communication is through stdin/stdout pipes. 276 * 277 * After this function successfully completes, `locations` will contain 278 * file / lines informations. 279 * 280 * The lifetime of the `Location` data surpases function return (strndup is used). 281 * 282 * Params: 283 * locations = The locations to resolve 284 */ 285 private void resolveAddressesWithAtos(Location[] locations) @nogc nothrow 286 { 287 import core.stdc.stdio : fclose, fflush, fgets, fprintf, printf, snprintf; 288 import core.stdc.stdlib : exit; 289 import core.sys.posix.stdio : fdopen; 290 import core.sys.posix.unistd : close, dup2, execlp, fork, getpid, pipe; 291 // Create in/out pipes to communicate with the forked exec 292 int[2] dummy_pipes; // these dummy pipes are there to prevent funny issues when stdin/stdout is closed and pipe returns id 0 or 1 293 int[2] pipes_to_atos; 294 int[2] pipes_from_atos; 295 if ( pipe(dummy_pipes) < 0 || pipe(pipes_to_atos) < 0 || pipe(pipes_from_atos) < 0 ) { 296 printf("some pipe creation error!\n"); 297 return; 298 } 299 close(dummy_pipes[0]); 300 close(dummy_pipes[1]); 301 auto write_to_atos = pipes_to_atos[1]; 302 auto read_from_atos = pipes_from_atos[0]; 303 auto atos_stdin = pipes_to_atos[0]; 304 auto atos_stdout = pipes_from_atos[1]; 305 auto self_pid = cast(int) getpid(); 306 // Spawn a child process that calls atos, reads/writes from the pipes, and then exits. 307 auto child_id = fork(); 308 if (child_id == -1) 309 { 310 printf("some fork error!\n"); 311 return; 312 } 313 else if (child_id == 0) 314 { 315 // We are in the child process, spawn atos and link pipes 316 // Close unused read/write ends of pipes 317 close(write_to_atos); 318 close(read_from_atos); 319 // Link pipes to stdin/stdout 320 dup2(atos_stdin, 0); 321 close(atos_stdin); 322 dup2(atos_stdout, 1); 323 close(atos_stdout); 324 char[10] pid_str; 325 snprintf(pid_str.ptr, pid_str.sizeof, "%d", cast(int) self_pid); 326 const(char)* atos_executable = "atos"; 327 const(char)* atos_p_arg = "-p"; 328 const(char)* atos_fullpath_arg = "-fullPath"; 329 execlp(atos_executable, atos_executable, atos_fullpath_arg, atos_p_arg, pid_str.ptr, null); 330 // If exec returns, an error occurred, need to exit the forked process here. 331 printf("some exec error!\n"); 332 exit(0); 333 } 334 // Parent process just continues from here. 335 // Close unused pipes 336 close(atos_stdin); 337 close(atos_stdout); 338 auto to_atos = fdopen(write_to_atos, "w"); 339 auto from_atos = fdopen(read_from_atos, "r"); 340 // buffer for atos reading. Note that symbol names can be super large... 341 static char[16 * 1024] read_buffer = void; 342 char* status_ptr = null; 343 foreach (ref loc; locations) 344 { 345 fprintf(to_atos, "%p\n", loc.address); 346 fflush(to_atos); 347 read_buffer[0] = '\0'; 348 status_ptr = fgets(read_buffer.ptr, read_buffer.sizeof, from_atos); 349 if (!status_ptr) 350 break; 351 Location parsed_loc = parseAtosLine(read_buffer.ptr); 352 if (parsed_loc.line != -1) 353 { 354 // Only update the file:line info, keep the procedure name as found before (preserving the standard truncation). 355 loc.file = parsed_loc.file; 356 loc.line = parsed_loc.line; 357 } 358 } 359 if (!status_ptr) 360 printf("\nDid not succeed in using 'atos' for extra debug information.\n"); 361 fclose(to_atos); 362 fclose(from_atos); 363 close(write_to_atos); 364 close(read_from_atos); 365 } 366 private Location parseAtosLine(char* buffer) @nogc nothrow 367 { 368 // The line from `atos` is in one of these formats: 369 // myfunction (in library.dylib) (sourcefile.c:17) 370 // myfunction (in library.dylib) + 0x1fe 371 // myfunction (in library.dylib) + 15 372 // 0xdeadbeef (in library.dylib) + 0x1fe 373 // 0xdeadbeef (in library.dylib) + 15 374 // 0xdeadbeef (in library.dylib) 375 // 0xdeadbeef 376 import core.stdc.stdlib : atoi; 377 import core.stdc.string : strchr, strstr; 378 import core.sys.posix.string : strndup; 379 Location loc; 380 if (!buffer) 381 return loc; 382 if (buffer[0] == '0' && buffer[1] == 'x') 383 // no named symbol found 384 return loc; 385 const symbolname_end = strstr(buffer, " (in "); 386 if (!symbolname_end) 387 return loc; 388 const symbolname_size = symbolname_end - buffer; 389 loc.procedure = strndup(buffer, symbolname_size)[0..symbolname_size]; 390 const filename_start = strstr(symbolname_end, ") (") + 3; 391 if (cast(size_t)filename_start < 4) 392 return loc; 393 const colon_location = strchr(filename_start, ':'); 394 if (!colon_location) 395 return loc; 396 const filename_size = colon_location - filename_start; 397 loc.file = strndup(filename_start, filename_size)[0..filename_size]; 398 const final_paren = strchr(colon_location+1, ')'); 399 if (!final_paren) 400 return loc; 401 loc.line = atoi(colon_location+1); 402 return loc; 403 } 404 } 405 406 /** 407 * Resolve the addresses of `locations` using `debugLineSectionData` 408 * 409 * Runs the DWARF state machine on `debugLineSectionData`, 410 * assuming it represents a debugging program describing the addresses 411 * in a continous and increasing manner. 412 * 413 * After this function successfully completes, `locations` will contains 414 * file / lines informations. 415 * 416 * Note that the lifetime of the `Location` data is bound to the lifetime 417 * of `debugLineSectionData`. 418 * 419 * Params: 420 * debugLineSectionData = A DWARF program to feed the state machine 421 * locations = The locations to resolve 422 * baseAddress = The offset to apply to every address 423 */ 424 void resolveAddresses(const(ubyte)[] debugLineSectionData, Location[] locations, size_t baseAddress) @nogc nothrow 425 { 426 debug(DwarfDebugMachine) import core.stdc.stdio; 427 428 size_t numberOfLocationsFound = 0; 429 430 const(ubyte)[] dbg = debugLineSectionData; 431 while (dbg.length > 0) 432 { 433 debug(DwarfDebugMachine) printf("new debug program\n"); 434 const lp = readLineNumberProgram(dbg); 435 436 LocationInfo lastLoc = LocationInfo(-1, -1); 437 const(void)* lastAddress; 438 439 debug(DwarfDebugMachine) printf("program:\n"); 440 runStateMachine(lp, 441 (const(void)* address, LocationInfo locInfo, bool isEndSequence) 442 { 443 // adjust to ASLR offset 444 address += baseAddress; 445 debug (DwarfDebugMachine) 446 printf("-- offsetting %p to %p\n", address - baseAddress, address); 447 448 foreach (ref loc; locations) 449 { 450 // If loc.line != -1, then it has been set previously. 451 // Some implementations (eg. dmd) write an address to 452 // the debug data multiple times, but so far I have found 453 // that the first occurrence to be the correct one. 454 if (loc.line != -1) 455 continue; 456 457 // Can be called with either `locInfo` or `lastLoc` 458 void update(const ref LocationInfo match) 459 { 460 // File indices are 1-based for DWARF < 5 461 const fileIndex = match.file - (lp.dwarfVersion < 5 ? 1 : 0); 462 const sourceFile = lp.sourceFiles[fileIndex]; 463 debug (DwarfDebugMachine) 464 { 465 printf("-- found for [%p]:\n", loc.address); 466 printf("-- file: %.*s\n", 467 cast(int) sourceFile.file.length, sourceFile.file.ptr); 468 printf("-- line: %d\n", match.line); 469 } 470 // DMD emits entries with FQN, but other implementations 471 // (e.g. LDC) make use of directories 472 // See https://github.com/dlang/druntime/pull/2945 473 if (sourceFile.dirIndex != 0) 474 loc.directory = lp.includeDirectories[sourceFile.dirIndex - 1]; 475 476 loc.file = sourceFile.file; 477 loc.line = match.line; 478 numberOfLocationsFound++; 479 } 480 481 // The state machine will not contain an entry for each 482 // address, as consecutive addresses with the same file/line 483 // are merged together to save on space, so we need to 484 // check if our address is within two addresses we get 485 // called with. 486 // 487 // Specs (DWARF v4, Section 6.2, PDF p.109) says: 488 // "We shrink it with two techniques. First, we delete from 489 // the matrix each row whose file, line, source column and 490 // discriminator information is identical with that of its 491 // predecessors. 492 if (loc.address == address) 493 update(locInfo); 494 else if (lastAddress && 495 loc.address > lastAddress && loc.address < address) 496 update(lastLoc); 497 } 498 499 if (isEndSequence) 500 { 501 lastAddress = null; 502 } 503 else 504 { 505 lastAddress = address; 506 lastLoc = locInfo; 507 } 508 509 return numberOfLocationsFound < locations.length; 510 } 511 ); 512 513 if (numberOfLocationsFound == locations.length) return; 514 } 515 } 516 517 /** 518 * A callback type for `runStateMachine` 519 * 520 * The callback is called when certain specific opcode are encountered 521 * (a.k.a when a complete `LocationInfo` is encountered). 522 * See `runStateMachine` implementation and the DWARF specs for more detail. 523 * 524 * Params: 525 * address = The address that the `LocationInfo` describes 526 * info = The `LocationInfo` itself, describing `address` 527 * isEndSequence = Whether the end of a sequence has been reached 528 */ 529 alias RunStateMachineCallback = 530 bool delegate(const(void)* address, LocationInfo info, bool isEndSequence) 531 @nogc nothrow; 532 533 /** 534 * Run the state machine to generate line number matrix 535 * 536 * Line number informations generated by the compiler are stored in the 537 * `.debug_line` section. Conceptually, they can be seen as a large matrix, 538 * with row such as "file", "line", "column", "is_statement", etc... 539 * However such a matrix would be too big to store in an object file, 540 * so DWARF instead generate this matrix using bytecode fed to a state machine. 541 * 542 * Note: 543 * Each compilation unit can have its own line number program. 544 * 545 * See_Also: 546 * - DWARF v4, Section 6.2: Line Number Information 547 * 548 * Params: 549 * lp = Program to execute 550 * callback = Delegate to call whenever a LocationInfo is completed 551 * 552 * Returns: 553 * `false` if an error happened (e.g. unknown opcode) 554 */ 555 bool runStateMachine(ref const(LineNumberProgram) lp, scope RunStateMachineCallback callback) @nogc nothrow 556 { 557 StateMachine machine; 558 machine.isStatement = lp.defaultIsStatement; 559 560 const(ubyte)[] program = lp.program; 561 while (program.length > 0) 562 { 563 size_t advanceAddressAndOpIndex(size_t operationAdvance) 564 { 565 const addressIncrement = lp.minimumInstructionLength * ((machine.operationIndex + operationAdvance) / lp.maximumOperationsPerInstruction); 566 machine.address += addressIncrement; 567 machine.operationIndex = (machine.operationIndex + operationAdvance) % lp.maximumOperationsPerInstruction; 568 return addressIncrement; 569 } 570 571 ubyte opcode = program.read!ubyte(); 572 if (opcode < lp.opcodeBase) 573 { 574 switch (opcode) with (StandardOpcode) 575 { 576 case extendedOp: 577 size_t len = cast(size_t) program.readULEB128(); 578 ubyte eopcode = program.read!ubyte(); 579 580 switch (eopcode) with (ExtendedOpcode) 581 { 582 case endSequence: 583 machine.isEndSequence = true; 584 debug(DwarfDebugMachine) printf("endSequence %p\n", machine.address); 585 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), true)) return true; 586 machine = StateMachine.init; 587 machine.isStatement = lp.defaultIsStatement; 588 break; 589 590 case setAddress: 591 const address = program.read!(void*)(); 592 debug(DwarfDebugMachine) printf("setAddress %p\n", address); 593 machine.address = address; 594 machine.operationIndex = 0; 595 break; 596 597 case defineFile: // TODO: add proper implementation 598 debug(DwarfDebugMachine) printf("defineFile\n"); 599 program = program[len - 1 .. $]; 600 break; 601 602 case setDiscriminator: 603 const discriminator = cast(uint) program.readULEB128(); 604 debug(DwarfDebugMachine) printf("setDiscriminator %d\n", discriminator); 605 machine.discriminator = discriminator; 606 break; 607 608 default: 609 // unknown opcode 610 debug(DwarfDebugMachine) printf("unknown extended opcode %d\n", cast(int) eopcode); 611 program = program[len - 1 .. $]; 612 break; 613 } 614 615 break; 616 617 case copy: 618 debug(DwarfDebugMachine) printf("copy %p\n", machine.address); 619 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true; 620 machine.isBasicBlock = false; 621 machine.isPrologueEnd = false; 622 machine.isEpilogueBegin = false; 623 machine.discriminator = 0; 624 break; 625 626 case advancePC: 627 const operationAdvance = cast(size_t) readULEB128(program); 628 advanceAddressAndOpIndex(operationAdvance); 629 debug(DwarfDebugMachine) printf("advancePC %d to %p\n", cast(int) operationAdvance, machine.address); 630 break; 631 632 case advanceLine: 633 long ad = readSLEB128(program); 634 machine.line += ad; 635 debug(DwarfDebugMachine) printf("advanceLine %d to %d\n", cast(int) ad, cast(int) machine.line); 636 break; 637 638 case setFile: 639 uint index = cast(uint) readULEB128(program); 640 debug(DwarfDebugMachine) printf("setFile to %d\n", cast(int) index); 641 machine.fileIndex = index; 642 break; 643 644 case setColumn: 645 uint col = cast(uint) readULEB128(program); 646 debug(DwarfDebugMachine) printf("setColumn %d\n", cast(int) col); 647 machine.column = col; 648 break; 649 650 case negateStatement: 651 debug(DwarfDebugMachine) printf("negateStatement\n"); 652 machine.isStatement = !machine.isStatement; 653 break; 654 655 case setBasicBlock: 656 debug(DwarfDebugMachine) printf("setBasicBlock\n"); 657 machine.isBasicBlock = true; 658 break; 659 660 case constAddPC: 661 const operationAdvance = (255 - lp.opcodeBase) / lp.lineRange; 662 advanceAddressAndOpIndex(operationAdvance); 663 debug(DwarfDebugMachine) printf("constAddPC %p\n", machine.address); 664 break; 665 666 case fixedAdvancePC: 667 const add = program.read!ushort(); 668 machine.address += add; 669 machine.operationIndex = 0; 670 debug(DwarfDebugMachine) printf("fixedAdvancePC %d to %p\n", cast(int) add, machine.address); 671 break; 672 673 case setPrologueEnd: 674 machine.isPrologueEnd = true; 675 debug(DwarfDebugMachine) printf("setPrologueEnd\n"); 676 break; 677 678 case setEpilogueBegin: 679 machine.isEpilogueBegin = true; 680 debug(DwarfDebugMachine) printf("setEpilogueBegin\n"); 681 break; 682 683 case setISA: 684 machine.isa = cast(uint) readULEB128(program); 685 debug(DwarfDebugMachine) printf("setISA %d\n", cast(int) machine.isa); 686 break; 687 688 default: 689 debug(DwarfDebugMachine) printf("unknown opcode %d\n", cast(int) opcode); 690 return false; 691 } 692 } 693 else 694 { 695 opcode -= lp.opcodeBase; 696 const operationAdvance = opcode / lp.lineRange; 697 const addressIncrement = advanceAddressAndOpIndex(operationAdvance); 698 const lineIncrement = lp.lineBase + (opcode % lp.lineRange); 699 machine.line += lineIncrement; 700 701 debug (DwarfDebugMachine) 702 printf("special %d %d to %p line %d\n", cast(int) addressIncrement, 703 cast(int) lineIncrement, machine.address, machine.line); 704 705 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true; 706 707 machine.isBasicBlock = false; 708 machine.isPrologueEnd = false; 709 machine.isEpilogueBegin = false; 710 machine.discriminator = 0; 711 } 712 } 713 714 return true; 715 } 716 717 T read(T)(ref const(ubyte)[] buffer) @nogc nothrow 718 { 719 version (X86) enum hasUnalignedLoads = true; 720 else version (X86_64) enum hasUnalignedLoads = true; 721 else enum hasUnalignedLoads = false; 722 723 static if (hasUnalignedLoads || T.alignof == 1) 724 { 725 T result = *(cast(T*) buffer.ptr); 726 } 727 else 728 { 729 T result = void; 730 memcpy(&result, buffer.ptr, T.sizeof); 731 } 732 733 buffer = buffer[T.sizeof .. $]; 734 return result; 735 } 736 737 // Reads a null-terminated string from `buffer`. 738 const(char)[] readStringz(ref const(ubyte)[] buffer) @nogc nothrow 739 { 740 const p = cast(char*) buffer.ptr; 741 const str = p[0 .. strlen(p)]; 742 buffer = buffer[str.length+1 .. $]; 743 return str; 744 } 745 746 ulong readULEB128(ref const(ubyte)[] buffer) @nogc nothrow 747 { 748 ulong val = 0; 749 uint shift = 0; 750 751 while (true) 752 { 753 ubyte b = buffer.read!ubyte(); 754 755 val |= (b & 0x7f) << shift; 756 if ((b & 0x80) == 0) break; 757 shift += 7; 758 } 759 760 return val; 761 } 762 763 unittest 764 { 765 const(ubyte)[] data = [0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF]; 766 assert(readULEB128(data) == 624_485); 767 assert(data[] == [0xDE, 0xAD, 0xBE, 0xEF]); 768 } 769 770 long readSLEB128(ref const(ubyte)[] buffer) @nogc nothrow 771 { 772 long val = 0; 773 uint shift = 0; 774 int size = 8 << 3; 775 ubyte b; 776 777 while (true) 778 { 779 b = buffer.read!ubyte(); 780 val |= (b & 0x7f) << shift; 781 shift += 7; 782 if ((b & 0x80) == 0) 783 break; 784 } 785 786 if (shift < size && (b & 0x40) != 0) 787 val |= -(1 << shift); 788 789 return val; 790 } 791 792 enum DW_LNCT : ushort 793 { 794 path = 1, 795 directoryIndex = 2, 796 timestamp = 3, 797 size = 4, 798 md5 = 5, 799 loUser = 0x2000, 800 hiUser = 0x3fff, 801 } 802 803 enum DW_FORM : ubyte 804 { 805 addr = 1, 806 block2 = 3, 807 block4 = 4, 808 data2 = 5, 809 data4 = 6, 810 data8 = 7, 811 string_ = 8, 812 block = 9, 813 block1 = 10, 814 data1 = 11, 815 flag = 12, 816 sdata = 13, 817 strp = 14, 818 udata = 15, 819 ref_addr = 16, 820 ref1 = 17, 821 ref2 = 18, 822 ref4 = 19, 823 ref8 = 20, 824 ref_udata = 21, 825 indirect = 22, 826 sec_offset = 23, 827 exprloc = 24, 828 flag_present = 25, 829 strx = 26, 830 addrx = 27, 831 ref_sup4 = 28, 832 strp_sup = 29, 833 data16 = 30, 834 line_strp = 31, 835 ref_sig8 = 32, 836 implicit_const = 33, 837 loclistx = 34, 838 rnglistx = 35, 839 ref_sup8 = 36, 840 strx1 = 37, 841 strx2 = 38, 842 strx3 = 39, 843 strx4 = 40, 844 addrx1 = 41, 845 addrx2 = 42, 846 addrx3 = 43, 847 addrx4 = 44, 848 } 849 850 struct EntryFormatPair 851 { 852 DW_LNCT type; 853 DW_FORM form; 854 } 855 856 /// Reads a DWARF v5 directory/file name entry format. 857 Array!EntryFormatPair readEntryFormat(ref const(ubyte)[] buffer) @nogc nothrow 858 { 859 const numPairs = buffer.read!ubyte(); 860 861 Array!EntryFormatPair pairs; 862 pairs.length = numPairs; 863 864 foreach (ref pair; pairs) 865 { 866 pair.type = cast(DW_LNCT) buffer.readULEB128(); 867 pair.form = cast(DW_FORM) buffer.readULEB128(); 868 } 869 870 debug (DwarfDebugMachine) 871 { 872 printf("entryFormat: (%d)\n", cast(int) pairs.length); 873 foreach (ref pair; pairs) 874 printf("\t- type: %d, form: %d\n", cast(int) pair.type, cast(int) pair.form); 875 } 876 877 return pairs; 878 } 879 880 enum StandardOpcode : ubyte 881 { 882 extendedOp = 0, 883 copy = 1, 884 advancePC = 2, 885 advanceLine = 3, 886 setFile = 4, 887 setColumn = 5, 888 negateStatement = 6, 889 setBasicBlock = 7, 890 constAddPC = 8, 891 fixedAdvancePC = 9, 892 setPrologueEnd = 10, 893 setEpilogueBegin = 11, 894 setISA = 12, 895 } 896 897 enum ExtendedOpcode : ubyte 898 { 899 endSequence = 1, 900 setAddress = 2, 901 defineFile = 3, 902 setDiscriminator = 4, 903 } 904 905 struct StateMachine 906 { 907 const(void)* address; 908 uint operationIndex = 0; 909 uint fileIndex = 1; 910 uint line = 1; 911 uint column = 0; 912 uint isa = 0; 913 uint discriminator = 0; 914 bool isStatement; 915 bool isBasicBlock = false; 916 bool isEndSequence = false; 917 bool isPrologueEnd = false; 918 bool isEpilogueBegin = false; 919 } 920 921 struct LocationInfo 922 { 923 int file; 924 int line; 925 } 926 927 struct LineNumberProgram 928 { 929 ulong unitLength; 930 ushort dwarfVersion; 931 ubyte addressSize; 932 ubyte segmentSelectorSize; 933 ulong headerLength; 934 ubyte minimumInstructionLength; 935 ubyte maximumOperationsPerInstruction; 936 bool defaultIsStatement; 937 byte lineBase; 938 ubyte lineRange; 939 ubyte opcodeBase; 940 const(ubyte)[] standardOpcodeLengths; 941 Array!(const(char)[]) includeDirectories; 942 Array!SourceFile sourceFiles; 943 const(ubyte)[] program; 944 } 945 946 struct SourceFile 947 { 948 const(char)[] file; 949 size_t dirIndex; // 1-based 950 } 951 952 LineNumberProgram readLineNumberProgram(ref const(ubyte)[] data) @nogc nothrow 953 { 954 const originalData = data; 955 956 LineNumberProgram lp; 957 958 bool is64bitDwarf = false; 959 lp.unitLength = data.read!uint(); 960 if (lp.unitLength == uint.max) 961 { 962 is64bitDwarf = true; 963 lp.unitLength = data.read!ulong(); 964 } 965 966 const dwarfVersionFieldOffset = cast(size_t) (data.ptr - originalData.ptr); 967 lp.dwarfVersion = data.read!ushort(); 968 assert(lp.dwarfVersion < 6, "DWARF v6+ not supported yet"); 969 970 if (lp.dwarfVersion >= 5) 971 { 972 lp.addressSize = data.read!ubyte(); 973 lp.segmentSelectorSize = data.read!ubyte(); 974 } 975 976 lp.headerLength = (is64bitDwarf ? data.read!ulong() : data.read!uint()); 977 978 const minimumInstructionLengthFieldOffset = cast(size_t) (data.ptr - originalData.ptr); 979 lp.minimumInstructionLength = data.read!ubyte(); 980 981 lp.maximumOperationsPerInstruction = (lp.dwarfVersion >= 4 ? data.read!ubyte() : 1); 982 lp.defaultIsStatement = (data.read!ubyte() != 0); 983 lp.lineBase = data.read!byte(); 984 lp.lineRange = data.read!ubyte(); 985 lp.opcodeBase = data.read!ubyte(); 986 987 lp.standardOpcodeLengths = data[0 .. lp.opcodeBase - 1]; 988 data = data[lp.opcodeBase - 1 .. $]; 989 990 if (lp.dwarfVersion >= 5) 991 { 992 static void consumeGenericForm(ref const(ubyte)[] data, DW_FORM form, bool is64bitDwarf) 993 { 994 with (DW_FORM) switch (form) 995 { 996 case strp, strp_sup, line_strp: 997 data = data[is64bitDwarf ? 8 : 4 .. $]; break; 998 case data1, strx1: 999 data = data[1 .. $]; break; 1000 case data2, strx2: 1001 data = data[2 .. $]; break; 1002 case strx3: 1003 data = data[3 .. $]; break; 1004 case data4, strx4: 1005 data = data[4 .. $]; break; 1006 case data8: 1007 data = data[8 .. $]; break; 1008 case data16: 1009 data = data[16 .. $]; break; 1010 case udata, strx: 1011 data.readULEB128(); break; 1012 case block: 1013 const length = cast(size_t) data.readULEB128(); 1014 data = data[length .. $]; 1015 break; 1016 default: 1017 assert(0); // TODO: support other forms for vendor extensions 1018 } 1019 } 1020 1021 const dirFormat = data.readEntryFormat(); 1022 lp.includeDirectories.length = cast(size_t) data.readULEB128(); 1023 foreach (ref dir; lp.includeDirectories) 1024 { 1025 dir = "<unknown dir>"; // fallback 1026 foreach (ref pair; dirFormat) 1027 { 1028 if (pair.type == DW_LNCT.path && 1029 // TODO: support other forms too (offsets in other sections) 1030 pair.form == DW_FORM.string_) 1031 { 1032 dir = data.readStringz(); 1033 } 1034 else // uninteresting type 1035 consumeGenericForm(data, pair.form, is64bitDwarf); 1036 } 1037 } 1038 1039 const fileFormat = data.readEntryFormat(); 1040 lp.sourceFiles.length = cast(size_t) data.readULEB128(); 1041 foreach (ref sf; lp.sourceFiles) 1042 { 1043 sf.file = "<unknown file>"; // fallback 1044 foreach (ref pair; fileFormat) 1045 { 1046 if (pair.type == DW_LNCT.path && 1047 // TODO: support other forms too (offsets in other sections) 1048 pair.form == DW_FORM.string_) 1049 { 1050 sf.file = data.readStringz(); 1051 } 1052 else if (pair.type == DW_LNCT.directoryIndex) 1053 { 1054 if (pair.form == DW_FORM.data1) 1055 sf.dirIndex = data.read!ubyte(); 1056 else if (pair.form == DW_FORM.data2) 1057 sf.dirIndex = data.read!ushort(); 1058 else if (pair.form == DW_FORM.udata) 1059 sf.dirIndex = cast(size_t) data.readULEB128(); 1060 else 1061 assert(0); // not allowed by DWARF 5 spec 1062 sf.dirIndex++; // DWARF v5 indices are 0-based 1063 } 1064 else // uninteresting type 1065 consumeGenericForm(data, pair.form, is64bitDwarf); 1066 } 1067 } 1068 } 1069 else 1070 { 1071 // A sequence ends with a null-byte. 1072 static auto readSequence(alias ReadEntry)(ref const(ubyte)[] data) 1073 { 1074 alias ResultType = typeof(ReadEntry(data)); 1075 1076 static size_t count(const(ubyte)[] data) 1077 { 1078 size_t count = 0; 1079 while (data.length && data[0] != 0) 1080 { 1081 ReadEntry(data); 1082 ++count; 1083 } 1084 return count; 1085 } 1086 1087 const numEntries = count(data); 1088 1089 Array!ResultType result; 1090 result.length = numEntries; 1091 1092 foreach (i; 0 .. numEntries) 1093 result[i] = ReadEntry(data); 1094 1095 data = data[1 .. $]; // skip over sequence-terminating null 1096 1097 return result; 1098 } 1099 1100 /// Directories are simply a sequence of NUL-terminated strings 1101 static const(char)[] readIncludeDirectoryEntry(ref const(ubyte)[] data) 1102 { 1103 return data.readStringz(); 1104 } 1105 lp.includeDirectories = readSequence!readIncludeDirectoryEntry(data); 1106 1107 static SourceFile readFileNameEntry(ref const(ubyte)[] data) 1108 { 1109 const file = data.readStringz(); 1110 const dirIndex = cast(size_t) data.readULEB128(); 1111 data.readULEB128(); // last mod 1112 data.readULEB128(); // file len 1113 1114 return SourceFile( 1115 file, 1116 dirIndex, 1117 ); 1118 } 1119 lp.sourceFiles = readSequence!readFileNameEntry(data); 1120 } 1121 1122 debug (DwarfDebugMachine) 1123 { 1124 printf("include_directories: (%d)\n", cast(int) lp.includeDirectories.length); 1125 foreach (dir; lp.includeDirectories) 1126 printf("\t- %.*s\n", cast(int) dir.length, dir.ptr); 1127 printf("source_files: (%d)\n", cast(int) lp.sourceFiles.length); 1128 foreach (ref sf; lp.sourceFiles) 1129 { 1130 if (sf.dirIndex > lp.includeDirectories.length) 1131 printf("\t- Out of bound directory! (%llu): %.*s\n", 1132 sf.dirIndex, cast(int) sf.file.length, sf.file.ptr); 1133 else if (sf.dirIndex > 0) 1134 { 1135 const dir = lp.includeDirectories[sf.dirIndex - 1]; 1136 printf("\t- (Dir:%llu:%.*s/)%.*s\n", sf.dirIndex, 1137 cast(int) dir.length, dir.ptr, 1138 cast(int) sf.file.length, sf.file.ptr); 1139 } 1140 else 1141 printf("\t- %.*s\n", cast(int) sf.file.length, sf.file.ptr); 1142 } 1143 } 1144 1145 const programStart = cast(size_t) (minimumInstructionLengthFieldOffset + lp.headerLength); 1146 const programEnd = cast(size_t) (dwarfVersionFieldOffset + lp.unitLength); 1147 lp.program = originalData[programStart .. programEnd]; 1148 1149 data = originalData[programEnd .. $]; 1150 1151 return lp; 1152 }