dwarf_cu_to_module.cc
1 // Copyright 2010 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 30 31 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. 32 33 // For <inttypes.h> PRI* macros, before anything else might #include it. 34 #ifndef __STDC_FORMAT_MACROS 35 #define __STDC_FORMAT_MACROS 36 #endif /* __STDC_FORMAT_MACROS */ 37 38 #ifdef HAVE_CONFIG_H 39 #include <config.h> // Must come first 40 #endif 41 42 #include "common/dwarf_cu_to_module.h" 43 44 #include <assert.h> 45 #include <inttypes.h> 46 #include <stdint.h> 47 #include <stdio.h> 48 49 #include <algorithm> 50 #include <memory> 51 #include <numeric> 52 #include <utility> 53 54 #include "common/string_view.h" 55 #include "common/dwarf_line_to_module.h" 56 #include "google_breakpad/common/breakpad_types.h" 57 58 namespace google_breakpad { 59 60 using std::accumulate; 61 using std::map; 62 using std::pair; 63 using std::sort; 64 using std::vector; 65 using std::unique_ptr; 66 67 // Data provided by a DWARF specification DIE. 68 // 69 // In DWARF, the DIE for a definition may contain a DW_AT_specification 70 // attribute giving the offset of the corresponding declaration DIE, and 71 // the definition DIE may omit information given in the declaration. For 72 // example, it's common for a function's address range to appear only in 73 // its definition DIE, but its name to appear only in its declaration 74 // DIE. 75 // 76 // The dumper needs to be able to follow DW_AT_specification links to 77 // bring all this information together in a FUNC record. Conveniently, 78 // DIEs that are the target of such links have a DW_AT_declaration flag 79 // set, so we can identify them when we first see them, and record their 80 // contents for later reference. 81 // 82 // A Specification holds information gathered from a declaration DIE that 83 // we may need if we find a DW_AT_specification link pointing to it. 84 struct DwarfCUToModule::Specification { 85 // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. 86 StringView qualified_name; 87 88 // The name of the enclosing scope, or the empty string if there is none. 89 StringView enclosing_name; 90 91 // The name for the specification DIE itself, without any enclosing 92 // name components. 93 StringView unqualified_name; 94 }; 95 96 // An abstract origin -- base definition of an inline function. 97 struct AbstractOrigin { 98 explicit AbstractOrigin(StringView name) : name(name) {} 99 100 StringView name; 101 }; 102 103 typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset; 104 105 // Data global to the DWARF-bearing file that is private to the 106 // DWARF-to-Module process. 107 struct DwarfCUToModule::FilePrivate { 108 // A map from offsets of DIEs within the .debug_info section to 109 // Specifications describing those DIEs. Specification references can 110 // cross compilation unit boundaries. 111 SpecificationByOffset specifications; 112 113 AbstractOriginByOffset origins; 114 115 // Keep a list of forward references from DW_AT_abstract_origin and 116 // DW_AT_specification attributes so names can be fixed up. 117 std::map<uint64_t, Module::Function*> forward_ref_die_to_func; 118 }; 119 120 DwarfCUToModule::FileContext::FileContext(const string& filename, 121 Module* module, 122 bool handle_inter_cu_refs) 123 : filename_(filename), 124 module_(module), 125 handle_inter_cu_refs_(handle_inter_cu_refs), 126 file_private_(new FilePrivate()) { 127 } 128 129 DwarfCUToModule::FileContext::~FileContext() { 130 for (std::vector<uint8_t *>::iterator i = uncompressed_sections_.begin(); 131 i != uncompressed_sections_.end(); ++i) { 132 delete[] *i; 133 } 134 } 135 136 void DwarfCUToModule::FileContext::AddSectionToSectionMap( 137 const string& name, const uint8_t* contents, uint64_t length) { 138 section_map_[name] = std::make_pair(contents, length); 139 } 140 141 void DwarfCUToModule::FileContext::AddManagedSectionToSectionMap( 142 const string& name, uint8_t* contents, uint64_t length) { 143 section_map_[name] = std::make_pair(contents, length); 144 uncompressed_sections_.push_back(contents); 145 } 146 147 void DwarfCUToModule::FileContext::ClearSectionMapForTest() { 148 section_map_.clear(); 149 } 150 151 const SectionMap& 152 DwarfCUToModule::FileContext::section_map() const { 153 return section_map_; 154 } 155 156 void DwarfCUToModule::FileContext::ClearSpecifications() { 157 if (!handle_inter_cu_refs_) 158 file_private_->specifications.clear(); 159 } 160 161 bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference( 162 uint64_t offset, uint64_t compilation_unit_start) const { 163 if (handle_inter_cu_refs_) 164 return false; 165 return offset < compilation_unit_start; 166 } 167 168 // Information global to the particular compilation unit we're 169 // parsing. This is for data shared across the CU's entire DIE tree, 170 // and parameters from the code invoking the CU parser. 171 struct DwarfCUToModule::CUContext { 172 CUContext(FileContext* file_context_arg, 173 WarningReporter* reporter_arg, 174 RangesHandler* ranges_handler_arg, 175 uint64_t low_pc, 176 uint64_t addr_base) 177 : version(0), 178 file_context(file_context_arg), 179 reporter(reporter_arg), 180 ranges_handler(ranges_handler_arg), 181 language(Language::CPlusPlus), 182 low_pc(low_pc), 183 high_pc(0), 184 ranges_form(DW_FORM_sec_offset), 185 ranges_data(0), 186 ranges_base(0), 187 addr_base(addr_base), 188 str_offsets_base(0) {} 189 190 ~CUContext() { 191 for (vector<Module::Function*>::iterator it = functions.begin(); 192 it != functions.end(); ++it) { 193 delete *it; 194 } 195 }; 196 197 // Dwarf version of the source CU. 198 uint8_t version; 199 200 // The DWARF-bearing file into which this CU was incorporated. 201 FileContext* file_context; 202 203 // For printing error messages. 204 WarningReporter* reporter; 205 206 // For reading ranges from the .debug_ranges section 207 RangesHandler* ranges_handler; 208 209 // The source language of this compilation unit. 210 const Language* language; 211 212 // Addresses covered by this CU. If high_pc_ is non-zero then the CU covers 213 // low_pc to high_pc, otherwise ranges_data is non-zero and low_pc represents 214 // the base address of the ranges covered by the CU. ranges_data will define 215 // the CU's actual ranges. 216 uint64_t low_pc; 217 uint64_t high_pc; 218 219 // Ranges for this CU are read according to this form. 220 enum DwarfForm ranges_form; 221 uint64_t ranges_data; 222 223 // Offset into .debug_rngslists where this CU's ranges are stored. 224 // Data in DW_FORM_rnglistx is relative to this offset. 225 uint64_t ranges_base; 226 227 // Offset into .debug_addr where this CU's addresses are stored. Data in 228 // form DW_FORM_addrxX is relative to this offset. 229 uint64_t addr_base; 230 231 // Offset into this CU's contribution to .debug_str_offsets. 232 uint64_t str_offsets_base; 233 234 // Collect all the data from the CU that a RangeListReader needs to read a 235 // range. 236 bool AssembleRangeListInfo( 237 RangeListReader::CURangesInfo* info) { 238 const SectionMap& section_map 239 = file_context->section_map(); 240 info->version_ = version; 241 info->base_address_ = low_pc; 242 info->ranges_base_ = ranges_base; 243 const char* section_name = (version <= 4 ? 244 ".debug_ranges" : ".debug_rnglists"); 245 SectionMap::const_iterator map_entry 246 = GetSectionByName(section_map, section_name); 247 if (map_entry == section_map.end()) { 248 return false; 249 } 250 info->buffer_ = map_entry->second.first; 251 info->size_ = map_entry->second.second; 252 if (version > 4) { 253 SectionMap::const_iterator map_entry 254 = GetSectionByName(section_map, ".debug_addr"); 255 if (map_entry == section_map.end()) { 256 return false; 257 } 258 info->addr_buffer_ = map_entry->second.first; 259 info->addr_buffer_size_ = map_entry->second.second; 260 info->addr_base_ = addr_base; 261 } 262 return true; 263 } 264 265 // The functions defined in this compilation unit. We accumulate 266 // them here during parsing. Then, in DwarfCUToModule::Finish, we 267 // assign them lines and add them to file_context->module. 268 // 269 // Destroying this destroys all the functions this vector points to. 270 vector<Module::Function*> functions; 271 272 // A map of function pointers to the its forward specification DIE's offset. 273 map<Module::Function*, uint64_t> spec_function_offsets; 274 }; 275 276 // Information about the context of a particular DIE. This is for 277 // information that changes as we descend the tree towards the leaves: 278 // the containing classes/namespaces, etc. 279 struct DwarfCUToModule::DIEContext { 280 // The fully-qualified name of the context. For example, for a 281 // tree like: 282 // 283 // DW_TAG_namespace Foo 284 // DW_TAG_class Bar 285 // DW_TAG_subprogram Baz 286 // 287 // in a C++ compilation unit, the DIEContext's name for the 288 // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's 289 // name for the DW_TAG_namespace DIE would be "". 290 StringView name; 291 }; 292 293 // An abstract base class for all the dumper's DIE handlers. 294 class DwarfCUToModule::GenericDIEHandler: public DIEHandler { 295 public: 296 // Create a handler for the DIE at OFFSET whose compilation unit is 297 // described by CU_CONTEXT, and whose immediate context is described 298 // by PARENT_CONTEXT. 299 GenericDIEHandler(CUContext* cu_context, DIEContext* parent_context, 300 uint64_t offset) 301 : cu_context_(cu_context), 302 parent_context_(parent_context), 303 offset_(offset), 304 declaration_(false), 305 specification_(NULL), 306 no_specification(false), 307 abstract_origin_(NULL), 308 forward_ref_die_offset_(0), specification_offset_(0) { } 309 310 // Derived classes' ProcessAttributeUnsigned can defer to this to 311 // handle DW_AT_declaration, or simply not override it. 312 void ProcessAttributeUnsigned(enum DwarfAttribute attr, 313 enum DwarfForm form, 314 uint64_t data); 315 316 // Derived classes' ProcessAttributeReference can defer to this to 317 // handle DW_AT_specification, or simply not override it. 318 void ProcessAttributeReference(enum DwarfAttribute attr, 319 enum DwarfForm form, 320 uint64_t data); 321 322 // Derived classes' ProcessAttributeReference can defer to this to 323 // handle DW_AT_specification, or simply not override it. 324 void ProcessAttributeString(enum DwarfAttribute attr, 325 enum DwarfForm form, 326 const string& data); 327 328 protected: 329 // Compute and return the fully-qualified name of the DIE. If this 330 // DIE is a declaration DIE, to be cited by other DIEs' 331 // DW_AT_specification attributes, record its enclosing name and 332 // unqualified name in the specification table. 333 // 334 // Use this from EndAttributes member functions, not ProcessAttribute* 335 // functions; only the former can be sure that all the DIE's attributes 336 // have been seen. 337 // 338 // On return, if has_qualified_name is non-NULL, *has_qualified_name is set to 339 // true if the DIE includes a fully-qualified name, false otherwise. 340 StringView ComputeQualifiedName(bool* has_qualified_name); 341 342 CUContext* cu_context_; 343 DIEContext* parent_context_; 344 uint64_t offset_; 345 346 // If this DIE has a DW_AT_declaration attribute, this is its value. 347 // It is false on DIEs with no DW_AT_declaration attribute. 348 bool declaration_; 349 350 // If this DIE has a DW_AT_specification attribute, this is the 351 // Specification structure for the DIE the attribute refers to. 352 // Otherwise, this is NULL. 353 Specification* specification_; 354 355 // If this DIE has DW_AT_specification with offset smaller than this DIE and 356 // we can't find that in the specification map. 357 bool no_specification; 358 359 // If this DIE has a DW_AT_abstract_origin attribute, this is the 360 // AbstractOrigin structure for the DIE the attribute refers to. 361 // Otherwise, this is NULL. 362 const AbstractOrigin* abstract_origin_; 363 364 // If this DIE has a DW_AT_specification or DW_AT_abstract_origin and it is a 365 // forward reference, no Specification will be available. Track the reference 366 // to be fixed up when the DIE is parsed. 367 uint64_t forward_ref_die_offset_; 368 369 // The root offset of Specification or abstract origin. 370 uint64_t specification_offset_; 371 372 // The value of the DW_AT_name attribute, or the empty string if the 373 // DIE has no such attribute. 374 StringView name_attribute_; 375 376 // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty 377 // string if the DIE has no such attribute or its content could not be 378 // demangled. 379 StringView demangled_name_; 380 381 // The non-demangled value of the DW_AT_MIPS_linkage_name attribute, 382 // it its content count not be demangled. 383 StringView raw_name_; 384 }; 385 386 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( 387 enum DwarfAttribute attr, 388 enum DwarfForm form, 389 uint64_t data) { 390 switch (attr) { 391 case DW_AT_declaration: declaration_ = (data != 0); break; 392 default: break; 393 } 394 } 395 396 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( 397 enum DwarfAttribute attr, 398 enum DwarfForm form, 399 uint64_t data) { 400 switch (attr) { 401 case DW_AT_specification: { 402 FileContext* file_context = cu_context_->file_context; 403 if (file_context->IsUnhandledInterCUReference( 404 data, cu_context_->reporter->cu_offset())) { 405 cu_context_->reporter->UnhandledInterCUReference(offset_, data); 406 break; 407 } 408 // Find the Specification to which this attribute refers, and 409 // set specification_ appropriately. We could do more processing 410 // here, but it's better to leave the real work to our 411 // EndAttribute member function, at which point we know we have 412 // seen all the DIE's attributes. 413 SpecificationByOffset* specifications = 414 &file_context->file_private_->specifications; 415 SpecificationByOffset::iterator spec = specifications->find(data); 416 if (spec != specifications->end()) { 417 specification_ = &spec->second; 418 } else if (data > offset_) { 419 forward_ref_die_offset_ = data; 420 } else { 421 no_specification = true; 422 } 423 specification_offset_ = data; 424 break; 425 } 426 case DW_AT_abstract_origin: { 427 const AbstractOriginByOffset& origins = 428 cu_context_->file_context->file_private_->origins; 429 AbstractOriginByOffset::const_iterator origin = origins.find(data); 430 if (origin != origins.end()) { 431 abstract_origin_ = &(origin->second); 432 } else if (data > offset_) { 433 forward_ref_die_offset_ = data; 434 } 435 specification_offset_ = data; 436 break; 437 } 438 default: break; 439 } 440 } 441 442 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( 443 enum DwarfAttribute attr, 444 enum DwarfForm form, 445 const string& data) { 446 switch (attr) { 447 case DW_AT_name: 448 name_attribute_ = 449 cu_context_->file_context->module_->AddStringToPool(data); 450 break; 451 case DW_AT_MIPS_linkage_name: 452 case DW_AT_linkage_name: { 453 string demangled; 454 Language::DemangleResult result = 455 cu_context_->language->DemangleName(data, &demangled); 456 switch (result) { 457 case Language::kDemangleSuccess: 458 demangled_name_ = 459 cu_context_->file_context->module_->AddStringToPool(demangled); 460 break; 461 462 case Language::kDemangleFailure: 463 cu_context_->reporter->DemangleError(data); 464 // fallthrough 465 case Language::kDontDemangle: 466 demangled_name_ = StringView(); 467 raw_name_ = cu_context_->file_context->module_->AddStringToPool(data); 468 break; 469 } 470 break; 471 } 472 default: break; 473 } 474 } 475 476 StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName( 477 bool* has_qualified_name) { 478 // Use the demangled name, if one is available. Demangled names are 479 // preferable to those inferred from the DWARF structure because they 480 // include argument types. 481 StringView* qualified_name = nullptr; 482 if (!demangled_name_.empty()) { 483 // Found it is this DIE. 484 qualified_name = &demangled_name_; 485 } else if (specification_ && !specification_->qualified_name.empty()) { 486 // Found it on the specification. 487 qualified_name = &specification_->qualified_name; 488 } 489 490 StringView* unqualified_name = nullptr; 491 StringView* enclosing_name = nullptr; 492 if (!qualified_name) { 493 if (has_qualified_name) { 494 // dSYMs built with -gmlt do not include the DW_AT_linkage_name 495 // with the unmangled symbol, but rather include it in the 496 // LC_SYMTAB STABS, which end up in the externs of the module. 497 // 498 // Remember this so the Module can copy over the extern name later. 499 *has_qualified_name = false; 500 } 501 502 // Find the unqualified name. If the DIE has its own DW_AT_name 503 // attribute, then use that; otherwise, check the specification. 504 if (!name_attribute_.empty()) { 505 unqualified_name = &name_attribute_; 506 } else if (specification_) { 507 unqualified_name = &specification_->unqualified_name; 508 } else if (!raw_name_.empty()) { 509 unqualified_name = &raw_name_; 510 } 511 512 // Find the name of the enclosing context. If this DIE has a 513 // specification, it's the specification's enclosing context that 514 // counts; otherwise, use this DIE's context. 515 if (specification_) { 516 enclosing_name = &specification_->enclosing_name; 517 } else if (parent_context_) { 518 enclosing_name = &parent_context_->name; 519 } 520 } else { 521 if (has_qualified_name) { 522 *has_qualified_name = true; 523 } 524 } 525 526 // Prepare the return value before upcoming mutations possibly invalidate the 527 // existing pointers. 528 string return_value; 529 if (qualified_name) { 530 return_value = qualified_name->str(); 531 } else if (unqualified_name && enclosing_name) { 532 // Combine the enclosing name and unqualified name to produce our 533 // own fully-qualified name. 534 return_value = cu_context_->language->MakeQualifiedName( 535 enclosing_name->str(), unqualified_name->str()); 536 } 537 538 // If this DIE was marked as a declaration, record its names in the 539 // specification table. 540 if ((declaration_ && qualified_name) || 541 (unqualified_name && enclosing_name)) { 542 Specification spec; 543 if (qualified_name) { 544 spec.qualified_name = *qualified_name; 545 } else { 546 spec.enclosing_name = *enclosing_name; 547 spec.unqualified_name = *unqualified_name; 548 } 549 cu_context_->file_context->file_private_->specifications[offset_] = spec; 550 } 551 552 return cu_context_->file_context->module_->AddStringToPool(return_value); 553 } 554 555 static bool IsEmptyRange(const vector<Module::Range>& ranges) { 556 uint64_t size = accumulate(ranges.cbegin(), ranges.cend(), 0, 557 [](uint64_t total, Module::Range entry) { 558 return total + entry.size; 559 } 560 ); 561 562 return size == 0; 563 } 564 565 566 // A handler for DW_TAG_inlined_subroutine DIEs. 567 class DwarfCUToModule::InlineHandler : public GenericDIEHandler { 568 public: 569 InlineHandler(CUContext* cu_context, 570 DIEContext* parent_context, 571 uint64_t offset, 572 int inline_nest_level, 573 vector<unique_ptr<Module::Inline>>& inlines) 574 : GenericDIEHandler(cu_context, parent_context, offset), 575 low_pc_(0), 576 high_pc_(0), 577 high_pc_form_(DW_FORM_addr), 578 ranges_form_(DW_FORM_sec_offset), 579 ranges_data_(0), 580 call_site_line_(0), 581 inline_nest_level_(inline_nest_level), 582 has_range_data_(false), 583 inlines_(inlines) {} 584 585 void ProcessAttributeUnsigned(enum DwarfAttribute attr, 586 enum DwarfForm form, 587 uint64_t data); 588 DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); 589 bool EndAttributes(); 590 void Finish(); 591 592 private: 593 // The fully-qualified name, as derived from name_attribute_, 594 // specification_, parent_context_. Computed in EndAttributes. 595 StringView name_; 596 uint64_t low_pc_; // DW_AT_low_pc 597 uint64_t high_pc_; // DW_AT_high_pc 598 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 599 DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx 600 uint64_t ranges_data_; // DW_AT_ranges 601 int call_site_line_; // DW_AT_call_line 602 int call_site_file_id_; // DW_AT_call_file 603 int inline_nest_level_; 604 bool has_range_data_; 605 // A vector of inlines in the same nest level. It's owned by its parent 606 // function/inline. At Finish(), add this inline into the vector. 607 vector<unique_ptr<Module::Inline>>& inlines_; 608 // A vector of child inlines. 609 vector<unique_ptr<Module::Inline>> child_inlines_; 610 }; 611 612 void DwarfCUToModule::InlineHandler::ProcessAttributeUnsigned( 613 enum DwarfAttribute attr, 614 enum DwarfForm form, 615 uint64_t data) { 616 switch (attr) { 617 case DW_AT_low_pc: 618 low_pc_ = data; 619 break; 620 case DW_AT_high_pc: 621 high_pc_form_ = form; 622 high_pc_ = data; 623 break; 624 case DW_AT_ranges: 625 has_range_data_ = true; 626 ranges_data_ = data; 627 ranges_form_ = form; 628 break; 629 case DW_AT_call_line: 630 call_site_line_ = data; 631 break; 632 case DW_AT_call_file: 633 call_site_file_id_ = data; 634 break; 635 default: 636 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); 637 break; 638 } 639 } 640 641 DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler( 642 uint64_t offset, 643 enum DwarfTag tag) { 644 switch (tag) { 645 case DW_TAG_inlined_subroutine: 646 return new InlineHandler(cu_context_, nullptr, offset, 647 inline_nest_level_ + 1, child_inlines_); 648 default: 649 return NULL; 650 } 651 } 652 653 bool DwarfCUToModule::InlineHandler::EndAttributes() { 654 if (abstract_origin_) 655 name_ = abstract_origin_->name; 656 if (name_.empty()) { 657 // We haven't seen the abstract origin yet, which might appears later and we 658 // will fix the name after calling 659 // InlineOriginMap::GetOrCreateInlineOrigin with right name. 660 name_ = 661 cu_context_->file_context->module_->AddStringToPool("<name omitted>"); 662 } 663 return true; 664 } 665 666 void DwarfCUToModule::InlineHandler::Finish() { 667 vector<Module::Range> ranges; 668 669 if (!has_range_data_) { 670 if (high_pc_form_ != DW_FORM_addr && 671 high_pc_form_ != DW_FORM_GNU_addr_index && 672 high_pc_form_ != DW_FORM_addrx && 673 high_pc_form_ != DW_FORM_addrx1 && 674 high_pc_form_ != DW_FORM_addrx2 && 675 high_pc_form_ != DW_FORM_addrx3 && 676 high_pc_form_ != DW_FORM_addrx4) { 677 high_pc_ += low_pc_; 678 } 679 680 Module::Range range(low_pc_, high_pc_ - low_pc_); 681 ranges.push_back(range); 682 } else { 683 RangesHandler* ranges_handler = cu_context_->ranges_handler; 684 if (ranges_handler) { 685 RangeListReader::CURangesInfo cu_info; 686 if (cu_context_->AssembleRangeListInfo(&cu_info)) { 687 if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_, 688 &cu_info, &ranges)) { 689 ranges.clear(); 690 cu_context_->reporter->MalformedRangeList(ranges_data_); 691 } 692 } else { 693 cu_context_->reporter->MissingRanges(); 694 } 695 } 696 } 697 698 // Ignore DW_TAG_inlined_subroutine with empty range. 699 if (ranges.empty()) { 700 return; 701 } 702 703 // Every DW_TAG_inlined_subroutine should have a DW_AT_abstract_origin. 704 assert(specification_offset_ != 0); 705 706 Module::InlineOriginMap& inline_origin_map = 707 cu_context_->file_context->module_ 708 ->inline_origin_maps[cu_context_->file_context->filename_]; 709 inline_origin_map.SetReference(specification_offset_, specification_offset_); 710 Module::InlineOrigin* origin = 711 inline_origin_map.GetOrCreateInlineOrigin(specification_offset_, name_); 712 unique_ptr<Module::Inline> in( 713 new Module::Inline(origin, ranges, call_site_line_, call_site_file_id_, 714 inline_nest_level_, std::move(child_inlines_))); 715 inlines_.push_back(std::move(in)); 716 } 717 718 // A handler for DIEs that contain functions and contribute a 719 // component to their names: namespaces, classes, etc. 720 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { 721 public: 722 NamedScopeHandler(CUContext* cu_context, 723 DIEContext* parent_context, 724 uint64_t offset, 725 bool handle_inline) 726 : GenericDIEHandler(cu_context, parent_context, offset), 727 handle_inline_(handle_inline) {} 728 bool EndAttributes(); 729 DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); 730 731 private: 732 DIEContext child_context_; // A context for our children. 733 bool handle_inline_; 734 }; 735 736 // A handler class for DW_TAG_subprogram DIEs. 737 class DwarfCUToModule::FuncHandler: public GenericDIEHandler { 738 public: 739 FuncHandler(CUContext* cu_context, 740 DIEContext* parent_context, 741 uint64_t offset, 742 bool handle_inline) 743 : GenericDIEHandler(cu_context, parent_context, offset), 744 low_pc_(0), 745 high_pc_(0), 746 high_pc_form_(DW_FORM_addr), 747 ranges_form_(DW_FORM_sec_offset), 748 ranges_data_(0), 749 inline_(false), 750 handle_inline_(handle_inline), 751 has_qualified_name_(false), 752 has_range_data_(false) {} 753 754 void ProcessAttributeUnsigned(enum DwarfAttribute attr, 755 enum DwarfForm form, 756 uint64_t data); 757 void ProcessAttributeSigned(enum DwarfAttribute attr, 758 enum DwarfForm form, 759 int64_t data); 760 DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); 761 bool EndAttributes(); 762 void Finish(); 763 764 private: 765 // The fully-qualified name, as derived from name_attribute_, 766 // specification_, parent_context_. Computed in EndAttributes. 767 StringView name_; 768 uint64_t low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc 769 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 770 DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx 771 uint64_t ranges_data_; // DW_AT_ranges 772 bool inline_; 773 vector<unique_ptr<Module::Inline>> child_inlines_; 774 bool handle_inline_; 775 bool has_qualified_name_; 776 bool has_range_data_; 777 DIEContext child_context_; // A context for our children. 778 }; 779 780 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( 781 enum DwarfAttribute attr, 782 enum DwarfForm form, 783 uint64_t data) { 784 switch (attr) { 785 // If this attribute is present at all --- even if its value is 786 // DW_INL_not_inlined --- then GCC may cite it as someone else's 787 // DW_AT_abstract_origin attribute. 788 case DW_AT_inline: inline_ = true; break; 789 790 case DW_AT_low_pc: low_pc_ = data; break; 791 case DW_AT_high_pc: 792 high_pc_form_ = form; 793 high_pc_ = data; 794 break; 795 case DW_AT_ranges: 796 has_range_data_ = true; 797 ranges_data_ = data; 798 ranges_form_ = form; 799 break; 800 default: 801 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); 802 break; 803 } 804 } 805 806 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( 807 enum DwarfAttribute attr, 808 enum DwarfForm form, 809 int64_t data) { 810 switch (attr) { 811 // If this attribute is present at all --- even if its value is 812 // DW_INL_not_inlined --- then GCC may cite it as someone else's 813 // DW_AT_abstract_origin attribute. 814 case DW_AT_inline: inline_ = true; break; 815 816 default: 817 break; 818 } 819 } 820 821 DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler( 822 uint64_t offset, 823 enum DwarfTag tag) { 824 switch (tag) { 825 case DW_TAG_inlined_subroutine: 826 if (handle_inline_) 827 return new InlineHandler(cu_context_, nullptr, offset, 0, 828 child_inlines_); 829 case DW_TAG_class_type: 830 case DW_TAG_structure_type: 831 case DW_TAG_union_type: 832 return new NamedScopeHandler(cu_context_, &child_context_, offset, 833 handle_inline_); 834 default: 835 return NULL; 836 } 837 } 838 839 bool DwarfCUToModule::FuncHandler::EndAttributes() { 840 // Compute our name, and record a specification, if appropriate. 841 name_ = ComputeQualifiedName(&has_qualified_name_); 842 if (name_.empty() && abstract_origin_) { 843 name_ = abstract_origin_->name; 844 } 845 child_context_.name = name_; 846 if (name_.empty() && no_specification) { 847 cu_context_->reporter->UnknownSpecification(offset_, specification_offset_); 848 } 849 return true; 850 } 851 852 void DwarfCUToModule::FuncHandler::Finish() { 853 vector<Module::Range> ranges; 854 855 // Check if this DIE was one of the forward references that was not able 856 // to be processed, and fix up the name of the appropriate Module::Function. 857 // "name_" will have already been fixed up in EndAttributes(). 858 if (!name_.empty()) { 859 auto iter = 860 cu_context_->file_context->file_private_->forward_ref_die_to_func.find( 861 offset_); 862 if (iter != 863 cu_context_->file_context->file_private_->forward_ref_die_to_func.end()) 864 iter->second->name = name_; 865 } 866 867 if (!has_range_data_) { 868 // Make high_pc_ an address, if it isn't already. 869 if (high_pc_form_ != DW_FORM_addr && 870 high_pc_form_ != DW_FORM_GNU_addr_index && 871 high_pc_form_ != DW_FORM_addrx && 872 high_pc_form_ != DW_FORM_addrx1 && 873 high_pc_form_ != DW_FORM_addrx2 && 874 high_pc_form_ != DW_FORM_addrx3 && 875 high_pc_form_ != DW_FORM_addrx4) { 876 high_pc_ += low_pc_; 877 } 878 879 Module::Range range(low_pc_, high_pc_ - low_pc_); 880 ranges.push_back(range); 881 } else { 882 RangesHandler* ranges_handler = cu_context_->ranges_handler; 883 if (ranges_handler) { 884 RangeListReader::CURangesInfo cu_info; 885 if (cu_context_->AssembleRangeListInfo(&cu_info)) { 886 if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_, 887 &cu_info, &ranges)) { 888 ranges.clear(); 889 cu_context_->reporter->MalformedRangeList(ranges_data_); 890 } 891 } else { 892 cu_context_->reporter->MissingRanges(); 893 } 894 } 895 } 896 897 StringView name_omitted = 898 cu_context_->file_context->module_->AddStringToPool("<name omitted>"); 899 bool empty_range = IsEmptyRange(ranges); 900 // Did we collect the information we need? Not all DWARF function 901 // entries are non-empty (for example, inlined functions that were never 902 // used), but all the ones we're interested in cover a non-empty range of 903 // bytes. 904 if (!empty_range) { 905 low_pc_ = ranges.front().address; 906 // Malformed DWARF may omit the name, but all Module::Functions must 907 // have names. 908 StringView name = name_.empty() ? name_omitted : name_; 909 // Create a Module::Function based on the data we've gathered, and 910 // add it to the functions_ list. 911 scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_)); 912 func->ranges = ranges; 913 func->parameter_size = 0; 914 // If the name was unqualified, prefer the Extern name if there's a mismatch 915 // (the Extern name will be fully-qualified in that case). 916 func->prefer_extern_name = !has_qualified_name_; 917 if (func->address) { 918 // If the function address is zero this is a sign that this function 919 // description is just empty debug data and should just be discarded. 920 cu_context_->functions.push_back(func.release()); 921 if (forward_ref_die_offset_ != 0) { 922 cu_context_->file_context->file_private_ 923 ->forward_ref_die_to_func[forward_ref_die_offset_] = 924 cu_context_->functions.back(); 925 926 cu_context_->spec_function_offsets[cu_context_->functions.back()] = 927 forward_ref_die_offset_; 928 } 929 930 cu_context_->functions.back()->inlines.swap(child_inlines_); 931 } 932 } else if (inline_) { 933 AbstractOrigin origin(name_); 934 cu_context_->file_context->file_private_->origins.insert({offset_, origin}); 935 } 936 937 // Only keep track of DW_TAG_subprogram which have the attributes we are 938 // interested. 939 if (handle_inline_ && (!empty_range || inline_)) { 940 StringView name = name_.empty() ? name_omitted : name_; 941 uint64_t offset = 942 specification_offset_ != 0 ? specification_offset_ : offset_; 943 Module::InlineOriginMap& inline_origin_map = 944 cu_context_->file_context->module_ 945 ->inline_origin_maps[cu_context_->file_context->filename_]; 946 inline_origin_map.SetReference(offset_, offset); 947 inline_origin_map.GetOrCreateInlineOrigin(offset_, name); 948 } 949 } 950 951 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { 952 child_context_.name = ComputeQualifiedName(NULL); 953 if (child_context_.name.empty() && no_specification) { 954 cu_context_->reporter->UnknownSpecification(offset_, specification_offset_); 955 } 956 return true; 957 } 958 959 DIEHandler* DwarfCUToModule::NamedScopeHandler::FindChildHandler( 960 uint64_t offset, 961 enum DwarfTag tag) { 962 switch (tag) { 963 case DW_TAG_subprogram: 964 return new FuncHandler(cu_context_, &child_context_, offset, 965 handle_inline_); 966 case DW_TAG_namespace: 967 case DW_TAG_class_type: 968 case DW_TAG_structure_type: 969 case DW_TAG_union_type: 970 return new NamedScopeHandler(cu_context_, &child_context_, offset, 971 handle_inline_); 972 default: 973 return NULL; 974 } 975 } 976 977 void DwarfCUToModule::WarningReporter::CUHeading() { 978 if (printed_cu_header_) 979 return; 980 fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%" PRIx64 "):\n", 981 filename_.c_str(), cu_name_.c_str(), cu_offset_); 982 printed_cu_header_ = true; 983 } 984 985 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64_t offset, 986 uint64_t target) { 987 CUHeading(); 988 fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a " 989 "DW_AT_specification attribute referring to the DIE at offset 0x%" 990 PRIx64 ", which was not marked as a declaration\n", 991 filename_.c_str(), offset, target); 992 } 993 994 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64_t offset, 995 uint64_t target) { 996 CUHeading(); 997 fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a " 998 "DW_AT_abstract_origin attribute referring to the DIE at offset 0x%" 999 PRIx64 ", which was not marked as an inline\n", 1000 filename_.c_str(), offset, target); 1001 } 1002 1003 void DwarfCUToModule::WarningReporter::MissingSection(const string& name) { 1004 CUHeading(); 1005 fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n", 1006 filename_.c_str(), name.c_str()); 1007 } 1008 1009 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64_t offset) { 1010 CUHeading(); 1011 fprintf(stderr, "%s: warning: line number data offset beyond end" 1012 " of '.debug_line' section\n", 1013 filename_.c_str()); 1014 } 1015 1016 void DwarfCUToModule::WarningReporter::UncoveredHeading() { 1017 if (printed_unpaired_header_) 1018 return; 1019 CUHeading(); 1020 fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n", 1021 filename_.c_str()); 1022 printed_unpaired_header_ = true; 1023 } 1024 1025 void DwarfCUToModule::WarningReporter::UncoveredFunction( 1026 const Module::Function& function) { 1027 if (!uncovered_warnings_enabled_) 1028 return; 1029 UncoveredHeading(); 1030 fprintf(stderr, " function%s: %s\n", 1031 IsEmptyRange(function.ranges) ? " (zero-length)" : "", 1032 function.name.str().c_str()); 1033 } 1034 1035 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line& line) { 1036 if (!uncovered_warnings_enabled_) 1037 return; 1038 UncoveredHeading(); 1039 fprintf(stderr, " line%s: %s:%d at 0x%" PRIx64 "\n", 1040 (line.size == 0 ? " (zero-length)" : ""), 1041 line.file->name.c_str(), line.number, line.address); 1042 } 1043 1044 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64_t offset) { 1045 CUHeading(); 1046 fprintf(stderr, "%s: warning: function at offset 0x%" PRIx64 " has no name\n", 1047 filename_.c_str(), offset); 1048 } 1049 1050 void DwarfCUToModule::WarningReporter::DemangleError(const string& input) { 1051 CUHeading(); 1052 fprintf(stderr, "%s: warning: failed to demangle %s\n", 1053 filename_.c_str(), input.c_str()); 1054 } 1055 1056 void DwarfCUToModule::WarningReporter::UnhandledInterCUReference( 1057 uint64_t offset, uint64_t target) { 1058 CUHeading(); 1059 fprintf(stderr, "%s: warning: the DIE at offset 0x%" PRIx64 " has a " 1060 "DW_FORM_ref_addr attribute with an inter-CU reference to " 1061 "0x%" PRIx64 ", but inter-CU reference handling is turned " 1062 " off.\n", filename_.c_str(), offset, target); 1063 } 1064 1065 void DwarfCUToModule::WarningReporter::MalformedRangeList(uint64_t offset) { 1066 CUHeading(); 1067 fprintf(stderr, "%s: warning: the range list at offset 0x%" PRIx64 " falls " 1068 " out of the .debug_ranges section.\n", 1069 filename_.c_str(), offset); 1070 } 1071 1072 void DwarfCUToModule::WarningReporter::MissingRanges() { 1073 CUHeading(); 1074 fprintf(stderr, "%s: warning: A DW_AT_ranges attribute was encountered but " 1075 "the .debug_ranges section is missing.\n", filename_.c_str()); 1076 } 1077 1078 DwarfCUToModule::DwarfCUToModule(FileContext* file_context, 1079 LineToModuleHandler* line_reader, 1080 RangesHandler* ranges_handler, 1081 WarningReporter* reporter, 1082 bool handle_inline, 1083 uint64_t low_pc, 1084 uint64_t addr_base, 1085 bool has_source_line_info, 1086 uint64_t source_line_offset) 1087 : RootDIEHandler(handle_inline), 1088 line_reader_(line_reader), 1089 cu_context_(new CUContext(file_context, 1090 reporter, 1091 ranges_handler, 1092 low_pc, 1093 addr_base)), 1094 child_context_(new DIEContext()), 1095 has_source_line_info_(has_source_line_info), 1096 source_line_offset_(source_line_offset) {} 1097 1098 DwarfCUToModule::~DwarfCUToModule() { 1099 } 1100 1101 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, 1102 enum DwarfForm form, 1103 int64_t data) { 1104 switch (attr) { 1105 case DW_AT_language: // source language of this CU 1106 SetLanguage(static_cast<DwarfLanguage>(data)); 1107 break; 1108 default: 1109 break; 1110 } 1111 } 1112 1113 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, 1114 enum DwarfForm form, 1115 uint64_t data) { 1116 switch (attr) { 1117 case DW_AT_stmt_list: // Line number information. 1118 has_source_line_info_ = true; 1119 source_line_offset_ = data; 1120 break; 1121 case DW_AT_language: // source language of this CU 1122 SetLanguage(static_cast<DwarfLanguage>(data)); 1123 break; 1124 case DW_AT_low_pc: 1125 cu_context_->low_pc = data; 1126 break; 1127 case DW_AT_high_pc: 1128 cu_context_->high_pc = data; 1129 break; 1130 case DW_AT_ranges: 1131 cu_context_->ranges_data = data; 1132 cu_context_->ranges_form = form; 1133 break; 1134 case DW_AT_rnglists_base: 1135 cu_context_->ranges_base = data; 1136 break; 1137 case DW_AT_addr_base: 1138 case DW_AT_GNU_addr_base: 1139 cu_context_->addr_base = data; 1140 break; 1141 case DW_AT_str_offsets_base: 1142 cu_context_->str_offsets_base = data; 1143 break; 1144 default: 1145 break; 1146 } 1147 } 1148 1149 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, 1150 enum DwarfForm form, 1151 const string& data) { 1152 switch (attr) { 1153 case DW_AT_name: 1154 cu_context_->reporter->SetCUName(data); 1155 break; 1156 case DW_AT_comp_dir: 1157 line_reader_->StartCompilationUnit(data); 1158 break; 1159 default: 1160 break; 1161 } 1162 } 1163 1164 bool DwarfCUToModule::EndAttributes() { 1165 return true; 1166 } 1167 1168 DIEHandler* DwarfCUToModule::FindChildHandler( 1169 uint64_t offset, 1170 enum DwarfTag tag) { 1171 switch (tag) { 1172 case DW_TAG_subprogram: 1173 return new FuncHandler(cu_context_.get(), child_context_.get(), offset, 1174 handle_inline); 1175 case DW_TAG_namespace: 1176 case DW_TAG_class_type: 1177 case DW_TAG_structure_type: 1178 case DW_TAG_union_type: 1179 case DW_TAG_module: 1180 return new NamedScopeHandler(cu_context_.get(), child_context_.get(), 1181 offset, handle_inline); 1182 default: 1183 return NULL; 1184 } 1185 } 1186 1187 void DwarfCUToModule::SetLanguage(DwarfLanguage language) { 1188 switch (language) { 1189 case DW_LANG_Java: 1190 cu_context_->language = Language::Java; 1191 break; 1192 1193 case DW_LANG_Swift: 1194 cu_context_->language = Language::Swift; 1195 break; 1196 1197 case DW_LANG_Rust: 1198 cu_context_->language = Language::Rust; 1199 break; 1200 1201 // DWARF has no generic language code for assembly language; this is 1202 // what the GNU toolchain uses. 1203 case DW_LANG_Mips_Assembler: 1204 cu_context_->language = Language::Assembler; 1205 break; 1206 1207 // C++ covers so many cases that it probably has some way to cope 1208 // with whatever the other languages throw at us. So make it the 1209 // default. 1210 // 1211 // Objective C and Objective C++ seem to create entries for 1212 // methods whose DW_AT_name values are already fully-qualified: 1213 // "-[Classname method:]". These appear at the top level. 1214 // 1215 // DWARF data for C should never include namespaces or functions 1216 // nested in struct types, but if it ever does, then C++'s 1217 // notation is probably not a bad choice for that. 1218 default: 1219 case DW_LANG_ObjC: 1220 case DW_LANG_ObjC_plus_plus: 1221 case DW_LANG_C: 1222 case DW_LANG_C89: 1223 case DW_LANG_C99: 1224 case DW_LANG_C_plus_plus: 1225 cu_context_->language = Language::CPlusPlus; 1226 break; 1227 } 1228 } 1229 1230 void DwarfCUToModule::ReadSourceLines(uint64_t offset) { 1231 const SectionMap& section_map 1232 = cu_context_->file_context->section_map(); 1233 SectionMap::const_iterator map_entry 1234 = GetSectionByName(section_map, ".debug_line"); 1235 if (map_entry == section_map.end()) { 1236 cu_context_->reporter->MissingSection(".debug_line"); 1237 return; 1238 } 1239 const uint8_t* line_section_start = map_entry->second.first + offset; 1240 uint64_t line_section_length = map_entry->second.second; 1241 if (offset >= line_section_length) { 1242 cu_context_->reporter->BadLineInfoOffset(offset); 1243 return; 1244 } 1245 line_section_length -= offset; 1246 // When reading line tables, string sections are never needed for dwarf4, and 1247 // may or may not be needed by dwarf5, so no error if they are missing. 1248 const uint8_t* string_section_start = nullptr; 1249 uint64_t string_section_length = 0; 1250 map_entry = GetSectionByName(section_map, ".debug_str"); 1251 if (map_entry != section_map.end()) { 1252 string_section_start = map_entry->second.first; 1253 string_section_length = map_entry->second.second; 1254 } 1255 const uint8_t* line_string_section_start = nullptr; 1256 uint64_t line_string_section_length = 0; 1257 map_entry = GetSectionByName(section_map, ".debug_line_str"); 1258 if (map_entry != section_map.end()) { 1259 line_string_section_start = map_entry->second.first; 1260 line_string_section_length = map_entry->second.second; 1261 } 1262 line_reader_->ReadProgram( 1263 line_section_start, line_section_length, 1264 string_section_start, string_section_length, 1265 line_string_section_start, line_string_section_length, 1266 cu_context_->file_context->module_, &lines_, &files_); 1267 } 1268 1269 namespace { 1270 class FunctionRange { 1271 public: 1272 FunctionRange(const Module::Range& range, Module::Function* function) : 1273 address(range.address), size(range.size), function(function) { } 1274 1275 void AddLine(Module::Line& line) { 1276 function->lines.push_back(line); 1277 } 1278 1279 Module::Address address; 1280 Module::Address size; 1281 Module::Function* function; 1282 }; 1283 1284 // Fills an array of ranges with pointers to the functions which owns 1285 // them. The array is sorted in ascending order and the ranges are non 1286 // empty and non-overlapping. 1287 1288 static void FillSortedFunctionRanges(vector<FunctionRange>& dest_ranges, 1289 vector<Module::Function*>* functions) { 1290 for (vector<Module::Function*>::const_iterator func_it = functions->cbegin(); 1291 func_it != functions->cend(); 1292 func_it++) 1293 { 1294 Module::Function* func = *func_it; 1295 vector<Module::Range>& ranges = func->ranges; 1296 for (vector<Module::Range>::const_iterator ranges_it = ranges.cbegin(); 1297 ranges_it != ranges.cend(); 1298 ++ranges_it) { 1299 FunctionRange range(*ranges_it, func); 1300 if (range.size != 0) { 1301 dest_ranges.push_back(range); 1302 } 1303 } 1304 } 1305 1306 sort(dest_ranges.begin(), dest_ranges.end(), 1307 [](const FunctionRange& fr1, const FunctionRange& fr2) { 1308 return fr1.address < fr2.address; 1309 } 1310 ); 1311 } 1312 1313 // Return true if ADDRESS falls within the range of ITEM. 1314 template <class T> 1315 inline bool within(const T& item, Module::Address address) { 1316 // Because Module::Address is unsigned, and unsigned arithmetic 1317 // wraps around, this will be false if ADDRESS falls before the 1318 // start of ITEM, or if it falls after ITEM's end. 1319 return address - item.address < item.size; 1320 } 1321 } 1322 1323 void DwarfCUToModule::AssignLinesToFunctions() { 1324 vector<Module::Function*>* functions = &cu_context_->functions; 1325 WarningReporter* reporter = cu_context_->reporter; 1326 1327 // This would be simpler if we assumed that source line entries 1328 // don't cross function boundaries. However, there's no real reason 1329 // to assume that (say) a series of function definitions on the same 1330 // line wouldn't get coalesced into one line number entry. The 1331 // DWARF spec certainly makes no such promises. 1332 // 1333 // So treat the functions and lines as peers, and take the trouble 1334 // to compute their ranges' intersections precisely. In any case, 1335 // the hair here is a constant factor for performance; the 1336 // complexity from here on out is linear. 1337 1338 // Put both our functions and lines in order by address. 1339 std::sort(functions->begin(), functions->end(), 1340 Module::Function::CompareByAddress); 1341 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); 1342 1343 // The last line that we used any piece of. We use this only for 1344 // generating warnings. 1345 const Module::Line* last_line_used = NULL; 1346 1347 // The last function and line we warned about --- so we can avoid 1348 // doing so more than once. 1349 const Module::Function* last_function_cited = NULL; 1350 const Module::Line* last_line_cited = NULL; 1351 1352 // Prepare a sorted list of ranges with range-to-function mapping 1353 vector<FunctionRange> sorted_ranges; 1354 FillSortedFunctionRanges(sorted_ranges, functions); 1355 1356 // Make a single pass through both the range and line vectors from lower to 1357 // higher addresses, populating each range's function lines vector with lines 1358 // from our lines_ vector that fall within the range. 1359 vector<FunctionRange>::iterator range_it = sorted_ranges.begin(); 1360 vector<Module::Line>::const_iterator line_it = lines_.begin(); 1361 1362 Module::Address current; 1363 1364 // Pointers to the referents of func_it and line_it, or NULL if the 1365 // iterator is at the end of the sequence. 1366 FunctionRange* range; 1367 const Module::Line* line; 1368 1369 // Start current at the beginning of the first line or function, 1370 // whichever is earlier. 1371 if (range_it != sorted_ranges.end() && line_it != lines_.end()) { 1372 range = &*range_it; 1373 line = &*line_it; 1374 current = std::min(range->address, line->address); 1375 } else if (line_it != lines_.end()) { 1376 range = NULL; 1377 line = &*line_it; 1378 current = line->address; 1379 } else if (range_it != sorted_ranges.end()) { 1380 range = &*range_it; 1381 line = NULL; 1382 current = range->address; 1383 } else { 1384 return; 1385 } 1386 1387 // Some dwarf producers handle linker-removed functions by using -1 as a 1388 // tombstone in the line table. So the end marker can be -1. 1389 if (current == Module::kMaxAddress) 1390 return; 1391 1392 while (range || line) { 1393 // This loop has two invariants that hold at the top. 1394 // 1395 // First, at least one of the iterators is not at the end of its 1396 // sequence, and those that are not refer to the earliest 1397 // range or line that contains or starts after CURRENT. 1398 // 1399 // Note that every byte is in one of four states: it is covered 1400 // or not covered by a range, and, independently, it is 1401 // covered or not covered by a line. 1402 // 1403 // The second invariant is that CURRENT refers to a byte whose 1404 // state is different from its predecessor, or it refers to the 1405 // first byte in the address space. In other words, CURRENT is 1406 // always the address of a transition. 1407 // 1408 // Note that, although each iteration advances CURRENT from one 1409 // transition address to the next in each iteration, it might 1410 // not advance the iterators. Suppose we have a range that 1411 // starts with a line, has a gap, and then a second line, and 1412 // suppose that we enter an iteration with CURRENT at the end of 1413 // the first line. The next transition address is the start of 1414 // the second line, after the gap, so the iteration should 1415 // advance CURRENT to that point. At the head of that iteration, 1416 // the invariants require that the line iterator be pointing at 1417 // the second line. But this is also true at the head of the 1418 // next. And clearly, the iteration must not change the range 1419 // iterator. So neither iterator moves. 1420 1421 // Assert the first invariant (see above). 1422 assert(!range || current < range->address || within(*range, current)); 1423 assert(!line || current < line->address || within(*line, current)); 1424 1425 // The next transition after CURRENT. 1426 Module::Address next_transition; 1427 1428 // Figure out which state we're in, add lines or warn, and compute 1429 // the next transition address. 1430 if (range && current >= range->address) { 1431 if (line && current >= line->address) { 1432 // Covered by both a line and a range. 1433 Module::Address range_left = range->size - (current - range->address); 1434 Module::Address line_left = line->size - (current - line->address); 1435 // This may overflow, but things work out. 1436 next_transition = current + std::min(range_left, line_left); 1437 Module::Line l = *line; 1438 l.address = current; 1439 l.size = next_transition - current; 1440 range->AddLine(l); 1441 last_line_used = line; 1442 } else { 1443 // Covered by a range, but no line. 1444 if (range->function != last_function_cited) { 1445 reporter->UncoveredFunction(*(range->function)); 1446 last_function_cited = range->function; 1447 } 1448 if (line && within(*range, line->address)) 1449 next_transition = line->address; 1450 else 1451 // If this overflows, we'll catch it below. 1452 next_transition = range->address + range->size; 1453 } 1454 } else { 1455 if (line && current >= line->address) { 1456 // Covered by a line, but no range. 1457 // 1458 // If GCC emits padding after one function to align the start 1459 // of the next, then it will attribute the padding 1460 // instructions to the last source line of function (to reduce 1461 // the size of the line number info), but omit it from the 1462 // DW_AT_{low,high}_pc range given in .debug_info (since it 1463 // costs nothing to be precise there). If we did use at least 1464 // some of the line we're about to skip, and it ends at the 1465 // start of the next function, then assume this is what 1466 // happened, and don't warn. 1467 if (line != last_line_cited 1468 && !(range 1469 && line == last_line_used 1470 && range->address - line->address == line->size)) { 1471 reporter->UncoveredLine(*line); 1472 last_line_cited = line; 1473 } 1474 if (range && within(*line, range->address)) 1475 next_transition = range->address; 1476 else 1477 // If this overflows, we'll catch it below. 1478 next_transition = line->address + line->size; 1479 } else { 1480 // Covered by neither a range nor a line. By the invariant, 1481 // both range and line begin after CURRENT. The next transition 1482 // is the start of the next range or next line, whichever 1483 // is earliest. 1484 assert(range || line); 1485 if (range && line) 1486 next_transition = std::min(range->address, line->address); 1487 else if (range) 1488 next_transition = range->address; 1489 else 1490 next_transition = line->address; 1491 } 1492 } 1493 1494 // If a function or line abuts the end of the address space, then 1495 // next_transition may end up being zero, in which case we've completed 1496 // our pass. Handle that here, instead of trying to deal with it in 1497 // each place we compute next_transition. 1498 1499 // Some dwarf producers handle linker-removed functions by using -1 as a 1500 // tombstone in the line table. So the end marker can be -1. 1501 if (!next_transition || next_transition == Module::kMaxAddress) 1502 break; 1503 1504 // Advance iterators as needed. If lines overlap or functions overlap, 1505 // then we could go around more than once. We don't worry too much 1506 // about what result we produce in that case, just as long as we don't 1507 // hang or crash. 1508 while (range_it != sorted_ranges.end() 1509 && next_transition >= range_it->address 1510 && !within(*range_it, next_transition)) 1511 range_it++; 1512 range = (range_it != sorted_ranges.end()) ? &(*range_it) : NULL; 1513 while (line_it != lines_.end() 1514 && next_transition >= line_it->address 1515 && !within(*line_it, next_transition)) 1516 line_it++; 1517 line = (line_it != lines_.end()) ? &*line_it : NULL; 1518 1519 // We must make progress. 1520 assert(next_transition > current); 1521 current = next_transition; 1522 } 1523 } 1524 1525 void DwarfCUToModule::AssignFilesToInlines() { 1526 // Assign File* to Inlines inside this CU. 1527 auto assignFile = [this](unique_ptr<Module::Inline>& in) { 1528 in->call_site_file = files_[in->call_site_file_id]; 1529 }; 1530 for (auto func : cu_context_->functions) { 1531 Module::Inline::InlineDFS(func->inlines, assignFile); 1532 } 1533 } 1534 1535 void DwarfCUToModule::Finish() { 1536 // Assembly language files have no function data, and that gives us 1537 // no place to store our line numbers (even though the GNU toolchain 1538 // will happily produce source line info for assembly language 1539 // files). To avoid spurious warnings about lines we can't assign 1540 // to functions, skip CUs in languages that lack functions. 1541 if (!cu_context_->language->HasFunctions()) 1542 return; 1543 1544 // Read source line info, if we have any. 1545 if (has_source_line_info_) 1546 ReadSourceLines(source_line_offset_); 1547 1548 vector<Module::Function*>* functions = &cu_context_->functions; 1549 1550 // Dole out lines to the appropriate functions. 1551 AssignLinesToFunctions(); 1552 1553 AssignFilesToInlines(); 1554 1555 // Add our functions, which now have source lines assigned to them, 1556 // to module_, and remove duplicate functions. 1557 for (Module::Function* func : *functions) 1558 if (!cu_context_->file_context->module_->AddFunction(func)) { 1559 auto iter = cu_context_->spec_function_offsets.find(func); 1560 if (iter != cu_context_->spec_function_offsets.end()) 1561 cu_context_->file_context->file_private_->forward_ref_die_to_func.erase( 1562 iter->second); 1563 delete func; 1564 } 1565 1566 // Ownership of the function objects has shifted from cu_context to 1567 // the Module. 1568 functions->clear(); 1569 1570 cu_context_->file_context->ClearSpecifications(); 1571 } 1572 1573 bool DwarfCUToModule::StartCompilationUnit(uint64_t offset, 1574 uint8_t address_size, 1575 uint8_t offset_size, 1576 uint64_t cu_length, 1577 uint8_t dwarf_version) { 1578 cu_context_->version = dwarf_version; 1579 return dwarf_version >= 2; 1580 } 1581 1582 bool DwarfCUToModule::StartRootDIE(uint64_t offset, enum DwarfTag tag) { 1583 // We don't deal with partial compilation units (the only other tag 1584 // likely to be used for root DIE). 1585 return (tag == DW_TAG_compile_unit 1586 || tag == DW_TAG_skeleton_unit); 1587 } 1588 1589 } // namespace google_breakpad