/ src / common / linux / dump_symbols.cc
dump_symbols.cc
   1  // Copyright 2011 Google LLC
   2  //
   3  // Redistribution and use in source and binary forms, with or without
   4  // modification, are permitted provided that the following conditions are
   5  // met:
   6  //
   7  //     * Redistributions of source code must retain the above copyright
   8  // notice, this list of conditions and the following disclaimer.
   9  //     * Redistributions in binary form must reproduce the above
  10  // copyright notice, this list of conditions and the following disclaimer
  11  // in the documentation and/or other materials provided with the
  12  // distribution.
  13  //     * Neither the name of Google LLC nor the names of its
  14  // contributors may be used to endorse or promote products derived from
  15  // this software without specific prior written permission.
  16  //
  17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  
  29  // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  30  
  31  // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
  32  // Find all the debugging info in a file and dump it as a Breakpad symbol file.
  33  
  34  #ifdef HAVE_CONFIG_H
  35  #include <config.h>  // Must come first
  36  #endif
  37  
  38  #include "common/linux/dump_symbols.h"
  39  
  40  #include <assert.h>
  41  #include <elf.h>
  42  #include <errno.h>
  43  #include <fcntl.h>
  44  #include <limits.h>
  45  #include <link.h>
  46  #include <stdint.h>
  47  #include <stdio.h>
  48  #include <stdlib.h>
  49  #include <string.h>
  50  #include <sys/mman.h>
  51  #include <sys/stat.h>
  52  #include <unistd.h>
  53  #include <zlib.h>
  54  #ifdef HAVE_LIBZSTD
  55  #include <zstd.h>
  56  #endif
  57  
  58  #include <set>
  59  #include <string>
  60  #include <utility>
  61  #include <vector>
  62  
  63  #include "common/dwarf/bytereader-inl.h"
  64  #include "common/dwarf/dwarf2diehandler.h"
  65  #include "common/dwarf_cfi_to_module.h"
  66  #include "common/dwarf_cu_to_module.h"
  67  #include "common/dwarf_line_to_module.h"
  68  #include "common/dwarf_range_list_handler.h"
  69  #include "common/linux/crc32.h"
  70  #include "common/linux/eintr_wrapper.h"
  71  #include "common/linux/elfutils.h"
  72  #include "common/linux/elfutils-inl.h"
  73  #include "common/linux/elf_symbols_to_module.h"
  74  #include "common/linux/file_id.h"
  75  #include "common/memory_allocator.h"
  76  #include "common/module.h"
  77  #include "common/path_helper.h"
  78  #include "common/scoped_ptr.h"
  79  #ifndef NO_STABS_SUPPORT
  80  #include "common/stabs_reader.h"
  81  #include "common/stabs_to_module.h"
  82  #endif
  83  #include "common/using_std_string.h"
  84  
  85  // This namespace contains helper functions.
  86  namespace {
  87  
  88  using google_breakpad::DumpOptions;
  89  using google_breakpad::DwarfCFIToModule;
  90  using google_breakpad::DwarfCUToModule;
  91  using google_breakpad::DwarfLineToModule;
  92  using google_breakpad::DwarfRangeListHandler;
  93  using google_breakpad::ElfClass;
  94  using google_breakpad::ElfClass32;
  95  using google_breakpad::ElfClass64;
  96  using google_breakpad::elf::FileID;
  97  using google_breakpad::FindElfSectionByName;
  98  using google_breakpad::GetOffset;
  99  using google_breakpad::IsValidElf;
 100  using google_breakpad::elf::kDefaultBuildIdSize;
 101  using google_breakpad::Module;
 102  using google_breakpad::PageAllocator;
 103  #ifndef NO_STABS_SUPPORT
 104  using google_breakpad::StabsToModule;
 105  #endif
 106  using google_breakpad::scoped_ptr;
 107  using google_breakpad::wasteful_vector;
 108  
 109  // Define AARCH64 ELF architecture if host machine does not include this define.
 110  #ifndef EM_AARCH64
 111  #define EM_AARCH64      183
 112  #endif
 113  
 114  // Define ZStd compression if host machine does not include this define.
 115  #ifndef ELFCOMPRESS_ZSTD
 116  #define ELFCOMPRESS_ZSTD 2
 117  #endif
 118  
 119  //
 120  // FDWrapper
 121  //
 122  // Wrapper class to make sure opened file is closed.
 123  //
 124  class FDWrapper {
 125   public:
 126    explicit FDWrapper(int fd) :
 127      fd_(fd) {}
 128    ~FDWrapper() {
 129      if (fd_ != -1)
 130        close(fd_);
 131    }
 132    int get() {
 133      return fd_;
 134    }
 135    int release() {
 136      int fd = fd_;
 137      fd_ = -1;
 138      return fd;
 139    }
 140   private:
 141    int fd_;
 142  };
 143  
 144  //
 145  // MmapWrapper
 146  //
 147  // Wrapper class to make sure mapped regions are unmapped.
 148  //
 149  class MmapWrapper {
 150   public:
 151    MmapWrapper() : is_set_(false) {}
 152    ~MmapWrapper() {
 153      if (is_set_ && base_ != NULL) {
 154        assert(size_ > 0);
 155        munmap(base_, size_);
 156      }
 157    }
 158    void set(void* mapped_address, size_t mapped_size) {
 159      is_set_ = true;
 160      base_ = mapped_address;
 161      size_ = mapped_size;
 162    }
 163    void release() {
 164      assert(is_set_);
 165      is_set_ = false;
 166      base_ = NULL;
 167      size_ = 0;
 168    }
 169  
 170   private:
 171    bool is_set_;
 172    void* base_;
 173    size_t size_;
 174  };
 175  
 176  // Find the preferred loading address of the binary.
 177  template<typename ElfClass>
 178  typename ElfClass::Addr GetLoadingAddress(
 179      const typename ElfClass::Phdr* program_headers,
 180      int nheader) {
 181    typedef typename ElfClass::Phdr Phdr;
 182  
 183    // For non-PIC executables (e_type == ET_EXEC), the load address is
 184    // the start address of the first PT_LOAD segment.  (ELF requires
 185    // the segments to be sorted by load address.)  For PIC executables
 186    // and dynamic libraries (e_type == ET_DYN), this address will
 187    // normally be zero.
 188    for (int i = 0; i < nheader; ++i) {
 189      const Phdr& header = program_headers[i];
 190      if (header.p_type == PT_LOAD)
 191        return header.p_vaddr;
 192    }
 193    return 0;
 194  }
 195  
 196  // Find the set of address ranges for all PT_LOAD segments.
 197  template <typename ElfClass>
 198  vector<Module::Range> GetPtLoadSegmentRanges(
 199      const typename ElfClass::Phdr* program_headers,
 200      int nheader) {
 201    typedef typename ElfClass::Phdr Phdr;
 202    vector<Module::Range> ranges;
 203  
 204    for (int i = 0; i < nheader; ++i) {
 205      const Phdr& header = program_headers[i];
 206      if (header.p_type == PT_LOAD) {
 207        ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
 208      }
 209    }
 210    return ranges;
 211  }
 212  
 213  #ifndef NO_STABS_SUPPORT
 214  template<typename ElfClass>
 215  bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
 216                 const typename ElfClass::Shdr* stab_section,
 217                 const typename ElfClass::Shdr* stabstr_section,
 218                 const bool big_endian,
 219                 Module* module) {
 220    // A callback object to handle data from the STABS reader.
 221    StabsToModule handler(module);
 222    // Find the addresses of the STABS data, and create a STABS reader object.
 223    // On Linux, STABS entries always have 32-bit values, regardless of the
 224    // address size of the architecture whose code they're describing, and
 225    // the strings are always "unitized".
 226    const uint8_t* stabs =
 227        GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
 228    const uint8_t* stabstr =
 229        GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
 230    google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
 231                                        stabstr, stabstr_section->sh_size,
 232                                        big_endian, 4, true, &handler);
 233    // Read the STABS data, and do post-processing.
 234    if (!reader.Process())
 235      return false;
 236    handler.Finalize();
 237    return true;
 238  }
 239  #endif  // NO_STABS_SUPPORT
 240  
 241  // A range handler that accepts rangelist data parsed by
 242  // google_breakpad::RangeListReader and populates a range vector (typically
 243  // owned by a function) with the results.
 244  class DumperRangesHandler : public DwarfCUToModule::RangesHandler {
 245   public:
 246    DumperRangesHandler(google_breakpad::ByteReader* reader) :
 247        reader_(reader) { }
 248  
 249    bool ReadRanges(
 250        enum google_breakpad::DwarfForm form, uint64_t data,
 251        google_breakpad::RangeListReader::CURangesInfo* cu_info,
 252        vector<Module::Range>* ranges) {
 253      DwarfRangeListHandler handler(ranges);
 254      google_breakpad::RangeListReader range_list_reader(reader_, cu_info,
 255                                                      &handler);
 256      return range_list_reader.ReadRanges(form, data);
 257    }
 258  
 259   private:
 260    google_breakpad::ByteReader* reader_;
 261  };
 262  
 263  // A line-to-module loader that accepts line number info parsed by
 264  // google_breakpad::LineInfo and populates a Module and a line vector
 265  // with the results.
 266  class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
 267   public:
 268    // Create a line-to-module converter using BYTE_READER.
 269    explicit DumperLineToModule(google_breakpad::ByteReader* byte_reader)
 270        : byte_reader_(byte_reader) { }
 271    void StartCompilationUnit(const string& compilation_dir) {
 272      compilation_dir_ = compilation_dir;
 273    }
 274    void ReadProgram(const uint8_t* program,
 275                     uint64_t length,
 276                     const uint8_t* string_section,
 277                     uint64_t string_section_length,
 278                     const uint8_t* line_string_section,
 279                     uint64_t line_string_section_length,
 280                     Module* module,
 281                     std::vector<Module::Line>* lines,
 282                     std::map<uint32_t, Module::File*>* files) {
 283      DwarfLineToModule handler(module, compilation_dir_, lines, files);
 284      google_breakpad::LineInfo parser(program, length, byte_reader_,
 285                                    string_section, string_section_length,
 286                                    line_string_section,
 287                                    line_string_section_length,
 288                                    &handler);
 289      parser.Start();
 290    }
 291   private:
 292    string compilation_dir_;
 293    google_breakpad::ByteReader* byte_reader_;
 294  };
 295  
 296  template<typename ElfClass>
 297  bool IsCompressedHeader(const typename ElfClass::Shdr* section) {
 298    return (section->sh_flags & SHF_COMPRESSED) != 0;
 299  }
 300  
 301  template<typename ElfClass>
 302  uint32_t GetCompressionHeader(
 303      typename ElfClass::Chdr& compression_header,
 304      const uint8_t* content, uint64_t size) {
 305    const typename ElfClass::Chdr* header =
 306        reinterpret_cast<const typename ElfClass::Chdr *>(content);
 307  
 308    if (size < sizeof (*header)) {
 309      return 0;
 310    }
 311  
 312    compression_header = *header;
 313    return sizeof (*header);
 314  }
 315  
 316  std::pair<uint8_t *, uint64_t> UncompressZlibSectionContents(
 317      const uint8_t* compressed_buffer, uint64_t compressed_size, uint64_t uncompressed_size) {
 318    z_stream stream;
 319    memset(&stream, 0, sizeof stream);
 320  
 321    stream.avail_in = compressed_size;
 322    stream.avail_out = uncompressed_size;
 323    stream.next_in = const_cast<uint8_t *>(compressed_buffer);
 324  
 325    google_breakpad::scoped_array<uint8_t> uncompressed_buffer(
 326      new uint8_t[uncompressed_size]);
 327  
 328    int status = inflateInit(&stream);
 329    while (stream.avail_in != 0 && status == Z_OK) {
 330      stream.next_out =
 331        uncompressed_buffer.get() + uncompressed_size - stream.avail_out;
 332  
 333      if ((status = inflate(&stream, Z_FINISH)) != Z_STREAM_END) {
 334        break;
 335      }
 336  
 337      status = inflateReset(&stream);
 338    }
 339  
 340    return inflateEnd(&stream) != Z_OK || status != Z_OK || stream.avail_out != 0
 341      ? std::make_pair(nullptr, 0)
 342      : std::make_pair(uncompressed_buffer.release(), uncompressed_size);
 343  }
 344  
 345  #ifdef HAVE_LIBZSTD
 346  std::pair<uint8_t *, uint64_t> UncompressZstdSectionContents(
 347      const uint8_t* compressed_buffer, uint64_t compressed_size,uint64_t uncompressed_size) {
 348  
 349    google_breakpad::scoped_array<uint8_t> uncompressed_buffer(new uint8_t[uncompressed_size]);
 350    size_t out_size = ZSTD_decompress(uncompressed_buffer.get(), uncompressed_size,
 351      compressed_buffer, compressed_size);
 352    if (ZSTD_isError(out_size)) {
 353      return std::make_pair(nullptr, 0);
 354    }
 355    assert(out_size == uncompressed_size);
 356    return std::make_pair(uncompressed_buffer.release(), uncompressed_size);
 357  }
 358  #endif
 359  
 360  std::pair<uint8_t *, uint64_t> UncompressSectionContents(
 361      uint64_t compression_type, const uint8_t* compressed_buffer,
 362      uint64_t compressed_size, uint64_t uncompressed_size) {
 363    if (compression_type == ELFCOMPRESS_ZLIB) {
 364      return UncompressZlibSectionContents(compressed_buffer, compressed_size, uncompressed_size);
 365    }
 366  
 367  #ifdef HAVE_LIBZSTD
 368    if (compression_type == ELFCOMPRESS_ZSTD) {
 369      return UncompressZstdSectionContents(compressed_buffer, compressed_size, uncompressed_size);
 370    }
 371  #endif
 372  
 373    return std::make_pair(nullptr, 0);
 374  }
 375  
 376  void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader,
 377                              Module* module,
 378                              google_breakpad::Endianness endianness,
 379                              bool handle_inter_cu_refs,
 380                              bool handle_inline) {
 381    std::string split_file;
 382    google_breakpad::SectionMap split_sections;
 383    google_breakpad::ByteReader split_byte_reader(endianness);
 384    uint64_t cu_offset = 0;
 385    if (!reader->ProcessSplitDwarf(split_file, split_sections, split_byte_reader,
 386                                   cu_offset))
 387      return;
 388    DwarfCUToModule::FileContext file_context(split_file, module,
 389                                              handle_inter_cu_refs);
 390    for (auto section : split_sections)
 391      file_context.AddSectionToSectionMap(section.first, section.second.first,
 392                                          section.second.second);
 393    // Because DWP/DWO file doesn't have .debug_addr/.debug_line/.debug_line_str,
 394    // its debug info will refer to .debug_addr/.debug_line in the main binary.
 395    if (file_context.section_map().find(".debug_addr") ==
 396        file_context.section_map().end())
 397      file_context.AddSectionToSectionMap(".debug_addr", reader->GetAddrBuffer(),
 398                                          reader->GetAddrBufferLen());
 399    if (file_context.section_map().find(".debug_line") ==
 400        file_context.section_map().end())
 401      file_context.AddSectionToSectionMap(".debug_line", reader->GetLineBuffer(),
 402                                          reader->GetLineBufferLen());
 403    if (file_context.section_map().find(".debug_line_str") ==
 404        file_context.section_map().end())
 405      file_context.AddSectionToSectionMap(".debug_line_str",
 406                                          reader->GetLineStrBuffer(),
 407                                          reader->GetLineStrBufferLen());
 408  
 409    DumperRangesHandler ranges_handler(&split_byte_reader);
 410    DumperLineToModule line_to_module(&split_byte_reader);
 411    DwarfCUToModule::WarningReporter reporter(split_file, cu_offset);
 412    DwarfCUToModule root_handler(
 413        &file_context, &line_to_module, &ranges_handler, &reporter, handle_inline,
 414        reader->GetLowPC(), reader->GetAddrBase(), reader->HasSourceLineInfo(),
 415        reader->GetSourceLineOffset());
 416    google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
 417    google_breakpad::CompilationUnit split_reader(
 418        split_file, file_context.section_map(), cu_offset, &split_byte_reader,
 419        &die_dispatcher);
 420    split_reader.SetSplitDwarf(reader->GetAddrBase(), reader->GetDWOID());
 421    split_reader.Start();
 422    // Normally, it won't happen unless we have transitive reference.
 423    if (split_reader.ShouldProcessSplitDwarf()) {
 424      StartProcessSplitDwarf(&split_reader, module, endianness,
 425                             handle_inter_cu_refs, handle_inline);
 426    }
 427  }
 428  
 429  template<typename ElfClass>
 430  bool LoadDwarf(const string& dwarf_filename,
 431                 const typename ElfClass::Ehdr* elf_header,
 432                 const bool big_endian,
 433                 bool handle_inter_cu_refs,
 434                 bool handle_inline,
 435                 Module* module) {
 436    typedef typename ElfClass::Shdr Shdr;
 437  
 438    const google_breakpad::Endianness endianness = big_endian ?
 439        google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
 440    google_breakpad::ByteReader byte_reader(endianness);
 441  
 442    // Construct a context for this file.
 443    DwarfCUToModule::FileContext file_context(dwarf_filename,
 444                                              module,
 445                                              handle_inter_cu_refs);
 446  
 447    // Build a map of the ELF file's sections.
 448    const Shdr* sections =
 449        GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
 450    int num_sections = elf_header->e_shnum;
 451    const Shdr* section_names = sections + elf_header->e_shstrndx;
 452    for (int i = 0; i < num_sections; i++) {
 453      const Shdr* section = &sections[i];
 454      string name = GetOffset<ElfClass, char>(elf_header,
 455                                              section_names->sh_offset) +
 456                    section->sh_name;
 457      const uint8_t* contents = GetOffset<ElfClass, uint8_t>(elf_header,
 458                                                             section->sh_offset);
 459      uint64_t size = section->sh_size;
 460  
 461      if (!IsCompressedHeader<ElfClass>(section)) {
 462        file_context.AddSectionToSectionMap(name, contents, size);
 463        continue;
 464      }
 465  
 466      typename ElfClass::Chdr chdr;
 467  
 468      uint32_t compression_header_size =
 469        GetCompressionHeader<ElfClass>(chdr, contents, size);
 470  
 471      if (compression_header_size == 0 || chdr.ch_size == 0) {
 472        continue;
 473      }
 474  
 475      contents += compression_header_size;
 476      size -= compression_header_size;
 477  
 478      std::pair<uint8_t *, uint64_t> uncompressed =
 479        UncompressSectionContents(chdr.ch_type, contents, size, chdr.ch_size);
 480  
 481      if (uncompressed.first != nullptr && uncompressed.second != 0) {
 482        file_context.AddManagedSectionToSectionMap(name, uncompressed.first, uncompressed.second);
 483      }
 484    }
 485  
 486    // .debug_ranges and .debug_rnglists reader
 487    DumperRangesHandler ranges_handler(&byte_reader);
 488  
 489    // Parse all the compilation units in the .debug_info section.
 490    DumperLineToModule line_to_module(&byte_reader);
 491    google_breakpad::SectionMap::const_iterator debug_info_entry =
 492        file_context.section_map().find(".debug_info");
 493    assert(debug_info_entry != file_context.section_map().end());
 494    const std::pair<const uint8_t*, uint64_t>& debug_info_section =
 495        debug_info_entry->second;
 496    // This should never have been called if the file doesn't have a
 497    // .debug_info section.
 498    assert(debug_info_section.first);
 499    uint64_t debug_info_length = debug_info_section.second;
 500    for (uint64_t offset = 0; offset < debug_info_length;) {
 501      // Make a handler for the root DIE that populates MODULE with the
 502      // data that was found.
 503      DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
 504      DwarfCUToModule root_handler(&file_context, &line_to_module,
 505                                   &ranges_handler, &reporter, handle_inline);
 506      // Make a Dwarf2Handler that drives the DIEHandler.
 507      google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
 508      // Make a DWARF parser for the compilation unit at OFFSET.
 509      google_breakpad::CompilationUnit reader(dwarf_filename,
 510                                           file_context.section_map(),
 511                                           offset,
 512                                           &byte_reader,
 513                                           &die_dispatcher);
 514      // Process the entire compilation unit; get the offset of the next.
 515      offset += reader.Start();
 516      // Start to process split dwarf file.
 517      if (reader.ShouldProcessSplitDwarf()) {
 518        StartProcessSplitDwarf(&reader, module, endianness, handle_inter_cu_refs,
 519                               handle_inline);
 520      }
 521    }
 522    return true;
 523  }
 524  
 525  // Fill REGISTER_NAMES with the register names appropriate to the
 526  // machine architecture given in HEADER, indexed by the register
 527  // numbers used in DWARF call frame information. Return true on
 528  // success, or false if HEADER's machine architecture is not
 529  // supported.
 530  template<typename ElfClass>
 531  bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
 532                             std::vector<string>* register_names) {
 533    switch (elf_header->e_machine) {
 534      case EM_386:
 535        *register_names = DwarfCFIToModule::RegisterNames::I386();
 536        return true;
 537      case EM_ARM:
 538        *register_names = DwarfCFIToModule::RegisterNames::ARM();
 539        return true;
 540      case EM_AARCH64:
 541        *register_names = DwarfCFIToModule::RegisterNames::ARM64();
 542        return true;
 543      case EM_MIPS:
 544        *register_names = DwarfCFIToModule::RegisterNames::MIPS();
 545        return true;
 546      case EM_X86_64:
 547        *register_names = DwarfCFIToModule::RegisterNames::X86_64();
 548        return true;
 549      case EM_RISCV:
 550        *register_names = DwarfCFIToModule::RegisterNames::RISCV();
 551        return true;
 552      default:
 553        return false;
 554    }
 555  }
 556  
 557  template<typename ElfClass>
 558  bool LoadDwarfCFI(const string& dwarf_filename,
 559                    const typename ElfClass::Ehdr* elf_header,
 560                    const char* section_name,
 561                    const typename ElfClass::Shdr* section,
 562                    const bool eh_frame,
 563                    const typename ElfClass::Shdr* got_section,
 564                    const typename ElfClass::Shdr* text_section,
 565                    const bool big_endian,
 566                    Module* module) {
 567    // Find the appropriate set of register names for this file's
 568    // architecture.
 569    std::vector<string> register_names;
 570    if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
 571      fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
 572              " cannot convert DWARF call frame information\n",
 573              dwarf_filename.c_str(), elf_header->e_machine);
 574      return false;
 575    }
 576  
 577    const google_breakpad::Endianness endianness = big_endian ?
 578        google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
 579  
 580    // Find the call frame information and its size.
 581    const uint8_t* cfi =
 582        GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset);
 583    size_t cfi_size = section->sh_size;
 584  
 585    // Plug together the parser, handler, and their entourages.
 586    DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
 587    DwarfCFIToModule handler(module, register_names, &module_reporter);
 588    google_breakpad::ByteReader byte_reader(endianness);
 589  
 590    byte_reader.SetAddressSize(ElfClass::kAddrSize);
 591  
 592    // Provide the base addresses for .eh_frame encoded pointers, if
 593    // possible.
 594    byte_reader.SetCFIDataBase(section->sh_addr, cfi);
 595    if (got_section)
 596      byte_reader.SetDataBase(got_section->sh_addr);
 597    if (text_section)
 598      byte_reader.SetTextBase(text_section->sh_addr);
 599  
 600    google_breakpad::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
 601                                                         section_name);
 602    if (!IsCompressedHeader<ElfClass>(section)) {
 603      google_breakpad::CallFrameInfo parser(cfi, cfi_size,
 604                                            &byte_reader, &handler,
 605                                            &dwarf_reporter, eh_frame);
 606      parser.Start();
 607      return true;
 608    }
 609  
 610    typename ElfClass::Chdr chdr;
 611    uint32_t compression_header_size =
 612      GetCompressionHeader<ElfClass>(chdr, cfi, cfi_size);
 613  
 614    if (compression_header_size == 0 || chdr.ch_size == 0) {
 615      fprintf(stderr, "%s: decompression failed at header\n",
 616              dwarf_filename.c_str());
 617      return false;
 618    }
 619    if (compression_header_size > cfi_size) {
 620      fprintf(stderr, "%s: decompression error, compression_header too large\n",
 621              dwarf_filename.c_str());
 622      return false;
 623    }
 624  
 625    cfi += compression_header_size;
 626    cfi_size -= compression_header_size;
 627  
 628    std::pair<uint8_t *, uint64_t> uncompressed =
 629      UncompressSectionContents(chdr.ch_type, cfi, cfi_size, chdr.ch_size);
 630  
 631    if (uncompressed.first == nullptr || uncompressed.second == 0) {
 632      fprintf(stderr, "%s: decompression failed\n", dwarf_filename.c_str());
 633      return false;
 634    }
 635    google_breakpad::CallFrameInfo parser(uncompressed.first, uncompressed.second,
 636                                          &byte_reader, &handler, &dwarf_reporter,
 637                                          eh_frame);
 638    parser.Start();
 639    return true;
 640  }
 641  
 642  bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
 643               void** elf_header) {
 644    int obj_fd = open(obj_file.c_str(), O_RDONLY);
 645    if (obj_fd < 0) {
 646      fprintf(stderr, "Failed to open ELF file '%s': %s\n",
 647              obj_file.c_str(), strerror(errno));
 648      return false;
 649    }
 650    FDWrapper obj_fd_wrapper(obj_fd);
 651    struct stat st;
 652    if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
 653      fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
 654              obj_file.c_str(), strerror(errno));
 655      return false;
 656    }
 657    void* obj_base = mmap(NULL, st.st_size,
 658                          PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
 659    if (obj_base == MAP_FAILED) {
 660      fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
 661              obj_file.c_str(), strerror(errno));
 662      return false;
 663    }
 664    map_wrapper->set(obj_base, st.st_size);
 665    *elf_header = obj_base;
 666    if (!IsValidElf(*elf_header)) {
 667      fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
 668      return false;
 669    }
 670    return true;
 671  }
 672  
 673  // Get the endianness of ELF_HEADER. If it's invalid, return false.
 674  template<typename ElfClass>
 675  bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
 676                     bool* big_endian) {
 677    if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
 678      *big_endian = false;
 679      return true;
 680    }
 681    if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
 682      *big_endian = true;
 683      return true;
 684    }
 685  
 686    fprintf(stderr, "bad data encoding in ELF header: %d\n",
 687            elf_header->e_ident[EI_DATA]);
 688    return false;
 689  }
 690  
 691  // Given |left_abspath|, find the absolute path for |right_path| and see if the
 692  // two absolute paths are the same.
 693  bool IsSameFile(const char* left_abspath, const string& right_path) {
 694    char right_abspath[PATH_MAX];
 695    if (!realpath(right_path.c_str(), right_abspath))
 696      return false;
 697    return strcmp(left_abspath, right_abspath) == 0;
 698  }
 699  
 700  // Read the .gnu_debuglink and get the debug file name. If anything goes
 701  // wrong, return an empty string.
 702  string ReadDebugLink(const uint8_t* debuglink,
 703                       const size_t debuglink_size,
 704                       const bool big_endian,
 705                       const string& obj_file,
 706                       const std::vector<string>& debug_dirs) {
 707    // Include '\0' + CRC32 (4 bytes).
 708    size_t debuglink_len = strlen(reinterpret_cast<const char*>(debuglink)) + 5;
 709    debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round up to 4 bytes.
 710  
 711    // Sanity check.
 712    if (debuglink_len != debuglink_size) {
 713      fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
 714              "%zx %zx\n", debuglink_len, debuglink_size);
 715      return string();
 716    }
 717  
 718    char obj_file_abspath[PATH_MAX];
 719    if (!realpath(obj_file.c_str(), obj_file_abspath)) {
 720      fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
 721      return string();
 722    }
 723  
 724    std::vector<string> searched_paths;
 725    string debuglink_path;
 726    std::vector<string>::const_iterator it;
 727    for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
 728      const string& debug_dir = *it;
 729      debuglink_path = debug_dir + "/" +
 730                       reinterpret_cast<const char*>(debuglink);
 731  
 732      // There is the annoying case of /path/to/foo.so having foo.so as the
 733      // debug link file name. Thus this may end up opening /path/to/foo.so again,
 734      // and there is a small chance of the two files having the same CRC.
 735      if (IsSameFile(obj_file_abspath, debuglink_path))
 736        continue;
 737  
 738      searched_paths.push_back(debug_dir);
 739      int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
 740      if (debuglink_fd < 0)
 741        continue;
 742  
 743      FDWrapper debuglink_fd_wrapper(debuglink_fd);
 744  
 745      // The CRC is the last 4 bytes in |debuglink|.
 746      const google_breakpad::Endianness endianness = big_endian ?
 747          google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
 748      google_breakpad::ByteReader byte_reader(endianness);
 749      uint32_t expected_crc =
 750          byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
 751  
 752      uint32_t actual_crc = 0;
 753      while (true) {
 754        const size_t kReadSize = 4096;
 755        char buf[kReadSize];
 756        ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
 757        if (bytes_read < 0) {
 758          fprintf(stderr, "Error reading debug ELF file %s.\n",
 759                  debuglink_path.c_str());
 760          return string();
 761        }
 762        if (bytes_read == 0)
 763          break;
 764        actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
 765      }
 766      if (actual_crc != expected_crc) {
 767        fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
 768                debuglink_path.c_str());
 769        continue;
 770      }
 771  
 772      // Found debug file.
 773      return debuglink_path;
 774    }
 775  
 776    // Not found case.
 777    fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
 778            obj_file.c_str());
 779    for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
 780      const string& debug_dir = *it;
 781      fprintf(stderr, "  %s/%s\n", debug_dir.c_str(), debuglink);
 782    }
 783    return string();
 784  }
 785  
 786  //
 787  // LoadSymbolsInfo
 788  //
 789  // Holds the state between the two calls to LoadSymbols() in case it's necessary
 790  // to follow the .gnu_debuglink section and load debug information from a
 791  // different file.
 792  //
 793  template<typename ElfClass>
 794  class LoadSymbolsInfo {
 795   public:
 796    typedef typename ElfClass::Addr Addr;
 797  
 798    explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
 799      debug_dirs_(dbg_dirs),
 800      has_loading_addr_(false) {}
 801  
 802    // Keeps track of which sections have been loaded so sections don't
 803    // accidentally get loaded twice from two different files.
 804    void LoadedSection(const string& section) {
 805      if (loaded_sections_.count(section) == 0) {
 806        loaded_sections_.insert(section);
 807      } else {
 808        fprintf(stderr, "Section %s has already been loaded.\n",
 809                section.c_str());
 810      }
 811    }
 812  
 813    // The ELF file and linked debug file are expected to have the same preferred
 814    // loading address.
 815    void set_loading_addr(Addr addr, const string& filename) {
 816      if (!has_loading_addr_) {
 817        loading_addr_ = addr;
 818        loaded_file_ = filename;
 819        return;
 820      }
 821  
 822      if (addr != loading_addr_) {
 823        fprintf(stderr,
 824                "ELF file '%s' and debug ELF file '%s' "
 825                "have different load addresses.\n",
 826                loaded_file_.c_str(), filename.c_str());
 827        assert(false);
 828      }
 829    }
 830  
 831    // Setters and getters
 832    const std::vector<string>& debug_dirs() const {
 833      return debug_dirs_;
 834    }
 835  
 836    string debuglink_file() const {
 837      return debuglink_file_;
 838    }
 839    void set_debuglink_file(string file) {
 840      debuglink_file_ = file;
 841    }
 842  
 843   private:
 844    const std::vector<string>& debug_dirs_; // Directories in which to
 845                                            // search for the debug ELF file.
 846  
 847    string debuglink_file_;  // Full path to the debug ELF file.
 848  
 849    bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
 850  
 851    Addr loading_addr_;  // Saves the preferred loading address from the
 852                         // first call to LoadSymbols().
 853  
 854    string loaded_file_;  // Name of the file loaded from the first call to
 855                          // LoadSymbols().
 856  
 857    std::set<string> loaded_sections_;  // Tracks the Loaded ELF sections
 858                                        // between calls to LoadSymbols().
 859  };
 860  
 861  template<typename ElfClass>
 862  bool LoadSymbols(const string& obj_file,
 863                   const bool big_endian,
 864                   const typename ElfClass::Ehdr* elf_header,
 865                   const bool read_gnu_debug_link,
 866                   LoadSymbolsInfo<ElfClass>* info,
 867                   const DumpOptions& options,
 868                   Module* module) {
 869    typedef typename ElfClass::Addr Addr;
 870    typedef typename ElfClass::Phdr Phdr;
 871    typedef typename ElfClass::Shdr Shdr;
 872  
 873    Addr loading_addr = GetLoadingAddress<ElfClass>(
 874        GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
 875        elf_header->e_phnum);
 876    module->SetLoadAddress(loading_addr);
 877    info->set_loading_addr(loading_addr, obj_file);
 878  
 879    // Allow filtering of extraneous debug information in partitioned libraries.
 880    // Such libraries contain debug information for all libraries extracted from
 881    // the same combined library, implying extensive duplication.
 882    vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
 883        GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
 884        elf_header->e_phnum);
 885    module->SetAddressRanges(address_ranges);
 886  
 887    const Shdr* sections =
 888        GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
 889    const Shdr* section_names = sections + elf_header->e_shstrndx;
 890    const char* names =
 891        GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
 892    const char* names_end = names + section_names->sh_size;
 893    bool found_debug_info_section = false;
 894    bool found_usable_info = false;
 895  
 896    if ((options.symbol_data & SYMBOLS_AND_FILES) ||
 897        (options.symbol_data & INLINES)) {
 898  #ifndef NO_STABS_SUPPORT
 899      // Look for STABS debugging information, and load it if present.
 900      const Shdr* stab_section =
 901        FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
 902                                       sections, names, names_end,
 903                                       elf_header->e_shnum);
 904      if (stab_section) {
 905        const Shdr* stabstr_section = stab_section->sh_link + sections;
 906        if (stabstr_section) {
 907          found_debug_info_section = true;
 908          found_usable_info = true;
 909          info->LoadedSection(".stab");
 910          if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
 911                                   big_endian, module)) {
 912            fprintf(stderr, "%s: \".stab\" section found, but failed to load"
 913                    " STABS debugging information\n", obj_file.c_str());
 914          }
 915        }
 916      }
 917  #endif  // NO_STABS_SUPPORT
 918  
 919      // See if there are export symbols available.
 920      const Shdr* symtab_section =
 921          FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB,
 922                                         sections, names, names_end,
 923                                         elf_header->e_shnum);
 924      const Shdr* strtab_section =
 925          FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB,
 926                                         sections, names, names_end,
 927                                         elf_header->e_shnum);
 928      if (symtab_section && strtab_section) {
 929        info->LoadedSection(".symtab");
 930  
 931        const uint8_t* symtab =
 932            GetOffset<ElfClass, uint8_t>(elf_header,
 933                                         symtab_section->sh_offset);
 934        const uint8_t* strtab =
 935            GetOffset<ElfClass, uint8_t>(elf_header,
 936                                         strtab_section->sh_offset);
 937        bool result =
 938            ELFSymbolsToModule(symtab,
 939                               symtab_section->sh_size,
 940                               strtab,
 941                               strtab_section->sh_size,
 942                               big_endian,
 943                               ElfClass::kAddrSize,
 944                               module);
 945        found_usable_info = found_usable_info || result;
 946      } else {
 947        // Look in dynsym only if full symbol table was not available.
 948        const Shdr* dynsym_section =
 949            FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
 950                                           sections, names, names_end,
 951                                           elf_header->e_shnum);
 952        const Shdr* dynstr_section =
 953            FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
 954                                           sections, names, names_end,
 955                                           elf_header->e_shnum);
 956        if (dynsym_section && dynstr_section) {
 957          info->LoadedSection(".dynsym");
 958  
 959          const uint8_t* dynsyms =
 960              GetOffset<ElfClass, uint8_t>(elf_header,
 961                                           dynsym_section->sh_offset);
 962          const uint8_t* dynstrs =
 963              GetOffset<ElfClass, uint8_t>(elf_header,
 964                                           dynstr_section->sh_offset);
 965          bool result =
 966              ELFSymbolsToModule(dynsyms,
 967                                 dynsym_section->sh_size,
 968                                 dynstrs,
 969                                 dynstr_section->sh_size,
 970                                 big_endian,
 971                                 ElfClass::kAddrSize,
 972                                 module);
 973          found_usable_info = found_usable_info || result;
 974        }
 975      }
 976  
 977      // Only Load .debug_info after loading symbol table to avoid duplicate
 978      // PUBLIC records.
 979      // Look for DWARF debugging information, and load it if present.
 980      const Shdr* dwarf_section =
 981        FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
 982                                       sections, names, names_end,
 983                                       elf_header->e_shnum);
 984  
 985      // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains,
 986      // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
 987      if (elf_header->e_machine == EM_MIPS && !dwarf_section) {
 988        dwarf_section =
 989          FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF,
 990                                         sections, names, names_end,
 991                                         elf_header->e_shnum);
 992      }
 993  
 994      if (dwarf_section) {
 995        found_debug_info_section = true;
 996        found_usable_info = true;
 997        info->LoadedSection(".debug_info");
 998        if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
 999                                 options.handle_inter_cu_refs,
1000                                 options.symbol_data & INLINES, module)) {
1001          fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
1002                  "DWARF debugging information\n", obj_file.c_str());
1003        }
1004      }
1005    }
1006  
1007    if (options.symbol_data & CFI) {
1008      // Dwarf Call Frame Information (CFI) is actually independent from
1009      // the other DWARF debugging information, and can be used alone.
1010      const Shdr* dwarf_cfi_section =
1011          FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
1012                                         sections, names, names_end,
1013                                         elf_header->e_shnum);
1014  
1015      // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains,
1016      // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
1017      if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) {
1018        dwarf_cfi_section =
1019            FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF,
1020                                          sections, names, names_end,
1021                                          elf_header->e_shnum);
1022      }
1023  
1024      if (dwarf_cfi_section) {
1025        // Ignore the return value of this function; even without call frame
1026        // information, the other debugging information could be perfectly
1027        // useful.
1028        info->LoadedSection(".debug_frame");
1029        bool result =
1030            LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
1031                                   dwarf_cfi_section, false, 0, 0, big_endian,
1032                                   module);
1033        found_usable_info = found_usable_info || result;
1034      }
1035  
1036      // Linux C++ exception handling information can also provide
1037      // unwinding data.
1038      const Shdr* eh_frame_section =
1039          FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
1040                                         sections, names, names_end,
1041                                         elf_header->e_shnum);
1042      if (eh_frame_section) {
1043        // Pointers in .eh_frame data may be relative to the base addresses of
1044        // certain sections. Provide those sections if present.
1045        const Shdr* got_section =
1046            FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
1047                                           sections, names, names_end,
1048                                           elf_header->e_shnum);
1049        const Shdr* text_section =
1050            FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
1051                                           sections, names, names_end,
1052                                           elf_header->e_shnum);
1053        info->LoadedSection(".eh_frame");
1054        // As above, ignore the return value of this function.
1055        bool result =
1056            LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
1057                                   eh_frame_section, true,
1058                                   got_section, text_section, big_endian, module);
1059        found_usable_info = found_usable_info || result;
1060      }
1061    }
1062  
1063    if (!found_debug_info_section) {
1064      fprintf(stderr, "%s: file contains no debugging information"
1065              " (no \".stab\" or \".debug_info\" sections)\n",
1066              obj_file.c_str());
1067  
1068      // Failed, but maybe there's a .gnu_debuglink section?
1069      if (read_gnu_debug_link) {
1070        const Shdr* gnu_debuglink_section
1071            = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
1072                                             sections, names,
1073                                             names_end, elf_header->e_shnum);
1074        if (gnu_debuglink_section) {
1075          if (!info->debug_dirs().empty()) {
1076            const uint8_t* debuglink_contents =
1077                GetOffset<ElfClass, uint8_t>(elf_header,
1078                                             gnu_debuglink_section->sh_offset);
1079            string debuglink_file =
1080                ReadDebugLink(debuglink_contents,
1081                              gnu_debuglink_section->sh_size,
1082                              big_endian,
1083                              obj_file,
1084                              info->debug_dirs());
1085            info->set_debuglink_file(debuglink_file);
1086          } else {
1087            fprintf(stderr, ".gnu_debuglink section found in '%s', "
1088                    "but no debug path specified.\n", obj_file.c_str());
1089          }
1090        } else {
1091          fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
1092                  obj_file.c_str());
1093        }
1094      } else {
1095        // Return true if some usable information was found, since the caller
1096        // doesn't want to use .gnu_debuglink.
1097        return found_usable_info;
1098      }
1099  
1100      // No debug info was found, let the user try again with .gnu_debuglink
1101      // if present.
1102      return false;
1103    }
1104  
1105    return true;
1106  }
1107  
1108  // Return the breakpad symbol file identifier for the architecture of
1109  // ELF_HEADER.
1110  template<typename ElfClass>
1111  const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
1112    typedef typename ElfClass::Half Half;
1113    Half arch = elf_header->e_machine;
1114    switch (arch) {
1115      case EM_386:        return "x86";
1116      case EM_ARM:        return "arm";
1117      case EM_AARCH64:    return "arm64";
1118      case EM_MIPS:       return "mips";
1119      case EM_PPC64:      return "ppc64";
1120      case EM_PPC:        return "ppc";
1121      case EM_S390:       return "s390";
1122      case EM_SPARC:      return "sparc";
1123      case EM_SPARCV9:    return "sparcv9";
1124      case EM_X86_64:     return "x86_64";
1125      case EM_RISCV:      return "riscv";
1126      default: return NULL;
1127    }
1128  }
1129  
1130  template<typename ElfClass>
1131  bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
1132                         const string& debuglink_file,
1133                         const string& obj_filename,
1134                         const char* obj_file_architecture,
1135                         const bool obj_file_is_big_endian) {
1136    const char* debug_architecture =
1137        ElfArchitecture<ElfClass>(debug_elf_header);
1138    if (!debug_architecture) {
1139      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
1140              debuglink_file.c_str(), debug_elf_header->e_machine);
1141      return false;
1142    }
1143    if (strcmp(obj_file_architecture, debug_architecture)) {
1144      fprintf(stderr, "%s with ELF machine architecture %s does not match "
1145              "%s with ELF architecture %s\n",
1146              debuglink_file.c_str(), debug_architecture,
1147              obj_filename.c_str(), obj_file_architecture);
1148      return false;
1149    }
1150    bool debug_big_endian;
1151    if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
1152      return false;
1153    if (debug_big_endian != obj_file_is_big_endian) {
1154      fprintf(stderr, "%s and %s does not match in endianness\n",
1155              obj_filename.c_str(), debuglink_file.c_str());
1156      return false;
1157    }
1158    return true;
1159  }
1160  
1161  template<typename ElfClass>
1162  bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header,
1163                             const string& obj_filename,
1164                             const string& obj_os,
1165                             scoped_ptr<Module>& module,
1166                             bool enable_multiple_field) {
1167    PageAllocator allocator;
1168    wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize);
1169    if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
1170      fprintf(stderr, "%s: unable to generate file identifier\n",
1171              obj_filename.c_str());
1172      return false;
1173    }
1174  
1175    const char* architecture = ElfArchitecture<ElfClass>(elf_header);
1176    if (!architecture) {
1177      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
1178              obj_filename.c_str(), elf_header->e_machine);
1179      return false;
1180    }
1181  
1182    char name_buf[NAME_MAX] = {};
1183    std::string name = google_breakpad::ElfFileSoNameFromMappedFile(
1184                           elf_header, name_buf, sizeof(name_buf))
1185                           ? name_buf
1186                           : google_breakpad::BaseName(obj_filename);
1187  
1188    // Add an extra "0" at the end.  PDB files on Windows have an 'age'
1189    // number appended to the end of the file identifier; this isn't
1190    // really used or necessary on other platforms, but be consistent.
1191    string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0";
1192    // This is just the raw Build ID in hex.
1193    string code_id = FileID::ConvertIdentifierToString(identifier);
1194  
1195    module.reset(new Module(name, obj_os, architecture, id, code_id,
1196                            enable_multiple_field));
1197  
1198    return true;
1199  }
1200  
1201  template<typename ElfClass>
1202  bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
1203                              const string& obj_filename,
1204                              const string& obj_os,
1205                              const std::vector<string>& debug_dirs,
1206                              const DumpOptions& options,
1207                              Module** out_module) {
1208    typedef typename ElfClass::Ehdr Ehdr;
1209  
1210    *out_module = NULL;
1211  
1212    scoped_ptr<Module> module;
1213    if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os, module,
1214                                         options.enable_multiple_field)) {
1215      return false;
1216    }
1217  
1218    // Figure out what endianness this file is.
1219    bool big_endian;
1220    if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
1221      return false;
1222  
1223    LoadSymbolsInfo<ElfClass> info(debug_dirs);
1224    if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
1225                               !debug_dirs.empty(), &info,
1226                               options, module.get())) {
1227      const string debuglink_file = info.debuglink_file();
1228      if (debuglink_file.empty())
1229        return false;
1230  
1231      // Load debuglink ELF file.
1232      fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
1233      MmapWrapper debug_map_wrapper;
1234      Ehdr* debug_elf_header = NULL;
1235      if (!LoadELF(debuglink_file, &debug_map_wrapper,
1236                   reinterpret_cast<void**>(&debug_elf_header)) ||
1237          !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
1238                                       obj_filename,
1239                                       module->architecture().c_str(),
1240                                       big_endian)) {
1241        return false;
1242      }
1243  
1244      if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
1245                                 debug_elf_header, false, &info,
1246                                 options, module.get())) {
1247        return false;
1248      }
1249    }
1250  
1251    *out_module = module.release();
1252    return true;
1253  }
1254  
1255  }  // namespace
1256  
1257  namespace google_breakpad {
1258  
1259  // Not explicitly exported, but not static so it can be used in unit tests.
1260  bool ReadSymbolDataInternal(const uint8_t* obj_file,
1261                              const string& obj_filename,
1262                              const string& obj_os,
1263                              const std::vector<string>& debug_dirs,
1264                              const DumpOptions& options,
1265                              Module** module) {
1266    if (!IsValidElf(obj_file)) {
1267      fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
1268      return false;
1269    }
1270  
1271    int elfclass = ElfClass(obj_file);
1272    if (elfclass == ELFCLASS32) {
1273      return ReadSymbolDataElfClass<ElfClass32>(
1274          reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os,
1275          debug_dirs, options, module);
1276    }
1277    if (elfclass == ELFCLASS64) {
1278      return ReadSymbolDataElfClass<ElfClass64>(
1279          reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os,
1280          debug_dirs, options, module);
1281    }
1282  
1283    return false;
1284  }
1285  
1286  bool WriteSymbolFile(const string& load_path,
1287                       const string& obj_file,
1288                       const string& obj_os,
1289                       const std::vector<string>& debug_dirs,
1290                       const DumpOptions& options,
1291                       std::ostream& sym_stream) {
1292    Module* module;
1293    if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options,
1294                        &module))
1295      return false;
1296  
1297    bool result = module->Write(sym_stream, options.symbol_data);
1298    delete module;
1299    return result;
1300  }
1301  
1302  // Read the selected object file's debugging information, and write out the
1303  // header only to |stream|. Return true on success; if an error occurs, report
1304  // it and return false.
1305  bool WriteSymbolFileHeader(const string& load_path,
1306                             const string& obj_file,
1307                             const string& obj_os,
1308                             std::ostream& sym_stream) {
1309    MmapWrapper map_wrapper;
1310    void* elf_header = NULL;
1311    if (!LoadELF(load_path, &map_wrapper, &elf_header)) {
1312      fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str());
1313      return false;
1314    }
1315  
1316    if (!IsValidElf(elf_header)) {
1317      fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
1318      return false;
1319    }
1320  
1321    int elfclass = ElfClass(elf_header);
1322    scoped_ptr<Module> module;
1323    if (elfclass == ELFCLASS32) {
1324      if (!InitModuleForElfClass<ElfClass32>(
1325          reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os,
1326          module, /*enable_multiple_field=*/false)) {
1327        fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1328        return false;
1329      }
1330    } else if (elfclass == ELFCLASS64) {
1331      if (!InitModuleForElfClass<ElfClass64>(
1332          reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os,
1333          module, /*enable_multiple_field=*/false)) {
1334        fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1335        return false;
1336      }
1337    } else {
1338      fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str());
1339      return false;
1340    }
1341  
1342    return module->Write(sym_stream, ALL_SYMBOL_DATA);
1343  }
1344  
1345  bool ReadSymbolData(const string& load_path,
1346                      const string& obj_file,
1347                      const string& obj_os,
1348                      const std::vector<string>& debug_dirs,
1349                      const DumpOptions& options,
1350                      Module** module) {
1351    MmapWrapper map_wrapper;
1352    void* elf_header = NULL;
1353    if (!LoadELF(load_path, &map_wrapper, &elf_header))
1354      return false;
1355  
1356    return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
1357                                  obj_file, obj_os, debug_dirs, options, module);
1358  }
1359  
1360  }  // namespace google_breakpad