/ src / common / linux / elf_symbols_to_module.cc
elf_symbols_to_module.cc
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2011 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Original author: Ted Mielczarek <ted.mielczarek@gmail.com>
 32  
 33  #ifdef HAVE_CONFIG_H
 34  #include <config.h>  // Must come first
 35  #endif
 36  
 37  #include "common/linux/elf_symbols_to_module.h"
 38  
 39  #include <cxxabi.h>
 40  #include <elf.h>
 41  #include <string.h>
 42  
 43  #include <memory>
 44  #include <utility>
 45  
 46  #include "common/byte_cursor.h"
 47  #include "common/module.h"
 48  
 49  namespace google_breakpad {
 50  
 51  class ELFSymbolIterator {
 52  public:
 53    // The contents of an ELF symbol, adjusted for the host's endianness,
 54    // word size, and so on. Corresponds to the data in Elf32_Sym / Elf64_Sym.
 55    struct Symbol {
 56      // True if this iterator has reached the end of the symbol array. When
 57      // this is set, the other members of this structure are not valid.
 58      bool at_end;
 59  
 60      // The number of this symbol within the list.
 61      size_t index;
 62  
 63      // The current symbol's name offset. This is the offset within the
 64      // string table.
 65      size_t name_offset;
 66  
 67      // The current symbol's value, size, info and shndx fields.
 68      uint64_t value;
 69      uint64_t size;
 70      unsigned char info;
 71      uint16_t shndx;
 72    };
 73  
 74    // Create an ELFSymbolIterator walking the symbols in BUFFER. Treat the
 75    // symbols as big-endian if BIG_ENDIAN is true, as little-endian
 76    // otherwise. Assume each symbol has a 'value' field whose size is
 77    // VALUE_SIZE.
 78    //
 79    ELFSymbolIterator(const ByteBuffer* buffer, bool big_endian,
 80                      size_t value_size)
 81      : value_size_(value_size), cursor_(buffer, big_endian) {
 82      // Actually, weird sizes could be handled just fine, but they're
 83      // probably mistakes --- expressed in bits, say.
 84      assert(value_size == 4 || value_size == 8);
 85      symbol_.index = 0;
 86      Fetch();
 87    }
 88  
 89    // Move to the next symbol. This function's behavior is undefined if
 90    // at_end() is true when it is called.
 91    ELFSymbolIterator& operator++() { Fetch(); symbol_.index++; return *this; }
 92  
 93    // Dereferencing this iterator produces a reference to an Symbol structure
 94    // that holds the current symbol's values. The symbol is owned by this
 95    // SymbolIterator, and will be invalidated at the next call to operator++.
 96    const Symbol& operator*() const { return symbol_; }
 97    const Symbol* operator->() const { return &symbol_; }
 98  
 99  private:
100    // Read the symbol at cursor_, and set symbol_ appropriately.
101    void Fetch() {
102      // Elf32_Sym and Elf64_Sym have different layouts.
103      unsigned char other;
104      if (value_size_ == 4) {
105        // Elf32_Sym
106        cursor_
107          .Read(4, false, &symbol_.name_offset)
108          .Read(4, false, &symbol_.value)
109          .Read(4, false, &symbol_.size)
110          .Read(1, false, &symbol_.info)
111          .Read(1, false, &other)
112          .Read(2, false, &symbol_.shndx);
113      } else {
114        // Elf64_Sym
115        cursor_
116          .Read(4, false, &symbol_.name_offset)
117          .Read(1, false, &symbol_.info)
118          .Read(1, false, &other)
119          .Read(2, false, &symbol_.shndx)
120          .Read(8, false, &symbol_.value)
121          .Read(8, false, &symbol_.size);
122      }
123      symbol_.at_end = !cursor_;
124    }
125  
126    // The size of symbols' value field, in bytes.
127    size_t value_size_;
128  
129    // A byte cursor traversing buffer_.
130    ByteCursor cursor_;
131  
132    // Values for the symbol this iterator refers to.
133    Symbol symbol_;
134  };
135  
136  const char* SymbolString(ptrdiff_t offset, ByteBuffer& strings) {
137    if (offset < 0 || (size_t) offset >= strings.Size()) {
138      // Return the null string.
139      offset = 0;
140    }
141    return reinterpret_cast<const char*>(strings.start + offset);
142  }
143  
144  bool ELFSymbolsToModule(const uint8_t* symtab_section,
145                          size_t symtab_size,
146                          const uint8_t* string_section,
147                          size_t string_size,
148                          const bool big_endian,
149                          size_t value_size,
150                          Module* module) {
151    ByteBuffer symbols(symtab_section, symtab_size);
152    // Ensure that the string section is null-terminated.
153    if (string_section[string_size - 1] != '\0') {
154      const void* null_terminator = memrchr(string_section, '\0', string_size);
155      string_size = reinterpret_cast<const uint8_t*>(null_terminator)
156        - string_section;
157    }
158    ByteBuffer strings(string_section, string_size);
159  
160    // The iterator walking the symbol table.
161    ELFSymbolIterator iterator(&symbols, big_endian, value_size);
162  
163    while(!iterator->at_end) {
164      if (ELF32_ST_TYPE(iterator->info) == STT_FUNC &&
165          iterator->shndx != SHN_UNDEF) {
166        auto ext = std::make_unique<Module::Extern>(iterator->value);
167        ext->name = SymbolString(iterator->name_offset, strings);
168  #if !defined(__ANDROID__)  // Android NDK doesn't provide abi::__cxa_demangle.
169        int status = 0;
170        char* demangled =
171            abi::__cxa_demangle(ext->name.c_str(), NULL, NULL, &status);
172        if (demangled) {
173          if (status == 0)
174            ext->name = demangled;
175          free(demangled);
176        }
177  #endif
178        module->AddExtern(std::move(ext));
179      }
180      ++iterator;
181    }
182    return true;
183  }
184  
185  }  // namespace google_breakpad