/ src / common / linux / file_id.cc
file_id.cc
  1  // Copyright 2006 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  //
 29  // file_id.cc: Return a unique identifier for a file
 30  //
 31  // See file_id.h for documentation
 32  //
 33  
 34  #ifdef HAVE_CONFIG_H
 35  #include <config.h>  // Must come first
 36  #endif
 37  
 38  #include "common/linux/file_id.h"
 39  
 40  #include <arpa/inet.h>
 41  #include <assert.h>
 42  #include <string.h>
 43  
 44  #include <algorithm>
 45  #include <string>
 46  
 47  #include "common/linux/elf_gnu_compat.h"
 48  #include "common/linux/elfutils.h"
 49  #include "common/linux/linux_libc_support.h"
 50  #include "common/linux/memory_mapped_file.h"
 51  #include "common/using_std_string.h"
 52  #include "third_party/lss/linux_syscall_support.h"
 53  
 54  namespace google_breakpad {
 55  namespace elf {
 56  
 57  // Used in a few places for backwards-compatibility.
 58  const size_t kMDGUIDSize = sizeof(MDGUID);
 59  
 60  FileID::FileID(const char* path) : path_(path) {}
 61  
 62  // ELF note name and desc are 32-bits word padded.
 63  #define NOTE_PADDING(a) ((a + 3) & ~3)
 64  
 65  // These functions are also used inside the crashed process, so be safe
 66  // and use the syscall/libc wrappers instead of direct syscalls or libc.
 67  
 68  static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
 69                                            wasteful_vector<uint8_t>& identifier) {
 70    static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
 71                  "Elf32_Nhdr and Elf64_Nhdr should be the same");
 72    typedef typename ElfClass32::Nhdr Nhdr;
 73  
 74    const void* section_end = reinterpret_cast<const char*>(section) + length;
 75    const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
 76    while (reinterpret_cast<const void*>(note_header) < section_end) {
 77      if (note_header->n_type == NT_GNU_BUILD_ID)
 78        break;
 79      note_header = reinterpret_cast<const Nhdr*>(
 80                    reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
 81                    NOTE_PADDING(note_header->n_namesz) +
 82                    NOTE_PADDING(note_header->n_descsz));
 83    }
 84    if (reinterpret_cast<const void*>(note_header) >= section_end ||
 85        note_header->n_descsz == 0) {
 86      return false;
 87    }
 88  
 89    const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
 90      sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
 91    identifier.insert(identifier.end(),
 92                      build_id,
 93                      build_id + note_header->n_descsz);
 94  
 95    return true;
 96  }
 97  
 98  // Attempt to locate a .note.gnu.build-id section in an ELF binary
 99  // and copy it into |identifier|.
100  static bool FindElfBuildIDNote(const void* elf_mapped_base,
101                                 wasteful_vector<uint8_t>& identifier) {
102    PageAllocator allocator;
103    // lld normally creates 2 PT_NOTEs, gold normally creates 1.
104    auto_wasteful_vector<ElfSegment, 2> segs(&allocator);
105    if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
106      for (ElfSegment& seg : segs) {
107        if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
108          return true;
109        }
110      }
111    }
112  
113    void* note_section;
114    size_t note_size;
115    if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
116                       (const void**)&note_section, &note_size)) {
117      return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
118    }
119  
120    return false;
121  }
122  
123  // Attempt to locate the .text section of an ELF binary and generate
124  // a simple hash by XORing the first page worth of bytes into |identifier|.
125  static bool HashElfTextSection(const void* elf_mapped_base,
126                                 wasteful_vector<uint8_t>& identifier) {
127    identifier.resize(kMDGUIDSize);
128  
129    void* text_section;
130    size_t text_size;
131    if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
132                        (const void**)&text_section, &text_size) ||
133        text_size == 0) {
134      return false;
135    }
136  
137    // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
138    // function backwards-compatible.
139    my_memset(&identifier[0], 0, kMDGUIDSize);
140    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
141    const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
142    while (ptr < ptr_end) {
143      for (unsigned i = 0; i < kMDGUIDSize; i++)
144        identifier[i] ^= ptr[i];
145      ptr += kMDGUIDSize;
146    }
147    return true;
148  }
149  
150  // static
151  bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
152                                               wasteful_vector<uint8_t>& identifier) {
153    // Look for a build id note first.
154    if (FindElfBuildIDNote(base, identifier))
155      return true;
156  
157    // Fall back on hashing the first page of the text section.
158    return HashElfTextSection(base, identifier);
159  }
160  
161  bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
162    MemoryMappedFile mapped_file(path_.c_str(), 0);
163    if (!mapped_file.data())  // Should probably check if size >= ElfW(Ehdr)?
164      return false;
165  
166    return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
167  }
168  
169  // These three functions are not ever called in an unsafe context, so it's OK
170  // to allocate memory and use libc.
171  static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
172    string result;
173    for (unsigned int idx = 0; idx < count; ++idx) {
174      char buf[3];
175      snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
176      result.append(buf);
177    }
178    return result;
179  }
180  
181  // static
182  string FileID::ConvertIdentifierToUUIDString(
183      const wasteful_vector<uint8_t>& identifier) {
184    uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
185  
186    // Endian-ness swap to match dump processor expectation.
187    memcpy(identifier_swapped, &identifier[0],
188           std::min(kMDGUIDSize, identifier.size()));
189    uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
190    *data1 = htonl(*data1);
191    uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
192    *data2 = htons(*data2);
193    uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
194    *data3 = htons(*data3);
195  
196    return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
197  }
198  
199  // static
200  string FileID::ConvertIdentifierToString(
201      const wasteful_vector<uint8_t>& identifier) {
202    return bytes_to_hex_string(&identifier[0], identifier.size());
203  }
204  
205  }  // elf
206  }  // namespace google_breakpad