file_id.cc
1 // Copyright 2006 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 // 29 // file_id.cc: Return a unique identifier for a file 30 // 31 // See file_id.h for documentation 32 // 33 34 #ifdef HAVE_CONFIG_H 35 #include <config.h> // Must come first 36 #endif 37 38 #include "common/linux/file_id.h" 39 40 #include <arpa/inet.h> 41 #include <assert.h> 42 #include <string.h> 43 44 #include <algorithm> 45 #include <string> 46 47 #include "common/linux/elf_gnu_compat.h" 48 #include "common/linux/elfutils.h" 49 #include "common/linux/linux_libc_support.h" 50 #include "common/linux/memory_mapped_file.h" 51 #include "common/using_std_string.h" 52 #include "third_party/lss/linux_syscall_support.h" 53 54 namespace google_breakpad { 55 namespace elf { 56 57 // Used in a few places for backwards-compatibility. 58 const size_t kMDGUIDSize = sizeof(MDGUID); 59 60 FileID::FileID(const char* path) : path_(path) {} 61 62 // ELF note name and desc are 32-bits word padded. 63 #define NOTE_PADDING(a) ((a + 3) & ~3) 64 65 // These functions are also used inside the crashed process, so be safe 66 // and use the syscall/libc wrappers instead of direct syscalls or libc. 67 68 static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length, 69 wasteful_vector<uint8_t>& identifier) { 70 static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr), 71 "Elf32_Nhdr and Elf64_Nhdr should be the same"); 72 typedef typename ElfClass32::Nhdr Nhdr; 73 74 const void* section_end = reinterpret_cast<const char*>(section) + length; 75 const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); 76 while (reinterpret_cast<const void*>(note_header) < section_end) { 77 if (note_header->n_type == NT_GNU_BUILD_ID) 78 break; 79 note_header = reinterpret_cast<const Nhdr*>( 80 reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + 81 NOTE_PADDING(note_header->n_namesz) + 82 NOTE_PADDING(note_header->n_descsz)); 83 } 84 if (reinterpret_cast<const void*>(note_header) >= section_end || 85 note_header->n_descsz == 0) { 86 return false; 87 } 88 89 const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) + 90 sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); 91 identifier.insert(identifier.end(), 92 build_id, 93 build_id + note_header->n_descsz); 94 95 return true; 96 } 97 98 // Attempt to locate a .note.gnu.build-id section in an ELF binary 99 // and copy it into |identifier|. 100 static bool FindElfBuildIDNote(const void* elf_mapped_base, 101 wasteful_vector<uint8_t>& identifier) { 102 PageAllocator allocator; 103 // lld normally creates 2 PT_NOTEs, gold normally creates 1. 104 auto_wasteful_vector<ElfSegment, 2> segs(&allocator); 105 if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) { 106 for (ElfSegment& seg : segs) { 107 if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) { 108 return true; 109 } 110 } 111 } 112 113 void* note_section; 114 size_t note_size; 115 if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, 116 (const void**)¬e_section, ¬e_size)) { 117 return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); 118 } 119 120 return false; 121 } 122 123 // Attempt to locate the .text section of an ELF binary and generate 124 // a simple hash by XORing the first page worth of bytes into |identifier|. 125 static bool HashElfTextSection(const void* elf_mapped_base, 126 wasteful_vector<uint8_t>& identifier) { 127 identifier.resize(kMDGUIDSize); 128 129 void* text_section; 130 size_t text_size; 131 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, 132 (const void**)&text_section, &text_size) || 133 text_size == 0) { 134 return false; 135 } 136 137 // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this 138 // function backwards-compatible. 139 my_memset(&identifier[0], 0, kMDGUIDSize); 140 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); 141 const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096)); 142 while (ptr < ptr_end) { 143 for (unsigned i = 0; i < kMDGUIDSize; i++) 144 identifier[i] ^= ptr[i]; 145 ptr += kMDGUIDSize; 146 } 147 return true; 148 } 149 150 // static 151 bool FileID::ElfFileIdentifierFromMappedFile(const void* base, 152 wasteful_vector<uint8_t>& identifier) { 153 // Look for a build id note first. 154 if (FindElfBuildIDNote(base, identifier)) 155 return true; 156 157 // Fall back on hashing the first page of the text section. 158 return HashElfTextSection(base, identifier); 159 } 160 161 bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) { 162 MemoryMappedFile mapped_file(path_.c_str(), 0); 163 if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)? 164 return false; 165 166 return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier); 167 } 168 169 // These three functions are not ever called in an unsafe context, so it's OK 170 // to allocate memory and use libc. 171 static string bytes_to_hex_string(const uint8_t* bytes, size_t count) { 172 string result; 173 for (unsigned int idx = 0; idx < count; ++idx) { 174 char buf[3]; 175 snprintf(buf, sizeof(buf), "%02X", bytes[idx]); 176 result.append(buf); 177 } 178 return result; 179 } 180 181 // static 182 string FileID::ConvertIdentifierToUUIDString( 183 const wasteful_vector<uint8_t>& identifier) { 184 uint8_t identifier_swapped[kMDGUIDSize] = { 0 }; 185 186 // Endian-ness swap to match dump processor expectation. 187 memcpy(identifier_swapped, &identifier[0], 188 std::min(kMDGUIDSize, identifier.size())); 189 uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); 190 *data1 = htonl(*data1); 191 uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); 192 *data2 = htons(*data2); 193 uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); 194 *data3 = htons(*data3); 195 196 return bytes_to_hex_string(identifier_swapped, kMDGUIDSize); 197 } 198 199 // static 200 string FileID::ConvertIdentifierToString( 201 const wasteful_vector<uint8_t>& identifier) { 202 return bytes_to_hex_string(&identifier[0], identifier.size()); 203 } 204 205 } // elf 206 } // namespace google_breakpad