macho_reader.cc
1 // Copyright 2010 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 30 31 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and 32 // google_breakpad::Mach_O::Reader. See macho_reader.h for details. 33 34 #ifdef HAVE_CONFIG_H 35 #include <config.h> // Must come first 36 #endif 37 38 #include "common/mac/macho_reader.h" 39 40 #include <assert.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 44 #include <limits> 45 46 // Unfortunately, CPU_TYPE_ARM is not define for 10.4. 47 #if !defined(CPU_TYPE_ARM) 48 #define CPU_TYPE_ARM 12 49 #endif 50 51 #if !defined(CPU_TYPE_ARM_64) 52 #define CPU_TYPE_ARM_64 16777228 53 #endif 54 55 namespace google_breakpad { 56 namespace mach_o { 57 58 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its 59 // arguments, so you can't place expressions that do necessary work in 60 // the argument of an assert. Nor can you assign the result of the 61 // expression to a variable and assert that the variable's value is 62 // true: you'll get unused variable warnings when NDEBUG is #defined. 63 // 64 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that 65 // the result is true if NDEBUG is not #defined. 66 #if defined(NDEBUG) 67 #define ASSERT_ALWAYS_EVAL(x) (x) 68 #else 69 #define ASSERT_ALWAYS_EVAL(x) assert(x) 70 #endif 71 72 void FatReader::Reporter::BadHeader() { 73 fprintf(stderr, "%s: file is neither a fat binary file" 74 " nor a Mach-O object file\n", filename_.c_str()); 75 } 76 77 void FatReader::Reporter::TooShort() { 78 fprintf(stderr, "%s: file too short for the data it claims to contain\n", 79 filename_.c_str()); 80 } 81 82 void FatReader::Reporter::MisplacedObjectFile() { 83 fprintf(stderr, "%s: file too short for the object files it claims" 84 " to contain\n", filename_.c_str()); 85 } 86 87 bool FatReader::Read(const uint8_t* buffer, size_t size) { 88 buffer_.start = buffer; 89 buffer_.end = buffer + size; 90 ByteCursor cursor(&buffer_); 91 92 // Fat binaries always use big-endian, so read the magic number in 93 // that endianness. To recognize Mach-O magic numbers, which can use 94 // either endianness, check for both the proper and reversed forms 95 // of the magic numbers. 96 cursor.set_big_endian(true); 97 if (cursor >> magic_) { 98 if (magic_ == FAT_MAGIC) { 99 // How many object files does this fat binary contain? 100 uint32_t object_files_count; 101 if (!(cursor >> object_files_count)) { // nfat_arch 102 reporter_->TooShort(); 103 return false; 104 } 105 106 // Read the list of object files. 107 object_files_.resize(object_files_count); 108 for (size_t i = 0; i < object_files_count; i++) { 109 struct fat_arch objfile; 110 111 // Read this object file entry, byte-swapping as appropriate. 112 cursor >> objfile.cputype 113 >> objfile.cpusubtype 114 >> objfile.offset 115 >> objfile.size 116 >> objfile.align; 117 118 SuperFatArch super_fat_arch(objfile); 119 object_files_[i] = super_fat_arch; 120 121 if (!cursor) { 122 reporter_->TooShort(); 123 return false; 124 } 125 // Does the file actually have the bytes this entry refers to? 126 size_t fat_size = buffer_.Size(); 127 if (objfile.offset > fat_size || 128 objfile.size > fat_size - objfile.offset) { 129 reporter_->MisplacedObjectFile(); 130 return false; 131 } 132 } 133 134 return true; 135 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || 136 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { 137 // If this is a little-endian Mach-O file, fix the cursor's endianness. 138 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) 139 cursor.set_big_endian(false); 140 // Record the entire file as a single entry in the object file list. 141 object_files_.resize(1); 142 143 // Get the cpu type and subtype from the Mach-O header. 144 if (!(cursor >> object_files_[0].cputype 145 >> object_files_[0].cpusubtype)) { 146 reporter_->TooShort(); 147 return false; 148 } 149 150 object_files_[0].offset = 0; 151 object_files_[0].size = static_cast<uint64_t>(buffer_.Size()); 152 // This alignment is correct for 32 and 64-bit x86 and ppc. 153 // See get_align in the lipo source for other architectures: 154 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c 155 object_files_[0].align = 12; // 2^12 == 4096 156 return true; 157 } 158 } 159 reporter_->BadHeader(); 160 return false; 161 } 162 163 void Reader::Reporter::BadHeader() { 164 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); 165 } 166 167 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, 168 cpu_subtype_t cpu_subtype, 169 cpu_type_t expected_cpu_type, 170 cpu_subtype_t expected_cpu_subtype) { 171 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" 172 " type %d, subtype %d\n", 173 filename_.c_str(), cpu_type, cpu_subtype, 174 expected_cpu_type, expected_cpu_subtype); 175 } 176 177 void Reader::Reporter::HeaderTruncated() { 178 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", 179 filename_.c_str()); 180 } 181 182 void Reader::Reporter::LoadCommandRegionTruncated() { 183 fprintf(stderr, "%s: file too short to hold load command region" 184 " given in Mach-O header\n", filename_.c_str()); 185 } 186 187 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, 188 LoadCommandType type) { 189 fprintf(stderr, "%s: file's header claims there are %zu" 190 " load commands, but load command #%zu", 191 filename_.c_str(), claimed, i); 192 if (type) fprintf(stderr, ", of type %d,", type); 193 fprintf(stderr, " extends beyond the end of the load command region\n"); 194 } 195 196 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { 197 fprintf(stderr, "%s: the contents of load command #%zu, of type %d," 198 " extend beyond the size given in the load command's header\n", 199 filename_.c_str(), i, type); 200 } 201 202 void Reader::Reporter::SectionsMissing(const string& name) { 203 fprintf(stderr, "%s: the load command for segment '%s'" 204 " is too short to hold the section headers it claims to have\n", 205 filename_.c_str(), name.c_str()); 206 } 207 208 void Reader::Reporter::MisplacedSegmentData(const string& name) { 209 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" 210 " the end of the file\n", filename_.c_str(), name.c_str()); 211 } 212 213 void Reader::Reporter::MisplacedSectionData(const string& section, 214 const string& segment) { 215 fprintf(stderr, "%s: the section '%s' in segment '%s'" 216 " claims its contents lie outside the segment's contents\n", 217 filename_.c_str(), section.c_str(), segment.c_str()); 218 } 219 220 void Reader::Reporter::MisplacedSymbolTable() { 221 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" 222 " table's contents are located beyond the end of the file\n", 223 filename_.c_str()); 224 } 225 226 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { 227 fprintf(stderr, "%s: CPU type %d is not supported\n", 228 filename_.c_str(), cpu_type); 229 } 230 231 bool Reader::Read(const uint8_t* buffer, 232 size_t size, 233 cpu_type_t expected_cpu_type, 234 cpu_subtype_t expected_cpu_subtype) { 235 assert(!buffer_.start); 236 buffer_.start = buffer; 237 buffer_.end = buffer + size; 238 ByteCursor cursor(&buffer_, true); 239 uint32_t magic; 240 if (!(cursor >> magic)) { 241 reporter_->HeaderTruncated(); 242 return false; 243 } 244 245 if (expected_cpu_type != CPU_TYPE_ANY) { 246 uint32_t expected_magic; 247 // validate that magic matches the expected cpu type 248 switch (expected_cpu_type) { 249 case CPU_TYPE_ARM: 250 case CPU_TYPE_I386: 251 expected_magic = MH_CIGAM; 252 break; 253 case CPU_TYPE_POWERPC: 254 expected_magic = MH_MAGIC; 255 break; 256 case CPU_TYPE_ARM_64: 257 case CPU_TYPE_X86_64: 258 expected_magic = MH_CIGAM_64; 259 break; 260 case CPU_TYPE_POWERPC64: 261 expected_magic = MH_MAGIC_64; 262 break; 263 default: 264 reporter_->UnsupportedCPUType(expected_cpu_type); 265 return false; 266 } 267 268 if (expected_magic != magic) { 269 reporter_->BadHeader(); 270 return false; 271 } 272 } 273 274 // Since the byte cursor is in big-endian mode, a reversed magic number 275 // always indicates a little-endian file, regardless of our own endianness. 276 switch (magic) { 277 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; 278 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; 279 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; 280 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; 281 default: 282 reporter_->BadHeader(); 283 return false; 284 } 285 cursor.set_big_endian(big_endian_); 286 uint32_t commands_size, reserved; 287 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ 288 >> commands_size >> flags_; 289 if (bits_64_) 290 cursor >> reserved; 291 if (!cursor) { 292 reporter_->HeaderTruncated(); 293 return false; 294 } 295 296 if (expected_cpu_type != CPU_TYPE_ANY && 297 (expected_cpu_type != cpu_type_ || 298 expected_cpu_subtype != cpu_subtype_)) { 299 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, 300 expected_cpu_type, expected_cpu_subtype); 301 return false; 302 } 303 304 cursor 305 .PointTo(&load_commands_.start, commands_size) 306 .PointTo(&load_commands_.end, 0); 307 if (!cursor) { 308 reporter_->LoadCommandRegionTruncated(); 309 return false; 310 } 311 312 return true; 313 } 314 315 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const { 316 ByteCursor list_cursor(&load_commands_, big_endian_); 317 318 for (size_t index = 0; index < load_command_count_; ++index) { 319 // command refers to this load command alone, so that cursor will 320 // refuse to read past the load command's end. But since we haven't 321 // read the size yet, let command initially refer to the entire 322 // remainder of the load command series. 323 ByteBuffer command(list_cursor.here(), list_cursor.Available()); 324 ByteCursor cursor(&command, big_endian_); 325 326 // Read the command type and size --- fields common to all commands. 327 uint32_t type, size; 328 if (!(cursor >> type)) { 329 reporter_->LoadCommandsOverrun(load_command_count_, index, 0); 330 return false; 331 } 332 if (!(cursor >> size) || size > command.Size()) { 333 reporter_->LoadCommandsOverrun(load_command_count_, index, type); 334 return false; 335 } 336 337 // Now that we've read the length, restrict command's range to this 338 // load command only. 339 command.end = command.start + size; 340 341 switch (type) { 342 case LC_SEGMENT: 343 case LC_SEGMENT_64: { 344 Segment segment; 345 segment.bits_64 = (type == LC_SEGMENT_64); 346 size_t word_size = segment.bits_64 ? 8 : 4; 347 cursor.CString(&segment.name, 16); 348 cursor 349 .Read(word_size, false, &segment.vmaddr) 350 .Read(word_size, false, &segment.vmsize) 351 .Read(word_size, false, &segment.fileoff) 352 .Read(word_size, false, &segment.filesize); 353 cursor >> segment.maxprot 354 >> segment.initprot 355 >> segment.nsects 356 >> segment.flags; 357 if (!cursor) { 358 reporter_->LoadCommandTooShort(index, type); 359 return false; 360 } 361 if (segment.fileoff > buffer_.Size() || 362 segment.filesize > buffer_.Size() - segment.fileoff) { 363 reporter_->MisplacedSegmentData(segment.name); 364 return false; 365 } 366 // Mach-O files in .dSYM bundles have the contents of the loaded 367 // segments removed, and their file offsets and file sizes zeroed 368 // out. To help us handle this special case properly, give such 369 // segments' contents NULL starting and ending pointers. 370 if (segment.fileoff == 0 && segment.filesize == 0) { 371 segment.contents.start = segment.contents.end = NULL; 372 } else { 373 segment.contents.start = buffer_.start + segment.fileoff; 374 segment.contents.end = segment.contents.start + segment.filesize; 375 } 376 // The section list occupies the remainder of this load command's space. 377 segment.section_list.start = cursor.here(); 378 segment.section_list.end = command.end; 379 380 if (!handler->SegmentCommand(segment)) 381 return false; 382 break; 383 } 384 385 case LC_SYMTAB: { 386 uint32_t symoff, nsyms, stroff, strsize; 387 cursor >> symoff >> nsyms >> stroff >> strsize; 388 if (!cursor) { 389 reporter_->LoadCommandTooShort(index, type); 390 return false; 391 } 392 // How big are the entries in the symbol table? 393 // sizeof(struct nlist_64) : sizeof(struct nlist), 394 // but be paranoid about alignment vs. target architecture. 395 size_t symbol_size = bits_64_ ? 16 : 12; 396 // How big is the entire symbol array? 397 size_t symbols_size = nsyms * symbol_size; 398 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || 399 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { 400 reporter_->MisplacedSymbolTable(); 401 return false; 402 } 403 ByteBuffer entries(buffer_.start + symoff, symbols_size); 404 ByteBuffer names(buffer_.start + stroff, strsize); 405 if (!handler->SymtabCommand(entries, names)) 406 return false; 407 break; 408 } 409 410 default: { 411 if (!handler->UnknownCommand(type, command)) 412 return false; 413 break; 414 } 415 } 416 417 list_cursor.set_here(command.end); 418 } 419 420 return true; 421 } 422 423 // A load command handler that looks for a segment of a given name. 424 class Reader::SegmentFinder : public LoadCommandHandler { 425 public: 426 // Create a load command handler that looks for a segment named NAME, 427 // and sets SEGMENT to describe it if found. 428 SegmentFinder(const string& name, Segment* segment) 429 : name_(name), segment_(segment), found_() { } 430 431 // Return true if the traversal found the segment, false otherwise. 432 bool found() const { return found_; } 433 434 bool SegmentCommand(const Segment& segment) { 435 if (segment.name == name_) { 436 *segment_ = segment; 437 found_ = true; 438 return false; 439 } 440 return true; 441 } 442 443 private: 444 // The name of the segment our creator is looking for. 445 const string& name_; 446 447 // Where we should store the segment if found. (WEAK) 448 Segment* segment_; 449 450 // True if we found the segment. 451 bool found_; 452 }; 453 454 bool Reader::FindSegment(const string& name, Segment* segment) const { 455 SegmentFinder finder(name, segment); 456 WalkLoadCommands(&finder); 457 return finder.found(); 458 } 459 460 bool Reader::WalkSegmentSections(const Segment& segment, 461 SectionHandler* handler) const { 462 size_t word_size = segment.bits_64 ? 8 : 4; 463 ByteCursor cursor(&segment.section_list, big_endian_); 464 465 for (size_t i = 0; i < segment.nsects; i++) { 466 Section section; 467 section.bits_64 = segment.bits_64; 468 uint64_t size, offset; 469 uint32_t dummy32; 470 cursor 471 .CString(§ion.section_name, 16) 472 .CString(§ion.segment_name, 16) 473 .Read(word_size, false, §ion.address) 474 .Read(word_size, false, &size) 475 .Read(sizeof(uint32_t), false, &offset) // clears high bits of |offset| 476 >> section.align 477 >> dummy32 478 >> dummy32 479 >> section.flags 480 >> dummy32 481 >> dummy32; 482 if (section.bits_64) 483 cursor >> dummy32; 484 if (!cursor) { 485 reporter_->SectionsMissing(segment.name); 486 return false; 487 } 488 489 // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle 490 // 64-bit file offsets gracefully. Segment load commands do contain 64-bit 491 // file offsets, but sections within do not. Because segments load 492 // contiguously, recompute each section’s file offset on the basis of its 493 // containing segment’s file offset and the difference between the section’s 494 // and segment’s load addresses. If truncation is detected, honor the 495 // recomputed offset. 496 if (segment.bits_64 && 497 segment.fileoff + segment.filesize > 498 std::numeric_limits<uint32_t>::max()) { 499 const uint64_t section_offset_recomputed = 500 segment.fileoff + section.address - segment.vmaddr; 501 if (offset == static_cast<uint32_t>(section_offset_recomputed)) { 502 offset = section_offset_recomputed; 503 } 504 } 505 506 const uint32_t section_type = section.flags & SECTION_TYPE; 507 if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL || 508 section_type == S_GB_ZEROFILL) { 509 // Zero-fill sections have a size, but no contents. 510 section.contents.start = section.contents.end = NULL; 511 } else if (segment.contents.start == NULL && 512 segment.contents.end == NULL) { 513 // Mach-O files in .dSYM bundles have the contents of the loaded 514 // segments removed, and their file offsets and file sizes zeroed 515 // out. However, the sections within those segments still have 516 // non-zero sizes. There's no reason to call MisplacedSectionData in 517 // this case; the caller may just need the section's load 518 // address. But do set the contents' limits to NULL, for safety. 519 section.contents.start = section.contents.end = NULL; 520 } else { 521 if (offset < size_t(segment.contents.start - buffer_.start) || 522 offset > size_t(segment.contents.end - buffer_.start) || 523 size > size_t(segment.contents.end - buffer_.start - offset)) { 524 if (offset > 0) { 525 reporter_->MisplacedSectionData(section.section_name, 526 section.segment_name); 527 return false; 528 } else { 529 // Mach-O files in .dSYM bundles have the contents of the loaded 530 // segments partially removed. The removed sections will have zero as 531 // their offset. MisplacedSectionData should not be called in this 532 // case. 533 section.contents.start = section.contents.end = NULL; 534 } 535 } else { 536 section.contents.start = buffer_.start + offset; 537 section.contents.end = section.contents.start + size; 538 } 539 } 540 if (!handler->HandleSection(section)) 541 return false; 542 } 543 return true; 544 } 545 546 // A SectionHandler that builds a SectionMap for the sections within a 547 // given segment. 548 class Reader::SectionMapper: public SectionHandler { 549 public: 550 // Create a SectionHandler that populates MAP with an entry for 551 // each section it is given. 552 SectionMapper(SectionMap* map) : map_(map) { } 553 bool HandleSection(const Section& section) { 554 (*map_)[section.section_name] = section; 555 return true; 556 } 557 private: 558 // The map under construction. (WEAK) 559 SectionMap* map_; 560 }; 561 562 bool Reader::MapSegmentSections(const Segment& segment, 563 SectionMap* section_map) const { 564 section_map->clear(); 565 SectionMapper mapper(section_map); 566 return WalkSegmentSections(segment, &mapper); 567 } 568 569 } // namespace mach_o 570 } // namespace google_breakpad