/ src / common / mac / macho_reader.cc
macho_reader.cc
  1  // Copyright 2010 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 30  
 31  // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
 32  // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
 33  
 34  #ifdef HAVE_CONFIG_H
 35  #include <config.h>  // Must come first
 36  #endif
 37  
 38  #include "common/mac/macho_reader.h"
 39  
 40  #include <assert.h>
 41  #include <stdio.h>
 42  #include <stdlib.h>
 43  
 44  #include <limits>
 45  
 46  // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
 47  #if !defined(CPU_TYPE_ARM)
 48  #define CPU_TYPE_ARM 12
 49  #endif
 50  
 51  #if !defined(CPU_TYPE_ARM_64)
 52  #define CPU_TYPE_ARM_64 16777228
 53  #endif
 54  
 55  namespace google_breakpad {
 56  namespace mach_o {
 57  
 58  // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
 59  // arguments, so you can't place expressions that do necessary work in
 60  // the argument of an assert. Nor can you assign the result of the
 61  // expression to a variable and assert that the variable's value is
 62  // true: you'll get unused variable warnings when NDEBUG is #defined.
 63  //
 64  // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
 65  // the result is true if NDEBUG is not #defined.
 66  #if defined(NDEBUG)
 67  #define ASSERT_ALWAYS_EVAL(x) (x)
 68  #else
 69  #define ASSERT_ALWAYS_EVAL(x) assert(x)
 70  #endif
 71  
 72  void FatReader::Reporter::BadHeader() {
 73    fprintf(stderr, "%s: file is neither a fat binary file"
 74            " nor a Mach-O object file\n", filename_.c_str());
 75  }
 76  
 77  void FatReader::Reporter::TooShort() {
 78    fprintf(stderr, "%s: file too short for the data it claims to contain\n",
 79            filename_.c_str());
 80  }
 81  
 82  void FatReader::Reporter::MisplacedObjectFile() {
 83    fprintf(stderr, "%s: file too short for the object files it claims"
 84            " to contain\n", filename_.c_str());
 85  }
 86  
 87  bool FatReader::Read(const uint8_t* buffer, size_t size) {
 88    buffer_.start = buffer;
 89    buffer_.end = buffer + size;
 90    ByteCursor cursor(&buffer_);
 91  
 92    // Fat binaries always use big-endian, so read the magic number in
 93    // that endianness. To recognize Mach-O magic numbers, which can use
 94    // either endianness, check for both the proper and reversed forms
 95    // of the magic numbers.
 96    cursor.set_big_endian(true);
 97    if (cursor >> magic_) {
 98      if (magic_ == FAT_MAGIC) {
 99        // How many object files does this fat binary contain?
100        uint32_t object_files_count;
101        if (!(cursor >> object_files_count)) {  // nfat_arch
102          reporter_->TooShort();
103          return false;
104        }
105  
106        // Read the list of object files.
107        object_files_.resize(object_files_count);
108        for (size_t i = 0; i < object_files_count; i++) {
109          struct fat_arch objfile;
110  
111          // Read this object file entry, byte-swapping as appropriate.
112          cursor >> objfile.cputype
113                 >> objfile.cpusubtype
114                 >> objfile.offset
115                 >> objfile.size
116                 >> objfile.align;
117  
118          SuperFatArch super_fat_arch(objfile);
119          object_files_[i] = super_fat_arch;
120  
121          if (!cursor) {
122            reporter_->TooShort();
123            return false;
124          }
125          // Does the file actually have the bytes this entry refers to?
126          size_t fat_size = buffer_.Size();
127          if (objfile.offset > fat_size ||
128              objfile.size > fat_size - objfile.offset) {
129            reporter_->MisplacedObjectFile();
130            return false;
131          }
132        }
133  
134        return true;
135      } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
136                 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
137        // If this is a little-endian Mach-O file, fix the cursor's endianness.
138        if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
139          cursor.set_big_endian(false);
140        // Record the entire file as a single entry in the object file list.
141        object_files_.resize(1);
142  
143        // Get the cpu type and subtype from the Mach-O header.
144        if (!(cursor >> object_files_[0].cputype
145                     >> object_files_[0].cpusubtype)) {
146          reporter_->TooShort();
147          return false;
148        }
149  
150        object_files_[0].offset = 0;
151        object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
152        // This alignment is correct for 32 and 64-bit x86 and ppc.
153        // See get_align in the lipo source for other architectures:
154        // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
155        object_files_[0].align = 12;  // 2^12 == 4096
156        return true;
157      }
158    }
159    reporter_->BadHeader();
160    return false;
161  }
162  
163  void Reader::Reporter::BadHeader() {
164    fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
165  }
166  
167  void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
168                                         cpu_subtype_t cpu_subtype,
169                                         cpu_type_t expected_cpu_type,
170                                         cpu_subtype_t expected_cpu_subtype) {
171    fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
172            " type %d, subtype %d\n",
173            filename_.c_str(), cpu_type, cpu_subtype,
174            expected_cpu_type, expected_cpu_subtype);
175  }
176  
177  void Reader::Reporter::HeaderTruncated() {
178    fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
179            filename_.c_str());
180  }
181  
182  void Reader::Reporter::LoadCommandRegionTruncated() {
183    fprintf(stderr, "%s: file too short to hold load command region"
184            " given in Mach-O header\n", filename_.c_str());
185  }
186  
187  void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
188                                             LoadCommandType type) {
189    fprintf(stderr, "%s: file's header claims there are %zu"
190            " load commands, but load command #%zu",
191            filename_.c_str(), claimed, i);
192    if (type) fprintf(stderr, ", of type %d,", type);
193    fprintf(stderr, " extends beyond the end of the load command region\n");
194  }
195  
196  void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
197    fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
198            " extend beyond the size given in the load command's header\n",
199            filename_.c_str(), i, type);
200  }
201  
202  void Reader::Reporter::SectionsMissing(const string& name) {
203    fprintf(stderr, "%s: the load command for segment '%s'"
204            " is too short to hold the section headers it claims to have\n",
205            filename_.c_str(), name.c_str());
206  }
207  
208  void Reader::Reporter::MisplacedSegmentData(const string& name) {
209    fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
210            " the end of the file\n", filename_.c_str(), name.c_str());
211  }
212  
213  void Reader::Reporter::MisplacedSectionData(const string& section,
214                                              const string& segment) {
215    fprintf(stderr, "%s: the section '%s' in segment '%s'"
216            " claims its contents lie outside the segment's contents\n",
217            filename_.c_str(), section.c_str(), segment.c_str());
218  }
219  
220  void Reader::Reporter::MisplacedSymbolTable() {
221    fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
222            " table's contents are located beyond the end of the file\n",
223            filename_.c_str());
224  }
225  
226  void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
227    fprintf(stderr, "%s: CPU type %d is not supported\n",
228            filename_.c_str(), cpu_type);
229  }
230  
231  bool Reader::Read(const uint8_t* buffer,
232                    size_t size,
233                    cpu_type_t expected_cpu_type,
234                    cpu_subtype_t expected_cpu_subtype) {
235    assert(!buffer_.start);
236    buffer_.start = buffer;
237    buffer_.end = buffer + size;
238    ByteCursor cursor(&buffer_, true);
239    uint32_t magic;
240    if (!(cursor >> magic)) {
241      reporter_->HeaderTruncated();
242      return false;
243    }
244  
245    if (expected_cpu_type != CPU_TYPE_ANY) {
246      uint32_t expected_magic;
247      // validate that magic matches the expected cpu type
248      switch (expected_cpu_type) {
249        case CPU_TYPE_ARM:
250        case CPU_TYPE_I386:
251          expected_magic = MH_CIGAM;
252          break;
253        case CPU_TYPE_POWERPC:
254          expected_magic = MH_MAGIC;
255          break;
256        case CPU_TYPE_ARM_64:
257        case CPU_TYPE_X86_64:
258          expected_magic = MH_CIGAM_64;
259          break;
260        case CPU_TYPE_POWERPC64:
261          expected_magic = MH_MAGIC_64;
262          break;
263        default:
264          reporter_->UnsupportedCPUType(expected_cpu_type);
265          return false;
266      }
267  
268      if (expected_magic != magic) {
269        reporter_->BadHeader();
270        return false;
271      }
272    }
273  
274    // Since the byte cursor is in big-endian mode, a reversed magic number
275    // always indicates a little-endian file, regardless of our own endianness.
276    switch (magic) {
277      case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
278      case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
279      case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
280      case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
281      default:
282        reporter_->BadHeader();
283        return false;
284    }
285    cursor.set_big_endian(big_endian_);
286    uint32_t commands_size, reserved;
287    cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
288           >> commands_size >> flags_;
289    if (bits_64_)
290      cursor >> reserved;
291    if (!cursor) {
292      reporter_->HeaderTruncated();
293      return false;
294    }
295  
296    if (expected_cpu_type != CPU_TYPE_ANY &&
297        (expected_cpu_type != cpu_type_ ||
298         expected_cpu_subtype != cpu_subtype_)) {
299      reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
300                                expected_cpu_type, expected_cpu_subtype);
301      return false;
302    }
303  
304    cursor
305        .PointTo(&load_commands_.start, commands_size)
306        .PointTo(&load_commands_.end, 0);
307    if (!cursor) {
308      reporter_->LoadCommandRegionTruncated();
309      return false;
310    }
311  
312    return true;
313  }
314  
315  bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
316    ByteCursor list_cursor(&load_commands_, big_endian_);
317  
318    for (size_t index = 0; index < load_command_count_; ++index) {
319      // command refers to this load command alone, so that cursor will
320      // refuse to read past the load command's end. But since we haven't
321      // read the size yet, let command initially refer to the entire
322      // remainder of the load command series.
323      ByteBuffer command(list_cursor.here(), list_cursor.Available());
324      ByteCursor cursor(&command, big_endian_);
325  
326      // Read the command type and size --- fields common to all commands.
327      uint32_t type, size;
328      if (!(cursor >> type)) {
329        reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
330        return false;
331      }
332      if (!(cursor >> size) || size > command.Size()) {
333        reporter_->LoadCommandsOverrun(load_command_count_, index, type);
334        return false;
335      }
336  
337      // Now that we've read the length, restrict command's range to this
338      // load command only.
339      command.end = command.start + size;
340  
341      switch (type) {
342        case LC_SEGMENT:
343        case LC_SEGMENT_64: {
344          Segment segment;
345          segment.bits_64 = (type == LC_SEGMENT_64);
346          size_t word_size = segment.bits_64 ? 8 : 4;
347          cursor.CString(&segment.name, 16);
348          cursor
349              .Read(word_size, false, &segment.vmaddr)
350              .Read(word_size, false, &segment.vmsize)
351              .Read(word_size, false, &segment.fileoff)
352              .Read(word_size, false, &segment.filesize);
353          cursor >> segment.maxprot
354                 >> segment.initprot
355                 >> segment.nsects
356                 >> segment.flags;
357          if (!cursor) {
358            reporter_->LoadCommandTooShort(index, type);
359            return false;
360          }
361          if (segment.fileoff > buffer_.Size() ||
362              segment.filesize > buffer_.Size() - segment.fileoff) {
363            reporter_->MisplacedSegmentData(segment.name);
364            return false;
365          }
366          // Mach-O files in .dSYM bundles have the contents of the loaded
367          // segments removed, and their file offsets and file sizes zeroed
368          // out. To help us handle this special case properly, give such
369          // segments' contents NULL starting and ending pointers.
370          if (segment.fileoff == 0 && segment.filesize == 0) {
371            segment.contents.start = segment.contents.end = NULL;
372          } else {
373            segment.contents.start = buffer_.start + segment.fileoff;
374            segment.contents.end = segment.contents.start + segment.filesize;
375          }
376          // The section list occupies the remainder of this load command's space.
377          segment.section_list.start = cursor.here();
378          segment.section_list.end = command.end;
379  
380          if (!handler->SegmentCommand(segment))
381            return false;
382          break;
383        }
384  
385        case LC_SYMTAB: {
386          uint32_t symoff, nsyms, stroff, strsize;
387          cursor >> symoff >> nsyms >> stroff >> strsize;
388          if (!cursor) {
389            reporter_->LoadCommandTooShort(index, type);
390            return false;
391          }
392          // How big are the entries in the symbol table?
393          // sizeof(struct nlist_64) : sizeof(struct nlist),
394          // but be paranoid about alignment vs. target architecture.
395          size_t symbol_size = bits_64_ ? 16 : 12;
396          // How big is the entire symbol array?
397          size_t symbols_size = nsyms * symbol_size;
398          if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
399              stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
400            reporter_->MisplacedSymbolTable();
401            return false;
402          }
403          ByteBuffer entries(buffer_.start + symoff, symbols_size);
404          ByteBuffer names(buffer_.start + stroff, strsize);
405          if (!handler->SymtabCommand(entries, names))
406            return false;
407          break;
408        }
409  
410        default: {
411          if (!handler->UnknownCommand(type, command))
412            return false;
413          break;
414        }
415      }
416  
417      list_cursor.set_here(command.end);
418    }
419  
420    return true;
421  }
422  
423  // A load command handler that looks for a segment of a given name.
424  class Reader::SegmentFinder : public LoadCommandHandler {
425   public:
426    // Create a load command handler that looks for a segment named NAME,
427    // and sets SEGMENT to describe it if found.
428    SegmentFinder(const string& name, Segment* segment)
429        : name_(name), segment_(segment), found_() { }
430  
431    // Return true if the traversal found the segment, false otherwise.
432    bool found() const { return found_; }
433  
434    bool SegmentCommand(const Segment& segment) {
435      if (segment.name == name_) {
436        *segment_ = segment;
437        found_ = true;
438        return false;
439      }
440      return true;
441    }
442  
443   private:
444    // The name of the segment our creator is looking for.
445    const string& name_;
446  
447    // Where we should store the segment if found. (WEAK)
448    Segment* segment_;
449  
450    // True if we found the segment.
451    bool found_;
452  };
453  
454  bool Reader::FindSegment(const string& name, Segment* segment) const {
455    SegmentFinder finder(name, segment);
456    WalkLoadCommands(&finder);
457    return finder.found();
458  }
459  
460  bool Reader::WalkSegmentSections(const Segment& segment,
461                                   SectionHandler* handler) const {
462    size_t word_size = segment.bits_64 ? 8 : 4;
463    ByteCursor cursor(&segment.section_list, big_endian_);
464  
465    for (size_t i = 0; i < segment.nsects; i++) {
466      Section section;
467      section.bits_64 = segment.bits_64;
468      uint64_t size, offset;
469      uint32_t dummy32;
470      cursor
471          .CString(&section.section_name, 16)
472          .CString(&section.segment_name, 16)
473          .Read(word_size, false, &section.address)
474          .Read(word_size, false, &size)
475          .Read(sizeof(uint32_t), false, &offset)  // clears high bits of |offset|
476          >> section.align
477          >> dummy32
478          >> dummy32
479          >> section.flags
480          >> dummy32
481          >> dummy32;
482      if (section.bits_64)
483        cursor >> dummy32;
484      if (!cursor) {
485        reporter_->SectionsMissing(segment.name);
486        return false;
487      }
488  
489      // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
490      // 64-bit file offsets gracefully. Segment load commands do contain 64-bit
491      // file offsets, but sections within do not. Because segments load
492      // contiguously, recompute each section’s file offset on the basis of its
493      // containing segment’s file offset and the difference between the section’s
494      // and segment’s load addresses. If truncation is detected, honor the
495      // recomputed offset.
496      if (segment.bits_64 &&
497          segment.fileoff + segment.filesize >
498              std::numeric_limits<uint32_t>::max()) {
499        const uint64_t section_offset_recomputed =
500            segment.fileoff + section.address - segment.vmaddr;
501        if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
502          offset = section_offset_recomputed;
503        }
504      }
505  
506      const uint32_t section_type = section.flags & SECTION_TYPE;
507      if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
508              section_type == S_GB_ZEROFILL) {
509        // Zero-fill sections have a size, but no contents.
510        section.contents.start = section.contents.end = NULL;
511      } else if (segment.contents.start == NULL &&
512                 segment.contents.end == NULL) {
513        // Mach-O files in .dSYM bundles have the contents of the loaded
514        // segments removed, and their file offsets and file sizes zeroed
515        // out.  However, the sections within those segments still have
516        // non-zero sizes.  There's no reason to call MisplacedSectionData in
517        // this case; the caller may just need the section's load
518        // address. But do set the contents' limits to NULL, for safety.
519        section.contents.start = section.contents.end = NULL;
520      } else {
521        if (offset < size_t(segment.contents.start - buffer_.start) ||
522            offset > size_t(segment.contents.end - buffer_.start) ||
523            size > size_t(segment.contents.end - buffer_.start - offset)) {
524          if (offset > 0) {
525            reporter_->MisplacedSectionData(section.section_name,
526                                            section.segment_name);
527            return false;
528          } else {
529            // Mach-O files in .dSYM bundles have the contents of the loaded
530            // segments partially removed. The removed sections will have zero as
531            // their offset. MisplacedSectionData should not be called in this
532            // case.
533            section.contents.start = section.contents.end = NULL;
534          }
535        } else {
536          section.contents.start = buffer_.start + offset;
537          section.contents.end = section.contents.start + size;
538        }
539      }
540      if (!handler->HandleSection(section))
541        return false;
542    }
543    return true;
544  }
545  
546  // A SectionHandler that builds a SectionMap for the sections within a
547  // given segment.
548  class Reader::SectionMapper: public SectionHandler {
549   public:
550    // Create a SectionHandler that populates MAP with an entry for
551    // each section it is given.
552    SectionMapper(SectionMap* map) : map_(map) { }
553    bool HandleSection(const Section& section) {
554      (*map_)[section.section_name] = section;
555      return true;
556    }
557   private:
558    // The map under construction. (WEAK)
559    SectionMap* map_;
560  };
561  
562  bool Reader::MapSegmentSections(const Segment& segment,
563                                  SectionMap* section_map) const {
564    section_map->clear();
565    SectionMapper mapper(section_map);
566    return WalkSegmentSections(segment, &mapper);
567  }
568  
569  }  // namespace mach_o
570  }  // namespace google_breakpad