/ src / common / mac / dump_syms.h
dump_syms.h
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2011 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32  
 33  // dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for
 34  // reading debugging information from Mach-O files and writing it out as a
 35  // Breakpad symbol file.
 36  
 37  #include <mach-o/loader.h>
 38  #include <stdio.h>
 39  #include <stdlib.h>
 40  
 41  #include <ostream>
 42  #include <string>
 43  #include <vector>
 44  
 45  #include "common/byte_cursor.h"
 46  #include "common/dwarf/dwarf2reader.h"
 47  #include "common/mac/arch_utilities.h"
 48  #include "common/mac/macho_reader.h"
 49  #include "common/mac/super_fat_arch.h"
 50  #include "common/module.h"
 51  #include "common/scoped_ptr.h"
 52  #include "common/symbol_data.h"
 53  
 54  namespace google_breakpad {
 55  
 56  class DumpSymbols {
 57   public:
 58    DumpSymbols(SymbolData symbol_data,
 59                bool handle_inter_cu_refs,
 60                bool enable_multiple = false,
 61                const std::string& module_name = "",
 62                bool prefer_extern_name = false)
 63        : symbol_data_(symbol_data),
 64          handle_inter_cu_refs_(handle_inter_cu_refs),
 65          object_filename_(),
 66          contents_(),
 67          size_(0),
 68          from_disk_(false),
 69          object_files_(),
 70          selected_object_file_(),
 71          selected_object_name_(),
 72          enable_multiple_(enable_multiple),
 73          module_name_(module_name),
 74          prefer_extern_name_(prefer_extern_name) {}
 75    ~DumpSymbols() = default;
 76  
 77    // Prepare to read debugging information from |filename|. |filename| may be
 78    // the name of a fat file, a Mach-O file, or a dSYM bundle containing either
 79    // of the above.
 80    //
 81    // If |module_name_| is empty, uses the basename of |filename| as the module
 82    // name. Otherwise, uses |module_name_| as the module name.
 83    //
 84    // On success, return true; if there is a problem reading
 85    // |filename|, report it and return false.
 86    bool Read(const std::string& filename);
 87  
 88    // Prepare to read debugging information from |contents|. |contents| is
 89    // expected to be the data obtained from reading a fat file, or a Mach-O file.
 90    // |filename| is used to determine the object filename in the generated
 91    // output; there will not be an attempt to open this file as the data
 92    // is already expected to be in memory. On success, return true; if there is a
 93    // problem reading |contents|, report it and return false.
 94    bool ReadData(uint8_t* contents, size_t size, const std::string& filename);
 95  
 96    // If this dumper's file includes an object file for `info`, then select that
 97    // object file for dumping, and return true. Otherwise, return false, and
 98    // leave this dumper's selected architecture unchanged.
 99    //
100    // By default, if this dumper's file contains only one object file, then
101    // the dumper will dump those symbols; and if it contains more than one
102    // object file, then the dumper will dump the object file whose
103    // architecture matches that of this dumper program.
104    bool SetArchitecture(const ArchInfo& info);
105  
106    // Return a pointer to an array of SuperFatArch structures describing the
107    // object files contained in this dumper's file. Set *|count| to the number
108    // of elements in the array. The returned array is owned by this DumpSymbols
109    // instance.
110    //
111    // If there are no available architectures, this function
112    // may return NULL.
113    const SuperFatArch* AvailableArchitectures(size_t* count) {
114      *count = object_files_.size();
115      if (object_files_.size() > 0)
116        return &object_files_[0];
117      return NULL;
118    }
119  
120    // Read the selected object file's debugging information, and write out the
121    // header only to |stream|. Return true on success; if an error occurs, report
122    // it and return false.
123    bool WriteSymbolFileHeader(std::ostream& stream);
124  
125    // Read the selected object file's debugging information and store it in
126    // `module`. The caller owns the resulting module object and must delete
127    // it when finished.
128    bool ReadSymbolData(Module** module);
129  
130    // Return an identifier string for the file this DumpSymbols is dumping.
131    std::string Identifier();
132  
133   private:
134    // Used internally.
135    class DumperLineToModule;
136    class DumperRangesHandler;
137    class LoadCommandDumper;
138  
139    // This method behaves similarly to NXFindBestFatArch, but it supports
140    // SuperFatArch.
141    SuperFatArch* FindBestMatchForArchitecture(
142        cpu_type_t cpu_type, cpu_subtype_t cpu_subtype);
143  
144    // Creates an empty module object.
145    bool CreateEmptyModule(scoped_ptr<Module>& module);
146  
147    // Process the split dwarf file referenced by reader.
148    void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader,
149                                Module* module,
150                                google_breakpad::Endianness endianness,
151                                bool handle_inter_cu_refs,
152                                bool handle_inline) const;
153  
154    // Read debugging information from |dwarf_sections|, which was taken from
155    // |macho_reader|, and add it to |module|.
156    void ReadDwarf(google_breakpad::Module* module,
157                   const mach_o::Reader& macho_reader,
158                   const mach_o::SectionMap& dwarf_sections,
159                   bool handle_inter_cu_refs) const;
160  
161    // Read DWARF CFI or .eh_frame data from |section|, belonging to
162    // |macho_reader|, and record it in |module|.  If |eh_frame| is true,
163    // then the data is .eh_frame-format data; otherwise, it is standard DWARF
164    // .debug_frame data. On success, return true; on failure, report
165    // the problem and return false.
166    bool ReadCFI(google_breakpad::Module* module,
167                 const mach_o::Reader& macho_reader,
168                 const mach_o::Section& section,
169                 bool eh_frame) const;
170  
171    // The selection of what type of symbol data to read/write.
172    const SymbolData symbol_data_;
173  
174    // Whether to handle references between compilation units.
175    const bool handle_inter_cu_refs_;
176  
177    // The name of the file this DumpSymbols will actually read debugging
178    // information from. If the filename passed to Read refers to a dSYM bundle,
179    // then this is the resource file within that bundle.
180    std::string object_filename_;
181  
182    // The complete contents of object_filename_, mapped into memory.
183    scoped_array<uint8_t> contents_;
184  
185    // The size of contents_.
186    size_t size_;
187  
188    // Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData.
189    // This is used to indicate that downstream code paths can/should also read
190    // from disk or not.
191    bool from_disk_;
192  
193    // A vector of SuperFatArch structures describing the object files
194    // object_filename_ contains. If object_filename_ refers to a fat binary,
195    // this may have more than one element; if it refers to a Mach-O file, this
196    // has exactly one element.
197    vector<SuperFatArch> object_files_;
198  
199    // The object file in object_files_ selected to dump, or NULL if
200    // SetArchitecture hasn't been called yet.
201    const SuperFatArch* selected_object_file_;
202  
203    // A string that identifies the selected object file, for use in error
204    // messages.  This is usually object_filename_, but if that refers to a
205    // fat binary, it includes an indication of the particular architecture
206    // within that binary.
207    string selected_object_name_;
208  
209    // Whether symbols sharing an address should be collapsed into a single entry
210    // and marked with an `m` in the output. 
211    // See: https://crbug.com/google-breakpad/751 and docs at 
212    // docs/symbol_files.md#records-3
213    bool enable_multiple_;
214  
215    // If non-empty, used as the module name. Otherwise, the basename of
216    // |object_filename_| is used as the module name.
217    const std::string module_name_;
218  
219    // If a Function and an Extern share the same address but have a different
220    // name, prefer the name of the Extern.
221    //
222    // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
223    // as the Function's fully-qualified name will only be present in the STABS
224    // (which are placed in the Extern), not in the DWARF symbols (which are
225    // placed in the Function).
226    bool prefer_extern_name_;
227  };
228  
229  }  // namespace google_breakpad