/ src / common / module.h
module.h
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2010 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32  
 33  // module.h: Define google_breakpad::Module. A Module holds debugging
 34  // information, and can write that information out as a Breakpad
 35  // symbol file.
 36  
 37  #ifndef COMMON_LINUX_MODULE_H__
 38  #define COMMON_LINUX_MODULE_H__
 39  
 40  #include <functional>
 41  #include <iostream>
 42  #include <limits>
 43  #include <map>
 44  #include <memory>
 45  #include <set>
 46  #include <string>
 47  #include <vector>
 48  
 49  #include "common/string_view.h"
 50  #include "common/symbol_data.h"
 51  #include "common/unordered.h"
 52  #include "common/using_std_string.h"
 53  #include "google_breakpad/common/breakpad_types.h"
 54  
 55  namespace google_breakpad {
 56  
 57  using std::set;
 58  using std::vector;
 59  using std::map;
 60  
 61  // A Module represents the contents of a module, and supports methods
 62  // for adding information produced by parsing STABS or DWARF data
 63  // --- possibly both from the same file --- and then writing out the
 64  // unified contents as a Breakpad-format symbol file.
 65  class Module {
 66   public:
 67    // The type of addresses and sizes in a symbol table.
 68    typedef uint64_t Address;
 69    static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max();
 70    struct File;
 71    struct Function;
 72    struct InlineOrigin;
 73    struct Inline;
 74    struct Line;
 75    struct Extern;
 76  
 77    // Addresses appearing in File, Function, and Line structures are
 78    // absolute, not relative to the the module's load address.  That
 79    // is, if the module were loaded at its nominal load address, the
 80    // addresses would be correct.
 81  
 82    // A source file.
 83    struct File {
 84      explicit File(const string& name_input) : name(name_input), source_id(0) {}
 85  
 86      // The name of the source file.
 87      const string name;
 88  
 89      // The file's source id.  The Write member function clears this
 90      // field and assigns source ids a fresh, so any value placed here
 91      // before calling Write will be lost.
 92      int source_id;
 93    };
 94  
 95    // An address range.
 96    struct Range {
 97      Range(const Address address_input, const Address size_input) :
 98          address(address_input), size(size_input) { }
 99  
100      Address address;
101      Address size;
102    };
103  
104    // A function.
105    struct Function {
106      Function(StringView name_input, const Address& address_input) :
107          name(name_input), address(address_input), parameter_size(0) {}
108  
109      // For sorting by address.  (Not style-guide compliant, but it's
110      // stupid not to put this in the struct.)
111      static bool CompareByAddress(const Function* x, const Function* y) {
112        return x->address < y->address;
113      }
114  
115      // The function's name.
116      StringView name;
117  
118      // The start address and the address ranges covered by the function.
119      const Address address;
120      vector<Range> ranges;
121  
122      // The function's parameter size.
123      Address parameter_size;
124  
125      // Source lines belonging to this function, sorted by increasing
126      // address.
127      vector<Line> lines;
128  
129      // Inlined call sites belonging to this functions.
130      vector<std::unique_ptr<Inline>> inlines;
131  
132      // If this symbol has been folded with other symbols in the linked binary.
133      bool is_multiple = false;
134  
135      // If the function's name should be filled out from a matching Extern,
136      // should they not match.
137      bool prefer_extern_name = false;
138    };
139  
140    struct InlineOrigin {
141      explicit InlineOrigin(StringView name) : id(-1), name(name) {}
142  
143      // A unique id for each InlineOrigin object. INLINE records use the id to
144      // refer to its INLINE_ORIGIN record.
145      int id;
146  
147      // The inlined function's name.
148      StringView name;
149    };
150  
151    // A inlined call site.
152    struct Inline {
153      Inline(InlineOrigin* origin,
154             const vector<Range>& ranges,
155             int call_site_line,
156             int call_site_file_id,
157             int inline_nest_level,
158             vector<std::unique_ptr<Inline>> child_inlines)
159          : origin(origin),
160            ranges(ranges),
161            call_site_line(call_site_line),
162            call_site_file_id(call_site_file_id),
163            call_site_file(nullptr),
164            inline_nest_level(inline_nest_level),
165            child_inlines(std::move(child_inlines)) {}
166  
167      InlineOrigin* origin;
168  
169      // The list of addresses and sizes.
170      vector<Range> ranges;
171  
172      int call_site_line;
173  
174      // The id is only meanful inside a CU. It's only used for looking up real
175      // File* after scanning a CU.
176      int call_site_file_id;
177  
178      File* call_site_file;
179  
180      int inline_nest_level;
181  
182      // A list of inlines which are children of this inline.
183      vector<std::unique_ptr<Inline>> child_inlines;
184  
185      int getCallSiteFileID() const {
186        return call_site_file ? call_site_file->source_id : -1;
187      }
188  
189      static void InlineDFS(
190          vector<std::unique_ptr<Module::Inline>>& inlines,
191          std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) {
192        for (std::unique_ptr<Module::Inline>& in : inlines) {
193          forEach(in);
194          InlineDFS(in->child_inlines, forEach);
195        }
196      }
197    };
198  
199    typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset;
200  
201    class InlineOriginMap {
202     public:
203      // Add INLINE ORIGIN to the module. Return a pointer to origin .
204      InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);
205  
206      // offset is the offset of a DW_TAG_subprogram. specification_offset is the
207      // value of its DW_AT_specification or equals to offset if
208      // DW_AT_specification doesn't exist in that DIE.
209      void SetReference(uint64_t offset, uint64_t specification_offset);
210  
211      ~InlineOriginMap() {
212        for (const auto& iter : inline_origins_) {
213          delete iter.second;
214        }
215      }
216  
217     private:
218      // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram.
219      InlineOriginByOffset inline_origins_;
220  
221      // A map from a DW_TAG_subprogram's offset to the offset of its
222      // specification or abstract origin subprogram. The set of values in this
223      // map should always be the same set of keys in inline_origins_.
224      map<uint64_t, uint64_t> references_;
225    };
226  
227    map<std::string, InlineOriginMap> inline_origin_maps;
228  
229    // A source line.
230    struct Line {
231      // For sorting by address.  (Not style-guide compliant, but it's
232      // stupid not to put this in the struct.)
233      static bool CompareByAddress(const Module::Line& x, const Module::Line& y) {
234        return x.address < y.address;
235      }
236  
237      Address address, size;    // The address and size of the line's code.
238      File* file;                // The source file.
239      int number;                // The source line number.
240    };
241  
242    // An exported symbol.
243    struct Extern {
244      explicit Extern(const Address& address_input) : address(address_input) {}
245      const Address address;
246      string name;
247      // If this symbol has been folded with other symbols in the linked binary.
248      bool is_multiple = false;
249    };
250  
251    // A map from register names to postfix expressions that recover
252    // their their values. This can represent a complete set of rules to
253    // follow at some address, or a set of changes to be applied to an
254    // extant set of rules.
255    typedef map<string, string> RuleMap;
256  
257    // A map from addresses to RuleMaps, representing changes that take
258    // effect at given addresses.
259    typedef map<Address, RuleMap> RuleChangeMap;
260  
261    // A range of 'STACK CFI' stack walking information. An instance of
262    // this structure corresponds to a 'STACK CFI INIT' record and the
263    // subsequent 'STACK CFI' records that fall within its range.
264    struct StackFrameEntry {
265      // The starting address and number of bytes of machine code this
266      // entry covers.
267      Address address, size;
268  
269      // The initial register recovery rules, in force at the starting
270      // address.
271      RuleMap initial_rules;
272  
273      // A map from addresses to rule changes. To find the rules in
274      // force at a given address, start with initial_rules, and then
275      // apply the changes given in this map for all addresses up to and
276      // including the address you're interested in.
277      RuleChangeMap rule_changes;
278    };
279  
280    struct FunctionCompare {
281      bool operator() (const Function* lhs, const Function* rhs) const {
282        if (lhs->address == rhs->address)
283          return lhs->name < rhs->name;
284        return lhs->address < rhs->address;
285      }
286    };
287  
288    struct InlineOriginCompare {
289      bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const {
290        return lhs->name < rhs->name;
291      }
292    };
293  
294    struct ExternCompare {
295      // Defining is_transparent allows
296      // std::set<std::unique_ptr<Extern>, ExternCompare>::find() to be called
297      // with an Extern* and have set use the overloads below.
298      using is_transparent = void;
299      bool operator() (const std::unique_ptr<Extern>& lhs,
300                       const std::unique_ptr<Extern>& rhs) const {
301        return lhs->address < rhs->address;
302      }
303      bool operator() (const Extern* lhs, const std::unique_ptr<Extern>& rhs) const {
304        return lhs->address < rhs->address;
305      }
306      bool operator() (const std::unique_ptr<Extern>& lhs, const Extern* rhs) const {
307        return lhs->address < rhs->address;
308      }
309    };
310  
311    // Create a new module with the given name, operating system,
312    // architecture, and ID string.
313    // NB: `enable_multiple_field` is temporary while transitioning to enabling
314    // writing the multiple field permanently.
315    Module(const string& name,
316           const string& os,
317           const string& architecture,
318           const string& id,
319           const string& code_id = "",
320           bool enable_multiple_field = false,
321           bool prefer_extern_name = false);
322    ~Module();
323  
324    // Set the module's load address to LOAD_ADDRESS; addresses given
325    // for functions and lines will be written to the Breakpad symbol
326    // file as offsets from this address.  Construction initializes this
327    // module's load address to zero: addresses written to the symbol
328    // file will be the same as they appear in the Function, Line, and
329    // StackFrameEntry structures.
330    //
331    // Note that this member function has no effect on addresses stored
332    // in the data added to this module; the Write member function
333    // simply subtracts off the load address from addresses before it
334    // prints them. Only the last load address given before calling
335    // Write is used.
336    void SetLoadAddress(Address load_address);
337  
338    // Sets address filtering on elements added to the module.  This allows
339    // libraries with extraneous debug symbols to generate symbol files containing
340    // only relevant symbols.  For example, an LLD-generated partition library may
341    // contain debug information pertaining to all partitions derived from a
342    // single "combined" library.  Filtering applies only to elements added after
343    // this method is called.
344    void SetAddressRanges(const vector<Range>& ranges);
345  
346    // Add FUNCTION to the module. FUNCTION's name must not be empty.
347    // This module owns all Function objects added with this function:
348    // destroying the module destroys them as well.
349    // Return false if the function is duplicate and needs to be freed.
350    bool AddFunction(Function* function);
351  
352    // Add STACK_FRAME_ENTRY to the module.
353    // This module owns all StackFrameEntry objects added with this
354    // function: destroying the module destroys them as well.
355    void AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry);
356  
357    // Add PUBLIC to the module.
358    // This module owns all Extern objects added with this function:
359    // destroying the module destroys them as well.
360    void AddExtern(std::unique_ptr<Extern> ext);
361  
362    // If this module has a file named NAME, return a pointer to it. If
363    // it has none, then create one and return a pointer to the new
364    // file. This module owns all File objects created using these
365    // functions; destroying the module destroys them as well.
366    File* FindFile(const string& name);
367    File* FindFile(const char* name);
368  
369    // If this module has a file named NAME, return a pointer to it.
370    // Otherwise, return NULL.
371    File* FindExistingFile(const string& name);
372  
373    // Insert pointers to the functions added to this module at I in
374    // VEC. The pointed-to Functions are still owned by this module.
375    // (Since this is effectively a copy of the function list, this is
376    // mostly useful for testing; other uses should probably get a more
377    // appropriate interface.)
378    void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i);
379  
380    // Insert pointers to the externs added to this module at I in
381    // VEC. The pointed-to Externs are still owned by this module.
382    // (Since this is effectively a copy of the extern list, this is
383    // mostly useful for testing; other uses should probably get a more
384    // appropriate interface.)
385    void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i);
386  
387    // Clear VEC and fill it with pointers to the Files added to this
388    // module, sorted by name. The pointed-to Files are still owned by
389    // this module. (Since this is effectively a copy of the file list,
390    // this is mostly useful for testing; other uses should probably get
391    // a more appropriate interface.)
392    void GetFiles(vector<File*>* vec);
393  
394    // Clear VEC and fill it with pointers to the StackFrameEntry
395    // objects that have been added to this module. (Since this is
396    // effectively a copy of the stack frame entry list, this is mostly
397    // useful for testing; other uses should probably get
398    // a more appropriate interface.)
399    void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const;
400  
401    // Find those files in this module that are actually referred to by
402    // functions' line number data, and assign them source id numbers.
403    // Set the source id numbers for all other files --- unused by the
404    // source line data --- to -1.  We do this before writing out the
405    // symbol file, at which point we omit any unused files.
406    void AssignSourceIds();
407  
408    // This function should be called before AssignSourceIds() to get the set of
409    // valid InlineOrigins*.
410    void CreateInlineOrigins(
411        set<InlineOrigin*, InlineOriginCompare>& inline_origins);
412  
413    // Call AssignSourceIds, and write this module to STREAM in the
414    // breakpad symbol format. Return true if all goes well, or false if
415    // an error occurs. This method writes out:
416    // - a header based on the values given to the constructor,
417    // If symbol_data is not CFI then:
418    // - the source files added via FindFile,
419    // - the functions added via AddFunctions, each with its lines,
420    // - all public records,
421    // If symbol_data is CFI then:
422    // - all CFI records.
423    // Addresses in the output are all relative to the load address
424    // established by SetLoadAddress.
425    bool Write(std::ostream& stream, SymbolData symbol_data);
426  
427    // Place the name in the global set of strings. Return a StringView points to
428    // a string inside the pool.
429    StringView AddStringToPool(const string& str) {
430      auto result = common_strings_.insert(str);
431      return *(result.first);
432    }
433  
434    string name() const { return name_; }
435    string os() const { return os_; }
436    string architecture() const { return architecture_; }
437    string identifier() const { return id_; }
438    string code_identifier() const { return code_id_; }
439  
440   private:
441    // Report an error that has occurred writing the symbol file, using
442    // errno to find the appropriate cause.  Return false.
443    static bool ReportError();
444  
445    // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI'
446    // records, without a final newline. Return true if all goes well;
447    // if an error occurs, return false, and leave errno set.
448    static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream);
449  
450    // Returns true of the specified address resides with an specified address
451    // range, or if no ranges have been specified.
452    bool AddressIsInModule(Address address) const;
453  
454    // Module header entries.
455    string name_, os_, architecture_, id_, code_id_;
456  
457    // The module's nominal load address.  Addresses for functions and
458    // lines are absolute, assuming the module is loaded at this
459    // address.
460    Address load_address_;
461  
462    // The set of valid address ranges of the module.  If specified, attempts to
463    // add elements residing outside these ranges will be silently filtered.
464    vector<Range> address_ranges_;
465  
466    // Relation for maps whose keys are strings shared with some other
467    // structure.
468    struct CompareStringPtrs {
469      bool operator()(const string* x, const string* y) const { return *x < *y; }
470    };
471  
472    // A map from filenames to File structures.  The map's keys are
473    // pointers to the Files' names.
474    typedef map<const string*, File*, CompareStringPtrs> FileByNameMap;
475  
476    // A set containing Function structures, sorted by address.
477    typedef set<Function*, FunctionCompare> FunctionSet;
478  
479    // A set containing Extern structures, sorted by address.
480    typedef set<std::unique_ptr<Extern>, ExternCompare> ExternSet;
481  
482    // The module owns all the files and functions that have been added
483    // to it; destroying the module frees the Files and Functions these
484    // point to.
485    FileByNameMap files_;    // This module's source files.
486    FunctionSet functions_;  // This module's functions.
487    // Used to quickly look up whether a function exists at a particular address.
488    unordered_set<Address> function_addresses_;
489  
490    // The module owns all the call frame info entries that have been
491    // added to it.
492    vector<std::unique_ptr<StackFrameEntry>> stack_frame_entries_;
493  
494    // The module owns all the externs that have been added to it;
495    // destroying the module frees the Externs these point to.
496    ExternSet externs_;
497  
498    unordered_set<string> common_strings_;
499  
500    // Whether symbols sharing an address should be collapsed into a single entry
501    // and marked with an `m` in the output. See
502    // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs
503    // at
504    // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
505    bool enable_multiple_field_;
506  
507    // If a Function and an Extern share the same address but have a different
508    // name, prefer the name of the Extern.
509    //
510    // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
511    // as the Function's fully-qualified name will only be present in the STABS
512    // (which are placed in the Extern), not in the DWARF symbols (which are
513    // placed in the Function).
514    bool prefer_extern_name_;
515  };
516  
517  }  // namespace google_breakpad
518  
519  #endif  // COMMON_LINUX_MODULE_H__