/ src / common / dwarf_cu_to_module.h
dwarf_cu_to_module.h
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2010 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32  
 33  // Add DWARF debugging information to a Breakpad symbol file. This
 34  // file defines the DwarfCUToModule class, which accepts parsed DWARF
 35  // data and populates a google_breakpad::Module with the results; the
 36  // Module can then write its contents as a Breakpad symbol file.
 37  
 38  #ifndef COMMON_LINUX_DWARF_CU_TO_MODULE_H__
 39  #define COMMON_LINUX_DWARF_CU_TO_MODULE_H__
 40  
 41  #include <stdint.h>
 42  
 43  #include <string>
 44  #include <vector>
 45  
 46  #include "common/language.h"
 47  #include "common/module.h"
 48  #include "common/dwarf/dwarf2diehandler.h"
 49  #include "common/dwarf/dwarf2reader.h"
 50  #include "common/scoped_ptr.h"
 51  #include "common/using_std_string.h"
 52  
 53  namespace google_breakpad {
 54  
 55  // Populate a google_breakpad::Module with DWARF debugging information.
 56  //
 57  // An instance of this class can be provided as a handler to a
 58  // DIEDispatcher, which can in turn be a handler for a
 59  // CompilationUnit DWARF parser. The handler uses the results
 60  // of parsing to populate a google_breakpad::Module with source file,
 61  // function, and source line information.
 62  class DwarfCUToModule: public RootDIEHandler {
 63    struct FilePrivate;
 64   public:
 65    // Information global to the DWARF-bearing file we are processing,
 66    // for use by DwarfCUToModule. Each DwarfCUToModule instance deals
 67    // with a single compilation unit within the file, but information
 68    // global to the whole file is held here. The client is responsible
 69    // for filling it in appropriately (except for the 'file_private'
 70    // field, which the constructor and destructor take care of), and
 71    // then providing it to the DwarfCUToModule instance for each
 72    // compilation unit we process in that file. Set HANDLE_INTER_CU_REFS
 73    // to true to handle debugging symbols with DW_FORM_ref_addr entries.
 74    class FileContext {
 75     public:
 76      FileContext(const string& filename,
 77                  Module* module,
 78                  bool handle_inter_cu_refs);
 79      ~FileContext();
 80  
 81      // Add CONTENTS of size LENGTH to the section map as NAME.
 82      void AddSectionToSectionMap(const string& name,
 83                                  const uint8_t* contents,
 84                                  uint64_t length);
 85  
 86      void AddManagedSectionToSectionMap(const string& name,
 87                                  uint8_t* contents,
 88                                  uint64_t length);
 89  
 90      // Clear the section map for testing.
 91      void ClearSectionMapForTest();
 92  
 93      const SectionMap& section_map() const;
 94  
 95     private:
 96      friend class DwarfCUToModule;
 97  
 98      // Clears all the Specifications if HANDLE_INTER_CU_REFS_ is false.
 99      void ClearSpecifications();
100  
101      // Given an OFFSET and a CU that starts at COMPILATION_UNIT_START, returns
102      // true if this is an inter-compilation unit reference that is not being
103      // handled.
104      bool IsUnhandledInterCUReference(uint64_t offset,
105                                       uint64_t compilation_unit_start) const;
106  
107      // The name of this file, for use in error messages.
108      const string filename_;
109  
110      // A map of this file's sections, used for finding other DWARF
111      // sections that the .debug_info section may refer to.
112      SectionMap section_map_;
113  
114      // The Module to which we're contributing definitions.
115      Module* module_;
116  
117      // True if we are handling references between compilation units.
118      const bool handle_inter_cu_refs_;
119  
120      // Inter-compilation unit data used internally by the handlers.
121      scoped_ptr<FilePrivate> file_private_;
122      std::vector<uint8_t *> uncompressed_sections_;
123    };
124  
125    // An abstract base class for handlers that handle DWARF range lists for
126    // DwarfCUToModule.
127    class RangesHandler {
128     public:
129      RangesHandler() { }
130      virtual ~RangesHandler() { }
131  
132      // Called when finishing a function to populate the function's ranges.
133      // The entries are read according to the form and data.
134      virtual bool ReadRanges(
135          enum DwarfForm form, uint64_t data,
136          RangeListReader::CURangesInfo* cu_info,
137          vector<Module::Range>* ranges) = 0;
138    };
139  
140    // An abstract base class for handlers that handle DWARF line data
141    // for DwarfCUToModule. DwarfCUToModule could certainly just use
142    // LineInfo itself directly, but decoupling things
143    // this way makes unit testing a little easier.
144    class LineToModuleHandler {
145     public:
146      LineToModuleHandler() { }
147      virtual ~LineToModuleHandler() { }
148  
149      // Called at the beginning of a new compilation unit, prior to calling
150      // ReadProgram(). compilation_dir will indicate the path that the
151      // current compilation unit was compiled in, consistent with the
152      // DW_AT_comp_dir DIE.
153      virtual void StartCompilationUnit(const string& compilation_dir) = 0;
154  
155      // Populate MODULE and LINES with source file names and code/line
156      // mappings, given a pointer to some DWARF line number data
157      // PROGRAM, and an overestimate of its size. Add no zero-length
158      // lines to LINES.
159      virtual void ReadProgram(const uint8_t* program, uint64_t length,
160                               const uint8_t* string_section,
161                               uint64_t string_section_length,
162                               const uint8_t* line_string_section,
163                               uint64_t line_string_length,
164                               Module* module, vector<Module::Line>* lines,
165                               map<uint32_t, Module::File*>* files) = 0;
166    };
167  
168    // The interface DwarfCUToModule uses to report warnings. The member
169    // function definitions for this class write messages to stderr, but
170    // you can override them if you'd like to detect or report these
171    // conditions yourself.
172    class WarningReporter {
173     public:
174      // Warn about problems in the DWARF file FILENAME, in the
175      // compilation unit at OFFSET.
176      WarningReporter(const string& filename, uint64_t cu_offset)
177          : filename_(filename), cu_offset_(cu_offset), printed_cu_header_(false),
178            printed_unpaired_header_(false),
179            uncovered_warnings_enabled_(false) { }
180      virtual ~WarningReporter() { }
181  
182      // Set the name of the compilation unit we're processing to NAME.
183      virtual void SetCUName(const string& name) { cu_name_ = name; }
184  
185      // Accessor and setter for uncovered_warnings_enabled_.
186      // UncoveredFunction and UncoveredLine only report a problem if that is
187      // true. By default, these warnings are disabled, because those
188      // conditions occur occasionally in healthy code.
189      virtual bool uncovered_warnings_enabled() const {
190        return uncovered_warnings_enabled_;
191      }
192      virtual void set_uncovered_warnings_enabled(bool value) {
193        uncovered_warnings_enabled_ = value;
194      }
195  
196      // A DW_AT_specification in the DIE at OFFSET refers to a DIE we
197      // haven't processed yet, or that wasn't marked as a declaration,
198      // at TARGET.
199      virtual void UnknownSpecification(uint64_t offset, uint64_t target);
200  
201      // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we
202      // haven't processed yet, or that wasn't marked as inline, at TARGET.
203      virtual void UnknownAbstractOrigin(uint64_t offset, uint64_t target);
204  
205      // We were unable to find the DWARF section named SECTION_NAME.
206      virtual void MissingSection(const string& section_name);
207  
208      // The CU's DW_AT_stmt_list offset OFFSET is bogus.
209      virtual void BadLineInfoOffset(uint64_t offset);
210  
211      // FUNCTION includes code covered by no line number data.
212      virtual void UncoveredFunction(const Module::Function& function);
213  
214      // Line number NUMBER in LINE_FILE, of length LENGTH, includes code
215      // covered by no function.
216      virtual void UncoveredLine(const Module::Line& line);
217  
218      // The DW_TAG_subprogram DIE at OFFSET has no name specified directly
219      // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin
220      // link.
221      virtual void UnnamedFunction(uint64_t offset);
222  
223      // __cxa_demangle() failed to demangle INPUT.
224      virtual void DemangleError(const string& input);
225  
226      // The DW_FORM_ref_addr at OFFSET to TARGET was not handled because
227      // FilePrivate did not retain the inter-CU specification data.
228      virtual void UnhandledInterCUReference(uint64_t offset, uint64_t target);
229  
230      // The DW_AT_ranges at offset is malformed (truncated or outside of the
231      // .debug_ranges section's bound).
232      virtual void MalformedRangeList(uint64_t offset);
233  
234      // A DW_AT_ranges attribute was encountered but the no .debug_ranges
235      // section was found.
236      virtual void MissingRanges();
237  
238      uint64_t cu_offset() const {
239        return cu_offset_;
240      }
241  
242     protected:
243      const string filename_;
244      const uint64_t cu_offset_;
245      string cu_name_;
246      bool printed_cu_header_;
247      bool printed_unpaired_header_;
248      bool uncovered_warnings_enabled_;
249  
250     private:
251      // Print a per-CU heading, once.
252      void CUHeading();
253      // Print an unpaired function/line heading, once.
254      void UncoveredHeading();
255    };
256  
257    // Create a DWARF debugging info handler for a compilation unit
258    // within FILE_CONTEXT. This uses information received from the
259    // CompilationUnit DWARF parser to populate
260    // FILE_CONTEXT->module. Use LINE_READER to handle the compilation
261    // unit's line number data. Use REPORTER to report problems with the
262    // data we find.
263    DwarfCUToModule(FileContext* file_context,
264                    LineToModuleHandler* line_reader,
265                    RangesHandler* ranges_handler,
266                    WarningReporter* reporter,
267                    bool handle_inline = false,
268                    uint64_t low_pc = 0,
269                    uint64_t addr_base = 0,
270                    bool has_source_line_info = false,
271                    uint64_t source_line_offset = 0);
272    ~DwarfCUToModule();
273  
274    void ProcessAttributeSigned(enum DwarfAttribute attr,
275                                enum DwarfForm form,
276                                int64_t data);
277    void ProcessAttributeUnsigned(enum DwarfAttribute attr,
278                                  enum DwarfForm form,
279                                  uint64_t data);
280    void ProcessAttributeString(enum DwarfAttribute attr,
281                                enum DwarfForm form,
282                                const string& data);
283    bool EndAttributes();
284    DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
285  
286    // Assign all our source Lines to the Functions that cover their
287    // addresses, and then add them to module_.
288    void Finish();
289  
290    bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
291                              uint8_t offset_size, uint64_t cu_length,
292                              uint8_t dwarf_version);
293    bool StartRootDIE(uint64_t offset, enum DwarfTag tag);
294  
295   private:
296    // Used internally by the handler. Full definitions are in
297    // dwarf_cu_to_module.cc.
298    struct CUContext;
299    struct DIEContext;
300    struct Specification;
301    class GenericDIEHandler;
302    class FuncHandler;
303    class InlineHandler;
304    class NamedScopeHandler;
305  
306    // A map from section offsets to specifications.
307    typedef map<uint64_t, Specification> SpecificationByOffset;
308  
309    // Set this compilation unit's source language to LANGUAGE.
310    void SetLanguage(DwarfLanguage language);
311  
312    // Read source line information at OFFSET in the .debug_line
313    // section.  Record source files in module_, but record source lines
314    // in lines_; we apportion them to functions in
315    // AssignLinesToFunctions.
316    void ReadSourceLines(uint64_t offset);
317  
318    // Assign the lines in lines_ to the individual line lists of the
319    // functions in functions_.  (DWARF line information maps an entire
320    // compilation unit at a time, and gives no indication of which
321    // lines belong to which functions, beyond their addresses.)
322    void AssignLinesToFunctions();
323  
324    void AssignFilesToInlines();
325  
326    // The only reason cu_context_ and child_context_ are pointers is
327    // that we want to keep their definitions private to
328    // dwarf_cu_to_module.cc, instead of listing them all here. They are
329    // owned by this DwarfCUToModule: the constructor sets them, and the
330    // destructor deletes them.
331  
332    // The handler to use to handle line number data.
333    LineToModuleHandler* line_reader_;
334  
335    // This compilation unit's context.
336    scoped_ptr<CUContext> cu_context_;
337  
338    // A context for our children.
339    scoped_ptr<DIEContext> child_context_;
340  
341    // True if this compilation unit has source line information.
342    bool has_source_line_info_;
343  
344    // The offset of this compilation unit's line number information in
345    // the .debug_line section.
346    uint64_t source_line_offset_;
347  
348    // The line numbers we have seen thus far.  We accumulate these here
349    // during parsing.  Then, in Finish, we call AssignLinesToFunctions
350    // to dole them out to the appropriate functions.
351    vector<Module::Line> lines_;
352  
353    // The map from file index to File* in this CU.
354    std::map<uint32_t, Module::File*> files_;
355  };
356  
357  }  // namespace google_breakpad
358  
359  #endif  // COMMON_LINUX_DWARF_CU_TO_MODULE_H__