/ src / common / dwarf_line_to_module.h
dwarf_line_to_module.h
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2010 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32  
 33  // The DwarfLineToModule class accepts line number information from a
 34  // DWARF parser and adds it to a google_breakpad::Module. The Module
 35  // can write that data out as a Breakpad symbol file.
 36  
 37  #ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H
 38  #define COMMON_LINUX_DWARF_LINE_TO_MODULE_H
 39  
 40  #include <string>
 41  
 42  #include "common/module.h"
 43  #include "common/dwarf/dwarf2reader.h"
 44  #include "common/using_std_string.h"
 45  
 46  namespace google_breakpad {
 47  
 48  // A class for producing a vector of google_breakpad::Module::Line
 49  // instances from parsed DWARF line number data.  
 50  //
 51  // An instance of this class can be provided as a handler to a
 52  // LineInfo DWARF line number information parser. The
 53  // handler accepts source location information from the parser and
 54  // uses it to produce a vector of google_breakpad::Module::Line
 55  // objects, referring to google_breakpad::Module::File objects added
 56  // to a particular google_breakpad::Module.
 57  //
 58  // GNU toolchain omitted sections support:
 59  // ======================================
 60  //
 61  // Given the right options, the GNU toolchain will omit unreferenced
 62  // functions from the final executable. Unfortunately, when it does so, it
 63  // does not remove the associated portions of the DWARF line number
 64  // program; instead, it gives the DW_LNE_set_address instructions referring
 65  // to the now-deleted code addresses of zero. Given this input, the DWARF
 66  // line parser will call AddLine with a series of lines starting at address
 67  // zero. For example, here is the output from 'readelf -wl' for a program
 68  // with four functions, the first three of which have been omitted:
 69  //
 70  //   Line Number Statements:
 71  //    Extended opcode 2: set Address to 0x0
 72  //    Advance Line by 14 to 15
 73  //    Copy
 74  //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
 75  //    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
 76  //    Advance PC by 2 to 0xd
 77  //    Extended opcode 1: End of Sequence
 78  // 
 79  //    Extended opcode 2: set Address to 0x0
 80  //    Advance Line by 14 to 15
 81  //    Copy
 82  //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
 83  //    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
 84  //    Advance PC by 2 to 0xd
 85  //    Extended opcode 1: End of Sequence
 86  // 
 87  //    Extended opcode 2: set Address to 0x0
 88  //    Advance Line by 19 to 20
 89  //    Copy
 90  //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21
 91  //    Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22
 92  //    Advance PC by 2 to 0xa
 93  //    Extended opcode 1: End of Sequence
 94  // 
 95  //    Extended opcode 2: set Address to 0x80483a4
 96  //    Advance Line by 23 to 24
 97  //    Copy
 98  //    Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25
 99  //    Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26
100  //    Advance PC by 6 to 0x80483bd
101  //    Extended opcode 1: End of Sequence
102  //
103  // Instead of collecting runs of lines describing code that is not there,
104  // we try to recognize and drop them. Since the linker doesn't explicitly
105  // distinguish references to dropped sections from genuine references to
106  // code at address zero, we must use a heuristic. We have chosen:
107  //
108  // - If a line starts at address zero, omit it. (On the platforms
109  //   breakpad targets, it is extremely unlikely that there will be code
110  //   at address zero.)
111  //
112  // - If a line starts immediately after an omitted line, omit it too.
113  class DwarfLineToModule: public LineInfoHandler {
114   public:
115    // As the DWARF line info parser passes us line records, add source
116    // files to MODULE, and add all lines to the end of LINES. LINES
117    // need not be empty. If the parser hands us a zero-length line, we
118    // omit it. If the parser hands us a line that extends beyond the
119    // end of the address space, we clip it. It's up to our client to
120    // sort out which lines belong to which functions; we don't add them
121    // to any particular function in MODULE ourselves.
122    DwarfLineToModule(Module* module,
123                      const string& compilation_dir,
124                      vector<Module::Line>* lines,
125                      std::map<uint32_t, Module::File*>* files)
126        : module_(module),
127          compilation_dir_(compilation_dir),
128          lines_(lines),
129          files_(files),
130          highest_file_number_(-1),
131          omitted_line_end_(0),
132          warned_bad_file_number_(false),
133          warned_bad_directory_number_(false) { }
134  
135    ~DwarfLineToModule() { }
136  
137    void DefineDir(const string& name, uint32_t dir_num);
138    void DefineFile(const string& name, int32_t file_num,
139                    uint32_t dir_num, uint64_t mod_time,
140                    uint64_t length);
141    void AddLine(uint64_t address, uint64_t length,
142                 uint32_t file_num, uint32_t line_num, uint32_t column_num);
143  
144   private:
145  
146    typedef std::map<uint32_t, string> DirectoryTable;
147    typedef std::map<uint32_t, Module::File*> FileTable;
148  
149    // The module we're contributing debugging info to. Owned by our
150    // client.
151    Module *module_;
152  
153    // The compilation directory for the current compilation unit whose
154    // lines are being accumulated.
155    string compilation_dir_;
156  
157    // The vector of lines we're accumulating. Owned by our client.
158    //
159    // In a Module, as in a breakpad symbol file, lines belong to
160    // specific functions, but DWARF simply assigns lines to addresses;
161    // one must infer the line/function relationship using the
162    // functions' beginning and ending addresses. So we can't add these
163    // to the appropriate function from module_ until we've read the
164    // function info as well. Instead, we accumulate lines here, and let
165    // whoever constructed this sort it all out.
166    vector<Module::Line>* lines_;
167  
168    // A table mapping directory numbers to paths.
169    DirectoryTable directories_;
170  
171    // A table mapping file numbers to Module::File pointers.
172    FileTable* files_;
173  
174    // The highest file number we've seen so far, or -1 if we've seen
175    // none.  Used for dynamically defined file numbers.
176    int32_t highest_file_number_;
177  
178    // This is the ending address of the last line we omitted, or zero if we
179    // didn't omit the previous line. It is zero before we have received any
180    // AddLine calls.
181    uint64_t omitted_line_end_;
182  
183    // True if we've warned about:
184    bool warned_bad_file_number_; // bad file numbers
185    bool warned_bad_directory_number_; // bad directory numbers
186  };
187  
188  } // namespace google_breakpad
189  
190  #endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H