/ src / common / windows / pdb_source_line_writer.h
pdb_source_line_writer.h
  1  // Copyright 2006 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
 30  // a line/address map for use with BasicSourceLineResolver.
 31  
 32  #ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
 33  #define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
 34  
 35  #include <atlcomcli.h>
 36  
 37  #include <map>
 38  #include <memory>
 39  #include <string>
 40  #include <unordered_map>
 41  #include <vector>
 42  
 43  #include "common/windows/module_info.h"
 44  #include "common/windows/omap.h"
 45  
 46  struct IDiaEnumLineNumbers;
 47  struct IDiaSession;
 48  struct IDiaSymbol;
 49  
 50  namespace google_breakpad {
 51  
 52  using std::map;
 53  using std::vector;
 54  using std::wstring;
 55  using std::unordered_map;
 56  
 57  class PDBSourceLineWriter {
 58   public:
 59    enum FileFormat {
 60      PDB_FILE,  // a .pdb file containing debug symbols
 61      EXE_FILE,  // a .exe or .dll file
 62      ANY_FILE   // try PDB_FILE and then EXE_FILE
 63    };
 64  
 65    explicit PDBSourceLineWriter(bool handle_inline = false);
 66    ~PDBSourceLineWriter();
 67  
 68    // Opens the given file.  For executable files, the corresponding pdb
 69    // file must be available; Open will be if it is not.
 70    // If there is already a pdb file open, it is automatically closed.
 71    // Returns true on success.
 72    bool Open(const wstring& file, FileFormat format);
 73  
 74    // Closes the current pdb file and its associated resources.
 75    void Close();
 76  
 77    // Sets the code file full path.  This is optional for 32-bit modules.  It is
 78    // also optional for 64-bit modules when there is an executable file stored
 79    // in the same directory as the PDB file.  It is only required for 64-bit
 80    // modules when the executable file is not in the same location as the PDB
 81    // file and it must be called after Open() and before WriteMap().
 82    // If Open() was called for an executable file, then it is an error to call
 83    // SetCodeFile() with a different file path and it will return false.
 84    bool SetCodeFile(const wstring& exe_file);
 85  
 86    // Writes a Breakpad symbol file from the current pdb file to |symbol_file|.
 87    // Returns true on success.
 88    bool WriteSymbols(FILE *symbol_file);
 89  
 90    // Retrieves information about the module's debugging file.  Returns
 91    // true on success and false on failure.
 92    bool GetModuleInfo(PDBModuleInfo *info);
 93  
 94    // Retrieves information about the module's PE file.  Returns
 95    // true on success and false on failure.
 96    bool GetPEInfo(PEModuleInfo *info);
 97  
 98    // Sets uses_guid to true if the opened file uses a new-style CodeView
 99    // record with a 128-bit GUID, or false if the opened file uses an old-style
100    // CodeView record.  When no GUID is available, a 32-bit signature should be
101    // used to identify the module instead.  If the information cannot be
102    // determined, this method returns false.
103    bool UsesGUID(bool *uses_guid);
104  
105   private:
106    // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an
107    // inlined function.
108    struct InlineOrigin {
109      // The unique id for an InlineOrigin.
110      int id;
111      // The name of the inlined function.
112      wstring name;
113    };
114  
115    // Line represents LINE record in a symbol file. It represents a source code
116    // line.
117    struct Line {
118      // The relative address of a line.
119      DWORD rva;
120      // The number bytes this line has.
121      DWORD length;
122      // The source line number.
123      DWORD line_num;
124      // The source file id where the source line is located at.
125      DWORD file_id;
126    };
127  
128    // Inline represents INLINE record in a symbol file.
129    class Inline {
130     public:
131      explicit Inline(int inline_nest_level);
132  
133      void SetOriginId(int origin_id);
134  
135      // Adding inlinee line's range into ranges. If line is adjacent with any
136      // existing lines, extend the range. Otherwise, add line as a new range.
137      void ExtendRanges(const Line& line);
138  
139      void SetCallSiteLine(DWORD call_site_line);
140  
141      void SetCallSiteFileId(DWORD call_site_file_id);
142  
143      void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines);
144  
145      void Print(FILE* output) const;
146  
147     private:
148      // The nest level of this inline record.
149      int inline_nest_level_;
150      // The source line number at where this inlined function is called.
151      DWORD call_site_line_ = 0;
152      // The call site file id at where this inlined function is called.
153      DWORD call_site_file_id_ = 0;
154      // The id used for referring to an InlineOrigin.
155      int origin_id_ = 0;
156      // A map from rva to length. This is the address ranges covered by this
157      // Inline.
158      map<DWORD, DWORD> ranges_;
159      // The list of direct Inlines inlined inside this Inline.
160      vector<std::unique_ptr<Inline>> child_inlines_;
161    };
162  
163    // Lines represents a map of lines inside a function with rva as the key.
164    // AddLine function adds a line into the map and ensures that there is no
165    // overlap between any two lines in the map.
166    class Lines {
167     public:
168      const map<DWORD, Line>& GetLineMap() const { return line_map_; }
169  
170      // Finds the line from line_map_ that contains the given rva returns its
171      // line_num. If not found, return 0.
172      DWORD GetLineNum(DWORD rva) const;
173  
174      // Finds the line from line_map_ that contains the given rva returns its
175      // file_id. If not found, return 0.
176      DWORD GetFileId(DWORD rva) const;
177  
178      // Add the `line` into line_map_. If the `line` overlaps with existing
179      // lines, truncate the existing lines and add the given line. It ensures
180      // that all lines in line_map_ do not overlap with each other. For example,
181      // suppose there is a line A in the map and we call AddLine with Line B.
182      // Line A: rva: 100, length: 20, line_num: 10, file_id: 1
183      // Line B: rva: 105, length: 10, line_num: 4, file_id: 2
184      // After calling AddLine with Line B, we will have the following lines:
185      // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1
186      // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2
187      // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1
188      void AddLine(const Line& line);
189  
190     private:
191      // Finds the line from line_map_ that contains the given rva. If not found,
192      // return nullptr.
193      const Line* GetLine(DWORD rva) const;
194      // The key is rva. AddLine function ensures that any two lines in the map do
195      // not overlap.
196      map<DWORD, Line> line_map_;
197    };
198  
199    // Construct Line from IDiaLineNumber. The output Line is stored at line.
200    // Return true on success.
201    bool GetLine(IDiaLineNumber* dia_line, Line* line) const;
202  
203    // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at
204    // line_list.
205    // Returns true on success.
206    bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const;
207  
208    // Outputs the line/address pairs for each line in the enumerator.
209    void PrintLines(const Lines& lines) const;
210  
211    // Outputs a function address and name, followed by its source line list.
212    // block can be the same object as function, or it can be a reference to a
213    // code block that is lexically part of this function, but resides at a
214    // separate address. If has_multiple_symbols is true, this function's
215    // instructions correspond to multiple symbols. Returns true on success.
216    bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block,
217                       bool has_multiple_symbols);
218  
219    // Outputs all functions as described above.  Returns true on success.
220    bool PrintFunctions();
221  
222    // Outputs all of the source files in the session's pdb file.
223    // Returns true on success.
224    bool PrintSourceFiles();
225  
226    // Output all inline origins.
227    void PrintInlineOrigins() const;
228  
229    // Retrieve inlines inside the given block. It also adds inlinee lines to
230    // `line_list` since inner lines are more precise source location. If the
231    // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call
232    // itself with each child as first argument. Returns true on success.
233    // `block`: the IDiaSymbol that may have inline sites.
234    // `line_list`: the list of lines inside current function.
235    // `inline_nest_level`: the nest level of block's Inlines.
236    // `inlines`: the vector to store the list of inlines for the block.
237    bool GetInlines(IDiaSymbol* block,
238                    Lines* line_list,
239                    int inline_nest_level,
240                    vector<std::unique_ptr<Inline>>* inlines);
241  
242    // Outputs all inlines.
243    void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const;
244  
245    // Outputs all of the frame information necessary to construct stack
246    // backtraces in the absence of frame pointers. For x86 data stored in
247    // .pdb files. Returns true on success.
248    bool PrintFrameDataUsingPDB();
249  
250    // Outputs all of the frame information necessary to construct stack
251    // backtraces in the absence of frame pointers. For x64 data stored in
252    // .exe, .dll files. Returns true on success.
253    bool PrintFrameDataUsingEXE();
254  
255    // Outputs all of the frame information necessary to construct stack
256    // backtraces in the absence of frame pointers.  Returns true on success.
257    bool PrintFrameData();
258  
259    // Outputs a single public symbol address and name, if the symbol corresponds
260    // to a code address.  Returns true on success.  If symbol is does not
261    // correspond to code, returns true without outputting anything. If
262    // has_multiple_symbols is true, the symbol corresponds to a code address and
263    // the instructions correspond to multiple symbols.
264    bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols);
265  
266    // Outputs a line identifying the PDB file that is being dumped, along with
267    // its uuid and age.
268    bool PrintPDBInfo();
269  
270    // Outputs a line identifying the PE file corresponding to the PDB
271    // file that is being dumped, along with its code identifier,
272    // which consists of its timestamp and file size.
273    bool PrintPEInfo();
274  
275    // Returns true if this filename has already been seen,
276    // and an ID is stored for it, or false if it has not.
277    bool FileIDIsCached(const wstring& file) {
278      return unique_files_.find(file) != unique_files_.end();
279    }
280  
281    // Cache this filename and ID for later reuse.
282    void CacheFileID(const wstring& file, DWORD id) {
283      unique_files_[file] = id;
284    }
285  
286    // Store this ID in the cache as a duplicate for this filename.
287    void StoreDuplicateFileID(const wstring& file, DWORD id) {
288      unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
289      if (iter != unique_files_.end()) {
290        // map this id to the previously seen one
291        file_ids_[id] = iter->second;
292      }
293    }
294  
295    // Given a file's unique ID, return the ID that should be used to
296    // reference it. There may be multiple files with identical filenames
297    // but different unique IDs. The cache attempts to coalesce these into
298    // one ID per unique filename.
299    DWORD GetRealFileID(DWORD id) const {
300      unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id);
301      if (iter == file_ids_.end())
302        return id;
303      return iter->second;
304    }
305  
306    // Find the PE file corresponding to the loaded PDB file, and
307    // set the code_file_ member. Returns false on failure.
308    bool FindPEFile();
309  
310    // Returns the function name for a symbol.  If possible, the name is
311    // undecorated.  If the symbol's decorated form indicates the size of
312    // parameters on the stack, this information is returned in stack_param_size.
313    // Returns true on success.  If the symbol doesn't encode parameter size
314    // information, stack_param_size is set to -1.
315    static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
316                                      int *stack_param_size);
317  
318    // Returns the number of bytes of stack space used for a function's
319    // parameters.  function must have the tag SymTagFunction.  In the event of
320    // a failure, returns 0, which is also a valid number of bytes.
321    static int GetFunctionStackParamSize(IDiaSymbol *function);
322  
323    // The filename of the PE file corresponding to the currently-open
324    // pdb file.
325    wstring code_file_;
326  
327    // The session for the currently-open pdb file.
328    CComPtr<IDiaSession> session_;
329  
330    // The current output file for this WriteMap invocation.
331    FILE *output_;
332  
333    // There may be many duplicate filenames with different IDs.
334    // This maps from the DIA "unique ID" to a single ID per unique
335    // filename.
336    unordered_map<DWORD, DWORD> file_ids_;
337    // This maps unique filenames to file IDs.
338    unordered_map<wstring, DWORD> unique_files_;
339  
340    // The INLINE_ORIGINS records. The key is the function name.
341    std::map<wstring, InlineOrigin> inline_origins_;
342  
343    // This is used for calculating post-transform symbol addresses and lengths.
344    ImageMap image_map_;
345  
346    // If we should output INLINE/INLINE_ORIGIN records
347    bool handle_inline_;
348  
349    // Disallow copy ctor and operator=
350    PDBSourceLineWriter(const PDBSourceLineWriter&);
351    void operator=(const PDBSourceLineWriter&);
352  };
353  
354  }  // namespace google_breakpad
355  
356  #endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_