/ src / common / stabs_reader.h
stabs_reader.h
  1  // -*- mode: c++ -*-
  2  
  3  // Copyright 2010 Google LLC
  4  //
  5  // Redistribution and use in source and binary forms, with or without
  6  // modification, are permitted provided that the following conditions are
  7  // met:
  8  //
  9  //     * Redistributions of source code must retain the above copyright
 10  // notice, this list of conditions and the following disclaimer.
 11  //     * Redistributions in binary form must reproduce the above
 12  // copyright notice, this list of conditions and the following disclaimer
 13  // in the documentation and/or other materials provided with the
 14  // distribution.
 15  //     * Neither the name of Google LLC nor the names of its
 16  // contributors may be used to endorse or promote products derived from
 17  // this software without specific prior written permission.
 18  //
 19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  
 31  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32  
 33  // stabs_reader.h: Define StabsReader, a parser for STABS debugging
 34  // information. A description of the STABS debugging format can be
 35  // found at:
 36  //
 37  //    http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
 38  //
 39  // The comments here assume you understand the format.
 40  //
 41  // This parser can handle big-endian and little-endian data, and the symbol
 42  // values may be either 32 or 64 bits long. It handles both STABS in
 43  // sections (as used on Linux) and STABS appearing directly in an
 44  // a.out-like symbol table (as used in Darwin OS X Mach-O files).
 45  
 46  #ifndef COMMON_STABS_READER_H__
 47  #define COMMON_STABS_READER_H__
 48  
 49  #include <stddef.h>
 50  #include <stdint.h>
 51  
 52  #ifdef HAVE_MACH_O_NLIST_H
 53  #include <mach-o/nlist.h>
 54  #elif defined(HAVE_A_OUT_H)
 55  #include <a.out.h>
 56  #endif
 57  
 58  #include <string>
 59  #include <vector>
 60  
 61  #include "common/byte_cursor.h"
 62  #include "common/using_std_string.h"
 63  
 64  namespace google_breakpad {
 65  
 66  class StabsHandler;
 67  
 68  class StabsReader {
 69   public:
 70    // Create a reader for the STABS debug information whose .stab section is
 71    // being traversed by ITERATOR, and whose .stabstr section is referred to
 72    // by STRINGS. The reader will call the member functions of HANDLER to
 73    // report the information it finds, when the reader's 'Process' member
 74    // function is called.
 75    //
 76    // BIG_ENDIAN should be true if the entries in the .stab section are in
 77    // big-endian form, or false if they are in little-endian form.
 78    //
 79    // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
 80    // field in each entry in bytes.
 81    //
 82    // UNITIZED should be true if the STABS data is stored in units with
 83    // N_UNDF headers. This is usually the case for STABS stored in sections,
 84    // like .stab/.stabstr, and usually not the case for STABS stored in the
 85    // actual symbol table; UNITIZED should be true when parsing Linux stabs,
 86    // false when parsing Mac OS X STABS. For details, see:
 87    // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
 88    // 
 89    // Note that, in ELF, the .stabstr section should be found using the
 90    // 'sh_link' field of the .stab section header, not by name.
 91    StabsReader(const uint8_t* stab,    size_t stab_size,
 92                const uint8_t* stabstr, size_t stabstr_size,
 93                bool big_endian, size_t value_size, bool unitized,
 94                StabsHandler* handler);
 95  
 96    // Process the STABS data, calling the handler's member functions to
 97    // report what we find.  While the handler functions return true,
 98    // continue to process until we reach the end of the section.  If we
 99    // processed the entire section and all handlers returned true,
100    // return true.  If any handler returned false, return false.
101    // 
102    // This is only meant to be called once per StabsReader instance;
103    // resuming a prior processing pass that stopped abruptly isn't supported.
104    bool Process();
105  
106   private:
107  
108    // An class for walking arrays of STABS entries. This isolates the main
109    // STABS reader from the exact format (size; endianness) of the entries
110    // themselves.
111    class EntryIterator {
112     public:
113      // The contents of a STABS entry, adjusted for the host's endianness,
114      // word size, 'struct nlist' layout, and so on.
115      struct Entry {
116        // True if this iterator has reached the end of the entry array. When
117        // this is set, the other members of this structure are not valid.
118        bool at_end;
119  
120        // The number of this entry within the list.
121        size_t index;
122  
123        // The current entry's name offset. This is the offset within the
124        // current compilation unit's strings, as establish by the N_UNDF entries.
125        size_t name_offset;
126  
127        // The current entry's type, 'other' field, descriptor, and value.
128        unsigned char type;
129        unsigned char other;
130        short descriptor;
131        uint64_t value;
132      };
133  
134      // Create a EntryIterator walking the entries in BUFFER. Treat the
135      // entries as big-endian if BIG_ENDIAN is true, as little-endian
136      // otherwise. Assume each entry has a 'value' field whose size is
137      // VALUE_SIZE.
138      //
139      // This would not be terribly clean to extend to other format variations,
140      // but it's enough to handle Linux and Mac, and we'd like STABS to die
141      // anyway.
142      //
143      // For the record: on Linux, STABS entry values are always 32 bits,
144      // regardless of the architecture address size (don't ask me why); on
145      // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
146      // size for a Linux ELF .stab section varies according to the ELF class
147      // from 12 to 20 even as the actual entries remain unchanged.
148      EntryIterator(const ByteBuffer* buffer, bool big_endian, size_t value_size);
149  
150      // Move to the next entry. This function's behavior is undefined if
151      // at_end() is true when it is called.
152      EntryIterator& operator++() { Fetch(); entry_.index++; return *this; }
153  
154      // Dereferencing this iterator produces a reference to an Entry structure
155      // that holds the current entry's values. The entry is owned by this
156      // EntryIterator, and will be invalidated at the next call to operator++.
157      const Entry& operator*() const { return entry_; }
158      const Entry* operator->() const { return &entry_; }
159  
160     private:
161      // Read the STABS entry at cursor_, and set entry_ appropriately.
162      void Fetch();
163  
164      // The size of entries' value field, in bytes.
165      size_t value_size_;
166  
167      // A byte cursor traversing buffer_.
168      ByteCursor cursor_;
169  
170      // Values for the entry this iterator refers to.
171      Entry entry_;
172    };
173  
174    // A source line, saved to be reported later.
175    struct Line {
176      uint64_t address;
177      const char* filename;
178      int number;
179    };
180  
181    // Return the name of the current symbol.
182    const char* SymbolString();
183  
184    // Process a compilation unit starting at symbol_.  Return true
185    // to continue processing, or false to abort.
186    bool ProcessCompilationUnit();
187  
188    // Process a function in current_source_file_ starting at symbol_.
189    // Return true to continue processing, or false to abort.
190    bool ProcessFunction();
191  
192    // Process an exported function symbol.
193    // Return true to continue processing, or false to abort.
194    bool ProcessExtern();
195  
196    // The STABS entries being parsed.
197    ByteBuffer entries_;
198  
199    // The string section to which the entries refer.
200    ByteBuffer strings_;
201  
202    // The iterator walking the STABS entries.
203    EntryIterator iterator_;
204  
205    // True if the data is "unitized"; see the explanation in the comment for
206    // StabsReader::StabsReader.
207    bool unitized_;
208  
209    StabsHandler* handler_;
210  
211    // The offset of the current compilation unit's strings within stabstr_.
212    size_t string_offset_;
213  
214    // The value string_offset_ should have for the next compilation unit,
215    // as established by N_UNDF entries.
216    size_t next_cu_string_offset_;
217  
218    // The current source file name.
219    const char* current_source_file_;
220  
221    // Mac OS X STABS place SLINE records before functions; we accumulate a
222    // vector of these until we see the FUN record, and then report them
223    // after the StartFunction call.
224    std::vector<Line> queued_lines_;
225  };
226  
227  // Consumer-provided callback structure for the STABS reader.  Clients
228  // of the STABS reader provide an instance of this structure.  The
229  // reader then invokes the member functions of that instance to report
230  // the information it finds.
231  //
232  // The default definitions of the member functions do nothing, and return
233  // true so processing will continue.
234  class StabsHandler {
235   public:
236    StabsHandler() { }
237    virtual ~StabsHandler() { }
238  
239    // Some general notes about the handler callback functions:
240  
241    // Processing proceeds until the end of the .stabs section, or until
242    // one of these functions returns false.
243  
244    // The addresses given are as reported in the STABS info, without
245    // regard for whether the module may be loaded at different
246    // addresses at different times (a shared library, say).  When
247    // processing STABS from an ELF shared library, the addresses given
248    // all assume the library is loaded at its nominal load address.
249    // They are *not* offsets from the nominal load address.  If you
250    // want offsets, you must subtract off the library's nominal load
251    // address.
252  
253    // The arguments to these functions named FILENAME are all
254    // references to strings stored in the .stabstr section.  Because
255    // both the Linux and Solaris linkers factor out duplicate strings
256    // from the .stabstr section, the consumer can assume that if two
257    // FILENAME values are different addresses, they represent different
258    // file names.
259    //
260    // Thus, it's safe to use (say) std::map<char*, ...>, which does
261    // string address comparisons, not string content comparisons.
262    // Since all the strings are in same array of characters --- the
263    // .stabstr section --- comparing their addresses produces
264    // predictable, if not lexicographically meaningful, results.
265  
266    // Begin processing a compilation unit whose main source file is
267    // named FILENAME, and whose base address is ADDRESS.  If
268    // BUILD_DIRECTORY is non-NULL, it is the name of the build
269    // directory in which the compilation occurred.
270    virtual bool StartCompilationUnit(const char* filename, uint64_t address,
271                                      const char* build_directory) {
272      return true;
273    }
274  
275    // Finish processing the compilation unit.  If ADDRESS is non-zero,
276    // it is the ending address of the compilation unit.  If ADDRESS is
277    // zero, then the compilation unit's ending address is not
278    // available, and the consumer must infer it by other means.
279    virtual bool EndCompilationUnit(uint64_t address) { return true; }
280  
281    // Begin processing a function named NAME, whose starting address is
282    // ADDRESS.  This function belongs to the compilation unit that was
283    // most recently started but not ended.
284    //
285    // Note that, unlike filenames, NAME is not a pointer into the
286    // .stabstr section; this is because the name as it appears in the
287    // STABS data is followed by type information.  The value passed to
288    // StartFunction is the function name alone.
289    //
290    // In languages that use name mangling, like C++, NAME is mangled.
291    virtual bool StartFunction(const string& name, uint64_t address) {
292      return true;
293    }
294  
295    // Finish processing the function.  If ADDRESS is non-zero, it is
296    // the ending address for the function.  If ADDRESS is zero, then
297    // the function's ending address is not available, and the consumer
298    // must infer it by other means.
299    virtual bool EndFunction(uint64_t address) { return true; }
300    
301    // Report that the code at ADDRESS is attributable to line NUMBER of
302    // the source file named FILENAME.  The caller must infer the ending
303    // address of the line.
304    virtual bool Line(uint64_t address, const char* filename, int number) {
305      return true;
306    }
307  
308    // Report that an exported function NAME is present at ADDRESS.
309    // The size of the function is unknown.
310    virtual bool Extern(const string& name, uint64_t address) {
311      return true;
312    }
313  
314    // Report a warning.  FORMAT is a printf-like format string,
315    // specifying how to format the subsequent arguments.
316    virtual void Warning(const char* format, ...) = 0;
317  };
318  
319  } // namespace google_breakpad
320  
321  #endif  // COMMON_STABS_READER_H__