/ src / common / stabs_reader.cc
stabs_reader.cc
  1  // Copyright 2010 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 30  
 31  // This file implements the google_breakpad::StabsReader class.
 32  // See stabs_reader.h.
 33  
 34  #ifdef HAVE_CONFIG_H
 35  #include <config.h>  // Must come first
 36  #endif
 37  
 38  #include "common/stabs_reader.h"
 39  
 40  #include <assert.h>
 41  #include <stab.h>
 42  #include <string.h>
 43  
 44  #include <string>
 45  
 46  #include "common/using_std_string.h"
 47  
 48  using std::vector;
 49  
 50  namespace google_breakpad {
 51  
 52  StabsReader::EntryIterator::EntryIterator(const ByteBuffer* buffer,
 53                                            bool big_endian, size_t value_size)
 54      : value_size_(value_size), cursor_(buffer, big_endian) {
 55    // Actually, we could handle weird sizes just fine, but they're
 56    // probably mistakes --- expressed in bits, say.
 57    assert(value_size == 4 || value_size == 8);
 58    entry_.index = 0;
 59    Fetch();
 60  }
 61  
 62  void StabsReader::EntryIterator::Fetch() {
 63    cursor_
 64        .Read(4, false, &entry_.name_offset)
 65        .Read(1, false, &entry_.type)
 66        .Read(1, false, &entry_.other)
 67        .Read(2, false, &entry_.descriptor)
 68        .Read(value_size_, false, &entry_.value);
 69    entry_.at_end = !cursor_;
 70  }
 71  
 72  StabsReader::StabsReader(const uint8_t* stab,    size_t stab_size,
 73                           const uint8_t* stabstr, size_t stabstr_size,
 74                           bool big_endian, size_t value_size, bool unitized,
 75                           StabsHandler* handler)
 76      : entries_(stab, stab_size),
 77        strings_(stabstr, stabstr_size),
 78        iterator_(&entries_, big_endian, value_size),
 79        unitized_(unitized),
 80        handler_(handler),
 81        string_offset_(0),
 82        next_cu_string_offset_(0),
 83        current_source_file_(NULL) { }
 84  
 85  const char* StabsReader::SymbolString() {
 86    ptrdiff_t offset = string_offset_ + iterator_->name_offset;
 87    if (offset < 0 || (size_t) offset >= strings_.Size()) {
 88      handler_->Warning("symbol %d: name offset outside the string section\n",
 89                        iterator_->index);
 90      // Return our null string, to keep our promise about all names being
 91      // taken from the string section.
 92      offset = 0;
 93    }
 94    return reinterpret_cast<const char*>(strings_.start + offset);
 95  }
 96  
 97  bool StabsReader::Process() {
 98    while (!iterator_->at_end) {
 99      if (iterator_->type == N_SO) {
100        if (! ProcessCompilationUnit())
101          return false;
102      } else if (iterator_->type == N_UNDF && unitized_) {
103        // In unitized STABS (including Linux STABS, and pretty much anything
104        // else that puts STABS data in sections), at the head of each
105        // compilation unit's entries there is an N_UNDF stab giving the
106        // number of symbols in the compilation unit, and the number of bytes
107        // that compilation unit's strings take up in the .stabstr section.
108        // Each CU's strings are separate; the n_strx values are offsets
109        // within the current CU's portion of the .stabstr section.
110        //
111        // As an optimization, the GNU linker combines all the
112        // compilation units into one, with a single N_UNDF at the
113        // beginning. However, other linkers, like Gold, do not perform
114        // this optimization.
115        string_offset_ = next_cu_string_offset_;
116        next_cu_string_offset_ = iterator_->value;
117        ++iterator_;
118      }
119  #if defined(HAVE_MACH_O_NLIST_H)
120      // Export symbols in Mach-O binaries look like this.
121      // This is necessary in order to be able to dump symbols
122      // from OS X system libraries.
123      else if ((iterator_->type & N_STAB) == 0 &&
124                 (iterator_->type & N_TYPE) == N_SECT) {
125        ProcessExtern();
126      }
127  #endif
128      else {
129        ++iterator_;
130      }
131    }
132    return true;
133  }
134  
135  bool StabsReader::ProcessCompilationUnit() {
136    assert(!iterator_->at_end && iterator_->type == N_SO);
137  
138    // There may be an N_SO entry whose name ends with a slash,
139    // indicating the directory in which the compilation occurred.
140    // The build directory defaults to NULL.
141    const char* build_directory = NULL;
142    {
143      const char* name = SymbolString();
144      if (name[0] && name[strlen(name) - 1] == '/') {
145        build_directory = name;
146        ++iterator_;
147      }
148    }
149  
150    // We expect to see an N_SO entry with a filename next, indicating
151    // the start of the compilation unit.
152    {
153      if (iterator_->at_end || iterator_->type != N_SO)
154        return true;
155      const char* name = SymbolString();
156      if (name[0] == '\0') {
157        // This seems to be a stray end-of-compilation-unit marker;
158        // consume it, but don't report the end, since we didn't see a
159        // beginning.
160        ++iterator_;
161        return true;
162      }
163      current_source_file_ = name;
164    }
165  
166    if (! handler_->StartCompilationUnit(current_source_file_,
167                                         iterator_->value,
168                                         build_directory))
169      return false;
170  
171    ++iterator_;
172  
173    // The STABS documentation says that some compilers may emit
174    // additional N_SO entries with names immediately following the
175    // first, and that they should be ignored.  However, the original
176    // Breakpad STABS reader doesn't ignore them, so we won't either.
177  
178    // Process the body of the compilation unit, up to the next N_SO.
179    while (!iterator_->at_end && iterator_->type != N_SO) {
180      if (iterator_->type == N_FUN) {
181        if (! ProcessFunction())
182          return false;
183      } else if (iterator_->type == N_SLINE) {
184        // Mac OS X STABS place SLINE records before functions.
185        Line line;
186        // The value of an N_SLINE entry that appears outside a function is
187        // the absolute address of the line.
188        line.address = iterator_->value;
189        line.filename = current_source_file_;
190        // The n_desc of a N_SLINE entry is the line number.  It's a
191        // signed 16-bit field; line numbers from 32768 to 65535 are
192        // stored as n-65536.
193        line.number = (uint16_t) iterator_->descriptor;
194        queued_lines_.push_back(line);
195        ++iterator_;
196      } else if (iterator_->type == N_SOL) {
197        current_source_file_ = SymbolString();
198        ++iterator_;
199      } else {
200        // Ignore anything else.
201        ++iterator_;
202      }
203    }
204  
205    // An N_SO with an empty name indicates the end of the compilation
206    // unit.  Default to zero.
207    uint64_t ending_address = 0;
208    if (!iterator_->at_end) {
209      assert(iterator_->type == N_SO);
210      const char* name = SymbolString();
211      if (name[0] == '\0') {
212        ending_address = iterator_->value;
213        ++iterator_;
214      }
215    }
216  
217    if (! handler_->EndCompilationUnit(ending_address))
218      return false;
219  
220    queued_lines_.clear();
221  
222    return true;
223  }
224  
225  bool StabsReader::ProcessFunction() {
226    assert(!iterator_->at_end && iterator_->type == N_FUN);
227  
228    uint64_t function_address = iterator_->value;
229    // The STABS string for an N_FUN entry is the name of the function,
230    // followed by a colon, followed by type information for the
231    // function.  We want to pass the name alone to StartFunction.
232    const char* stab_string = SymbolString();
233    const char* name_end = strchr(stab_string, ':');
234    if (! name_end)
235      name_end = stab_string + strlen(stab_string);
236    string name(stab_string, name_end - stab_string);
237    if (! handler_->StartFunction(name, function_address))
238      return false;
239    ++iterator_;
240  
241    // If there were any SLINE records given before the function, report them now.
242    for (vector<Line>::const_iterator it = queued_lines_.begin();
243         it != queued_lines_.end(); it++) {
244      if (!handler_->Line(it->address, it->filename, it->number))
245        return false;
246    }
247    queued_lines_.clear();
248  
249    while (!iterator_->at_end) {
250      if (iterator_->type == N_SO || iterator_->type == N_FUN)
251        break;
252      else if (iterator_->type == N_SLINE) {
253        // The value of an N_SLINE entry is the offset of the line from
254        // the function's start address.
255        uint64_t line_address = function_address + iterator_->value;
256        // The n_desc of a N_SLINE entry is the line number.  It's a
257        // signed 16-bit field; line numbers from 32768 to 65535 are
258        // stored as n-65536.
259        uint16_t line_number = iterator_->descriptor;
260        if (! handler_->Line(line_address, current_source_file_, line_number))
261          return false;
262        ++iterator_;
263      } else if (iterator_->type == N_SOL) {
264        current_source_file_ = SymbolString();
265        ++iterator_;
266      } else
267        // Ignore anything else.
268        ++iterator_;
269    }
270  
271    // We've reached the end of the function. See if we can figure out its
272    // ending address.
273    uint64_t ending_address = 0;
274    if (!iterator_->at_end) {
275      assert(iterator_->type == N_SO || iterator_->type == N_FUN);
276      if (iterator_->type == N_FUN) {
277        const char* symbol_name = SymbolString();
278        if (symbol_name[0] == '\0') {
279          // An N_FUN entry with no name is a terminator for this function;
280          // its value is the function's size.
281          ending_address = function_address + iterator_->value;
282          ++iterator_;
283        } else {
284          // An N_FUN entry with a name is the next function, and we can take
285          // its value as our ending address. Don't advance the iterator, as
286          // we'll use this symbol to start the next function as well.
287          ending_address = iterator_->value;
288        }
289      } else {
290        // An N_SO entry could be an end-of-compilation-unit marker, or the
291        // start of the next compilation unit, but in either case, its value
292        // is our ending address. We don't advance the iterator;
293        // ProcessCompilationUnit will decide what to do with this symbol.
294        ending_address = iterator_->value;
295      }
296    }
297  
298    if (! handler_->EndFunction(ending_address))
299      return false;
300  
301    return true;
302  }
303  
304  bool StabsReader::ProcessExtern() {
305  #if defined(HAVE_MACH_O_NLIST_H)
306    assert(!iterator_->at_end &&
307           (iterator_->type & N_STAB) == 0 &&
308           (iterator_->type & N_TYPE) == N_SECT);
309  #endif
310  
311    // TODO(mark): only do symbols in the text section?
312    if (!handler_->Extern(SymbolString(), iterator_->value))
313      return false;
314  
315    ++iterator_;
316    return true;
317  }
318  
319  } // namespace google_breakpad