stabs_reader.cc
1 // Copyright 2010 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 30 31 // This file implements the google_breakpad::StabsReader class. 32 // See stabs_reader.h. 33 34 #ifdef HAVE_CONFIG_H 35 #include <config.h> // Must come first 36 #endif 37 38 #include "common/stabs_reader.h" 39 40 #include <assert.h> 41 #include <stab.h> 42 #include <string.h> 43 44 #include <string> 45 46 #include "common/using_std_string.h" 47 48 using std::vector; 49 50 namespace google_breakpad { 51 52 StabsReader::EntryIterator::EntryIterator(const ByteBuffer* buffer, 53 bool big_endian, size_t value_size) 54 : value_size_(value_size), cursor_(buffer, big_endian) { 55 // Actually, we could handle weird sizes just fine, but they're 56 // probably mistakes --- expressed in bits, say. 57 assert(value_size == 4 || value_size == 8); 58 entry_.index = 0; 59 Fetch(); 60 } 61 62 void StabsReader::EntryIterator::Fetch() { 63 cursor_ 64 .Read(4, false, &entry_.name_offset) 65 .Read(1, false, &entry_.type) 66 .Read(1, false, &entry_.other) 67 .Read(2, false, &entry_.descriptor) 68 .Read(value_size_, false, &entry_.value); 69 entry_.at_end = !cursor_; 70 } 71 72 StabsReader::StabsReader(const uint8_t* stab, size_t stab_size, 73 const uint8_t* stabstr, size_t stabstr_size, 74 bool big_endian, size_t value_size, bool unitized, 75 StabsHandler* handler) 76 : entries_(stab, stab_size), 77 strings_(stabstr, stabstr_size), 78 iterator_(&entries_, big_endian, value_size), 79 unitized_(unitized), 80 handler_(handler), 81 string_offset_(0), 82 next_cu_string_offset_(0), 83 current_source_file_(NULL) { } 84 85 const char* StabsReader::SymbolString() { 86 ptrdiff_t offset = string_offset_ + iterator_->name_offset; 87 if (offset < 0 || (size_t) offset >= strings_.Size()) { 88 handler_->Warning("symbol %d: name offset outside the string section\n", 89 iterator_->index); 90 // Return our null string, to keep our promise about all names being 91 // taken from the string section. 92 offset = 0; 93 } 94 return reinterpret_cast<const char*>(strings_.start + offset); 95 } 96 97 bool StabsReader::Process() { 98 while (!iterator_->at_end) { 99 if (iterator_->type == N_SO) { 100 if (! ProcessCompilationUnit()) 101 return false; 102 } else if (iterator_->type == N_UNDF && unitized_) { 103 // In unitized STABS (including Linux STABS, and pretty much anything 104 // else that puts STABS data in sections), at the head of each 105 // compilation unit's entries there is an N_UNDF stab giving the 106 // number of symbols in the compilation unit, and the number of bytes 107 // that compilation unit's strings take up in the .stabstr section. 108 // Each CU's strings are separate; the n_strx values are offsets 109 // within the current CU's portion of the .stabstr section. 110 // 111 // As an optimization, the GNU linker combines all the 112 // compilation units into one, with a single N_UNDF at the 113 // beginning. However, other linkers, like Gold, do not perform 114 // this optimization. 115 string_offset_ = next_cu_string_offset_; 116 next_cu_string_offset_ = iterator_->value; 117 ++iterator_; 118 } 119 #if defined(HAVE_MACH_O_NLIST_H) 120 // Export symbols in Mach-O binaries look like this. 121 // This is necessary in order to be able to dump symbols 122 // from OS X system libraries. 123 else if ((iterator_->type & N_STAB) == 0 && 124 (iterator_->type & N_TYPE) == N_SECT) { 125 ProcessExtern(); 126 } 127 #endif 128 else { 129 ++iterator_; 130 } 131 } 132 return true; 133 } 134 135 bool StabsReader::ProcessCompilationUnit() { 136 assert(!iterator_->at_end && iterator_->type == N_SO); 137 138 // There may be an N_SO entry whose name ends with a slash, 139 // indicating the directory in which the compilation occurred. 140 // The build directory defaults to NULL. 141 const char* build_directory = NULL; 142 { 143 const char* name = SymbolString(); 144 if (name[0] && name[strlen(name) - 1] == '/') { 145 build_directory = name; 146 ++iterator_; 147 } 148 } 149 150 // We expect to see an N_SO entry with a filename next, indicating 151 // the start of the compilation unit. 152 { 153 if (iterator_->at_end || iterator_->type != N_SO) 154 return true; 155 const char* name = SymbolString(); 156 if (name[0] == '\0') { 157 // This seems to be a stray end-of-compilation-unit marker; 158 // consume it, but don't report the end, since we didn't see a 159 // beginning. 160 ++iterator_; 161 return true; 162 } 163 current_source_file_ = name; 164 } 165 166 if (! handler_->StartCompilationUnit(current_source_file_, 167 iterator_->value, 168 build_directory)) 169 return false; 170 171 ++iterator_; 172 173 // The STABS documentation says that some compilers may emit 174 // additional N_SO entries with names immediately following the 175 // first, and that they should be ignored. However, the original 176 // Breakpad STABS reader doesn't ignore them, so we won't either. 177 178 // Process the body of the compilation unit, up to the next N_SO. 179 while (!iterator_->at_end && iterator_->type != N_SO) { 180 if (iterator_->type == N_FUN) { 181 if (! ProcessFunction()) 182 return false; 183 } else if (iterator_->type == N_SLINE) { 184 // Mac OS X STABS place SLINE records before functions. 185 Line line; 186 // The value of an N_SLINE entry that appears outside a function is 187 // the absolute address of the line. 188 line.address = iterator_->value; 189 line.filename = current_source_file_; 190 // The n_desc of a N_SLINE entry is the line number. It's a 191 // signed 16-bit field; line numbers from 32768 to 65535 are 192 // stored as n-65536. 193 line.number = (uint16_t) iterator_->descriptor; 194 queued_lines_.push_back(line); 195 ++iterator_; 196 } else if (iterator_->type == N_SOL) { 197 current_source_file_ = SymbolString(); 198 ++iterator_; 199 } else { 200 // Ignore anything else. 201 ++iterator_; 202 } 203 } 204 205 // An N_SO with an empty name indicates the end of the compilation 206 // unit. Default to zero. 207 uint64_t ending_address = 0; 208 if (!iterator_->at_end) { 209 assert(iterator_->type == N_SO); 210 const char* name = SymbolString(); 211 if (name[0] == '\0') { 212 ending_address = iterator_->value; 213 ++iterator_; 214 } 215 } 216 217 if (! handler_->EndCompilationUnit(ending_address)) 218 return false; 219 220 queued_lines_.clear(); 221 222 return true; 223 } 224 225 bool StabsReader::ProcessFunction() { 226 assert(!iterator_->at_end && iterator_->type == N_FUN); 227 228 uint64_t function_address = iterator_->value; 229 // The STABS string for an N_FUN entry is the name of the function, 230 // followed by a colon, followed by type information for the 231 // function. We want to pass the name alone to StartFunction. 232 const char* stab_string = SymbolString(); 233 const char* name_end = strchr(stab_string, ':'); 234 if (! name_end) 235 name_end = stab_string + strlen(stab_string); 236 string name(stab_string, name_end - stab_string); 237 if (! handler_->StartFunction(name, function_address)) 238 return false; 239 ++iterator_; 240 241 // If there were any SLINE records given before the function, report them now. 242 for (vector<Line>::const_iterator it = queued_lines_.begin(); 243 it != queued_lines_.end(); it++) { 244 if (!handler_->Line(it->address, it->filename, it->number)) 245 return false; 246 } 247 queued_lines_.clear(); 248 249 while (!iterator_->at_end) { 250 if (iterator_->type == N_SO || iterator_->type == N_FUN) 251 break; 252 else if (iterator_->type == N_SLINE) { 253 // The value of an N_SLINE entry is the offset of the line from 254 // the function's start address. 255 uint64_t line_address = function_address + iterator_->value; 256 // The n_desc of a N_SLINE entry is the line number. It's a 257 // signed 16-bit field; line numbers from 32768 to 65535 are 258 // stored as n-65536. 259 uint16_t line_number = iterator_->descriptor; 260 if (! handler_->Line(line_address, current_source_file_, line_number)) 261 return false; 262 ++iterator_; 263 } else if (iterator_->type == N_SOL) { 264 current_source_file_ = SymbolString(); 265 ++iterator_; 266 } else 267 // Ignore anything else. 268 ++iterator_; 269 } 270 271 // We've reached the end of the function. See if we can figure out its 272 // ending address. 273 uint64_t ending_address = 0; 274 if (!iterator_->at_end) { 275 assert(iterator_->type == N_SO || iterator_->type == N_FUN); 276 if (iterator_->type == N_FUN) { 277 const char* symbol_name = SymbolString(); 278 if (symbol_name[0] == '\0') { 279 // An N_FUN entry with no name is a terminator for this function; 280 // its value is the function's size. 281 ending_address = function_address + iterator_->value; 282 ++iterator_; 283 } else { 284 // An N_FUN entry with a name is the next function, and we can take 285 // its value as our ending address. Don't advance the iterator, as 286 // we'll use this symbol to start the next function as well. 287 ending_address = iterator_->value; 288 } 289 } else { 290 // An N_SO entry could be an end-of-compilation-unit marker, or the 291 // start of the next compilation unit, but in either case, its value 292 // is our ending address. We don't advance the iterator; 293 // ProcessCompilationUnit will decide what to do with this symbol. 294 ending_address = iterator_->value; 295 } 296 } 297 298 if (! handler_->EndFunction(ending_address)) 299 return false; 300 301 return true; 302 } 303 304 bool StabsReader::ProcessExtern() { 305 #if defined(HAVE_MACH_O_NLIST_H) 306 assert(!iterator_->at_end && 307 (iterator_->type & N_STAB) == 0 && 308 (iterator_->type & N_TYPE) == N_SECT); 309 #endif 310 311 // TODO(mark): only do symbols in the text section? 312 if (!handler_->Extern(SymbolString(), iterator_->value)) 313 return false; 314 315 ++iterator_; 316 return true; 317 } 318 319 } // namespace google_breakpad