basic_source_line_resolver.cc
1 // Copyright 2010 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 // 29 // basic_source_line_resolver.cc: BasicSourceLineResolver implementation. 30 // 31 // See basic_source_line_resolver.h and basic_source_line_resolver_types.h 32 // for documentation. 33 34 #ifdef HAVE_CONFIG_H 35 #include <config.h> // Must come first 36 #endif 37 38 #include <assert.h> 39 #include <stdio.h> 40 #include <string.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 44 #include <limits> 45 #include <map> 46 #include <memory> 47 #include <utility> 48 #include <vector> 49 50 #include "google_breakpad/processor/basic_source_line_resolver.h" 51 #include "processor/basic_source_line_resolver_types.h" 52 #include "processor/module_factory.h" 53 54 #include "processor/tokenize.h" 55 56 using std::deque; 57 using std::make_pair; 58 using std::map; 59 using std::unique_ptr; 60 using std::vector; 61 62 namespace google_breakpad { 63 64 #ifdef _WIN32 65 #ifdef _MSC_VER 66 #define strtok_r strtok_s 67 #endif 68 #define strtoull _strtoui64 69 #endif 70 71 namespace { 72 73 // Utility function to tokenize given the presence of an optional initial 74 // field. In this case, optional_field is the expected string for the optional 75 // field, and max_tokens is the maximum number of tokens including the optional 76 // field. Refer to the documentation for Tokenize for descriptions of the other 77 // arguments. 78 bool TokenizeWithOptionalField(char* line, 79 const char* optional_field, 80 const char* separators, 81 int max_tokens, 82 vector<char*>* tokens) { 83 // First tokenize assuming the optional field is not present. If we then see 84 // the optional field, additionally tokenize the last token into two tokens. 85 if (!Tokenize(line, separators, max_tokens - 1, tokens)) { 86 return false; 87 } 88 89 if (strcmp(tokens->front(), optional_field) == 0) { 90 // The optional field is present. Split the last token in two to recover the 91 // field prior to the last. 92 vector<char*> last_tokens; 93 if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) { 94 return false; 95 } 96 // Replace the previous last token with the two new tokens. 97 tokens->pop_back(); 98 tokens->push_back(last_tokens[0]); 99 tokens->push_back(last_tokens[1]); 100 } 101 102 return true; 103 } 104 105 } // namespace 106 107 static const char* kWhitespace = " \r\n"; 108 static const int kMaxErrorsPrinted = 5; 109 static const int kMaxErrorsBeforeBailing = 100; 110 111 BasicSourceLineResolver::BasicSourceLineResolver() : 112 SourceLineResolverBase(new BasicModuleFactory) { } 113 114 // static 115 void BasicSourceLineResolver::Module::LogParseError( 116 const string& message, 117 int line_number, 118 int* num_errors) { 119 if (++(*num_errors) <= kMaxErrorsPrinted) { 120 if (line_number > 0) { 121 BPLOG(ERROR) << "Line " << line_number << ": " << message; 122 } else { 123 BPLOG(ERROR) << message; 124 } 125 } 126 } 127 128 bool BasicSourceLineResolver::Module::LoadMapFromMemory( 129 char* memory_buffer, 130 size_t memory_buffer_size) { 131 linked_ptr<Function> cur_func; 132 int line_number = 0; 133 int num_errors = 0; 134 int inline_num_errors = 0; 135 char* save_ptr; 136 137 // If the length is 0, we can still pretend we have a symbol file. This is 138 // for scenarios that want to test symbol lookup, but don't necessarily care 139 // if certain modules do not have any information, like system libraries. 140 if (memory_buffer_size == 0) { 141 return true; 142 } 143 144 // Make sure the last character is null terminator. 145 size_t last_null_terminator = memory_buffer_size - 1; 146 if (memory_buffer[last_null_terminator] != '\0') { 147 memory_buffer[last_null_terminator] = '\0'; 148 } 149 150 // Skip any null terminators at the end of the memory buffer, and make sure 151 // there are no other null terminators in the middle of the memory buffer. 152 bool has_null_terminator_in_the_middle = false; 153 while (last_null_terminator > 0 && 154 memory_buffer[last_null_terminator - 1] == '\0') { 155 last_null_terminator--; 156 } 157 for (size_t i = 0; i < last_null_terminator; i++) { 158 if (memory_buffer[i] == '\0') { 159 memory_buffer[i] = '_'; 160 has_null_terminator_in_the_middle = true; 161 } 162 } 163 if (has_null_terminator_in_the_middle) { 164 LogParseError( 165 "Null terminator is not expected in the middle of the symbol data", 166 line_number, 167 &num_errors); 168 } 169 170 char* buffer; 171 buffer = strtok_r(memory_buffer, "\r\n", &save_ptr); 172 173 while (buffer != NULL) { 174 ++line_number; 175 176 if (strncmp(buffer, "FILE ", 5) == 0) { 177 if (!ParseFile(buffer)) { 178 LogParseError("ParseFile on buffer failed", line_number, &num_errors); 179 } 180 } else if (strncmp(buffer, "STACK ", 6) == 0) { 181 if (!ParseStackInfo(buffer)) { 182 LogParseError("ParseStackInfo failed", line_number, &num_errors); 183 } 184 } else if (strncmp(buffer, "FUNC ", 5) == 0) { 185 cur_func.reset(ParseFunction(buffer)); 186 if (!cur_func.get()) { 187 LogParseError("ParseFunction failed", line_number, &num_errors); 188 } else { 189 // StoreRange will fail if the function has an invalid address or size. 190 // We'll silently ignore this, the function and any corresponding lines 191 // will be destroyed when cur_func is released. 192 functions_.StoreRange(cur_func->address, cur_func->size, cur_func); 193 } 194 } else if (strncmp(buffer, "PUBLIC ", 7) == 0) { 195 // Clear cur_func: public symbols don't contain line number information. 196 cur_func.reset(); 197 198 if (!ParsePublicSymbol(buffer)) { 199 LogParseError("ParsePublicSymbol failed", line_number, &num_errors); 200 } 201 } else if (strncmp(buffer, "MODULE ", 7) == 0) { 202 // Ignore these. They're not of any use to BasicSourceLineResolver, 203 // which is fed modules by a SymbolSupplier. These lines are present to 204 // aid other tools in properly placing symbol files so that they can 205 // be accessed by a SymbolSupplier. 206 // 207 // MODULE <guid> <age> <filename> 208 } else if (strncmp(buffer, "INFO ", 5) == 0) { 209 // Ignore these as well, they're similarly just for housekeeping. 210 // 211 // INFO CODE_ID <code id> <filename> 212 } else if (strncmp(buffer, "INLINE ", 7) == 0) { 213 linked_ptr<Inline> in = ParseInline(buffer); 214 if (!in.get()) 215 LogParseError("ParseInline failed", line_number, &inline_num_errors); 216 else 217 cur_func->AppendInline(in); 218 } else if (strncmp(buffer, "INLINE_ORIGIN ", 14) == 0) { 219 if (!ParseInlineOrigin(buffer)) { 220 LogParseError("ParseInlineOrigin failed", line_number, 221 &inline_num_errors); 222 } 223 } else { 224 if (!cur_func.get()) { 225 LogParseError("Found source line data without a function", 226 line_number, &num_errors); 227 } else { 228 Line* line = ParseLine(buffer); 229 if (!line) { 230 LogParseError("ParseLine failed", line_number, &num_errors); 231 } else { 232 cur_func->lines.StoreRange(line->address, line->size, 233 linked_ptr<Line>(line)); 234 } 235 } 236 } 237 if (num_errors > kMaxErrorsBeforeBailing) { 238 break; 239 } 240 buffer = strtok_r(NULL, "\r\n", &save_ptr); 241 } 242 is_corrupt_ = num_errors > 0; 243 return true; 244 } 245 246 void BasicSourceLineResolver::Module::ConstructInlineFrames( 247 StackFrame* frame, 248 MemAddr address, 249 const ContainedRangeMap<uint64_t, linked_ptr<Inline>>& inline_map, 250 deque<unique_ptr<StackFrame>>* inlined_frames) const { 251 vector<const linked_ptr<Inline>*> inlines; 252 if (!inline_map.RetrieveRanges(address, inlines)) { 253 return; 254 } 255 256 for (const linked_ptr<Inline>* const in : inlines) { 257 unique_ptr<StackFrame> new_frame = 258 unique_ptr<StackFrame>(new StackFrame(*frame)); 259 auto origin = inline_origins_.find(in->get()->origin_id); 260 if (origin != inline_origins_.end()) { 261 new_frame->function_name = origin->second->name; 262 } else { 263 new_frame->function_name = "<name omitted>"; 264 } 265 266 // Store call site file and line in current frame, which will be updated 267 // later. 268 new_frame->source_line = in->get()->call_site_line; 269 if (in->get()->has_call_site_file_id) { 270 auto file = files_.find(in->get()->call_site_file_id); 271 if (file != files_.end()) { 272 new_frame->source_file_name = file->second; 273 } 274 } 275 276 // Use the starting address of the inlined range as inlined function base. 277 new_frame->function_base = new_frame->module->base_address(); 278 for (const auto& range : in->get()->inline_ranges) { 279 if (address >= range.first && address < range.first + range.second) { 280 new_frame->function_base += range.first; 281 break; 282 } 283 } 284 new_frame->trust = StackFrame::FRAME_TRUST_INLINE; 285 286 // The inlines vector has an order from innermost entry to outermost entry. 287 // By push_back, we will have inlined_frames with the same order. 288 inlined_frames->push_back(std::move(new_frame)); 289 } 290 291 // Update the source file and source line for each inlined frame. 292 if (!inlined_frames->empty()) { 293 string parent_frame_source_file_name = frame->source_file_name; 294 int parent_frame_source_line = frame->source_line; 295 frame->source_file_name = inlined_frames->back()->source_file_name; 296 frame->source_line = inlined_frames->back()->source_line; 297 for (unique_ptr<StackFrame>& inlined_frame : *inlined_frames) { 298 std::swap(inlined_frame->source_file_name, parent_frame_source_file_name); 299 std::swap(inlined_frame->source_line, parent_frame_source_line); 300 } 301 } 302 } 303 304 void BasicSourceLineResolver::Module::LookupAddress( 305 StackFrame* frame, 306 deque<unique_ptr<StackFrame>>* inlined_frames) const { 307 MemAddr address = frame->instruction - frame->module->base_address(); 308 309 // First, look for a FUNC record that covers address. Use 310 // RetrieveNearestRange instead of RetrieveRange so that, if there 311 // is no such function, we can use the next function to bound the 312 // extent of the PUBLIC symbol we find, below. This does mean we 313 // need to check that address indeed falls within the function we 314 // find; do the range comparison in an overflow-friendly way. 315 linked_ptr<Function> func; 316 linked_ptr<PublicSymbol> public_symbol; 317 MemAddr function_base; 318 MemAddr function_size; 319 MemAddr public_address; 320 if (functions_.RetrieveNearestRange(address, &func, &function_base, 321 NULL /* delta */, &function_size) && 322 address >= function_base && address - function_base < function_size) { 323 frame->function_name = func->name; 324 frame->function_base = frame->module->base_address() + function_base; 325 frame->is_multiple = func->is_multiple; 326 327 linked_ptr<Line> line; 328 MemAddr line_base; 329 if (func->lines.RetrieveRange(address, &line, &line_base, NULL /* delta */, 330 NULL /* size */)) { 331 FileMap::const_iterator it = files_.find(line->source_file_id); 332 if (it != files_.end()) { 333 frame->source_file_name = files_.find(line->source_file_id)->second; 334 } 335 frame->source_line = line->line; 336 frame->source_line_base = frame->module->base_address() + line_base; 337 } 338 339 // Check if this is inlined function call. 340 if (inlined_frames) { 341 ConstructInlineFrames(frame, address, func->inlines, inlined_frames); 342 } 343 } else if (public_symbols_.Retrieve(address, 344 &public_symbol, &public_address) && 345 (!func.get() || public_address > function_base)) { 346 frame->function_name = public_symbol->name; 347 frame->function_base = frame->module->base_address() + public_address; 348 frame->is_multiple = public_symbol->is_multiple; 349 } 350 } 351 352 WindowsFrameInfo* BasicSourceLineResolver::Module::FindWindowsFrameInfo( 353 const StackFrame* frame) const { 354 MemAddr address = frame->instruction - frame->module->base_address(); 355 scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo()); 356 357 // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and 358 // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order. 359 // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that 360 // includes its own program string. 361 // WindowsFrameInfo::STACK_INFO_FPO is the older type 362 // corresponding to the FPO_DATA struct. See stackwalker_x86.cc. 363 linked_ptr<WindowsFrameInfo> frame_info; 364 if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA] 365 .RetrieveRange(address, &frame_info)) 366 || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO] 367 .RetrieveRange(address, &frame_info))) { 368 result->CopyFrom(*frame_info.get()); 369 return result.release(); 370 } 371 372 // Even without a relevant STACK line, many functions contain 373 // information about how much space their parameters consume on the 374 // stack. Use RetrieveNearestRange instead of RetrieveRange, so that 375 // we can use the function to bound the extent of the PUBLIC symbol, 376 // below. However, this does mean we need to check that ADDRESS 377 // falls within the retrieved function's range; do the range 378 // comparison in an overflow-friendly way. 379 linked_ptr<Function> function; 380 MemAddr function_base, function_size; 381 if (functions_.RetrieveNearestRange(address, &function, &function_base, 382 NULL /* delta */, &function_size) && 383 address >= function_base && address - function_base < function_size) { 384 result->parameter_size = function->parameter_size; 385 result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE; 386 return result.release(); 387 } 388 389 // PUBLIC symbols might have a parameter size. Use the function we 390 // found above to limit the range the public symbol covers. 391 linked_ptr<PublicSymbol> public_symbol; 392 MemAddr public_address; 393 if (public_symbols_.Retrieve(address, &public_symbol, &public_address) && 394 (!function.get() || public_address > function_base)) { 395 result->parameter_size = public_symbol->parameter_size; 396 } 397 398 return NULL; 399 } 400 401 CFIFrameInfo* BasicSourceLineResolver::Module::FindCFIFrameInfo( 402 const StackFrame* frame) const { 403 MemAddr address = frame->instruction - frame->module->base_address(); 404 MemAddr initial_base, initial_size; 405 string initial_rules; 406 407 // Find the initial rule whose range covers this address. That 408 // provides an initial set of register recovery rules. Then, walk 409 // forward from the initial rule's starting address to frame's 410 // instruction address, applying delta rules. 411 if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, &initial_base, 412 NULL /* delta */, &initial_size)) { 413 return NULL; 414 } 415 416 // Create a frame info structure, and populate it with the rules from 417 // the STACK CFI INIT record. 418 scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo()); 419 if (!ParseCFIRuleSet(initial_rules, rules.get())) 420 return NULL; 421 422 // Find the first delta rule that falls within the initial rule's range. 423 map<MemAddr, string>::const_iterator delta = 424 cfi_delta_rules_.lower_bound(initial_base); 425 426 // Apply delta rules up to and including the frame's address. 427 while (delta != cfi_delta_rules_.end() && delta->first <= address) { 428 ParseCFIRuleSet(delta->second, rules.get()); 429 delta++; 430 } 431 432 return rules.release(); 433 } 434 435 bool BasicSourceLineResolver::Module::ParseFile(char* file_line) { 436 long index; 437 char* filename; 438 if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) { 439 files_.insert(make_pair(index, string(filename))); 440 return true; 441 } 442 return false; 443 } 444 445 bool BasicSourceLineResolver::Module::ParseInlineOrigin( 446 char* inline_origin_line) { 447 bool has_file_id; 448 long origin_id; 449 long source_file_id; 450 char* origin_name; 451 if (SymbolParseHelper::ParseInlineOrigin(inline_origin_line, &has_file_id, 452 &origin_id, &source_file_id, 453 &origin_name)) { 454 inline_origins_.insert(make_pair( 455 origin_id, 456 new InlineOrigin(has_file_id, source_file_id, origin_name))); 457 return true; 458 } 459 return false; 460 } 461 462 linked_ptr<BasicSourceLineResolver::Inline> 463 BasicSourceLineResolver::Module::ParseInline(char* inline_line) { 464 bool has_call_site_file_id; 465 long inline_nest_level; 466 long call_site_line; 467 long call_site_file_id; 468 long origin_id; 469 vector<std::pair<MemAddr, MemAddr>> ranges; 470 if (SymbolParseHelper::ParseInline(inline_line, &has_call_site_file_id, 471 &inline_nest_level, &call_site_line, 472 &call_site_file_id, &origin_id, &ranges)) { 473 return linked_ptr<Inline>(new Inline(has_call_site_file_id, 474 inline_nest_level, call_site_line, 475 call_site_file_id, origin_id, ranges)); 476 } 477 return linked_ptr<Inline>(); 478 } 479 480 BasicSourceLineResolver::Function* 481 BasicSourceLineResolver::Module::ParseFunction(char* function_line) { 482 bool is_multiple; 483 uint64_t address; 484 uint64_t size; 485 long stack_param_size; 486 char* name; 487 if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address, 488 &size, &stack_param_size, &name)) { 489 return new Function(name, address, size, stack_param_size, is_multiple); 490 } 491 return NULL; 492 } 493 494 BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( 495 char* line_line) { 496 uint64_t address; 497 uint64_t size; 498 long line_number; 499 long source_file; 500 501 if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number, 502 &source_file)) { 503 return new Line(address, size, source_file, line_number); 504 } 505 return NULL; 506 } 507 508 bool BasicSourceLineResolver::Module::ParsePublicSymbol(char* public_line) { 509 bool is_multiple; 510 uint64_t address; 511 long stack_param_size; 512 char* name; 513 514 if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address, 515 &stack_param_size, &name)) { 516 // A few public symbols show up with an address of 0. This has been seen 517 // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, 518 // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict 519 // with one another if they were allowed into the public_symbols_ map, 520 // but since the address is obviously invalid, gracefully accept them 521 // as input without putting them into the map. 522 if (address == 0) { 523 return true; 524 } 525 526 linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address, 527 stack_param_size, 528 is_multiple)); 529 return public_symbols_.Store(address, symbol); 530 } 531 return false; 532 } 533 534 bool BasicSourceLineResolver::Module::ParseStackInfo(char* stack_info_line) { 535 // Skip "STACK " prefix. 536 stack_info_line += 6; 537 538 // Find the token indicating what sort of stack frame walking 539 // information this is. 540 while (*stack_info_line == ' ') 541 stack_info_line++; 542 const char* platform = stack_info_line; 543 while (!strchr(kWhitespace, *stack_info_line)) 544 stack_info_line++; 545 *stack_info_line++ = '\0'; 546 547 // MSVC stack frame info. 548 if (strcmp(platform, "WIN") == 0) { 549 int type = 0; 550 uint64_t rva, code_size; 551 linked_ptr<WindowsFrameInfo> 552 stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line, 553 type, 554 rva, 555 code_size)); 556 if (stack_frame_info == NULL) 557 return false; 558 559 // TODO(mmentovai): I wanted to use StoreRange's return value as this 560 // method's return value, but MSVC infrequently outputs stack info that 561 // violates the containment rules. This happens with a section of code 562 // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks 563 // like this: 564 // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) 565 // STACK WIN 4 4243 2e 9 0 ... 566 // ContainedRangeMap treats these two blocks as conflicting. In reality, 567 // when the prolog lengths are taken into account, the actual code of 568 // these blocks doesn't conflict. However, we can't take the prolog lengths 569 // into account directly here because we'd wind up with a different set 570 // of range conflicts when MSVC outputs stack info like this: 571 // STACK WIN 4 1040 73 33 0 ... 572 // STACK WIN 4 105a 59 19 0 ... 573 // because in both of these entries, the beginning of the code after the 574 // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. 575 // Perhaps we could get away with storing ranges by rva + prolog_size 576 // if ContainedRangeMap were modified to allow replacement of 577 // already-stored values. 578 579 windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info); 580 return true; 581 } else if (strcmp(platform, "CFI") == 0) { 582 // DWARF CFI stack frame info 583 return ParseCFIFrameInfo(stack_info_line); 584 } else { 585 // Something unrecognized. 586 return false; 587 } 588 } 589 590 bool BasicSourceLineResolver::Module::ParseCFIFrameInfo( 591 char* stack_info_line) { 592 char* cursor; 593 594 // Is this an INIT record or a delta record? 595 char* init_or_address = strtok_r(stack_info_line, " \r\n", &cursor); 596 if (!init_or_address) 597 return false; 598 599 if (strcmp(init_or_address, "INIT") == 0) { 600 // This record has the form "STACK INIT <address> <size> <rules...>". 601 char* address_field = strtok_r(NULL, " \r\n", &cursor); 602 if (!address_field) return false; 603 604 char* size_field = strtok_r(NULL, " \r\n", &cursor); 605 if (!size_field) return false; 606 607 char* initial_rules = strtok_r(NULL, "\r\n", &cursor); 608 if (!initial_rules) return false; 609 610 MemAddr address = strtoul(address_field, NULL, 16); 611 MemAddr size = strtoul(size_field, NULL, 16); 612 cfi_initial_rules_.StoreRange(address, size, initial_rules); 613 return true; 614 } 615 616 // This record has the form "STACK <address> <rules...>". 617 char* address_field = init_or_address; 618 char* delta_rules = strtok_r(NULL, "\r\n", &cursor); 619 if (!delta_rules) return false; 620 MemAddr address = strtoul(address_field, NULL, 16); 621 cfi_delta_rules_[address] = delta_rules; 622 return true; 623 } 624 625 bool BasicSourceLineResolver::Function::AppendInline(linked_ptr<Inline> in) { 626 // This happends if in's parent wasn't added due to a malformed INLINE record. 627 if (in->inline_nest_level > last_added_inline_nest_level + 1) 628 return false; 629 630 last_added_inline_nest_level = in->inline_nest_level; 631 632 // Store all ranges into current level of inlines. 633 for (auto range : in->inline_ranges) 634 inlines.StoreRange(range.first, range.second, in); 635 return true; 636 } 637 638 // static 639 bool SymbolParseHelper::ParseFile(char* file_line, long* index, 640 char** filename) { 641 // FILE <id> <filename> 642 assert(strncmp(file_line, "FILE ", 5) == 0); 643 file_line += 5; // skip prefix 644 645 vector<char*> tokens; 646 if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { 647 return false; 648 } 649 650 char* after_number; 651 *index = strtol(tokens[0], &after_number, 10); 652 if (!IsValidAfterNumber(after_number) || *index < 0 || 653 *index == std::numeric_limits<long>::max()) { 654 return false; 655 } 656 657 *filename = tokens[1]; 658 if (!*filename) { 659 return false; 660 } 661 662 return true; 663 } 664 665 // static 666 bool SymbolParseHelper::ParseInlineOrigin(char* inline_origin_line, 667 bool* has_file_id, 668 long* origin_id, 669 long* file_id, 670 char** name) { 671 // Old INLINE_ORIGIN format: 672 // INLINE_ORIGIN <origin_id> <file_id> <name> 673 // New INLINE_ORIGIN format: 674 // INLINE_ORIGIN <origin_id> <name> 675 assert(strncmp(inline_origin_line, "INLINE_ORIGIN ", 14) == 0); 676 inline_origin_line += 14; // skip prefix 677 vector<char*> tokens; 678 // Split the line into two parts so that the first token is "<origin_id>", and 679 // second token is either "<file_id> <name>"" or "<name>"" depending on the 680 // format version. 681 if (!Tokenize(inline_origin_line, kWhitespace, 2, &tokens)) { 682 return false; 683 } 684 685 char* after_number; 686 *origin_id = strtol(tokens[0], &after_number, 10); 687 if (!IsValidAfterNumber(after_number) || *origin_id < 0 || 688 *origin_id == std::numeric_limits<long>::max()) { 689 return false; 690 } 691 692 // If the field after origin_id is a number, then it's old format. 693 char* remaining_line = tokens[1]; 694 *has_file_id = true; 695 for (size_t i = 0; 696 i < strlen(remaining_line) && remaining_line[i] != ' ' && *has_file_id; 697 ++i) { 698 // If the file id is -1, it might be an artificial function that doesn't 699 // have file id. So, we consider -1 as a valid special case. 700 if (remaining_line[i] == '-' && i == 0) { 701 continue; 702 } 703 *has_file_id = isdigit(remaining_line[i]); 704 } 705 706 if (*has_file_id) { 707 // If it's old format, split "<file_id> <name>" to {"<field_id>", "<name>"}. 708 if (!Tokenize(remaining_line, kWhitespace, 2, &tokens)) { 709 return false; 710 } 711 *file_id = strtol(tokens[0], &after_number, 10); 712 // If the file id is -1, it might be an artificial function that doesn't 713 // have file id. So, we consider -1 as a valid special case. 714 if (!IsValidAfterNumber(after_number) || *file_id < -1 || 715 *file_id == std::numeric_limits<long>::max()) { 716 return false; 717 } 718 } 719 720 *name = tokens[1]; 721 if (!*name) { 722 return false; 723 } 724 725 return true; 726 } 727 728 // static 729 bool SymbolParseHelper::ParseInline( 730 char* inline_line, 731 bool* has_call_site_file_id, 732 long* inline_nest_level, 733 long* call_site_line, 734 long* call_site_file_id, 735 long* origin_id, 736 vector<std::pair<MemAddr, MemAddr>>* ranges) { 737 // Old INLINE format: 738 // INLINE <inline_nest_level> <call_site_line> <origin_id> [<address> <size>]+ 739 // New INLINE format: 740 // INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id> 741 // [<address> <size>]+ 742 assert(strncmp(inline_line, "INLINE ", 7) == 0); 743 inline_line += 7; // skip prefix 744 745 vector<char*> tokens; 746 // Increase max_tokens if necessary. 747 Tokenize(inline_line, kWhitespace, 512, &tokens); 748 749 // Determine the version of INLINE record by parity of the vector length. 750 *has_call_site_file_id = tokens.size() % 2 == 0; 751 752 // The length of the vector should be at least 5. 753 if (tokens.size() < 5) { 754 return false; 755 } 756 757 char* after_number; 758 size_t next_idx = 0; 759 760 *inline_nest_level = strtol(tokens[next_idx++], &after_number, 10); 761 if (!IsValidAfterNumber(after_number) || *inline_nest_level < 0 || 762 *inline_nest_level == std::numeric_limits<long>::max()) { 763 return false; 764 } 765 766 *call_site_line = strtol(tokens[next_idx++], &after_number, 10); 767 if (!IsValidAfterNumber(after_number) || *call_site_line < 0 || 768 *call_site_line == std::numeric_limits<long>::max()) { 769 return false; 770 } 771 772 if (*has_call_site_file_id) { 773 *call_site_file_id = strtol(tokens[next_idx++], &after_number, 10); 774 // If the file id is -1, it might be an artificial function that doesn't 775 // have file id. So, we consider -1 as a valid special case. 776 if (!IsValidAfterNumber(after_number) || *call_site_file_id < -1 || 777 *call_site_file_id == std::numeric_limits<long>::max()) { 778 return false; 779 } 780 } 781 782 *origin_id = strtol(tokens[next_idx++], &after_number, 10); 783 if (!IsValidAfterNumber(after_number) || *origin_id < 0 || 784 *origin_id == std::numeric_limits<long>::max()) { 785 return false; 786 } 787 788 while (next_idx < tokens.size()) { 789 MemAddr address = strtoull(tokens[next_idx++], &after_number, 16); 790 if (!IsValidAfterNumber(after_number) || 791 address == std::numeric_limits<unsigned long long>::max()) { 792 return false; 793 } 794 MemAddr size = strtoull(tokens[next_idx++], &after_number, 16); 795 if (!IsValidAfterNumber(after_number) || 796 size == std::numeric_limits<unsigned long long>::max()) { 797 return false; 798 } 799 ranges->push_back({address, size}); 800 } 801 802 return true; 803 } 804 805 // static 806 bool SymbolParseHelper::ParseFunction(char* function_line, bool* is_multiple, 807 uint64_t* address, uint64_t* size, 808 long* stack_param_size, char** name) { 809 // FUNC [<multiple>] <address> <size> <stack_param_size> <name> 810 assert(strncmp(function_line, "FUNC ", 5) == 0); 811 function_line += 5; // skip prefix 812 813 vector<char*> tokens; 814 if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) { 815 return false; 816 } 817 818 *is_multiple = strcmp(tokens[0], "m") == 0; 819 int next_token = *is_multiple ? 1 : 0; 820 821 char* after_number; 822 *address = strtoull(tokens[next_token++], &after_number, 16); 823 if (!IsValidAfterNumber(after_number) || 824 *address == std::numeric_limits<unsigned long long>::max()) { 825 return false; 826 } 827 *size = strtoull(tokens[next_token++], &after_number, 16); 828 if (!IsValidAfterNumber(after_number) || 829 *size == std::numeric_limits<unsigned long long>::max()) { 830 return false; 831 } 832 *stack_param_size = strtol(tokens[next_token++], &after_number, 16); 833 if (!IsValidAfterNumber(after_number) || 834 *stack_param_size == std::numeric_limits<long>::max() || 835 *stack_param_size < 0) { 836 return false; 837 } 838 *name = tokens[next_token++]; 839 840 return true; 841 } 842 843 // static 844 bool SymbolParseHelper::ParseLine(char* line_line, uint64_t* address, 845 uint64_t* size, long* line_number, 846 long* source_file) { 847 // <address> <size> <line number> <source file id> 848 vector<char*> tokens; 849 if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { 850 return false; 851 } 852 853 char* after_number; 854 *address = strtoull(tokens[0], &after_number, 16); 855 if (!IsValidAfterNumber(after_number) || 856 *address == std::numeric_limits<unsigned long long>::max()) { 857 return false; 858 } 859 *size = strtoull(tokens[1], &after_number, 16); 860 if (!IsValidAfterNumber(after_number) || 861 *size == std::numeric_limits<unsigned long long>::max()) { 862 return false; 863 } 864 *line_number = strtol(tokens[2], &after_number, 10); 865 if (!IsValidAfterNumber(after_number) || 866 *line_number == std::numeric_limits<long>::max()) { 867 return false; 868 } 869 *source_file = strtol(tokens[3], &after_number, 10); 870 if (!IsValidAfterNumber(after_number) || *source_file < 0 || 871 *source_file == std::numeric_limits<long>::max()) { 872 return false; 873 } 874 875 // Valid line numbers normally start from 1, however there are functions that 876 // are associated with a source file but not associated with any line number 877 // (block helper function) and for such functions the symbol file contains 0 878 // for the line numbers. Hence, 0 should be treated as a valid line number. 879 // For more information on block helper functions, please, take a look at: 880 // http://clang.llvm.org/docs/Block-ABI-Apple.html 881 if (*line_number < 0) { 882 return false; 883 } 884 885 return true; 886 } 887 888 // static 889 bool SymbolParseHelper::ParsePublicSymbol(char* public_line, bool* is_multiple, 890 uint64_t* address, 891 long* stack_param_size, 892 char** name) { 893 // PUBLIC [<multiple>] <address> <stack_param_size> <name> 894 assert(strncmp(public_line, "PUBLIC ", 7) == 0); 895 public_line += 7; // skip prefix 896 897 vector<char*> tokens; 898 if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) { 899 return false; 900 } 901 902 *is_multiple = strcmp(tokens[0], "m") == 0; 903 int next_token = *is_multiple ? 1 : 0; 904 905 char* after_number; 906 *address = strtoull(tokens[next_token++], &after_number, 16); 907 if (!IsValidAfterNumber(after_number) || 908 *address == std::numeric_limits<unsigned long long>::max()) { 909 return false; 910 } 911 *stack_param_size = strtol(tokens[next_token++], &after_number, 16); 912 if (!IsValidAfterNumber(after_number) || 913 *stack_param_size == std::numeric_limits<long>::max() || 914 *stack_param_size < 0) { 915 return false; 916 } 917 *name = tokens[next_token++]; 918 919 return true; 920 } 921 922 // static 923 bool SymbolParseHelper::IsValidAfterNumber(char* after_number) { 924 if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) { 925 return true; 926 } 927 return false; 928 } 929 930 } // namespace google_breakpad