pdb_source_line_writer.cc
1 // Copyright 2006 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> // Must come first 31 #endif 32 33 #include "common/windows/pdb_source_line_writer.h" 34 35 #include <windows.h> 36 #include <winnt.h> 37 #include <atlbase.h> 38 #include <dia2.h> 39 #include <diacreate.h> 40 #include <ImageHlp.h> 41 #include <stdio.h> 42 43 #include <algorithm> 44 #include <limits> 45 #include <map> 46 #include <memory> 47 #include <set> 48 #include <utility> 49 50 #include "common/windows/dia_util.h" 51 #include "common/windows/guid_string.h" 52 #include "common/windows/pe_util.h" 53 #include "common/windows/string_utils-inl.h" 54 55 // This constant may be missing from DbgHelp.h. See the documentation for 56 // IDiaSymbol::get_undecoratedNameEx. 57 #ifndef UNDNAME_NO_ECSU 58 #define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union. 59 #endif // UNDNAME_NO_ECSU 60 61 namespace google_breakpad { 62 63 namespace { 64 65 using std::set; 66 using std::unique_ptr; 67 using std::vector; 68 69 // The symbol (among possibly many) selected to represent an rva. 70 struct SelectedSymbol { 71 SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public) 72 : symbol(symbol), is_public(is_public), is_multiple(false) {} 73 74 // The symbol to use for an rva. 75 CComPtr<IDiaSymbol> symbol; 76 // Whether this is a public or function symbol. 77 bool is_public; 78 // Whether the rva has multiple associated symbols. An rva will correspond to 79 // multiple symbols in the case of linker identical symbol folding. 80 bool is_multiple; 81 }; 82 83 // Maps rva to the symbol to use for that address. 84 typedef std::map<DWORD, SelectedSymbol> SymbolMap; 85 86 // Record this in the map as the selected symbol for the rva if it satisfies the 87 // necessary conditions. 88 void MaybeRecordSymbol(DWORD rva, 89 const CComPtr<IDiaSymbol> symbol, 90 bool is_public, 91 SymbolMap* map) { 92 SymbolMap::iterator loc = map->find(rva); 93 if (loc == map->end()) { 94 map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public))); 95 return; 96 } 97 98 // Prefer function symbols to public symbols. 99 if (is_public && !loc->second.is_public) { 100 return; 101 } 102 103 loc->second.is_multiple = true; 104 105 // Take the 'least' symbol by lexicographical order of the decorated name. We 106 // use the decorated rather than undecorated name because computing the latter 107 // is expensive. 108 BSTR current_name, new_name; 109 loc->second.symbol->get_name(¤t_name); 110 symbol->get_name(&new_name); 111 if (wcscmp(new_name, current_name) < 0) { 112 loc->second.symbol = symbol; 113 loc->second.is_public = is_public; 114 } 115 } 116 117 118 119 bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) { 120 DWORD a_section, a_offset, b_section, b_offset; 121 if (FAILED(a->get_addressSection(&a_section)) || 122 FAILED(a->get_addressOffset(&a_offset)) || 123 FAILED(b->get_addressSection(&b_section)) || 124 FAILED(b->get_addressOffset(&b_offset))) 125 return false; 126 return a_section == b_section && a_offset == b_offset; 127 } 128 129 bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource>& data_source) { 130 if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) { 131 return true; 132 } 133 134 class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100; 135 class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110; 136 class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120; 137 class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140; 138 139 // If the CoCreateInstance call above failed, msdia*.dll is not registered. 140 // We can try loading the DLL corresponding to the #included DIA SDK, but 141 // the DIA headers don't provide a version. Lets try to figure out which DIA 142 // version we're compiling against by comparing CLSIDs. 143 const wchar_t* msdia_dll = nullptr; 144 if (CLSID_DiaSource == _uuidof(DiaSource100)) { 145 msdia_dll = L"msdia100.dll"; 146 } else if (CLSID_DiaSource == _uuidof(DiaSource110)) { 147 msdia_dll = L"msdia110.dll"; 148 } else if (CLSID_DiaSource == _uuidof(DiaSource120)) { 149 msdia_dll = L"msdia120.dll"; 150 } else if (CLSID_DiaSource == _uuidof(DiaSource140)) { 151 msdia_dll = L"msdia140.dll"; 152 } 153 154 if (msdia_dll && 155 SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource, 156 reinterpret_cast<void**>(&data_source)))) { 157 return true; 158 } 159 160 return false; 161 } 162 163 const DWORD kUndecorateOptions = UNDNAME_NO_MS_KEYWORDS | 164 UNDNAME_NO_FUNCTION_RETURNS | 165 UNDNAME_NO_ALLOCATION_MODEL | 166 UNDNAME_NO_ALLOCATION_LANGUAGE | 167 UNDNAME_NO_THISTYPE | 168 UNDNAME_NO_ACCESS_SPECIFIERS | 169 UNDNAME_NO_THROW_SIGNATURES | 170 UNDNAME_NO_MEMBER_TYPE | 171 UNDNAME_NO_RETURN_UDT_MODEL | 172 UNDNAME_NO_ECSU; 173 174 #define arraysize(f) (sizeof(f) / sizeof(*f)) 175 176 void StripLlvmSuffixAndUndecorate(BSTR* name) { 177 // LLVM sometimes puts a suffix on symbols to give them a globally unique 178 // name. The suffix is either some string preceded by a period (like in the 179 // Itanium ABI; also on Windows this is safe since periods are otherwise 180 // never part of mangled names), or a dollar sign followed by a 32-char hex 181 // string (this should go away in future LLVM versions). Strip such suffixes 182 // and try demangling again. 183 // 184 // 185 // Example symbol names with such suffixes: 186 // 187 // ?foo@@YAXXZ$5520c83448162c04f2b239db4b5a2c61 188 // ?foo@@YAXXZ.llvm.13040715209719948753 189 190 if (**name != L'?') 191 return; // The name is already demangled. 192 193 for (size_t i = 0, len = wcslen(*name); i < len; i++) { 194 wchar_t c = (*name)[i]; 195 196 if (c == L'.' || (c == L'$' && len - i == 32 + 1)) { 197 (*name)[i] = L'\0'; 198 wchar_t undecorated[1024]; 199 DWORD res = UnDecorateSymbolNameW(*name, undecorated, 200 arraysize(undecorated), 201 kUndecorateOptions); 202 if (res == 0 || undecorated[0] == L'?') { 203 // Demangling failed; restore the symbol name and return. 204 (*name)[i] = c; 205 return; 206 } 207 208 SysFreeString(*name); 209 *name = SysAllocString(undecorated); 210 return; 211 } 212 } 213 } 214 215 // Prints the error message related to the error code as seen in 216 // Microsoft's MSVS documentation for loadDataFromPdb and loadDataForExe. 217 void PrintOpenError(HRESULT hr, const char* fn_name, const wchar_t* file) { 218 switch (hr) { 219 case E_PDB_NOT_FOUND: 220 fprintf(stderr, "%s: Failed to open %ws, or the file has an " 221 "invalid format.\n", fn_name, file); 222 break; 223 case E_PDB_FORMAT: 224 fprintf(stderr, "%s: Attempted to access %ws with an obsolete " 225 "format.\n", fn_name, file); 226 break; 227 case E_PDB_INVALID_SIG: 228 fprintf(stderr, "%s: Signature does not match for %ws.\n", fn_name, 229 file); 230 break; 231 case E_PDB_INVALID_AGE: 232 fprintf(stderr, "%s: Age does not match for %ws.\n", fn_name, file); 233 break; 234 case E_INVALIDARG: 235 fprintf(stderr, "%s: Invalid parameter for %ws.\n", fn_name, file); 236 break; 237 case E_UNEXPECTED: 238 fprintf(stderr, "%s: Data source has already been prepared for %ws.\n", 239 fn_name, file); 240 break; 241 default: 242 fprintf(stderr, "%s: Unexpected error 0x%lx, file: %ws.\n", 243 fn_name, hr, file); 244 break; 245 } 246 } 247 248 } // namespace 249 250 PDBSourceLineWriter::Inline::Inline(int inline_nest_level) 251 : inline_nest_level_(inline_nest_level) {} 252 253 void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) { 254 origin_id_ = origin_id; 255 } 256 257 void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) { 258 if (ranges_.empty()) { 259 ranges_[line.rva] = line.length; 260 return; 261 } 262 auto iter = ranges_.lower_bound(line.rva); 263 // There is no overlap if this function is called with inlinee lines from 264 // the same callsite. 265 if (iter == ranges_.begin()) { 266 return; 267 } 268 if (line.rva + line.length == iter->first) { 269 // If they are connected, merge their ranges into one. 270 DWORD length = line.length + iter->second; 271 ranges_.erase(iter); 272 ranges_[line.rva] = length; 273 } else { 274 --iter; 275 if (iter->first + iter->second == line.rva) { 276 ranges_[iter->first] = iter->second + line.length; 277 } else { 278 ranges_[line.rva] = line.length; 279 } 280 } 281 } 282 283 void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) { 284 call_site_line_ = call_site_line; 285 } 286 287 void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) { 288 call_site_file_id_ = call_site_file_id; 289 } 290 291 void PDBSourceLineWriter::Inline::SetChildInlines( 292 vector<unique_ptr<Inline>> child_inlines) { 293 child_inlines_ = std::move(child_inlines); 294 } 295 296 void PDBSourceLineWriter::Inline::Print(FILE* output) const { 297 // Ignore INLINE record that doesn't have any range. 298 if (ranges_.empty()) 299 return; 300 fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_, 301 call_site_file_id_, origin_id_); 302 for (const auto& r : ranges_) { 303 fprintf(output, " %lx %lx", r.first, r.second); 304 } 305 fprintf(output, "\n"); 306 for (const unique_ptr<Inline>& in : child_inlines_) { 307 in->Print(output); 308 } 309 } 310 311 const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine( 312 DWORD rva) const { 313 auto iter = line_map_.find(rva); 314 if (iter == line_map_.end()) { 315 // If not found exact rva, check if it's within any range. 316 iter = line_map_.lower_bound(rva); 317 if (iter == line_map_.begin()) 318 return nullptr; 319 --iter; 320 auto l = iter->second; 321 // This happens when there is no top level lines cover this rva (e.g. empty 322 // lines found for the function). Then we don't know the call site line 323 // number for this inlined function. 324 if (rva >= l.rva + l.length) 325 return nullptr; 326 } 327 return &iter->second; 328 } 329 330 DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const { 331 const Line* line = GetLine(rva); 332 return line ? line->line_num : 0; 333 } 334 335 DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const { 336 const Line* line = GetLine(rva); 337 return line ? line->file_id : 0; 338 } 339 340 void PDBSourceLineWriter::Lines::AddLine(const Line& line) { 341 if (line_map_.empty()) { 342 line_map_[line.rva] = line; 343 return; 344 } 345 346 // Given an existing line in line_map_, remove it from line_map_ if it 347 // overlaps with the line and add a new line for the non-overlap range. Return 348 // true if there is an overlap. 349 auto intercept = [&](Line old_line) { 350 DWORD end = old_line.rva + old_line.length; 351 // No overlap. 352 if (old_line.rva >= line.rva + line.length || line.rva >= end) 353 return false; 354 // old_line is within the line. 355 if (old_line.rva >= line.rva && end <= line.rva + line.length) { 356 line_map_.erase(old_line.rva); 357 return true; 358 } 359 // Then there is a overlap. 360 if (old_line.rva < line.rva) { 361 old_line.length -= end - line.rva; 362 if (end > line.rva + line.length) { 363 Line new_line = old_line; 364 new_line.rva = line.rva + line.length; 365 new_line.length = end - new_line.rva; 366 line_map_[new_line.rva] = new_line; 367 } 368 } else { 369 line_map_.erase(old_line.rva); 370 old_line.length -= line.rva + line.length - old_line.rva; 371 old_line.rva = line.rva + line.length; 372 } 373 line_map_[old_line.rva] = old_line; 374 return true; 375 }; 376 377 bool is_intercept; 378 // Use a loop in cases that there are multiple lines within the given line. 379 do { 380 auto iter = line_map_.lower_bound(line.rva); 381 if (iter == line_map_.end()) { 382 if (!line_map_.empty()) { 383 --iter; 384 intercept(iter->second); 385 } 386 break; 387 } 388 is_intercept = false; 389 if (iter != line_map_.begin()) { 390 // Check if the given line overlaps a line with smaller in the map. 391 auto prev = line_map_.lower_bound(line.rva); 392 --prev; 393 is_intercept = intercept(prev->second); 394 } 395 // Check if the given line overlaps a line with greater or equal rva in the 396 // map. Using operator |= here since it's possible that there are multiple 397 // lines with greater rva in the map overlap with the given line. 398 is_intercept |= intercept(iter->second); 399 } while (is_intercept); 400 line_map_[line.rva] = line; 401 } 402 403 PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline) 404 : output_(NULL), handle_inline_(handle_inline) {} 405 406 PDBSourceLineWriter::~PDBSourceLineWriter() { 407 Close(); 408 } 409 410 bool PDBSourceLineWriter::SetCodeFile(const wstring& exe_file) { 411 if (code_file_.empty()) { 412 code_file_ = exe_file; 413 return true; 414 } 415 // Setting a different code file path is an error. It is success only if the 416 // file paths are the same. 417 return exe_file == code_file_; 418 } 419 420 bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) { 421 Close(); 422 code_file_.clear(); 423 424 if (FAILED(CoInitialize(NULL))) { 425 fprintf(stderr, "CoInitialize failed\n"); 426 return false; 427 } 428 429 CComPtr<IDiaDataSource> data_source; 430 if (!CreateDiaDataSourceInstance(data_source)) { 431 const int kGuidSize = 64; 432 wchar_t classid[kGuidSize] = {0}; 433 StringFromGUID2(CLSID_DiaSource, classid, kGuidSize); 434 fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed " 435 "(msdia*.dll unregistered?)\n", classid); 436 return false; 437 } 438 439 HRESULT from_pdb_result; 440 HRESULT for_exe_result; 441 const wchar_t* file_name = file.c_str(); 442 switch (format) { 443 case PDB_FILE: 444 from_pdb_result = data_source->loadDataFromPdb(file_name); 445 if (FAILED(from_pdb_result)) { 446 PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name); 447 return false; 448 } 449 break; 450 case EXE_FILE: 451 for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL); 452 if (FAILED(for_exe_result)) { 453 PrintOpenError(for_exe_result, "loadDataForExe", file_name); 454 return false; 455 } 456 code_file_ = file; 457 break; 458 case ANY_FILE: 459 from_pdb_result = data_source->loadDataFromPdb(file_name); 460 if (FAILED(from_pdb_result)) { 461 for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL); 462 if (FAILED(for_exe_result)) { 463 PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name); 464 PrintOpenError(for_exe_result, "loadDataForExe", file_name); 465 return false; 466 } 467 code_file_ = file; 468 } 469 break; 470 default: 471 fprintf(stderr, "Unknown file format\n"); 472 return false; 473 } 474 475 if (FAILED(data_source->openSession(&session_))) { 476 fprintf(stderr, "openSession failed\n"); 477 } 478 479 return true; 480 } 481 482 bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const { 483 if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) { 484 fprintf(stderr, "failed to get line rva\n"); 485 return false; 486 } 487 488 if (FAILED(dia_line->get_length(&line->length))) { 489 fprintf(stderr, "failed to get line code length\n"); 490 return false; 491 } 492 493 DWORD dia_source_id; 494 if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) { 495 fprintf(stderr, "failed to get line source file id\n"); 496 return false; 497 } 498 // duplicate file names are coalesced to share one ID 499 line->file_id = GetRealFileID(dia_source_id); 500 501 if (FAILED(dia_line->get_lineNumber(&line->line_num))) { 502 fprintf(stderr, "failed to get line number\n"); 503 return false; 504 } 505 return true; 506 } 507 508 bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines, 509 Lines* line_list) const { 510 CComPtr<IDiaLineNumber> line; 511 ULONG count; 512 513 while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) { 514 Line l; 515 if (!GetLine(line, &l)) 516 return false; 517 // Silently ignore zero-length lines. 518 if (l.length != 0) 519 line_list->AddLine(l); 520 line.Release(); 521 } 522 return true; 523 } 524 525 void PDBSourceLineWriter::PrintLines(const Lines& lines) const { 526 // The line number format is: 527 // <rva> <line number> <source file id> 528 for (const auto& kv : lines.GetLineMap()) { 529 const Line& l = kv.second; 530 AddressRangeVector ranges; 531 MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges); 532 for (auto& range : ranges) { 533 fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num, 534 l.file_id); 535 } 536 } 537 } 538 539 bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function, 540 IDiaSymbol* block, 541 bool has_multiple_symbols) { 542 // The function format is: 543 // FUNC <address> <length> <param_stack_size> <function> 544 DWORD rva; 545 if (FAILED(block->get_relativeVirtualAddress(&rva))) { 546 fprintf(stderr, "couldn't get rva\n"); 547 return false; 548 } 549 550 ULONGLONG length; 551 if (FAILED(block->get_length(&length))) { 552 fprintf(stderr, "failed to get function length\n"); 553 return false; 554 } 555 556 if (length == 0) { 557 // Silently ignore zero-length functions, which can infrequently pop up. 558 return true; 559 } 560 561 CComBSTR name; 562 int stack_param_size; 563 if (!GetSymbolFunctionName(function, &name, &stack_param_size)) { 564 return false; 565 } 566 567 // If the decorated name didn't give the parameter size, try to 568 // calculate it. 569 if (stack_param_size < 0) { 570 stack_param_size = GetFunctionStackParamSize(function); 571 } 572 573 AddressRangeVector ranges; 574 MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)), 575 &ranges); 576 for (size_t i = 0; i < ranges.size(); ++i) { 577 const char* optional_multiple_field = has_multiple_symbols ? "m " : ""; 578 fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field, 579 ranges[i].rva, ranges[i].length, stack_param_size, name.m_str); 580 } 581 582 CComPtr<IDiaEnumLineNumbers> lines; 583 if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) { 584 return false; 585 } 586 587 // Get top level lines first, which later may be split into multiple smaller 588 // lines if any inline exists in their ranges if we want to handle inline. 589 Lines line_list; 590 if (!GetLines(lines, &line_list)) { 591 return false; 592 } 593 if (handle_inline_) { 594 vector<unique_ptr<Inline>> inlines; 595 if (!GetInlines(block, &line_list, 0, &inlines)) { 596 return false; 597 } 598 PrintInlines(inlines); 599 } 600 PrintLines(line_list); 601 return true; 602 } 603 604 bool PDBSourceLineWriter::PrintSourceFiles() { 605 CComPtr<IDiaSymbol> global; 606 if (FAILED(session_->get_globalScope(&global))) { 607 fprintf(stderr, "get_globalScope failed\n"); 608 return false; 609 } 610 611 CComPtr<IDiaEnumSymbols> compilands; 612 if (FAILED(global->findChildren(SymTagCompiland, NULL, 613 nsNone, &compilands))) { 614 fprintf(stderr, "findChildren failed\n"); 615 return false; 616 } 617 618 // Print a dummy file with id equals 0 to represent unknown file, because 619 // inline records might have unknown call site. 620 fwprintf(output_, L"FILE %d unknown file\n", 0); 621 622 CComPtr<IDiaSymbol> compiland; 623 ULONG count; 624 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) { 625 CComPtr<IDiaEnumSourceFiles> source_files; 626 if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) { 627 return false; 628 } 629 CComPtr<IDiaSourceFile> file; 630 while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) { 631 DWORD file_id; 632 if (FAILED(file->get_uniqueId(&file_id))) { 633 return false; 634 } 635 636 CComBSTR file_name; 637 if (FAILED(file->get_fileName(&file_name))) { 638 return false; 639 } 640 641 wstring file_name_string(file_name); 642 if (!FileIDIsCached(file_name_string)) { 643 // this is a new file name, cache it and output a FILE line. 644 CacheFileID(file_name_string, file_id); 645 fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str()); 646 } else { 647 // this file name has already been seen, just save this 648 // ID for later lookup. 649 StoreDuplicateFileID(file_name_string, file_id); 650 } 651 file.Release(); 652 } 653 compiland.Release(); 654 } 655 return true; 656 } 657 658 bool PDBSourceLineWriter::PrintFunctions() { 659 ULONG count = 0; 660 DWORD rva = 0; 661 CComPtr<IDiaSymbol> global; 662 HRESULT hr; 663 664 if (FAILED(session_->get_globalScope(&global))) { 665 fprintf(stderr, "get_globalScope failed\n"); 666 return false; 667 } 668 669 CComPtr<IDiaEnumSymbols> symbols = NULL; 670 671 // Find all function symbols first. 672 SymbolMap rva_symbol; 673 hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols); 674 675 if (SUCCEEDED(hr)) { 676 CComPtr<IDiaSymbol> symbol = NULL; 677 678 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { 679 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { 680 // Potentially record this as the canonical symbol for this rva. 681 MaybeRecordSymbol(rva, symbol, false, &rva_symbol); 682 } else { 683 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); 684 return false; 685 } 686 687 symbol.Release(); 688 } 689 690 symbols.Release(); 691 } 692 693 // Find all public symbols and record public symbols that are not also private 694 // symbols. 695 hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols); 696 697 if (SUCCEEDED(hr)) { 698 CComPtr<IDiaSymbol> symbol = NULL; 699 700 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { 701 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { 702 // Potentially record this as the canonical symbol for this rva. 703 MaybeRecordSymbol(rva, symbol, true, &rva_symbol); 704 } else { 705 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); 706 return false; 707 } 708 709 symbol.Release(); 710 } 711 712 symbols.Release(); 713 } 714 715 // For each rva, dump the selected symbol at the address. 716 SymbolMap::iterator it; 717 for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) { 718 CComPtr<IDiaSymbol> symbol = it->second.symbol; 719 // Only print public symbols if there is no function symbol for the address. 720 if (!it->second.is_public) { 721 if (!PrintFunction(symbol, symbol, it->second.is_multiple)) 722 return false; 723 } else { 724 if (!PrintCodePublicSymbol(symbol, it->second.is_multiple)) 725 return false; 726 } 727 } 728 729 // When building with PGO, the compiler can split functions into 730 // "hot" and "cold" blocks, and move the "cold" blocks out to separate 731 // pages, so the function can be noncontiguous. To find these blocks, 732 // we have to iterate over all the compilands, and then find blocks 733 // that are children of them. We can then find the lexical parents 734 // of those blocks and print out an extra FUNC line for blocks 735 // that are not contained in their parent functions. 736 CComPtr<IDiaEnumSymbols> compilands; 737 if (FAILED(global->findChildren(SymTagCompiland, NULL, 738 nsNone, &compilands))) { 739 fprintf(stderr, "findChildren failed on the global\n"); 740 return false; 741 } 742 743 CComPtr<IDiaSymbol> compiland; 744 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) { 745 CComPtr<IDiaEnumSymbols> blocks; 746 if (FAILED(compiland->findChildren(SymTagBlock, NULL, 747 nsNone, &blocks))) { 748 fprintf(stderr, "findChildren failed on a compiland\n"); 749 return false; 750 } 751 752 CComPtr<IDiaSymbol> block; 753 while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) { 754 // find this block's lexical parent function 755 CComPtr<IDiaSymbol> parent; 756 DWORD tag; 757 if (SUCCEEDED(block->get_lexicalParent(&parent)) && 758 SUCCEEDED(parent->get_symTag(&tag)) && 759 tag == SymTagFunction) { 760 // now get the block's offset and the function's offset and size, 761 // and determine if the block is outside of the function 762 DWORD func_rva, block_rva; 763 ULONGLONG func_length; 764 if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) && 765 SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) && 766 SUCCEEDED(parent->get_length(&func_length))) { 767 if (block_rva < func_rva || block_rva > (func_rva + func_length)) { 768 if (!PrintFunction(parent, block, false)) { 769 return false; 770 } 771 } 772 } 773 } 774 parent.Release(); 775 block.Release(); 776 } 777 blocks.Release(); 778 compiland.Release(); 779 } 780 781 global.Release(); 782 return true; 783 } 784 785 void PDBSourceLineWriter::PrintInlineOrigins() const { 786 struct OriginCompare { 787 bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const { 788 return lhs.id < rhs.id; 789 } 790 }; 791 set<InlineOrigin, OriginCompare> origins; 792 // Sort by origin id. 793 for (auto const& origin : inline_origins_) 794 origins.insert(origin.second); 795 for (auto o : origins) { 796 fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str()); 797 } 798 } 799 800 bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block, 801 Lines* line_list, 802 int inline_nest_level, 803 vector<unique_ptr<Inline>>* inlines) { 804 CComPtr<IDiaEnumSymbols> inline_callsites; 805 if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone, 806 &inline_callsites))) { 807 return false; 808 } 809 ULONG count; 810 CComPtr<IDiaSymbol> callsite; 811 while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) && 812 count == 1) { 813 unique_ptr<Inline> new_inline(new Inline(inline_nest_level)); 814 CComPtr<IDiaEnumLineNumbers> lines; 815 // All inlinee lines have the same file id. 816 DWORD file_id = 0; 817 DWORD call_site_line = 0; 818 if (FAILED(session_->findInlineeLines(callsite, &lines))) { 819 return false; 820 } 821 CComPtr<IDiaLineNumber> dia_line; 822 while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) { 823 Line line; 824 if (!GetLine(dia_line, &line)) { 825 return false; 826 } 827 // Silently ignore zero-length lines. 828 if (line.length != 0) { 829 // Use the first line num and file id at rva as this inline's call site 830 // line number, because after adding lines it may be changed to inner 831 // line number and inner file id. 832 if (call_site_line == 0) 833 call_site_line = line_list->GetLineNum(line.rva); 834 if (file_id == 0) 835 file_id = line_list->GetFileId(line.rva); 836 line_list->AddLine(line); 837 new_inline->ExtendRanges(line); 838 } 839 dia_line.Release(); 840 } 841 BSTR name; 842 callsite->get_name(&name); 843 if (SysStringLen(name) == 0) { 844 name = SysAllocString(L"<name omitted>"); 845 } 846 auto iter = inline_origins_.find(name); 847 if (iter == inline_origins_.end()) { 848 InlineOrigin origin; 849 origin.id = inline_origins_.size(); 850 origin.name = name; 851 inline_origins_[name] = origin; 852 } 853 new_inline->SetOriginId(inline_origins_[name].id); 854 new_inline->SetCallSiteLine(call_site_line); 855 new_inline->SetCallSiteFileId(file_id); 856 // Go to next level. 857 vector<unique_ptr<Inline>> child_inlines; 858 if (!GetInlines(callsite, line_list, inline_nest_level + 1, 859 &child_inlines)) { 860 return false; 861 } 862 new_inline->SetChildInlines(std::move(child_inlines)); 863 inlines->push_back(std::move(new_inline)); 864 callsite.Release(); 865 } 866 return true; 867 } 868 869 void PDBSourceLineWriter::PrintInlines( 870 const vector<unique_ptr<Inline>>& inlines) const { 871 for (const unique_ptr<Inline>& in : inlines) { 872 in->Print(output_); 873 } 874 } 875 876 #undef max 877 878 bool PDBSourceLineWriter::PrintFrameDataUsingPDB() { 879 // It would be nice if it were possible to output frame data alongside the 880 // associated function, as is done with line numbers, but the DIA API 881 // doesn't make it possible to get the frame data in that way. 882 883 CComPtr<IDiaEnumFrameData> frame_data_enum; 884 if (!FindTable(session_, &frame_data_enum)) 885 return false; 886 887 DWORD last_type = std::numeric_limits<DWORD>::max(); 888 DWORD last_rva = std::numeric_limits<DWORD>::max(); 889 DWORD last_code_size = 0; 890 DWORD last_prolog_size = std::numeric_limits<DWORD>::max(); 891 892 CComPtr<IDiaFrameData> frame_data; 893 ULONG count = 0; 894 while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) && 895 count == 1) { 896 DWORD type; 897 if (FAILED(frame_data->get_type(&type))) 898 return false; 899 900 DWORD rva; 901 if (FAILED(frame_data->get_relativeVirtualAddress(&rva))) 902 return false; 903 904 DWORD code_size; 905 if (FAILED(frame_data->get_lengthBlock(&code_size))) 906 return false; 907 908 DWORD prolog_size; 909 if (FAILED(frame_data->get_lengthProlog(&prolog_size))) 910 return false; 911 912 // parameter_size is the size of parameters passed on the stack. If any 913 // parameters are not passed on the stack (such as in registers), their 914 // sizes will not be included in parameter_size. 915 DWORD parameter_size; 916 if (FAILED(frame_data->get_lengthParams(¶meter_size))) 917 return false; 918 919 DWORD saved_register_size; 920 if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size))) 921 return false; 922 923 DWORD local_size; 924 if (FAILED(frame_data->get_lengthLocals(&local_size))) 925 return false; 926 927 // get_maxStack can return S_FALSE, just use 0 in that case. 928 DWORD max_stack_size = 0; 929 if (FAILED(frame_data->get_maxStack(&max_stack_size))) 930 return false; 931 932 // get_programString can return S_FALSE, indicating that there is no 933 // program string. In that case, check whether %ebp is used. 934 HRESULT program_string_result; 935 CComBSTR program_string; 936 if (FAILED(program_string_result = frame_data->get_program( 937 &program_string))) { 938 return false; 939 } 940 941 // get_allocatesBasePointer can return S_FALSE, treat that as though 942 // %ebp is not used. 943 BOOL allocates_base_pointer = FALSE; 944 if (program_string_result != S_OK) { 945 if (FAILED(frame_data->get_allocatesBasePointer( 946 &allocates_base_pointer))) { 947 return false; 948 } 949 } 950 951 // Only print out a line if type, rva, code_size, or prolog_size have 952 // changed from the last line. It is surprisingly common (especially in 953 // system library PDBs) for DIA to return a series of identical 954 // IDiaFrameData objects. For kernel32.pdb from Windows XP SP2 on x86, 955 // this check reduces the size of the dumped symbol file by a third. 956 if (type != last_type || rva != last_rva || code_size != last_code_size || 957 prolog_size != last_prolog_size) { 958 // The prolog and the code portions of the frame have to be treated 959 // independently as they may have independently changed in size, or may 960 // even have been split. 961 // NOTE: If epilog size is ever non-zero, we have to do something 962 // similar with it. 963 964 // Figure out where the prolog bytes have landed. 965 AddressRangeVector prolog_ranges; 966 if (prolog_size > 0) { 967 MapAddressRange(image_map_, AddressRange(rva, prolog_size), 968 &prolog_ranges); 969 } 970 971 // And figure out where the code bytes have landed. 972 AddressRangeVector code_ranges; 973 MapAddressRange(image_map_, 974 AddressRange(rva + prolog_size, 975 code_size - prolog_size), 976 &code_ranges); 977 978 struct FrameInfo { 979 DWORD rva; 980 DWORD code_size; 981 DWORD prolog_size; 982 }; 983 std::vector<FrameInfo> frame_infos; 984 985 // Special case: The prolog and the code bytes remain contiguous. This is 986 // only done for compactness of the symbol file, and we could actually 987 // be outputting independent frame info for the prolog and code portions. 988 if (prolog_ranges.size() == 1 && code_ranges.size() == 1 && 989 prolog_ranges[0].end() == code_ranges[0].rva) { 990 FrameInfo fi = { prolog_ranges[0].rva, 991 prolog_ranges[0].length + code_ranges[0].length, 992 prolog_ranges[0].length }; 993 frame_infos.push_back(fi); 994 } else { 995 // Otherwise we output the prolog and code frame info independently. 996 for (size_t i = 0; i < prolog_ranges.size(); ++i) { 997 FrameInfo fi = { prolog_ranges[i].rva, 998 prolog_ranges[i].length, 999 prolog_ranges[i].length }; 1000 frame_infos.push_back(fi); 1001 } 1002 for (size_t i = 0; i < code_ranges.size(); ++i) { 1003 FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 }; 1004 frame_infos.push_back(fi); 1005 } 1006 } 1007 1008 for (size_t i = 0; i < frame_infos.size(); ++i) { 1009 const FrameInfo& fi(frame_infos[i]); 1010 fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ", 1011 type, fi.rva, fi.code_size, fi.prolog_size, 1012 0 /* epilog_size */, parameter_size, saved_register_size, 1013 local_size, max_stack_size, program_string_result == S_OK); 1014 if (program_string_result == S_OK) { 1015 fprintf(output_, "%ws\n", program_string.m_str); 1016 } else { 1017 fprintf(output_, "%d\n", allocates_base_pointer); 1018 } 1019 } 1020 1021 last_type = type; 1022 last_rva = rva; 1023 last_code_size = code_size; 1024 last_prolog_size = prolog_size; 1025 } 1026 1027 frame_data.Release(); 1028 } 1029 1030 return true; 1031 } 1032 1033 bool PDBSourceLineWriter::PrintFrameDataUsingEXE() { 1034 if (code_file_.empty() && !FindPEFile()) { 1035 fprintf(stderr, "Couldn't locate EXE or DLL file.\n"); 1036 return false; 1037 } 1038 1039 return PrintPEFrameData(code_file_, output_); 1040 } 1041 1042 bool PDBSourceLineWriter::PrintFrameData() { 1043 PDBModuleInfo info; 1044 if (GetModuleInfo(&info) && info.cpu == L"x86_64") { 1045 return PrintFrameDataUsingEXE(); 1046 } 1047 return PrintFrameDataUsingPDB(); 1048 } 1049 1050 bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol* symbol, 1051 bool has_multiple_symbols) { 1052 BOOL is_code; 1053 if (FAILED(symbol->get_code(&is_code))) { 1054 return false; 1055 } 1056 if (!is_code) { 1057 return true; 1058 } 1059 1060 DWORD rva; 1061 if (FAILED(symbol->get_relativeVirtualAddress(&rva))) { 1062 return false; 1063 } 1064 1065 CComBSTR name; 1066 int stack_param_size; 1067 if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) { 1068 return false; 1069 } 1070 1071 AddressRangeVector ranges; 1072 MapAddressRange(image_map_, AddressRange(rva, 1), &ranges); 1073 for (size_t i = 0; i < ranges.size(); ++i) { 1074 const char* optional_multiple_field = has_multiple_symbols ? "m " : ""; 1075 fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field, 1076 ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0, 1077 name.m_str); 1078 } 1079 1080 // Now walk the function in the original untranslated space, asking DIA 1081 // what function is at that location, stepping through OMAP blocks. If 1082 // we're still in the same function, emit another entry, because the 1083 // symbol could have been split into multiple pieces. If we've gotten to 1084 // another symbol in the original address space, then we're done for 1085 // this symbol. See https://crbug.com/678874. 1086 for (;;) { 1087 // This steps to the next block in the original image. Simply doing 1088 // rva++ would also be correct, but would emit tons of unnecessary 1089 // entries. 1090 rva = image_map_.subsequent_rva_block[rva]; 1091 if (rva == 0) 1092 break; 1093 1094 CComPtr<IDiaSymbol> next_sym = NULL; 1095 LONG displacement; 1096 if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym, 1097 &displacement))) { 1098 break; 1099 } 1100 1101 if (!SymbolsMatch(symbol, next_sym)) 1102 break; 1103 1104 AddressRangeVector next_ranges; 1105 MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges); 1106 for (size_t i = 0; i < next_ranges.size(); ++i) { 1107 fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva, 1108 stack_param_size > 0 ? stack_param_size : 0, name.m_str); 1109 } 1110 } 1111 1112 return true; 1113 } 1114 1115 bool PDBSourceLineWriter::PrintPDBInfo() { 1116 PDBModuleInfo info; 1117 if (!GetModuleInfo(&info)) { 1118 return false; 1119 } 1120 1121 // Hard-code "windows" for the OS because that's the only thing that makes 1122 // sense for PDB files. (This might not be strictly correct for Windows CE 1123 // support, but we don't care about that at the moment.) 1124 fprintf(output_, "MODULE windows %ws %ws %ws\n", 1125 info.cpu.c_str(), info.debug_identifier.c_str(), 1126 info.debug_file.c_str()); 1127 1128 return true; 1129 } 1130 1131 bool PDBSourceLineWriter::PrintPEInfo() { 1132 PEModuleInfo info; 1133 if (!GetPEInfo(&info)) { 1134 return false; 1135 } 1136 1137 fprintf(output_, "INFO CODE_ID %ws %ws\n", 1138 info.code_identifier.c_str(), 1139 info.code_file.c_str()); 1140 return true; 1141 } 1142 1143 // wcstol_positive_strict is sort of like wcstol, but much stricter. string 1144 // should be a buffer pointing to a null-terminated string containing only 1145 // decimal digits. If the entire string can be converted to an integer 1146 // without overflowing, and there are no non-digit characters before the 1147 // result is set to the value and this function returns true. Otherwise, 1148 // this function returns false. This is an alternative to the strtol, atoi, 1149 // and scanf families, which are not as strict about input and in some cases 1150 // don't provide a good way for the caller to determine if a conversion was 1151 // successful. 1152 static bool wcstol_positive_strict(wchar_t* string, int* result) { 1153 int value = 0; 1154 for (wchar_t* c = string; *c != '\0'; ++c) { 1155 int last_value = value; 1156 value *= 10; 1157 // Detect overflow. 1158 if (value / 10 != last_value || value < 0) { 1159 return false; 1160 } 1161 if (*c < '0' || *c > '9') { 1162 return false; 1163 } 1164 unsigned int c_value = *c - '0'; 1165 last_value = value; 1166 value += c_value; 1167 // Detect overflow. 1168 if (value < last_value) { 1169 return false; 1170 } 1171 // Forbid leading zeroes unless the string is just "0". 1172 if (value == 0 && *(c+1) != '\0') { 1173 return false; 1174 } 1175 } 1176 *result = value; 1177 return true; 1178 } 1179 1180 bool PDBSourceLineWriter::FindPEFile() { 1181 CComPtr<IDiaSymbol> global; 1182 if (FAILED(session_->get_globalScope(&global))) { 1183 fprintf(stderr, "get_globalScope failed\n"); 1184 return false; 1185 } 1186 1187 CComBSTR symbols_file; 1188 if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) { 1189 wstring file(symbols_file); 1190 1191 // Look for an EXE or DLL file. 1192 const wchar_t* extensions[] = { L"exe", L"dll" }; 1193 for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) { 1194 size_t dot_pos = file.find_last_of(L"."); 1195 if (dot_pos != wstring::npos) { 1196 file.replace(dot_pos + 1, wstring::npos, extensions[i]); 1197 // Check if this file exists. 1198 if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) { 1199 code_file_ = file; 1200 return true; 1201 } 1202 } 1203 } 1204 } 1205 1206 return false; 1207 } 1208 1209 // static 1210 bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol* function, 1211 BSTR* name, 1212 int* stack_param_size) { 1213 *stack_param_size = -1; 1214 1215 // Use get_undecoratedNameEx to get readable C++ names with arguments. 1216 if (function->get_undecoratedNameEx(kUndecorateOptions, name) != S_OK) { 1217 if (function->get_name(name) != S_OK) { 1218 fprintf(stderr, "failed to get function name\n"); 1219 return false; 1220 } 1221 1222 // It's possible for get_name to return an empty string, so 1223 // special-case that. 1224 if (wcscmp(*name, L"") == 0) { 1225 SysFreeString(*name); 1226 // dwarf_cu_to_module.cc uses "<name omitted>", so match that. 1227 *name = SysAllocString(L"<name omitted>"); 1228 return true; 1229 } 1230 1231 // If a name comes from get_name because no undecorated form existed, 1232 // it's already formatted properly to be used as output. Don't do any 1233 // additional processing. 1234 // 1235 // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's. 1236 // This will result in calling get_name for some C++ symbols, so 1237 // all of the parameter and return type information may not be included in 1238 // the name string. 1239 } else { 1240 StripLlvmSuffixAndUndecorate(name); 1241 1242 // C++ uses a bogus "void" argument for functions and methods that don't 1243 // take any parameters. Take it out of the undecorated name because it's 1244 // ugly and unnecessary. 1245 const wchar_t* replace_string = L"(void)"; 1246 const size_t replace_length = wcslen(replace_string); 1247 const wchar_t* replacement_string = L"()"; 1248 size_t length = wcslen(*name); 1249 if (length >= replace_length) { 1250 wchar_t* name_end = *name + length - replace_length; 1251 if (wcscmp(name_end, replace_string) == 0) { 1252 WindowsStringUtils::safe_wcscpy(name_end, replace_length, 1253 replacement_string); 1254 length = wcslen(*name); 1255 } 1256 } 1257 1258 // Undecorate names used for stdcall and fastcall. These names prefix 1259 // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it 1260 // with '@' followed by the number of bytes of parameters, in decimal. 1261 // If such a name is found, take note of the size and undecorate it. 1262 // Only do this for names that aren't C++, which is determined based on 1263 // whether the undecorated name contains any ':' or '(' characters. 1264 if (!wcschr(*name, ':') && !wcschr(*name, '(') && 1265 (*name[0] == '_' || *name[0] == '@')) { 1266 wchar_t* last_at = wcsrchr(*name + 1, '@'); 1267 if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) { 1268 // If this function adheres to the fastcall convention, it accepts up 1269 // to the first 8 bytes of parameters in registers (%ecx and %edx). 1270 // We're only interested in the stack space used for parameters, so 1271 // so subtract 8 and don't let the size go below 0. 1272 if (*name[0] == '@') { 1273 if (*stack_param_size > 8) { 1274 *stack_param_size -= 8; 1275 } else { 1276 *stack_param_size = 0; 1277 } 1278 } 1279 1280 // Undecorate the name by moving it one character to the left in its 1281 // buffer, and terminating it where the last '@' had been. 1282 WindowsStringUtils::safe_wcsncpy(*name, length, 1283 *name + 1, last_at - *name - 1); 1284 } else if (*name[0] == '_') { 1285 // This symbol's name is encoded according to the cdecl rules. The 1286 // name doesn't end in a '@' character followed by a decimal positive 1287 // integer, so it's not a stdcall name. Strip off the leading 1288 // underscore. 1289 WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length); 1290 } 1291 } 1292 } 1293 1294 return true; 1295 } 1296 1297 // static 1298 int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol* function) { 1299 // This implementation is highly x86-specific. 1300 1301 // Gather the symbols corresponding to data. 1302 CComPtr<IDiaEnumSymbols> data_children; 1303 if (FAILED(function->findChildren(SymTagData, NULL, nsNone, 1304 &data_children))) { 1305 return 0; 1306 } 1307 1308 // lowest_base is the lowest %ebp-relative byte offset used for a parameter. 1309 // highest_end is one greater than the highest offset (i.e. base + length). 1310 // Stack parameters are assumed to be contiguous, because in reality, they 1311 // are. 1312 int lowest_base = INT_MAX; 1313 int highest_end = INT_MIN; 1314 1315 CComPtr<IDiaSymbol> child; 1316 DWORD count; 1317 while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) { 1318 // If any operation fails at this point, just proceed to the next child. 1319 // Use the next_child label instead of continue because child needs to 1320 // be released before it's reused. Declare constructable/destructable 1321 // types early to avoid gotos that cross initializations. 1322 CComPtr<IDiaSymbol> child_type; 1323 1324 // DataIsObjectPtr is only used for |this|. Because |this| can be passed 1325 // as a stack parameter, look for it in addition to traditional 1326 // parameters. 1327 DWORD child_kind; 1328 if (FAILED(child->get_dataKind(&child_kind)) || 1329 (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) { 1330 goto next_child; 1331 } 1332 1333 // Only concentrate on register-relative parameters. Parameters may also 1334 // be enregistered (passed directly in a register), but those don't 1335 // consume any stack space, so they're not of interest. 1336 DWORD child_location_type; 1337 if (FAILED(child->get_locationType(&child_location_type)) || 1338 child_location_type != LocIsRegRel) { 1339 goto next_child; 1340 } 1341 1342 // Of register-relative parameters, the only ones that make any sense are 1343 // %ebp- or %esp-relative. Note that MSVC's debugging information always 1344 // gives parameters as %ebp-relative even when a function doesn't use a 1345 // traditional frame pointer and stack parameters are accessed relative to 1346 // %esp, so just look for %ebp-relative parameters. If you wanted to 1347 // access parameters, you'd probably want to treat these %ebp-relative 1348 // offsets as if they were relative to %esp before a function's prolog 1349 // executed. 1350 DWORD child_register; 1351 if (FAILED(child->get_registerId(&child_register)) || 1352 child_register != CV_REG_EBP) { 1353 goto next_child; 1354 } 1355 1356 LONG child_register_offset; 1357 if (FAILED(child->get_offset(&child_register_offset))) { 1358 goto next_child; 1359 } 1360 1361 // IDiaSymbol::get_type can succeed but still pass back a NULL value. 1362 if (FAILED(child->get_type(&child_type)) || !child_type) { 1363 goto next_child; 1364 } 1365 1366 ULONGLONG child_length; 1367 if (FAILED(child_type->get_length(&child_length))) { 1368 goto next_child; 1369 } 1370 1371 // Extra scope to avoid goto jumping over variable initialization 1372 { 1373 int child_end = child_register_offset + static_cast<ULONG>(child_length); 1374 if (child_register_offset < lowest_base) { 1375 lowest_base = child_register_offset; 1376 } 1377 if (child_end > highest_end) { 1378 highest_end = child_end; 1379 } 1380 } 1381 1382 next_child: 1383 child.Release(); 1384 } 1385 1386 int param_size = 0; 1387 // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest 1388 // possible address to find a stack parameter before executing a function's 1389 // prolog (see above). Some optimizations cause parameter offsets to be 1390 // lower than 4, but we're not concerned with those because we're only 1391 // looking for parameters contained in addresses higher than where the 1392 // return address is stored. 1393 if (lowest_base < 4) { 1394 lowest_base = 4; 1395 } 1396 if (highest_end > lowest_base) { 1397 // All stack parameters are pushed as at least 4-byte quantities. If the 1398 // last type was narrower than 4 bytes, promote it. This assumes that all 1399 // parameters' offsets are 4-byte-aligned, which is always the case. Only 1400 // worry about the last type, because we're not summing the type sizes, 1401 // just looking at the lowest and highest offsets. 1402 int remainder = highest_end % 4; 1403 if (remainder) { 1404 highest_end += 4 - remainder; 1405 } 1406 1407 param_size = highest_end - lowest_base; 1408 } 1409 1410 return param_size; 1411 } 1412 1413 bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) { 1414 output_ = symbol_file; 1415 1416 // Load the OMAP information, and disable auto-translation of addresses in 1417 // preference of doing it ourselves. 1418 OmapData omap_data; 1419 if (!GetOmapDataAndDisableTranslation(session_, &omap_data)) 1420 return false; 1421 BuildImageMap(omap_data, &image_map_); 1422 1423 bool ret = PrintPDBInfo(); 1424 // This is not a critical piece of the symbol file. 1425 PrintPEInfo(); 1426 ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData(); 1427 PrintInlineOrigins(); 1428 1429 output_ = NULL; 1430 return ret; 1431 } 1432 1433 void PDBSourceLineWriter::Close() { 1434 if (session_ != nullptr) { 1435 session_.Release(); 1436 } 1437 } 1438 1439 bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo* info) { 1440 if (!info) { 1441 return false; 1442 } 1443 1444 info->debug_file.clear(); 1445 info->debug_identifier.clear(); 1446 info->cpu.clear(); 1447 1448 CComPtr<IDiaSymbol> global; 1449 if (FAILED(session_->get_globalScope(&global))) { 1450 return false; 1451 } 1452 1453 DWORD machine_type; 1454 // get_machineType can return S_FALSE. 1455 if (global->get_machineType(&machine_type) == S_OK) { 1456 // The documentation claims that get_machineType returns a value from 1457 // the CV_CPU_TYPE_e enumeration, but that's not the case. 1458 // Instead, it returns one of the IMAGE_FILE_MACHINE values as 1459 // defined here: 1460 // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx 1461 info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type)); 1462 } else { 1463 // Unexpected, but handle gracefully. 1464 info->cpu = L"unknown"; 1465 } 1466 1467 // DWORD* and int* are not compatible. This is clean and avoids a cast. 1468 DWORD age; 1469 if (FAILED(global->get_age(&age))) { 1470 return false; 1471 } 1472 1473 bool uses_guid; 1474 if (!UsesGUID(&uses_guid)) { 1475 return false; 1476 } 1477 1478 if (uses_guid) { 1479 GUID guid; 1480 if (FAILED(global->get_guid(&guid))) { 1481 return false; 1482 } 1483 1484 info->debug_identifier = GenerateDebugIdentifier(age, guid); 1485 } else { 1486 DWORD signature; 1487 if (FAILED(global->get_signature(&signature))) { 1488 return false; 1489 } 1490 1491 info->debug_identifier = GenerateDebugIdentifier(age, signature); 1492 } 1493 1494 CComBSTR debug_file_string; 1495 if (FAILED(global->get_symbolsFileName(&debug_file_string))) { 1496 return false; 1497 } 1498 info->debug_file = 1499 WindowsStringUtils::GetBaseName(wstring(debug_file_string)); 1500 1501 return true; 1502 } 1503 1504 bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo* info) { 1505 if (!info) { 1506 return false; 1507 } 1508 1509 if (code_file_.empty() && !FindPEFile()) { 1510 fprintf(stderr, "Couldn't locate EXE or DLL file.\n"); 1511 return false; 1512 } 1513 1514 return ReadPEInfo(code_file_, info); 1515 } 1516 1517 bool PDBSourceLineWriter::UsesGUID(bool* uses_guid) { 1518 if (!uses_guid) 1519 return false; 1520 1521 CComPtr<IDiaSymbol> global; 1522 if (FAILED(session_->get_globalScope(&global))) 1523 return false; 1524 1525 GUID guid; 1526 if (FAILED(global->get_guid(&guid))) 1527 return false; 1528 1529 DWORD signature; 1530 if (FAILED(global->get_signature(&signature))) 1531 return false; 1532 1533 // There are two possibilities for guid: either it's a real 128-bit GUID 1534 // as identified in a code module by a new-style CodeView record, or it's 1535 // a 32-bit signature (timestamp) as identified by an old-style record. 1536 // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h. 1537 // 1538 // Because DIA doesn't provide a way to directly determine whether a module 1539 // uses a GUID or a 32-bit signature, this code checks whether the first 32 1540 // bits of guid are the same as the signature, and if the rest of guid is 1541 // zero. If so, then with a pretty high degree of certainty, there's an 1542 // old-style CodeView record in use. This method will only falsely find an 1543 // an old-style CodeView record if a real 128-bit GUID has its first 32 1544 // bits set the same as the module's signature (timestamp) and the rest of 1545 // the GUID is set to 0. This is highly unlikely. 1546 1547 GUID signature_guid = {signature}; // 0-initializes other members 1548 *uses_guid = !IsEqualGUID(guid, signature_guid); 1549 return true; 1550 } 1551 1552 } // namespace google_breakpad