/ src / common / windows / pdb_source_line_writer.cc
pdb_source_line_writer.cc
   1  // Copyright 2006 Google LLC
   2  //
   3  // Redistribution and use in source and binary forms, with or without
   4  // modification, are permitted provided that the following conditions are
   5  // met:
   6  //
   7  //     * Redistributions of source code must retain the above copyright
   8  // notice, this list of conditions and the following disclaimer.
   9  //     * Redistributions in binary form must reproduce the above
  10  // copyright notice, this list of conditions and the following disclaimer
  11  // in the documentation and/or other materials provided with the
  12  // distribution.
  13  //     * Neither the name of Google LLC nor the names of its
  14  // contributors may be used to endorse or promote products derived from
  15  // this software without specific prior written permission.
  16  //
  17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  
  29  #ifdef HAVE_CONFIG_H
  30  #include <config.h>  // Must come first
  31  #endif
  32  
  33  #include "common/windows/pdb_source_line_writer.h"
  34  
  35  #include <windows.h>
  36  #include <winnt.h>
  37  #include <atlbase.h>
  38  #include <dia2.h>
  39  #include <diacreate.h>
  40  #include <ImageHlp.h>
  41  #include <stdio.h>
  42  
  43  #include <algorithm>
  44  #include <limits>
  45  #include <map>
  46  #include <memory>
  47  #include <set>
  48  #include <utility>
  49  
  50  #include "common/windows/dia_util.h"
  51  #include "common/windows/guid_string.h"
  52  #include "common/windows/pe_util.h"
  53  #include "common/windows/string_utils-inl.h"
  54  
  55  // This constant may be missing from DbgHelp.h.  See the documentation for
  56  // IDiaSymbol::get_undecoratedNameEx.
  57  #ifndef UNDNAME_NO_ECSU
  58  #define UNDNAME_NO_ECSU 0x8000  // Suppresses enum/class/struct/union.
  59  #endif  // UNDNAME_NO_ECSU
  60  
  61  namespace google_breakpad {
  62  
  63  namespace {
  64  
  65  using std::set;
  66  using std::unique_ptr;
  67  using std::vector;
  68  
  69  // The symbol (among possibly many) selected to represent an rva.
  70  struct SelectedSymbol {
  71    SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public)
  72        : symbol(symbol), is_public(is_public), is_multiple(false) {}
  73  
  74    // The symbol to use for an rva.
  75    CComPtr<IDiaSymbol> symbol;
  76    // Whether this is a public or function symbol.
  77    bool is_public;
  78    // Whether the rva has multiple associated symbols. An rva will correspond to
  79    // multiple symbols in the case of linker identical symbol folding.
  80    bool is_multiple;
  81  };
  82  
  83  // Maps rva to the symbol to use for that address.
  84  typedef std::map<DWORD, SelectedSymbol> SymbolMap;
  85  
  86  // Record this in the map as the selected symbol for the rva if it satisfies the
  87  // necessary conditions.
  88  void MaybeRecordSymbol(DWORD rva,
  89                         const CComPtr<IDiaSymbol> symbol,
  90                         bool is_public,
  91                         SymbolMap* map) {
  92    SymbolMap::iterator loc = map->find(rva);
  93    if (loc == map->end()) {
  94      map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public)));
  95      return;
  96    }
  97  
  98    // Prefer function symbols to public symbols.
  99    if (is_public && !loc->second.is_public) {
 100      return;
 101    }
 102  
 103    loc->second.is_multiple = true;
 104  
 105    // Take the 'least' symbol by lexicographical order of the decorated name. We
 106    // use the decorated rather than undecorated name because computing the latter
 107    // is expensive.
 108    BSTR current_name, new_name;
 109    loc->second.symbol->get_name(&current_name);
 110    symbol->get_name(&new_name);
 111    if (wcscmp(new_name, current_name) < 0) {
 112      loc->second.symbol = symbol;
 113      loc->second.is_public = is_public;
 114    }
 115  }
 116  
 117  
 118  
 119  bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) {
 120    DWORD a_section, a_offset, b_section, b_offset;
 121    if (FAILED(a->get_addressSection(&a_section)) ||
 122        FAILED(a->get_addressOffset(&a_offset)) ||
 123        FAILED(b->get_addressSection(&b_section)) ||
 124        FAILED(b->get_addressOffset(&b_offset)))
 125      return false;
 126    return a_section == b_section && a_offset == b_offset;
 127  }
 128  
 129  bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource>& data_source) {
 130    if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) {
 131      return true;
 132    }
 133  
 134    class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100;
 135    class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110;
 136    class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120;
 137    class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140;
 138  
 139    // If the CoCreateInstance call above failed, msdia*.dll is not registered.
 140    // We can try loading the DLL corresponding to the #included DIA SDK, but
 141    // the DIA headers don't provide a version. Lets try to figure out which DIA
 142    // version we're compiling against by comparing CLSIDs.
 143    const wchar_t* msdia_dll = nullptr;
 144    if (CLSID_DiaSource == _uuidof(DiaSource100)) {
 145      msdia_dll = L"msdia100.dll";
 146    } else if (CLSID_DiaSource == _uuidof(DiaSource110)) {
 147      msdia_dll = L"msdia110.dll";
 148    } else if (CLSID_DiaSource == _uuidof(DiaSource120)) {
 149      msdia_dll = L"msdia120.dll";
 150    } else if (CLSID_DiaSource == _uuidof(DiaSource140)) {
 151      msdia_dll = L"msdia140.dll";
 152    }
 153  
 154    if (msdia_dll &&
 155        SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
 156                                reinterpret_cast<void**>(&data_source)))) {
 157      return true;
 158    }
 159  
 160    return false;
 161  }
 162  
 163  const DWORD kUndecorateOptions = UNDNAME_NO_MS_KEYWORDS |
 164                                   UNDNAME_NO_FUNCTION_RETURNS |
 165                                   UNDNAME_NO_ALLOCATION_MODEL |
 166                                   UNDNAME_NO_ALLOCATION_LANGUAGE |
 167                                   UNDNAME_NO_THISTYPE |
 168                                   UNDNAME_NO_ACCESS_SPECIFIERS |
 169                                   UNDNAME_NO_THROW_SIGNATURES |
 170                                   UNDNAME_NO_MEMBER_TYPE |
 171                                   UNDNAME_NO_RETURN_UDT_MODEL |
 172                                   UNDNAME_NO_ECSU;
 173  
 174  #define arraysize(f) (sizeof(f) / sizeof(*f))
 175  
 176  void StripLlvmSuffixAndUndecorate(BSTR* name) {
 177    // LLVM sometimes puts a suffix on symbols to give them a globally unique
 178    // name. The suffix is either some string preceded by a period (like in the
 179    // Itanium ABI; also on Windows this is safe since periods are otherwise
 180    // never part of mangled names), or a dollar sign followed by a 32-char hex
 181    // string (this should go away in future LLVM versions). Strip such suffixes
 182    // and try demangling again.
 183    //
 184    //
 185    // Example symbol names with such suffixes:
 186    //
 187    //   ?foo@@YAXXZ$5520c83448162c04f2b239db4b5a2c61
 188    //   ?foo@@YAXXZ.llvm.13040715209719948753
 189  
 190    if (**name != L'?')
 191      return;  // The name is already demangled.
 192  
 193    for (size_t i = 0, len = wcslen(*name); i < len; i++) {
 194      wchar_t c = (*name)[i];
 195  
 196      if (c == L'.' || (c == L'$' && len - i == 32 + 1)) {
 197        (*name)[i] = L'\0';
 198        wchar_t undecorated[1024];
 199        DWORD res = UnDecorateSymbolNameW(*name, undecorated,
 200                                          arraysize(undecorated),
 201                                          kUndecorateOptions);
 202        if (res == 0 || undecorated[0] == L'?') {
 203          // Demangling failed; restore the symbol name and return.
 204          (*name)[i] = c;
 205          return;
 206        }
 207  
 208        SysFreeString(*name);
 209        *name = SysAllocString(undecorated);
 210        return;
 211      }
 212    }
 213  }
 214  
 215  // Prints the error message related to the error code as seen in
 216  // Microsoft's MSVS documentation for loadDataFromPdb and loadDataForExe.
 217  void PrintOpenError(HRESULT hr, const char* fn_name, const wchar_t* file) {
 218    switch (hr) {
 219      case E_PDB_NOT_FOUND:
 220        fprintf(stderr, "%s: Failed to open %ws, or the file has an "
 221                "invalid format.\n", fn_name, file);
 222        break;
 223      case E_PDB_FORMAT:
 224        fprintf(stderr, "%s: Attempted to access %ws with an obsolete "
 225                "format.\n", fn_name, file);
 226        break;
 227      case E_PDB_INVALID_SIG:
 228        fprintf(stderr, "%s: Signature does not match for %ws.\n", fn_name,
 229                file);
 230        break;
 231      case E_PDB_INVALID_AGE:
 232        fprintf(stderr, "%s: Age does not match for %ws.\n", fn_name, file);
 233        break;
 234      case E_INVALIDARG:
 235        fprintf(stderr, "%s: Invalid parameter for %ws.\n", fn_name, file);
 236        break;
 237      case E_UNEXPECTED:
 238        fprintf(stderr, "%s: Data source has already been prepared for %ws.\n",
 239                fn_name, file);
 240        break;
 241      default:
 242        fprintf(stderr, "%s: Unexpected error 0x%lx, file: %ws.\n",
 243                fn_name, hr, file);
 244        break;
 245    }
 246  }
 247  
 248  }  // namespace
 249  
 250  PDBSourceLineWriter::Inline::Inline(int inline_nest_level)
 251      : inline_nest_level_(inline_nest_level) {}
 252  
 253  void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) {
 254    origin_id_ = origin_id;
 255  }
 256  
 257  void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) {
 258    if (ranges_.empty()) {
 259      ranges_[line.rva] = line.length;
 260      return;
 261    }
 262    auto iter = ranges_.lower_bound(line.rva);
 263    // There is no overlap if this function is called with inlinee lines from
 264    // the same callsite.
 265    if (iter == ranges_.begin()) {
 266      return;
 267    }
 268    if (line.rva + line.length == iter->first) {
 269      // If they are connected, merge their ranges into one.
 270      DWORD length = line.length + iter->second;
 271      ranges_.erase(iter);
 272      ranges_[line.rva] = length;
 273    } else {
 274      --iter;
 275      if (iter->first + iter->second == line.rva) {
 276        ranges_[iter->first] = iter->second + line.length;
 277      } else {
 278        ranges_[line.rva] = line.length;
 279      }
 280    }
 281  }
 282  
 283  void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) {
 284    call_site_line_ = call_site_line;
 285  }
 286  
 287  void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) {
 288    call_site_file_id_ = call_site_file_id;
 289  }
 290  
 291  void PDBSourceLineWriter::Inline::SetChildInlines(
 292      vector<unique_ptr<Inline>> child_inlines) {
 293    child_inlines_ = std::move(child_inlines);
 294  }
 295  
 296  void PDBSourceLineWriter::Inline::Print(FILE* output) const {
 297    // Ignore INLINE record that doesn't have any range.
 298    if (ranges_.empty())
 299      return;
 300    fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_,
 301            call_site_file_id_, origin_id_);
 302    for (const auto& r : ranges_) {
 303      fprintf(output, " %lx %lx", r.first, r.second);
 304    }
 305    fprintf(output, "\n");
 306    for (const unique_ptr<Inline>& in : child_inlines_) {
 307      in->Print(output);
 308    }
 309  }
 310  
 311  const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine(
 312      DWORD rva) const {
 313    auto iter = line_map_.find(rva);
 314    if (iter == line_map_.end()) {
 315      // If not found exact rva, check if it's within any range.
 316      iter = line_map_.lower_bound(rva);
 317      if (iter == line_map_.begin())
 318        return nullptr;
 319      --iter;
 320      auto l = iter->second;
 321      // This happens when there is no top level lines cover this rva (e.g. empty
 322      // lines found for the function). Then we don't know the call site line
 323      // number for this inlined function.
 324      if (rva >= l.rva + l.length)
 325        return nullptr;
 326    }
 327    return &iter->second;
 328  }
 329  
 330  DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const {
 331    const Line* line = GetLine(rva);
 332    return line ? line->line_num : 0;
 333  }
 334  
 335  DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const {
 336    const Line* line = GetLine(rva);
 337    return line ? line->file_id : 0;
 338  }
 339  
 340  void PDBSourceLineWriter::Lines::AddLine(const Line& line) {
 341    if (line_map_.empty()) {
 342      line_map_[line.rva] = line;
 343      return;
 344    }
 345  
 346    // Given an existing line in line_map_, remove it from line_map_ if it
 347    // overlaps with the line and add a new line for the non-overlap range. Return
 348    // true if there is an overlap.
 349    auto intercept = [&](Line old_line) {
 350      DWORD end = old_line.rva + old_line.length;
 351      // No overlap.
 352      if (old_line.rva >= line.rva + line.length || line.rva >= end)
 353        return false;
 354      // old_line is within the line.
 355      if (old_line.rva >= line.rva && end <= line.rva + line.length) {
 356        line_map_.erase(old_line.rva);
 357        return true;
 358      }
 359      // Then there is a overlap.
 360      if (old_line.rva < line.rva) {
 361        old_line.length -= end - line.rva;
 362        if (end > line.rva + line.length) {
 363          Line new_line = old_line;
 364          new_line.rva = line.rva + line.length;
 365          new_line.length = end - new_line.rva;
 366          line_map_[new_line.rva] = new_line;
 367        }
 368      } else {
 369        line_map_.erase(old_line.rva);
 370        old_line.length -= line.rva + line.length - old_line.rva;
 371        old_line.rva = line.rva + line.length;
 372      }
 373      line_map_[old_line.rva] = old_line;
 374      return true;
 375    };
 376  
 377    bool is_intercept;
 378    // Use a loop in cases that there are multiple lines within the given line.
 379    do {
 380      auto iter = line_map_.lower_bound(line.rva);
 381      if (iter == line_map_.end()) {
 382        if (!line_map_.empty()) {
 383          --iter;
 384          intercept(iter->second);
 385        }
 386        break;
 387      }
 388      is_intercept = false;
 389      if (iter != line_map_.begin()) {
 390        // Check if the given line overlaps a line with smaller in the map.
 391        auto prev = line_map_.lower_bound(line.rva);
 392        --prev;
 393        is_intercept = intercept(prev->second);
 394      }
 395      // Check if the given line overlaps a line with greater or equal rva in the
 396      // map. Using operator |= here since it's possible that there are multiple
 397      // lines with greater rva in the map overlap with the given line.
 398      is_intercept |= intercept(iter->second);
 399    } while (is_intercept);
 400    line_map_[line.rva] = line;
 401  }
 402  
 403  PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline)
 404      : output_(NULL), handle_inline_(handle_inline) {}
 405  
 406  PDBSourceLineWriter::~PDBSourceLineWriter() {
 407    Close();
 408  }
 409  
 410  bool PDBSourceLineWriter::SetCodeFile(const wstring& exe_file) {
 411    if (code_file_.empty()) {
 412      code_file_ = exe_file;
 413      return true;
 414    }
 415    // Setting a different code file path is an error.  It is success only if the
 416    // file paths are the same.
 417    return exe_file == code_file_;
 418  }
 419  
 420  bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) {
 421    Close();
 422    code_file_.clear();
 423  
 424    if (FAILED(CoInitialize(NULL))) {
 425      fprintf(stderr, "CoInitialize failed\n");
 426      return false;
 427    }
 428  
 429    CComPtr<IDiaDataSource> data_source;
 430    if (!CreateDiaDataSourceInstance(data_source)) {
 431      const int kGuidSize = 64;
 432      wchar_t classid[kGuidSize] = {0};
 433      StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
 434      fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
 435              "(msdia*.dll unregistered?)\n", classid);
 436      return false;
 437    }
 438  
 439    HRESULT from_pdb_result;
 440    HRESULT for_exe_result;
 441    const wchar_t* file_name = file.c_str();
 442    switch (format) {
 443      case PDB_FILE:
 444        from_pdb_result = data_source->loadDataFromPdb(file_name);
 445        if (FAILED(from_pdb_result)) {
 446          PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
 447          return false;
 448        }
 449        break;
 450      case EXE_FILE:
 451        for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
 452        if (FAILED(for_exe_result)) {
 453          PrintOpenError(for_exe_result, "loadDataForExe", file_name);
 454          return false;
 455        }
 456        code_file_ = file;
 457        break;
 458      case ANY_FILE:
 459        from_pdb_result = data_source->loadDataFromPdb(file_name);
 460        if (FAILED(from_pdb_result)) {
 461          for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
 462          if (FAILED(for_exe_result)) {
 463            PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
 464            PrintOpenError(for_exe_result, "loadDataForExe", file_name);
 465            return false;
 466          }
 467          code_file_ = file;
 468        }
 469        break;
 470      default:
 471        fprintf(stderr, "Unknown file format\n");
 472        return false;
 473    }
 474  
 475    if (FAILED(data_source->openSession(&session_))) {
 476      fprintf(stderr, "openSession failed\n");
 477    }
 478  
 479    return true;
 480  }
 481  
 482  bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const {
 483    if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) {
 484      fprintf(stderr, "failed to get line rva\n");
 485      return false;
 486    }
 487  
 488    if (FAILED(dia_line->get_length(&line->length))) {
 489      fprintf(stderr, "failed to get line code length\n");
 490      return false;
 491    }
 492  
 493    DWORD dia_source_id;
 494    if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) {
 495      fprintf(stderr, "failed to get line source file id\n");
 496      return false;
 497    }
 498    // duplicate file names are coalesced to share one ID
 499    line->file_id = GetRealFileID(dia_source_id);
 500  
 501    if (FAILED(dia_line->get_lineNumber(&line->line_num))) {
 502      fprintf(stderr, "failed to get line number\n");
 503      return false;
 504    }
 505    return true;
 506  }
 507  
 508  bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines,
 509                                     Lines* line_list) const {
 510    CComPtr<IDiaLineNumber> line;
 511    ULONG count;
 512  
 513    while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
 514      Line l;
 515      if (!GetLine(line, &l))
 516        return false;
 517      // Silently ignore zero-length lines.
 518      if (l.length != 0)
 519        line_list->AddLine(l);
 520      line.Release();
 521    }
 522    return true;
 523  }
 524  
 525  void PDBSourceLineWriter::PrintLines(const Lines& lines) const {
 526    // The line number format is:
 527    // <rva> <line number> <source file id>
 528    for (const auto& kv : lines.GetLineMap()) {
 529      const Line& l = kv.second;
 530      AddressRangeVector ranges;
 531      MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges);
 532      for (auto& range : ranges) {
 533        fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num,
 534                l.file_id);
 535      }
 536    }
 537  }
 538  
 539  bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function,
 540                                          IDiaSymbol* block,
 541                                          bool has_multiple_symbols) {
 542    // The function format is:
 543    // FUNC <address> <length> <param_stack_size> <function>
 544    DWORD rva;
 545    if (FAILED(block->get_relativeVirtualAddress(&rva))) {
 546      fprintf(stderr, "couldn't get rva\n");
 547      return false;
 548    }
 549  
 550    ULONGLONG length;
 551    if (FAILED(block->get_length(&length))) {
 552      fprintf(stderr, "failed to get function length\n");
 553      return false;
 554    }
 555  
 556    if (length == 0) {
 557      // Silently ignore zero-length functions, which can infrequently pop up.
 558      return true;
 559    }
 560  
 561    CComBSTR name;
 562    int stack_param_size;
 563    if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
 564      return false;
 565    }
 566  
 567    // If the decorated name didn't give the parameter size, try to
 568    // calculate it.
 569    if (stack_param_size < 0) {
 570      stack_param_size = GetFunctionStackParamSize(function);
 571    }
 572  
 573    AddressRangeVector ranges;
 574    MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)),
 575                    &ranges);
 576    for (size_t i = 0; i < ranges.size(); ++i) {
 577      const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
 578      fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field,
 579              ranges[i].rva, ranges[i].length, stack_param_size, name.m_str);
 580    }
 581  
 582    CComPtr<IDiaEnumLineNumbers> lines;
 583    if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
 584      return false;
 585    }
 586  
 587    // Get top level lines first, which later may be split into multiple smaller
 588    // lines if any inline exists in their ranges if we want to handle inline.
 589    Lines line_list;
 590    if (!GetLines(lines, &line_list)) {
 591      return false;
 592    }
 593    if (handle_inline_) {
 594      vector<unique_ptr<Inline>> inlines;
 595      if (!GetInlines(block, &line_list, 0, &inlines)) {
 596        return false;
 597      }
 598      PrintInlines(inlines);
 599    }
 600    PrintLines(line_list);
 601    return true;
 602  }
 603  
 604  bool PDBSourceLineWriter::PrintSourceFiles() {
 605    CComPtr<IDiaSymbol> global;
 606    if (FAILED(session_->get_globalScope(&global))) {
 607      fprintf(stderr, "get_globalScope failed\n");
 608      return false;
 609    }
 610  
 611    CComPtr<IDiaEnumSymbols> compilands;
 612    if (FAILED(global->findChildren(SymTagCompiland, NULL,
 613                                    nsNone, &compilands))) {
 614      fprintf(stderr, "findChildren failed\n");
 615      return false;
 616    }
 617  
 618    // Print a dummy file with id equals 0 to represent unknown file, because
 619    // inline records might have unknown call site.
 620    fwprintf(output_, L"FILE %d unknown file\n", 0);
 621  
 622    CComPtr<IDiaSymbol> compiland;
 623    ULONG count;
 624    while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
 625      CComPtr<IDiaEnumSourceFiles> source_files;
 626      if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
 627        return false;
 628      }
 629      CComPtr<IDiaSourceFile> file;
 630      while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
 631        DWORD file_id;
 632        if (FAILED(file->get_uniqueId(&file_id))) {
 633          return false;
 634        }
 635  
 636        CComBSTR file_name;
 637        if (FAILED(file->get_fileName(&file_name))) {
 638          return false;
 639        }
 640  
 641        wstring file_name_string(file_name);
 642        if (!FileIDIsCached(file_name_string)) {
 643          // this is a new file name, cache it and output a FILE line.
 644          CacheFileID(file_name_string, file_id);
 645          fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str());
 646        } else {
 647          // this file name has already been seen, just save this
 648          // ID for later lookup.
 649          StoreDuplicateFileID(file_name_string, file_id);
 650        }
 651        file.Release();
 652      }
 653      compiland.Release();
 654    }
 655    return true;
 656  }
 657  
 658  bool PDBSourceLineWriter::PrintFunctions() {
 659    ULONG count = 0;
 660    DWORD rva = 0;
 661    CComPtr<IDiaSymbol> global;
 662    HRESULT hr;
 663  
 664    if (FAILED(session_->get_globalScope(&global))) {
 665      fprintf(stderr, "get_globalScope failed\n");
 666      return false;
 667    }
 668  
 669    CComPtr<IDiaEnumSymbols> symbols = NULL;
 670  
 671    // Find all function symbols first.
 672    SymbolMap rva_symbol;
 673    hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols);
 674  
 675    if (SUCCEEDED(hr)) {
 676      CComPtr<IDiaSymbol> symbol = NULL;
 677  
 678      while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
 679        if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
 680          // Potentially record this as the canonical symbol for this rva.
 681          MaybeRecordSymbol(rva, symbol, false, &rva_symbol);
 682        } else {
 683          fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
 684          return false;
 685        }
 686  
 687        symbol.Release();
 688      }
 689  
 690      symbols.Release();
 691    }
 692  
 693    // Find all public symbols and record public symbols that are not also private
 694    // symbols.
 695    hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols);
 696  
 697    if (SUCCEEDED(hr)) {
 698      CComPtr<IDiaSymbol> symbol = NULL;
 699  
 700      while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
 701        if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
 702          // Potentially record this as the canonical symbol for this rva.
 703          MaybeRecordSymbol(rva, symbol, true, &rva_symbol);
 704        } else {
 705          fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
 706          return false;
 707        }
 708  
 709        symbol.Release();
 710      }
 711  
 712      symbols.Release();
 713    }
 714  
 715    // For each rva, dump the selected symbol at the address.
 716    SymbolMap::iterator it;
 717    for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) {
 718      CComPtr<IDiaSymbol> symbol = it->second.symbol;
 719      // Only print public symbols if there is no function symbol for the address.
 720      if (!it->second.is_public) {
 721        if (!PrintFunction(symbol, symbol, it->second.is_multiple))
 722          return false;
 723      } else {
 724        if (!PrintCodePublicSymbol(symbol, it->second.is_multiple))
 725          return false;
 726      }
 727    }
 728  
 729    // When building with PGO, the compiler can split functions into
 730    // "hot" and "cold" blocks, and move the "cold" blocks out to separate
 731    // pages, so the function can be noncontiguous. To find these blocks,
 732    // we have to iterate over all the compilands, and then find blocks
 733    // that are children of them. We can then find the lexical parents
 734    // of those blocks and print out an extra FUNC line for blocks
 735    // that are not contained in their parent functions.
 736    CComPtr<IDiaEnumSymbols> compilands;
 737    if (FAILED(global->findChildren(SymTagCompiland, NULL,
 738                                    nsNone, &compilands))) {
 739      fprintf(stderr, "findChildren failed on the global\n");
 740      return false;
 741    }
 742  
 743    CComPtr<IDiaSymbol> compiland;
 744    while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
 745      CComPtr<IDiaEnumSymbols> blocks;
 746      if (FAILED(compiland->findChildren(SymTagBlock, NULL,
 747                                         nsNone, &blocks))) {
 748        fprintf(stderr, "findChildren failed on a compiland\n");
 749        return false;
 750      }
 751  
 752      CComPtr<IDiaSymbol> block;
 753      while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
 754        // find this block's lexical parent function
 755        CComPtr<IDiaSymbol> parent;
 756        DWORD tag;
 757        if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
 758            SUCCEEDED(parent->get_symTag(&tag)) &&
 759            tag == SymTagFunction) {
 760          // now get the block's offset and the function's offset and size,
 761          // and determine if the block is outside of the function
 762          DWORD func_rva, block_rva;
 763          ULONGLONG func_length;
 764          if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
 765              SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
 766              SUCCEEDED(parent->get_length(&func_length))) {
 767            if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
 768              if (!PrintFunction(parent, block, false)) {
 769                return false;
 770              }
 771            }
 772          }
 773        }
 774        parent.Release();
 775        block.Release();
 776      }
 777      blocks.Release();
 778      compiland.Release();
 779    }
 780  
 781    global.Release();
 782    return true;
 783  }
 784  
 785  void PDBSourceLineWriter::PrintInlineOrigins() const {
 786    struct OriginCompare {
 787      bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const {
 788        return lhs.id < rhs.id;
 789      }
 790    };
 791    set<InlineOrigin, OriginCompare> origins;
 792    // Sort by origin id.
 793    for (auto const& origin : inline_origins_)
 794      origins.insert(origin.second);
 795    for (auto o : origins) {
 796      fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str());
 797    }
 798  }
 799  
 800  bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block,
 801                                       Lines* line_list,
 802                                       int inline_nest_level,
 803                                       vector<unique_ptr<Inline>>* inlines) {
 804    CComPtr<IDiaEnumSymbols> inline_callsites;
 805    if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone,
 806                                     &inline_callsites))) {
 807      return false;
 808    }
 809    ULONG count;
 810    CComPtr<IDiaSymbol> callsite;
 811    while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) &&
 812           count == 1) {
 813      unique_ptr<Inline> new_inline(new Inline(inline_nest_level));
 814      CComPtr<IDiaEnumLineNumbers> lines;
 815      // All inlinee lines have the same file id.
 816      DWORD file_id = 0;
 817      DWORD call_site_line = 0;
 818      if (FAILED(session_->findInlineeLines(callsite, &lines))) {
 819        return false;
 820      }
 821      CComPtr<IDiaLineNumber> dia_line;
 822      while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) {
 823        Line line;
 824        if (!GetLine(dia_line, &line)) {
 825          return false;
 826        }
 827        // Silently ignore zero-length lines.
 828        if (line.length != 0) {
 829          // Use the first line num and file id at rva as this inline's call site
 830          // line number, because after adding lines it may be changed to inner
 831          // line number and inner file id.
 832          if (call_site_line == 0)
 833            call_site_line = line_list->GetLineNum(line.rva);
 834          if (file_id == 0)
 835            file_id = line_list->GetFileId(line.rva);
 836          line_list->AddLine(line);
 837          new_inline->ExtendRanges(line);
 838        }
 839        dia_line.Release();
 840      }
 841      BSTR name;
 842      callsite->get_name(&name);
 843      if (SysStringLen(name) == 0) {
 844        name = SysAllocString(L"<name omitted>");
 845      }
 846      auto iter = inline_origins_.find(name);
 847      if (iter == inline_origins_.end()) {
 848        InlineOrigin origin;
 849        origin.id = inline_origins_.size();
 850        origin.name = name;
 851        inline_origins_[name] = origin;
 852      }
 853      new_inline->SetOriginId(inline_origins_[name].id);
 854      new_inline->SetCallSiteLine(call_site_line);
 855      new_inline->SetCallSiteFileId(file_id);
 856      // Go to next level.
 857      vector<unique_ptr<Inline>> child_inlines;
 858      if (!GetInlines(callsite, line_list, inline_nest_level + 1,
 859                      &child_inlines)) {
 860        return false;
 861      }
 862      new_inline->SetChildInlines(std::move(child_inlines));
 863      inlines->push_back(std::move(new_inline));
 864      callsite.Release();
 865    }
 866    return true;
 867  }
 868  
 869  void PDBSourceLineWriter::PrintInlines(
 870      const vector<unique_ptr<Inline>>& inlines) const {
 871    for (const unique_ptr<Inline>& in : inlines) {
 872      in->Print(output_);
 873    }
 874  }
 875  
 876  #undef max
 877  
 878  bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
 879    // It would be nice if it were possible to output frame data alongside the
 880    // associated function, as is done with line numbers, but the DIA API
 881    // doesn't make it possible to get the frame data in that way.
 882  
 883    CComPtr<IDiaEnumFrameData> frame_data_enum;
 884    if (!FindTable(session_, &frame_data_enum))
 885      return false;
 886  
 887    DWORD last_type = std::numeric_limits<DWORD>::max();
 888    DWORD last_rva = std::numeric_limits<DWORD>::max();
 889    DWORD last_code_size = 0;
 890    DWORD last_prolog_size = std::numeric_limits<DWORD>::max();
 891  
 892    CComPtr<IDiaFrameData> frame_data;
 893    ULONG count = 0;
 894    while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
 895           count == 1) {
 896      DWORD type;
 897      if (FAILED(frame_data->get_type(&type)))
 898        return false;
 899  
 900      DWORD rva;
 901      if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
 902        return false;
 903  
 904      DWORD code_size;
 905      if (FAILED(frame_data->get_lengthBlock(&code_size)))
 906        return false;
 907  
 908      DWORD prolog_size;
 909      if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
 910        return false;
 911  
 912      // parameter_size is the size of parameters passed on the stack.  If any
 913      // parameters are not passed on the stack (such as in registers), their
 914      // sizes will not be included in parameter_size.
 915      DWORD parameter_size;
 916      if (FAILED(frame_data->get_lengthParams(&parameter_size)))
 917        return false;
 918  
 919      DWORD saved_register_size;
 920      if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
 921        return false;
 922  
 923      DWORD local_size;
 924      if (FAILED(frame_data->get_lengthLocals(&local_size)))
 925        return false;
 926  
 927      // get_maxStack can return S_FALSE, just use 0 in that case.
 928      DWORD max_stack_size = 0;
 929      if (FAILED(frame_data->get_maxStack(&max_stack_size)))
 930        return false;
 931  
 932      // get_programString can return S_FALSE, indicating that there is no
 933      // program string.  In that case, check whether %ebp is used.
 934      HRESULT program_string_result;
 935      CComBSTR program_string;
 936      if (FAILED(program_string_result = frame_data->get_program(
 937          &program_string))) {
 938        return false;
 939      }
 940  
 941      // get_allocatesBasePointer can return S_FALSE, treat that as though
 942      // %ebp is not used.
 943      BOOL allocates_base_pointer = FALSE;
 944      if (program_string_result != S_OK) {
 945        if (FAILED(frame_data->get_allocatesBasePointer(
 946            &allocates_base_pointer))) {
 947          return false;
 948        }
 949      }
 950  
 951      // Only print out a line if type, rva, code_size, or prolog_size have
 952      // changed from the last line.  It is surprisingly common (especially in
 953      // system library PDBs) for DIA to return a series of identical
 954      // IDiaFrameData objects.  For kernel32.pdb from Windows XP SP2 on x86,
 955      // this check reduces the size of the dumped symbol file by a third.
 956      if (type != last_type || rva != last_rva || code_size != last_code_size ||
 957          prolog_size != last_prolog_size) {
 958        // The prolog and the code portions of the frame have to be treated
 959        // independently as they may have independently changed in size, or may
 960        // even have been split.
 961        // NOTE: If epilog size is ever non-zero, we have to do something
 962        //     similar with it.
 963  
 964        // Figure out where the prolog bytes have landed.
 965        AddressRangeVector prolog_ranges;
 966        if (prolog_size > 0) {
 967          MapAddressRange(image_map_, AddressRange(rva, prolog_size),
 968                          &prolog_ranges);
 969        }
 970  
 971        // And figure out where the code bytes have landed.
 972        AddressRangeVector code_ranges;
 973        MapAddressRange(image_map_,
 974                        AddressRange(rva + prolog_size,
 975                                     code_size - prolog_size),
 976                        &code_ranges);
 977  
 978        struct FrameInfo {
 979          DWORD rva;
 980          DWORD code_size;
 981          DWORD prolog_size;
 982        };
 983        std::vector<FrameInfo> frame_infos;
 984  
 985        // Special case: The prolog and the code bytes remain contiguous. This is
 986        // only done for compactness of the symbol file, and we could actually
 987        // be outputting independent frame info for the prolog and code portions.
 988        if (prolog_ranges.size() == 1 && code_ranges.size() == 1 &&
 989            prolog_ranges[0].end() == code_ranges[0].rva) {
 990          FrameInfo fi = { prolog_ranges[0].rva,
 991                           prolog_ranges[0].length + code_ranges[0].length,
 992                           prolog_ranges[0].length };
 993          frame_infos.push_back(fi);
 994        } else {
 995          // Otherwise we output the prolog and code frame info independently.
 996          for (size_t i = 0; i < prolog_ranges.size(); ++i) {
 997            FrameInfo fi = { prolog_ranges[i].rva,
 998                             prolog_ranges[i].length,
 999                             prolog_ranges[i].length };
1000            frame_infos.push_back(fi);
1001          }
1002          for (size_t i = 0; i < code_ranges.size(); ++i) {
1003            FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 };
1004            frame_infos.push_back(fi);
1005          }
1006        }
1007  
1008        for (size_t i = 0; i < frame_infos.size(); ++i) {
1009          const FrameInfo& fi(frame_infos[i]);
1010          fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ",
1011                  type, fi.rva, fi.code_size, fi.prolog_size,
1012                  0 /* epilog_size */, parameter_size, saved_register_size,
1013                  local_size, max_stack_size, program_string_result == S_OK);
1014          if (program_string_result == S_OK) {
1015            fprintf(output_, "%ws\n", program_string.m_str);
1016          } else {
1017            fprintf(output_, "%d\n", allocates_base_pointer);
1018          }
1019        }
1020  
1021        last_type = type;
1022        last_rva = rva;
1023        last_code_size = code_size;
1024        last_prolog_size = prolog_size;
1025      }
1026  
1027      frame_data.Release();
1028    }
1029  
1030    return true;
1031  }
1032  
1033  bool PDBSourceLineWriter::PrintFrameDataUsingEXE() {
1034    if (code_file_.empty() && !FindPEFile()) {
1035      fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1036      return false;
1037    }
1038  
1039    return PrintPEFrameData(code_file_, output_);
1040  }
1041  
1042  bool PDBSourceLineWriter::PrintFrameData() {
1043    PDBModuleInfo info;
1044    if (GetModuleInfo(&info) && info.cpu == L"x86_64") {
1045      return PrintFrameDataUsingEXE();
1046    }
1047    return PrintFrameDataUsingPDB();
1048  }
1049  
1050  bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol* symbol,
1051                                                  bool has_multiple_symbols) {
1052    BOOL is_code;
1053    if (FAILED(symbol->get_code(&is_code))) {
1054      return false;
1055    }
1056    if (!is_code) {
1057      return true;
1058    }
1059  
1060    DWORD rva;
1061    if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
1062      return false;
1063    }
1064  
1065    CComBSTR name;
1066    int stack_param_size;
1067    if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
1068      return false;
1069    }
1070  
1071    AddressRangeVector ranges;
1072    MapAddressRange(image_map_, AddressRange(rva, 1), &ranges);
1073    for (size_t i = 0; i < ranges.size(); ++i) {
1074      const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
1075      fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field,
1076              ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0,
1077              name.m_str);
1078    }
1079  
1080    // Now walk the function in the original untranslated space, asking DIA
1081    // what function is at that location, stepping through OMAP blocks. If
1082    // we're still in the same function, emit another entry, because the
1083    // symbol could have been split into multiple pieces. If we've gotten to
1084    // another symbol in the original address space, then we're done for
1085    // this symbol. See https://crbug.com/678874.
1086    for (;;) {
1087      // This steps to the next block in the original image. Simply doing
1088      // rva++ would also be correct, but would emit tons of unnecessary
1089      // entries.
1090      rva = image_map_.subsequent_rva_block[rva];
1091      if (rva == 0)
1092        break;
1093  
1094      CComPtr<IDiaSymbol> next_sym = NULL;
1095      LONG displacement;
1096      if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym,
1097                                             &displacement))) {
1098        break;
1099      }
1100  
1101      if (!SymbolsMatch(symbol, next_sym))
1102        break;
1103  
1104      AddressRangeVector next_ranges;
1105      MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges);
1106      for (size_t i = 0; i < next_ranges.size(); ++i) {
1107        fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva,
1108                stack_param_size > 0 ? stack_param_size : 0, name.m_str);
1109      }
1110    }
1111  
1112    return true;
1113  }
1114  
1115  bool PDBSourceLineWriter::PrintPDBInfo() {
1116    PDBModuleInfo info;
1117    if (!GetModuleInfo(&info)) {
1118      return false;
1119    }
1120  
1121    // Hard-code "windows" for the OS because that's the only thing that makes
1122    // sense for PDB files.  (This might not be strictly correct for Windows CE
1123    // support, but we don't care about that at the moment.)
1124    fprintf(output_, "MODULE windows %ws %ws %ws\n",
1125            info.cpu.c_str(), info.debug_identifier.c_str(),
1126            info.debug_file.c_str());
1127  
1128    return true;
1129  }
1130  
1131  bool PDBSourceLineWriter::PrintPEInfo() {
1132    PEModuleInfo info;
1133    if (!GetPEInfo(&info)) {
1134      return false;
1135    }
1136  
1137    fprintf(output_, "INFO CODE_ID %ws %ws\n",
1138            info.code_identifier.c_str(),
1139            info.code_file.c_str());
1140    return true;
1141  }
1142  
1143  // wcstol_positive_strict is sort of like wcstol, but much stricter.  string
1144  // should be a buffer pointing to a null-terminated string containing only
1145  // decimal digits.  If the entire string can be converted to an integer
1146  // without overflowing, and there are no non-digit characters before the
1147  // result is set to the value and this function returns true.  Otherwise,
1148  // this function returns false.  This is an alternative to the strtol, atoi,
1149  // and scanf families, which are not as strict about input and in some cases
1150  // don't provide a good way for the caller to determine if a conversion was
1151  // successful.
1152  static bool wcstol_positive_strict(wchar_t* string, int* result) {
1153    int value = 0;
1154    for (wchar_t* c = string; *c != '\0'; ++c) {
1155      int last_value = value;
1156      value *= 10;
1157      // Detect overflow.
1158      if (value / 10 != last_value || value < 0) {
1159        return false;
1160      }
1161      if (*c < '0' || *c > '9') {
1162        return false;
1163      }
1164      unsigned int c_value = *c - '0';
1165      last_value = value;
1166      value += c_value;
1167      // Detect overflow.
1168      if (value < last_value) {
1169        return false;
1170      }
1171      // Forbid leading zeroes unless the string is just "0".
1172      if (value == 0 && *(c+1) != '\0') {
1173        return false;
1174      }
1175    }
1176    *result = value;
1177    return true;
1178  }
1179  
1180  bool PDBSourceLineWriter::FindPEFile() {
1181    CComPtr<IDiaSymbol> global;
1182    if (FAILED(session_->get_globalScope(&global))) {
1183      fprintf(stderr, "get_globalScope failed\n");
1184      return false;
1185    }
1186  
1187    CComBSTR symbols_file;
1188    if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
1189      wstring file(symbols_file);
1190  
1191      // Look for an EXE or DLL file.
1192      const wchar_t* extensions[] = { L"exe", L"dll" };
1193      for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
1194        size_t dot_pos = file.find_last_of(L".");
1195        if (dot_pos != wstring::npos) {
1196          file.replace(dot_pos + 1, wstring::npos, extensions[i]);
1197          // Check if this file exists.
1198          if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
1199            code_file_ = file;
1200            return true;
1201          }
1202        }
1203      }
1204    }
1205  
1206    return false;
1207  }
1208  
1209  // static
1210  bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol* function,
1211                                                  BSTR* name,
1212                                                  int* stack_param_size) {
1213    *stack_param_size = -1;
1214  
1215    // Use get_undecoratedNameEx to get readable C++ names with arguments.
1216    if (function->get_undecoratedNameEx(kUndecorateOptions, name) != S_OK) {
1217      if (function->get_name(name) != S_OK) {
1218        fprintf(stderr, "failed to get function name\n");
1219        return false;
1220      }
1221  
1222      // It's possible for get_name to return an empty string, so
1223      // special-case that.
1224      if (wcscmp(*name, L"") == 0) {
1225        SysFreeString(*name);
1226        // dwarf_cu_to_module.cc uses "<name omitted>", so match that.
1227        *name = SysAllocString(L"<name omitted>");
1228        return true;
1229      }
1230  
1231      // If a name comes from get_name because no undecorated form existed,
1232      // it's already formatted properly to be used as output.  Don't do any
1233      // additional processing.
1234      //
1235      // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
1236      // This will result in calling get_name for some C++ symbols, so
1237      // all of the parameter and return type information may not be included in
1238      // the name string.
1239    } else {
1240      StripLlvmSuffixAndUndecorate(name);
1241  
1242      // C++ uses a bogus "void" argument for functions and methods that don't
1243      // take any parameters.  Take it out of the undecorated name because it's
1244      // ugly and unnecessary.
1245      const wchar_t* replace_string = L"(void)";
1246      const size_t replace_length = wcslen(replace_string);
1247      const wchar_t* replacement_string = L"()";
1248      size_t length = wcslen(*name);
1249      if (length >= replace_length) {
1250        wchar_t* name_end = *name + length - replace_length;
1251        if (wcscmp(name_end, replace_string) == 0) {
1252          WindowsStringUtils::safe_wcscpy(name_end, replace_length,
1253                                          replacement_string);
1254          length = wcslen(*name);
1255        }
1256      }
1257  
1258      // Undecorate names used for stdcall and fastcall.  These names prefix
1259      // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
1260      // with '@' followed by the number of bytes of parameters, in decimal.
1261      // If such a name is found, take note of the size and undecorate it.
1262      // Only do this for names that aren't C++, which is determined based on
1263      // whether the undecorated name contains any ':' or '(' characters.
1264      if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
1265          (*name[0] == '_' || *name[0] == '@')) {
1266        wchar_t* last_at = wcsrchr(*name + 1, '@');
1267        if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
1268          // If this function adheres to the fastcall convention, it accepts up
1269          // to the first 8 bytes of parameters in registers (%ecx and %edx).
1270          // We're only interested in the stack space used for parameters, so
1271          // so subtract 8 and don't let the size go below 0.
1272          if (*name[0] == '@') {
1273            if (*stack_param_size > 8) {
1274              *stack_param_size -= 8;
1275            } else {
1276              *stack_param_size = 0;
1277            }
1278          }
1279  
1280          // Undecorate the name by moving it one character to the left in its
1281          // buffer, and terminating it where the last '@' had been.
1282          WindowsStringUtils::safe_wcsncpy(*name, length,
1283                                           *name + 1, last_at - *name - 1);
1284       } else if (*name[0] == '_') {
1285          // This symbol's name is encoded according to the cdecl rules.  The
1286          // name doesn't end in a '@' character followed by a decimal positive
1287          // integer, so it's not a stdcall name.  Strip off the leading
1288          // underscore.
1289          WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
1290        }
1291      }
1292    }
1293  
1294    return true;
1295  }
1296  
1297  // static
1298  int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol* function) {
1299    // This implementation is highly x86-specific.
1300  
1301    // Gather the symbols corresponding to data.
1302    CComPtr<IDiaEnumSymbols> data_children;
1303    if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
1304                                      &data_children))) {
1305      return 0;
1306    }
1307  
1308    // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
1309    // highest_end is one greater than the highest offset (i.e. base + length).
1310    // Stack parameters are assumed to be contiguous, because in reality, they
1311    // are.
1312    int lowest_base = INT_MAX;
1313    int highest_end = INT_MIN;
1314  
1315    CComPtr<IDiaSymbol> child;
1316    DWORD count;
1317    while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
1318      // If any operation fails at this point, just proceed to the next child.
1319      // Use the next_child label instead of continue because child needs to
1320      // be released before it's reused.  Declare constructable/destructable
1321      // types early to avoid gotos that cross initializations.
1322      CComPtr<IDiaSymbol> child_type;
1323  
1324      // DataIsObjectPtr is only used for |this|.  Because |this| can be passed
1325      // as a stack parameter, look for it in addition to traditional
1326      // parameters.
1327      DWORD child_kind;
1328      if (FAILED(child->get_dataKind(&child_kind)) ||
1329          (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
1330        goto next_child;
1331      }
1332  
1333      // Only concentrate on register-relative parameters.  Parameters may also
1334      // be enregistered (passed directly in a register), but those don't
1335      // consume any stack space, so they're not of interest.
1336      DWORD child_location_type;
1337      if (FAILED(child->get_locationType(&child_location_type)) ||
1338          child_location_type != LocIsRegRel) {
1339        goto next_child;
1340      }
1341  
1342      // Of register-relative parameters, the only ones that make any sense are
1343      // %ebp- or %esp-relative.  Note that MSVC's debugging information always
1344      // gives parameters as %ebp-relative even when a function doesn't use a
1345      // traditional frame pointer and stack parameters are accessed relative to
1346      // %esp, so just look for %ebp-relative parameters.  If you wanted to
1347      // access parameters, you'd probably want to treat these %ebp-relative
1348      // offsets as if they were relative to %esp before a function's prolog
1349      // executed.
1350      DWORD child_register;
1351      if (FAILED(child->get_registerId(&child_register)) ||
1352          child_register != CV_REG_EBP) {
1353        goto next_child;
1354      }
1355  
1356      LONG child_register_offset;
1357      if (FAILED(child->get_offset(&child_register_offset))) {
1358        goto next_child;
1359      }
1360  
1361      // IDiaSymbol::get_type can succeed but still pass back a NULL value.
1362      if (FAILED(child->get_type(&child_type)) || !child_type) {
1363        goto next_child;
1364      }
1365  
1366      ULONGLONG child_length;
1367      if (FAILED(child_type->get_length(&child_length))) {
1368        goto next_child;
1369      }
1370  
1371      // Extra scope to avoid goto jumping over variable initialization
1372      {
1373        int child_end = child_register_offset + static_cast<ULONG>(child_length);
1374        if (child_register_offset < lowest_base) {
1375          lowest_base = child_register_offset;
1376        }
1377        if (child_end > highest_end) {
1378          highest_end = child_end;
1379        }
1380      }
1381  
1382  next_child:
1383      child.Release();
1384    }
1385  
1386    int param_size = 0;
1387    // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
1388    // possible address to find a stack parameter before executing a function's
1389    // prolog (see above).  Some optimizations cause parameter offsets to be
1390    // lower than 4, but we're not concerned with those because we're only
1391    // looking for parameters contained in addresses higher than where the
1392    // return address is stored.
1393    if (lowest_base < 4) {
1394      lowest_base = 4;
1395    }
1396    if (highest_end > lowest_base) {
1397      // All stack parameters are pushed as at least 4-byte quantities.  If the
1398      // last type was narrower than 4 bytes, promote it.  This assumes that all
1399      // parameters' offsets are 4-byte-aligned, which is always the case.  Only
1400      // worry about the last type, because we're not summing the type sizes,
1401      // just looking at the lowest and highest offsets.
1402      int remainder = highest_end % 4;
1403      if (remainder) {
1404        highest_end += 4 - remainder;
1405      }
1406  
1407      param_size = highest_end - lowest_base;
1408    }
1409  
1410    return param_size;
1411  }
1412  
1413  bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) {
1414    output_ = symbol_file;
1415  
1416    // Load the OMAP information, and disable auto-translation of addresses in
1417    // preference of doing it ourselves.
1418    OmapData omap_data;
1419    if (!GetOmapDataAndDisableTranslation(session_, &omap_data))
1420      return false;
1421    BuildImageMap(omap_data, &image_map_);
1422  
1423    bool ret = PrintPDBInfo();
1424    // This is not a critical piece of the symbol file.
1425    PrintPEInfo();
1426    ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData();
1427    PrintInlineOrigins();
1428  
1429    output_ = NULL;
1430    return ret;
1431  }
1432  
1433  void PDBSourceLineWriter::Close() {
1434    if (session_ != nullptr) {
1435      session_.Release();
1436    }
1437  }
1438  
1439  bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo* info) {
1440    if (!info) {
1441      return false;
1442    }
1443  
1444    info->debug_file.clear();
1445    info->debug_identifier.clear();
1446    info->cpu.clear();
1447  
1448    CComPtr<IDiaSymbol> global;
1449    if (FAILED(session_->get_globalScope(&global))) {
1450      return false;
1451    }
1452  
1453    DWORD machine_type;
1454    // get_machineType can return S_FALSE.
1455    if (global->get_machineType(&machine_type) == S_OK) {
1456      // The documentation claims that get_machineType returns a value from
1457      // the CV_CPU_TYPE_e enumeration, but that's not the case.
1458      // Instead, it returns one of the IMAGE_FILE_MACHINE values as
1459      // defined here:
1460      // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
1461      info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type));
1462    } else {
1463      // Unexpected, but handle gracefully.
1464      info->cpu = L"unknown";
1465    }
1466  
1467    // DWORD* and int* are not compatible.  This is clean and avoids a cast.
1468    DWORD age;
1469    if (FAILED(global->get_age(&age))) {
1470      return false;
1471    }
1472  
1473    bool uses_guid;
1474    if (!UsesGUID(&uses_guid)) {
1475      return false;
1476    }
1477  
1478    if (uses_guid) {
1479      GUID guid;
1480      if (FAILED(global->get_guid(&guid))) {
1481        return false;
1482      }
1483  
1484      info->debug_identifier = GenerateDebugIdentifier(age, guid);
1485    } else {
1486      DWORD signature;
1487      if (FAILED(global->get_signature(&signature))) {
1488        return false;
1489      }
1490  
1491      info->debug_identifier = GenerateDebugIdentifier(age, signature);
1492    }
1493  
1494    CComBSTR debug_file_string;
1495    if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
1496      return false;
1497    }
1498    info->debug_file =
1499        WindowsStringUtils::GetBaseName(wstring(debug_file_string));
1500  
1501    return true;
1502  }
1503  
1504  bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo* info) {
1505    if (!info) {
1506      return false;
1507    }
1508  
1509    if (code_file_.empty() && !FindPEFile()) {
1510      fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1511      return false;
1512    }
1513  
1514    return ReadPEInfo(code_file_, info);
1515  }
1516  
1517  bool PDBSourceLineWriter::UsesGUID(bool* uses_guid) {
1518    if (!uses_guid)
1519      return false;
1520  
1521    CComPtr<IDiaSymbol> global;
1522    if (FAILED(session_->get_globalScope(&global)))
1523      return false;
1524  
1525    GUID guid;
1526    if (FAILED(global->get_guid(&guid)))
1527      return false;
1528  
1529    DWORD signature;
1530    if (FAILED(global->get_signature(&signature)))
1531      return false;
1532  
1533    // There are two possibilities for guid: either it's a real 128-bit GUID
1534    // as identified in a code module by a new-style CodeView record, or it's
1535    // a 32-bit signature (timestamp) as identified by an old-style record.
1536    // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
1537    //
1538    // Because DIA doesn't provide a way to directly determine whether a module
1539    // uses a GUID or a 32-bit signature, this code checks whether the first 32
1540    // bits of guid are the same as the signature, and if the rest of guid is
1541    // zero.  If so, then with a pretty high degree of certainty, there's an
1542    // old-style CodeView record in use.  This method will only falsely find an
1543    // an old-style CodeView record if a real 128-bit GUID has its first 32
1544    // bits set the same as the module's signature (timestamp) and the rest of
1545    // the GUID is set to 0.  This is highly unlikely.
1546  
1547    GUID signature_guid = {signature};  // 0-initializes other members
1548    *uses_guid = !IsEqualGUID(guid, signature_guid);
1549    return true;
1550  }
1551  
1552  }  // namespace google_breakpad