/ src / common / module.cc
module.cc
  1  // Copyright 2011 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 30  
 31  // module.cc: Implement google_breakpad::Module.  See module.h.
 32  
 33  #ifdef HAVE_CONFIG_H
 34  #include <config.h>  // Must come first
 35  #endif
 36  
 37  #include "common/module.h"
 38  #include "common/string_view.h"
 39  
 40  #include <assert.h>
 41  #include <errno.h>
 42  #include <stdio.h>
 43  #include <string.h>
 44  
 45  #include <functional>
 46  #include <iostream>
 47  #include <memory>
 48  #include <utility>
 49  
 50  namespace google_breakpad {
 51  
 52  using std::dec;
 53  using std::hex;
 54  using std::unique_ptr;
 55  
 56  Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin(
 57      uint64_t offset,
 58      StringView name) {
 59    uint64_t specification_offset = references_[offset];
 60    // Find the root offset.
 61    auto iter = references_.find(specification_offset);
 62    while (iter != references_.end() &&
 63           specification_offset != references_[specification_offset]) {
 64      specification_offset = references_[specification_offset];
 65      iter = references_.find(specification_offset);
 66    }
 67    if (inline_origins_.find(specification_offset) != inline_origins_.end()) {
 68      if (inline_origins_[specification_offset]->name == "<name omitted>") {
 69        inline_origins_[specification_offset]->name = name;
 70      }
 71      return inline_origins_[specification_offset];
 72    }
 73    inline_origins_[specification_offset] = new Module::InlineOrigin(name);
 74    return inline_origins_[specification_offset];
 75  }
 76  
 77  void Module::InlineOriginMap::SetReference(uint64_t offset,
 78                                             uint64_t specification_offset) {
 79    // If we haven't seen this doesn't exist in reference map, always add it.
 80    if (references_.find(offset) == references_.end()) {
 81      references_[offset] = specification_offset;
 82      return;
 83    }
 84    // If offset equals specification_offset and offset exists in
 85    // references_, there is no need to update the references_ map.
 86    // This early return is necessary because the call to erase in following if
 87    // will remove the entry of specification_offset in inline_origins_. If
 88    // specification_offset equals to references_[offset], it might be
 89    // duplicate debug info.
 90    if (offset == specification_offset ||
 91        specification_offset == references_[offset])
 92      return;
 93  
 94    // Fix up mapping in inline_origins_.
 95    auto remove = inline_origins_.find(references_[offset]);
 96    if (remove != inline_origins_.end()) {
 97      inline_origins_[specification_offset] = std::move(remove->second);
 98      inline_origins_.erase(remove);
 99    }
100    references_[offset] = specification_offset;
101  }
102  
103  Module::Module(const string& name,
104                 const string& os,
105                 const string& architecture,
106                 const string& id,
107                 const string& code_id /* = "" */,
108                 bool enable_multiple_field /* = false*/,
109                 bool prefer_extern_name /* = false*/)
110      : name_(name),
111        os_(os),
112        architecture_(architecture),
113        id_(id),
114        code_id_(code_id),
115        load_address_(0),
116        enable_multiple_field_(enable_multiple_field),
117        prefer_extern_name_(prefer_extern_name) {}
118  
119  Module::~Module() {
120    for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
121      delete it->second;
122    for (FunctionSet::iterator it = functions_.begin();
123         it != functions_.end(); ++it) {
124      delete *it;
125    }
126  }
127  
128  void Module::SetLoadAddress(Address address) {
129    load_address_ = address;
130  }
131  
132  void Module::SetAddressRanges(const vector<Range>& ranges) {
133    address_ranges_ = ranges;
134  }
135  
136  bool Module::AddFunction(Function* function) {
137    // FUNC lines must not hold an empty name, so catch the problem early if
138    // callers try to add one.
139    assert(!function->name.empty());
140  
141    if (!AddressIsInModule(function->address)) {
142      return false;
143    }
144  
145    // FUNCs are better than PUBLICs as they come with sizes, so remove an extern
146    // with the same address if present.
147    Extern ext(function->address);
148    ExternSet::iterator it_ext = externs_.find(&ext);
149    if (it_ext == externs_.end() &&
150        architecture_ == "arm" &&
151        (function->address & 0x1) == 0) {
152      // ARM THUMB functions have bit 0 set. ARM64 does not have THUMB.
153      Extern arm_thumb_ext(function->address | 0x1);
154      it_ext = externs_.find(&arm_thumb_ext);
155    }
156    if (it_ext != externs_.end()) {
157      Extern* found_ext = it_ext->get();
158      bool name_mismatch = found_ext->name != function->name;
159      if (enable_multiple_field_) {
160        bool is_multiple_based_on_name;
161        // In the case of a .dSYM built with -gmlt, the external name will be the
162        // fully-qualified symbol name, but the function name will be the partial
163        // name (or omitted).
164        //
165        // Don't mark multiple in this case.
166        if (name_mismatch &&
167            (function->name == "<name omitted>" ||
168             found_ext->name.find(function->name.str()) != string::npos)) {
169          is_multiple_based_on_name = false;
170        } else {
171          is_multiple_based_on_name = name_mismatch;
172        }
173        // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
174        function->is_multiple |=
175            is_multiple_based_on_name || found_ext->is_multiple;
176      }
177      if (name_mismatch && prefer_extern_name_) {
178        function->name = AddStringToPool(it_ext->get()->name);
179      }
180      externs_.erase(it_ext);
181    }
182  #if _DEBUG
183    {
184      // There should be no other PUBLIC symbols that overlap with the function.
185      for (const Range& range : function->ranges) {
186        Extern debug_ext(range.address);
187        ExternSet::iterator it_debug = externs_.lower_bound(&ext);
188        assert(it_debug == externs_.end() ||
189               (*it_debug)->address >= range.address + range.size);
190      }
191    }
192  #endif
193    if (enable_multiple_field_ && function_addresses_.count(function->address)) {
194      FunctionSet::iterator existing_function = std::find_if(
195          functions_.begin(), functions_.end(),
196          [&](Function* other) { return other->address == function->address; });
197      assert(existing_function != functions_.end());
198      (*existing_function)->is_multiple = true;
199      // Free the duplicate that was not inserted because this Module
200      // now owns it.
201      return false;
202    }
203    function_addresses_.emplace(function->address);
204    std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function);
205    if (!ret.second && (*ret.first != function)) {
206      // Free the duplicate that was not inserted because this Module
207      // now owns it.
208      return false;
209    }
210    return true;
211  }
212  
213  void Module::AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry) {
214    if (!AddressIsInModule(stack_frame_entry->address)) {
215      return;
216    }
217  
218    stack_frame_entries_.push_back(std::move(stack_frame_entry));
219  }
220  
221  void Module::AddExtern(std::unique_ptr<Extern> ext) {
222    if (!AddressIsInModule(ext->address)) {
223      return;
224    }
225  
226    std::pair<ExternSet::iterator,bool> ret = externs_.emplace(std::move(ext));
227    if (!ret.second && enable_multiple_field_) {
228      (*ret.first)->is_multiple = true;
229    }
230  }
231  
232  void Module::GetFunctions(vector<Function*>* vec,
233                            vector<Function*>::iterator i) {
234    vec->insert(i, functions_.begin(), functions_.end());
235  }
236  
237  void Module::GetExterns(vector<Extern*>* vec,
238                          vector<Extern*>::iterator i) {
239    auto pos = vec->insert(i, externs_.size(), nullptr);
240    for (const std::unique_ptr<Extern>& ext : externs_) {
241      *pos = ext.get();
242      ++pos;
243    }
244  }
245  
246  Module::File* Module::FindFile(const string& name) {
247    // A tricky bit here.  The key of each map entry needs to be a
248    // pointer to the entry's File's name string.  This means that we
249    // can't do the initial lookup with any operation that would create
250    // an empty entry for us if the name isn't found (like, say,
251    // operator[] or insert do), because such a created entry's key will
252    // be a pointer the string passed as our argument.  Since the key of
253    // a map's value type is const, we can't fix it up once we've
254    // created our file.  lower_bound does the lookup without doing an
255    // insertion, and returns a good hint iterator to pass to insert.
256    // Our "destiny" is where we belong, whether we're there or not now.
257    FileByNameMap::iterator destiny = files_.lower_bound(&name);
258    if (destiny == files_.end()
259        || *destiny->first != name) {  // Repeated string comparison, boo hoo.
260      File* file = new File(name);
261      file->source_id = -1;
262      destiny = files_.insert(destiny,
263                              FileByNameMap::value_type(&file->name, file));
264    }
265    return destiny->second;
266  }
267  
268  Module::File* Module::FindFile(const char* name) {
269    string name_string = name;
270    return FindFile(name_string);
271  }
272  
273  Module::File* Module::FindExistingFile(const string& name) {
274    FileByNameMap::iterator it = files_.find(&name);
275    return (it == files_.end()) ? NULL : it->second;
276  }
277  
278  void Module::GetFiles(vector<File*>* vec) {
279    vec->clear();
280    for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
281      vec->push_back(it->second);
282  }
283  
284  void Module::GetStackFrameEntries(vector<StackFrameEntry*>* vec) const {
285    vec->clear();
286    vec->reserve(stack_frame_entries_.size());
287    for (const auto& ent : stack_frame_entries_) {
288      vec->push_back(ent.get());
289    }
290  }
291  
292  void Module::AssignSourceIds() {
293    // First, give every source file an id of -1.
294    for (FileByNameMap::iterator file_it = files_.begin();
295         file_it != files_.end(); ++file_it) {
296      file_it->second->source_id = -1;
297    }
298  
299    // Next, mark all files actually cited by our functions' line number
300    // info, by setting each one's source id to zero.
301    for (FunctionSet::const_iterator func_it = functions_.begin();
302         func_it != functions_.end(); ++func_it) {
303      Function* func = *func_it;
304      for (vector<Line>::iterator line_it = func->lines.begin();
305           line_it != func->lines.end(); ++line_it)
306        line_it->file->source_id = 0;
307    }
308  
309    // Also mark all files cited by inline callsite by setting each one's source
310    // id to zero.
311    auto markInlineFiles = [](unique_ptr<Inline>& in) {
312      // There are some artificial inline functions which don't belong to
313      // any file. Those will have file id -1.
314      if (in->call_site_file) {
315        in->call_site_file->source_id = 0;
316      }
317    };
318    for (auto func : functions_) {
319      Inline::InlineDFS(func->inlines, markInlineFiles);
320    }
321  
322    // Finally, assign source ids to those files that have been marked.
323    // We could have just assigned source id numbers while traversing
324    // the line numbers, but doing it this way numbers the files in
325    // lexicographical order by name, which is neat.
326    int next_source_id = 0;
327    for (FileByNameMap::iterator file_it = files_.begin();
328         file_it != files_.end(); ++file_it) {
329      if (!file_it->second->source_id)
330        file_it->second->source_id = next_source_id++;
331    }
332  }
333  
334  void Module::CreateInlineOrigins(
335      set<InlineOrigin*, InlineOriginCompare>& inline_origins) {
336    // Only add origins that have file and deduplicate origins with same name and
337    // file id by doing a DFS.
338    auto addInlineOrigins = [&](unique_ptr<Inline>& in) {
339      auto it = inline_origins.find(in->origin);
340      if (it == inline_origins.end())
341        inline_origins.insert(in->origin);
342      else
343        in->origin = *it;
344    };
345    for (Function* func : functions_)
346      Module::Inline::InlineDFS(func->inlines, addInlineOrigins);
347    int next_id = 0;
348    for (InlineOrigin* origin : inline_origins) {
349      origin->id = next_id++;
350    }
351  }
352  
353  bool Module::ReportError() {
354    fprintf(stderr, "error writing symbol file: %s\n",
355            strerror(errno));
356    return false;
357  }
358  
359  bool Module::WriteRuleMap(const RuleMap& rule_map, std::ostream& stream) {
360    for (RuleMap::const_iterator it = rule_map.begin();
361         it != rule_map.end(); ++it) {
362      if (it != rule_map.begin())
363        stream << ' ';
364      stream << it->first << ": " << it->second;
365    }
366    return stream.good();
367  }
368  
369  bool Module::AddressIsInModule(Address address) const {
370    if (address_ranges_.empty()) {
371      return true;
372    }
373    for (const auto& segment : address_ranges_) {
374      if (address >= segment.address &&
375          address < segment.address + segment.size) {
376        return true;
377      }
378    }
379    return false;
380  }
381  
382  bool Module::Write(std::ostream& stream, SymbolData symbol_data) {
383    stream << "MODULE " << os_ << " " << architecture_ << " "
384           << id_ << " " << name_ << "\n";
385    if (!stream.good())
386      return ReportError();
387  
388    if (!code_id_.empty()) {
389      stream << "INFO CODE_ID " << code_id_ << "\n";
390    }
391  
392    if (symbol_data & SYMBOLS_AND_FILES) {
393      // Get all referenced inline origins.
394      set<InlineOrigin*, InlineOriginCompare> inline_origins;
395      CreateInlineOrigins(inline_origins);
396      AssignSourceIds();
397  
398      // Write out files.
399      for (FileByNameMap::iterator file_it = files_.begin();
400           file_it != files_.end(); ++file_it) {
401        File* file = file_it->second;
402        if (file->source_id >= 0) {
403          stream << "FILE " << file->source_id << " " <<  file->name << "\n";
404          if (!stream.good())
405            return ReportError();
406        }
407      }
408      // Write out inline origins.
409      for (InlineOrigin* origin : inline_origins) {
410        stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n";
411        if (!stream.good())
412          return ReportError();
413      }
414  
415      // Write out functions and their inlines and lines.
416      for (FunctionSet::const_iterator func_it = functions_.begin();
417           func_it != functions_.end(); ++func_it) {
418        Function* func = *func_it;
419        vector<Line>::iterator line_it = func->lines.begin();
420        for (auto range_it = func->ranges.cbegin();
421             range_it != func->ranges.cend(); ++range_it) {
422          stream << "FUNC " << (func->is_multiple ? "m " : "") << hex
423                 << (range_it->address - load_address_) << " " << range_it->size
424                 << " " << func->parameter_size << " " << func->name << dec
425                 << "\n";
426  
427          if (!stream.good())
428            return ReportError();
429  
430          // Write out inlines.
431          auto write_inline = [&](unique_ptr<Inline>& in) {
432            stream << "INLINE ";
433            stream << in->inline_nest_level << " " << in->call_site_line << " "
434                   << in->getCallSiteFileID() << " " << in->origin->id << hex;
435            for (const Range& r : in->ranges)
436              stream << " " << (r.address - load_address_) << " " << r.size;
437            stream << dec << "\n";
438          };
439          Module::Inline::InlineDFS(func->inlines, write_inline);
440          if (!stream.good())
441            return ReportError();
442  
443          while ((line_it != func->lines.end()) &&
444                 (line_it->address >= range_it->address) &&
445                 (line_it->address < (range_it->address + range_it->size))) {
446            stream << hex
447                   << (line_it->address - load_address_) << " "
448                   << line_it->size << " "
449                   << dec
450                   << line_it->number << " "
451                   << line_it->file->source_id << "\n";
452  
453            if (!stream.good())
454              return ReportError();
455  
456            ++line_it;
457          }
458        }
459      }
460  
461      // Write out 'PUBLIC' records.
462      for (ExternSet::const_iterator extern_it = externs_.begin();
463           extern_it != externs_.end(); ++extern_it) {
464        Extern* ext = extern_it->get();
465        stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex
466               << (ext->address - load_address_) << " 0 " << ext->name << dec
467               << "\n";
468      }
469    }
470  
471    if (symbol_data & CFI) {
472      // Write out 'STACK CFI INIT' and 'STACK CFI' records.
473      for (auto frame_it = stack_frame_entries_.begin();
474           frame_it != stack_frame_entries_.end(); ++frame_it) {
475        StackFrameEntry* entry = frame_it->get();
476        stream << "STACK CFI INIT " << hex
477               << (entry->address - load_address_) << " "
478               << entry->size << " " << dec;
479        if (!stream.good()
480            || !WriteRuleMap(entry->initial_rules, stream))
481          return ReportError();
482  
483        stream << "\n";
484  
485        // Write out this entry's delta rules as 'STACK CFI' records.
486        for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin();
487             delta_it != entry->rule_changes.end(); ++delta_it) {
488          stream << "STACK CFI " << hex
489                 << (delta_it->first - load_address_) << " " << dec;
490          if (!stream.good()
491              || !WriteRuleMap(delta_it->second, stream))
492            return ReportError();
493  
494          stream << "\n";
495        }
496      }
497    }
498  
499    return true;
500  }
501  
502  }  // namespace google_breakpad