module.cc
1 // Copyright 2011 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 30 31 // module.cc: Implement google_breakpad::Module. See module.h. 32 33 #ifdef HAVE_CONFIG_H 34 #include <config.h> // Must come first 35 #endif 36 37 #include "common/module.h" 38 #include "common/string_view.h" 39 40 #include <assert.h> 41 #include <errno.h> 42 #include <stdio.h> 43 #include <string.h> 44 45 #include <functional> 46 #include <iostream> 47 #include <memory> 48 #include <utility> 49 50 namespace google_breakpad { 51 52 using std::dec; 53 using std::hex; 54 using std::unique_ptr; 55 56 Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin( 57 uint64_t offset, 58 StringView name) { 59 uint64_t specification_offset = references_[offset]; 60 // Find the root offset. 61 auto iter = references_.find(specification_offset); 62 while (iter != references_.end() && 63 specification_offset != references_[specification_offset]) { 64 specification_offset = references_[specification_offset]; 65 iter = references_.find(specification_offset); 66 } 67 if (inline_origins_.find(specification_offset) != inline_origins_.end()) { 68 if (inline_origins_[specification_offset]->name == "<name omitted>") { 69 inline_origins_[specification_offset]->name = name; 70 } 71 return inline_origins_[specification_offset]; 72 } 73 inline_origins_[specification_offset] = new Module::InlineOrigin(name); 74 return inline_origins_[specification_offset]; 75 } 76 77 void Module::InlineOriginMap::SetReference(uint64_t offset, 78 uint64_t specification_offset) { 79 // If we haven't seen this doesn't exist in reference map, always add it. 80 if (references_.find(offset) == references_.end()) { 81 references_[offset] = specification_offset; 82 return; 83 } 84 // If offset equals specification_offset and offset exists in 85 // references_, there is no need to update the references_ map. 86 // This early return is necessary because the call to erase in following if 87 // will remove the entry of specification_offset in inline_origins_. If 88 // specification_offset equals to references_[offset], it might be 89 // duplicate debug info. 90 if (offset == specification_offset || 91 specification_offset == references_[offset]) 92 return; 93 94 // Fix up mapping in inline_origins_. 95 auto remove = inline_origins_.find(references_[offset]); 96 if (remove != inline_origins_.end()) { 97 inline_origins_[specification_offset] = std::move(remove->second); 98 inline_origins_.erase(remove); 99 } 100 references_[offset] = specification_offset; 101 } 102 103 Module::Module(const string& name, 104 const string& os, 105 const string& architecture, 106 const string& id, 107 const string& code_id /* = "" */, 108 bool enable_multiple_field /* = false*/, 109 bool prefer_extern_name /* = false*/) 110 : name_(name), 111 os_(os), 112 architecture_(architecture), 113 id_(id), 114 code_id_(code_id), 115 load_address_(0), 116 enable_multiple_field_(enable_multiple_field), 117 prefer_extern_name_(prefer_extern_name) {} 118 119 Module::~Module() { 120 for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) 121 delete it->second; 122 for (FunctionSet::iterator it = functions_.begin(); 123 it != functions_.end(); ++it) { 124 delete *it; 125 } 126 } 127 128 void Module::SetLoadAddress(Address address) { 129 load_address_ = address; 130 } 131 132 void Module::SetAddressRanges(const vector<Range>& ranges) { 133 address_ranges_ = ranges; 134 } 135 136 bool Module::AddFunction(Function* function) { 137 // FUNC lines must not hold an empty name, so catch the problem early if 138 // callers try to add one. 139 assert(!function->name.empty()); 140 141 if (!AddressIsInModule(function->address)) { 142 return false; 143 } 144 145 // FUNCs are better than PUBLICs as they come with sizes, so remove an extern 146 // with the same address if present. 147 Extern ext(function->address); 148 ExternSet::iterator it_ext = externs_.find(&ext); 149 if (it_ext == externs_.end() && 150 architecture_ == "arm" && 151 (function->address & 0x1) == 0) { 152 // ARM THUMB functions have bit 0 set. ARM64 does not have THUMB. 153 Extern arm_thumb_ext(function->address | 0x1); 154 it_ext = externs_.find(&arm_thumb_ext); 155 } 156 if (it_ext != externs_.end()) { 157 Extern* found_ext = it_ext->get(); 158 bool name_mismatch = found_ext->name != function->name; 159 if (enable_multiple_field_) { 160 bool is_multiple_based_on_name; 161 // In the case of a .dSYM built with -gmlt, the external name will be the 162 // fully-qualified symbol name, but the function name will be the partial 163 // name (or omitted). 164 // 165 // Don't mark multiple in this case. 166 if (name_mismatch && 167 (function->name == "<name omitted>" || 168 found_ext->name.find(function->name.str()) != string::npos)) { 169 is_multiple_based_on_name = false; 170 } else { 171 is_multiple_based_on_name = name_mismatch; 172 } 173 // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple. 174 function->is_multiple |= 175 is_multiple_based_on_name || found_ext->is_multiple; 176 } 177 if (name_mismatch && prefer_extern_name_) { 178 function->name = AddStringToPool(it_ext->get()->name); 179 } 180 externs_.erase(it_ext); 181 } 182 #if _DEBUG 183 { 184 // There should be no other PUBLIC symbols that overlap with the function. 185 for (const Range& range : function->ranges) { 186 Extern debug_ext(range.address); 187 ExternSet::iterator it_debug = externs_.lower_bound(&ext); 188 assert(it_debug == externs_.end() || 189 (*it_debug)->address >= range.address + range.size); 190 } 191 } 192 #endif 193 if (enable_multiple_field_ && function_addresses_.count(function->address)) { 194 FunctionSet::iterator existing_function = std::find_if( 195 functions_.begin(), functions_.end(), 196 [&](Function* other) { return other->address == function->address; }); 197 assert(existing_function != functions_.end()); 198 (*existing_function)->is_multiple = true; 199 // Free the duplicate that was not inserted because this Module 200 // now owns it. 201 return false; 202 } 203 function_addresses_.emplace(function->address); 204 std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function); 205 if (!ret.second && (*ret.first != function)) { 206 // Free the duplicate that was not inserted because this Module 207 // now owns it. 208 return false; 209 } 210 return true; 211 } 212 213 void Module::AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry) { 214 if (!AddressIsInModule(stack_frame_entry->address)) { 215 return; 216 } 217 218 stack_frame_entries_.push_back(std::move(stack_frame_entry)); 219 } 220 221 void Module::AddExtern(std::unique_ptr<Extern> ext) { 222 if (!AddressIsInModule(ext->address)) { 223 return; 224 } 225 226 std::pair<ExternSet::iterator,bool> ret = externs_.emplace(std::move(ext)); 227 if (!ret.second && enable_multiple_field_) { 228 (*ret.first)->is_multiple = true; 229 } 230 } 231 232 void Module::GetFunctions(vector<Function*>* vec, 233 vector<Function*>::iterator i) { 234 vec->insert(i, functions_.begin(), functions_.end()); 235 } 236 237 void Module::GetExterns(vector<Extern*>* vec, 238 vector<Extern*>::iterator i) { 239 auto pos = vec->insert(i, externs_.size(), nullptr); 240 for (const std::unique_ptr<Extern>& ext : externs_) { 241 *pos = ext.get(); 242 ++pos; 243 } 244 } 245 246 Module::File* Module::FindFile(const string& name) { 247 // A tricky bit here. The key of each map entry needs to be a 248 // pointer to the entry's File's name string. This means that we 249 // can't do the initial lookup with any operation that would create 250 // an empty entry for us if the name isn't found (like, say, 251 // operator[] or insert do), because such a created entry's key will 252 // be a pointer the string passed as our argument. Since the key of 253 // a map's value type is const, we can't fix it up once we've 254 // created our file. lower_bound does the lookup without doing an 255 // insertion, and returns a good hint iterator to pass to insert. 256 // Our "destiny" is where we belong, whether we're there or not now. 257 FileByNameMap::iterator destiny = files_.lower_bound(&name); 258 if (destiny == files_.end() 259 || *destiny->first != name) { // Repeated string comparison, boo hoo. 260 File* file = new File(name); 261 file->source_id = -1; 262 destiny = files_.insert(destiny, 263 FileByNameMap::value_type(&file->name, file)); 264 } 265 return destiny->second; 266 } 267 268 Module::File* Module::FindFile(const char* name) { 269 string name_string = name; 270 return FindFile(name_string); 271 } 272 273 Module::File* Module::FindExistingFile(const string& name) { 274 FileByNameMap::iterator it = files_.find(&name); 275 return (it == files_.end()) ? NULL : it->second; 276 } 277 278 void Module::GetFiles(vector<File*>* vec) { 279 vec->clear(); 280 for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) 281 vec->push_back(it->second); 282 } 283 284 void Module::GetStackFrameEntries(vector<StackFrameEntry*>* vec) const { 285 vec->clear(); 286 vec->reserve(stack_frame_entries_.size()); 287 for (const auto& ent : stack_frame_entries_) { 288 vec->push_back(ent.get()); 289 } 290 } 291 292 void Module::AssignSourceIds() { 293 // First, give every source file an id of -1. 294 for (FileByNameMap::iterator file_it = files_.begin(); 295 file_it != files_.end(); ++file_it) { 296 file_it->second->source_id = -1; 297 } 298 299 // Next, mark all files actually cited by our functions' line number 300 // info, by setting each one's source id to zero. 301 for (FunctionSet::const_iterator func_it = functions_.begin(); 302 func_it != functions_.end(); ++func_it) { 303 Function* func = *func_it; 304 for (vector<Line>::iterator line_it = func->lines.begin(); 305 line_it != func->lines.end(); ++line_it) 306 line_it->file->source_id = 0; 307 } 308 309 // Also mark all files cited by inline callsite by setting each one's source 310 // id to zero. 311 auto markInlineFiles = [](unique_ptr<Inline>& in) { 312 // There are some artificial inline functions which don't belong to 313 // any file. Those will have file id -1. 314 if (in->call_site_file) { 315 in->call_site_file->source_id = 0; 316 } 317 }; 318 for (auto func : functions_) { 319 Inline::InlineDFS(func->inlines, markInlineFiles); 320 } 321 322 // Finally, assign source ids to those files that have been marked. 323 // We could have just assigned source id numbers while traversing 324 // the line numbers, but doing it this way numbers the files in 325 // lexicographical order by name, which is neat. 326 int next_source_id = 0; 327 for (FileByNameMap::iterator file_it = files_.begin(); 328 file_it != files_.end(); ++file_it) { 329 if (!file_it->second->source_id) 330 file_it->second->source_id = next_source_id++; 331 } 332 } 333 334 void Module::CreateInlineOrigins( 335 set<InlineOrigin*, InlineOriginCompare>& inline_origins) { 336 // Only add origins that have file and deduplicate origins with same name and 337 // file id by doing a DFS. 338 auto addInlineOrigins = [&](unique_ptr<Inline>& in) { 339 auto it = inline_origins.find(in->origin); 340 if (it == inline_origins.end()) 341 inline_origins.insert(in->origin); 342 else 343 in->origin = *it; 344 }; 345 for (Function* func : functions_) 346 Module::Inline::InlineDFS(func->inlines, addInlineOrigins); 347 int next_id = 0; 348 for (InlineOrigin* origin : inline_origins) { 349 origin->id = next_id++; 350 } 351 } 352 353 bool Module::ReportError() { 354 fprintf(stderr, "error writing symbol file: %s\n", 355 strerror(errno)); 356 return false; 357 } 358 359 bool Module::WriteRuleMap(const RuleMap& rule_map, std::ostream& stream) { 360 for (RuleMap::const_iterator it = rule_map.begin(); 361 it != rule_map.end(); ++it) { 362 if (it != rule_map.begin()) 363 stream << ' '; 364 stream << it->first << ": " << it->second; 365 } 366 return stream.good(); 367 } 368 369 bool Module::AddressIsInModule(Address address) const { 370 if (address_ranges_.empty()) { 371 return true; 372 } 373 for (const auto& segment : address_ranges_) { 374 if (address >= segment.address && 375 address < segment.address + segment.size) { 376 return true; 377 } 378 } 379 return false; 380 } 381 382 bool Module::Write(std::ostream& stream, SymbolData symbol_data) { 383 stream << "MODULE " << os_ << " " << architecture_ << " " 384 << id_ << " " << name_ << "\n"; 385 if (!stream.good()) 386 return ReportError(); 387 388 if (!code_id_.empty()) { 389 stream << "INFO CODE_ID " << code_id_ << "\n"; 390 } 391 392 if (symbol_data & SYMBOLS_AND_FILES) { 393 // Get all referenced inline origins. 394 set<InlineOrigin*, InlineOriginCompare> inline_origins; 395 CreateInlineOrigins(inline_origins); 396 AssignSourceIds(); 397 398 // Write out files. 399 for (FileByNameMap::iterator file_it = files_.begin(); 400 file_it != files_.end(); ++file_it) { 401 File* file = file_it->second; 402 if (file->source_id >= 0) { 403 stream << "FILE " << file->source_id << " " << file->name << "\n"; 404 if (!stream.good()) 405 return ReportError(); 406 } 407 } 408 // Write out inline origins. 409 for (InlineOrigin* origin : inline_origins) { 410 stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n"; 411 if (!stream.good()) 412 return ReportError(); 413 } 414 415 // Write out functions and their inlines and lines. 416 for (FunctionSet::const_iterator func_it = functions_.begin(); 417 func_it != functions_.end(); ++func_it) { 418 Function* func = *func_it; 419 vector<Line>::iterator line_it = func->lines.begin(); 420 for (auto range_it = func->ranges.cbegin(); 421 range_it != func->ranges.cend(); ++range_it) { 422 stream << "FUNC " << (func->is_multiple ? "m " : "") << hex 423 << (range_it->address - load_address_) << " " << range_it->size 424 << " " << func->parameter_size << " " << func->name << dec 425 << "\n"; 426 427 if (!stream.good()) 428 return ReportError(); 429 430 // Write out inlines. 431 auto write_inline = [&](unique_ptr<Inline>& in) { 432 stream << "INLINE "; 433 stream << in->inline_nest_level << " " << in->call_site_line << " " 434 << in->getCallSiteFileID() << " " << in->origin->id << hex; 435 for (const Range& r : in->ranges) 436 stream << " " << (r.address - load_address_) << " " << r.size; 437 stream << dec << "\n"; 438 }; 439 Module::Inline::InlineDFS(func->inlines, write_inline); 440 if (!stream.good()) 441 return ReportError(); 442 443 while ((line_it != func->lines.end()) && 444 (line_it->address >= range_it->address) && 445 (line_it->address < (range_it->address + range_it->size))) { 446 stream << hex 447 << (line_it->address - load_address_) << " " 448 << line_it->size << " " 449 << dec 450 << line_it->number << " " 451 << line_it->file->source_id << "\n"; 452 453 if (!stream.good()) 454 return ReportError(); 455 456 ++line_it; 457 } 458 } 459 } 460 461 // Write out 'PUBLIC' records. 462 for (ExternSet::const_iterator extern_it = externs_.begin(); 463 extern_it != externs_.end(); ++extern_it) { 464 Extern* ext = extern_it->get(); 465 stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex 466 << (ext->address - load_address_) << " 0 " << ext->name << dec 467 << "\n"; 468 } 469 } 470 471 if (symbol_data & CFI) { 472 // Write out 'STACK CFI INIT' and 'STACK CFI' records. 473 for (auto frame_it = stack_frame_entries_.begin(); 474 frame_it != stack_frame_entries_.end(); ++frame_it) { 475 StackFrameEntry* entry = frame_it->get(); 476 stream << "STACK CFI INIT " << hex 477 << (entry->address - load_address_) << " " 478 << entry->size << " " << dec; 479 if (!stream.good() 480 || !WriteRuleMap(entry->initial_rules, stream)) 481 return ReportError(); 482 483 stream << "\n"; 484 485 // Write out this entry's delta rules as 'STACK CFI' records. 486 for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin(); 487 delta_it != entry->rule_changes.end(); ++delta_it) { 488 stream << "STACK CFI " << hex 489 << (delta_it->first - load_address_) << " " << dec; 490 if (!stream.good() 491 || !WriteRuleMap(delta_it->second, stream)) 492 return ReportError(); 493 494 stream << "\n"; 495 } 496 } 497 } 498 499 return true; 500 } 501 502 } // namespace google_breakpad