dynamic_images.cc
1 // Copyright 2007 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> // Must come first 31 #endif 32 33 #include "client/mac/handler/dynamic_images.h" 34 35 extern "C" { // needed to compile on Leopard 36 #include <mach-o/nlist.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 } 40 41 #include <assert.h> 42 #include <AvailabilityMacros.h> 43 #include <dlfcn.h> 44 #include <mach/task_info.h> 45 #include <sys/sysctl.h> 46 #include <TargetConditionals.h> 47 #include <unistd.h> 48 49 #include <algorithm> 50 #include <string> 51 #include <vector> 52 53 #include "breakpad_nlist_64.h" 54 55 #if !TARGET_OS_IPHONE 56 #include <CoreServices/CoreServices.h> 57 58 #ifndef MAC_OS_X_VERSION_10_6 59 #define MAC_OS_X_VERSION_10_6 1060 60 #endif 61 62 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 63 64 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in 65 // <mach/task_info.h> in the Mac OS X 10.6 SDK. 66 #define TASK_DYLD_INFO 17 67 struct task_dyld_info { 68 mach_vm_address_t all_image_info_addr; 69 mach_vm_size_t all_image_info_size; 70 }; 71 typedef struct task_dyld_info task_dyld_info_data_t; 72 typedef struct task_dyld_info* task_dyld_info_t; 73 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t)) 74 75 #endif 76 77 #endif // !TARGET_OS_IPHONE 78 79 namespace google_breakpad { 80 81 using std::string; 82 using std::vector; 83 84 //============================================================================== 85 // Returns the size of the memory region containing |address| and the 86 // number of bytes from |address| to the end of the region. 87 // We potentially, will extend the size of the original 88 // region by the size of the following region if it's contiguous with the 89 // first in order to handle cases when we're reading strings and they 90 // straddle two vm regions. 91 // 92 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task, 93 const uint64_t address, 94 mach_vm_size_t* size_to_end) { 95 mach_vm_address_t region_base = (mach_vm_address_t)address; 96 mach_vm_size_t region_size; 97 natural_t nesting_level = 0; 98 vm_region_submap_info_64 submap_info; 99 mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64; 100 101 // Get information about the vm region containing |address| 102 vm_region_recurse_info_t region_info; 103 region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info); 104 105 kern_return_t result = 106 mach_vm_region_recurse(target_task, 107 ®ion_base, 108 ®ion_size, 109 &nesting_level, 110 region_info, 111 &info_count); 112 113 if (result == KERN_SUCCESS) { 114 // Get distance from |address| to the end of this region 115 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 116 117 // If we want to handle strings as long as 4096 characters we may need 118 // to check if there's a vm region immediately following the first one. 119 // If so, we need to extend |*size_to_end| to go all the way to the end 120 // of the second region. 121 if (*size_to_end < 4096) { 122 // Second region starts where the first one ends 123 mach_vm_address_t region_base2 = 124 (mach_vm_address_t)(region_base + region_size); 125 mach_vm_size_t region_size2; 126 127 // Get information about the following vm region 128 result = 129 mach_vm_region_recurse(target_task, 130 ®ion_base2, 131 ®ion_size2, 132 &nesting_level, 133 region_info, 134 &info_count); 135 136 // Extend region_size to go all the way to the end of the 2nd region 137 if (result == KERN_SUCCESS 138 && region_base2 == region_base + region_size) { 139 region_size += region_size2; 140 } 141 } 142 143 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 144 } else { 145 region_size = 0; 146 *size_to_end = 0; 147 } 148 149 return region_size; 150 } 151 152 #define kMaxStringLength 8192 153 //============================================================================== 154 // Reads a NULL-terminated string from another task. 155 // 156 // Warning! This will not read any strings longer than kMaxStringLength-1 157 // 158 static string ReadTaskString(task_port_t target_task, 159 const uint64_t address) { 160 // The problem is we don't know how much to read until we know how long 161 // the string is. And we don't know how long the string is, until we've read 162 // the memory! So, we'll try to read kMaxStringLength bytes 163 // (or as many bytes as we can until we reach the end of the vm region). 164 mach_vm_size_t size_to_end; 165 GetMemoryRegionSize(target_task, address, &size_to_end); 166 167 if (size_to_end > 0) { 168 mach_vm_size_t size_to_read = 169 size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end; 170 171 vector<uint8_t> bytes; 172 if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) != 173 KERN_SUCCESS) 174 return string(); 175 176 return string(reinterpret_cast<const char*>(&bytes[0])); 177 } 178 179 return string(); 180 } 181 182 //============================================================================== 183 // Reads an address range from another task. The bytes read will be returned 184 // in bytes, which will be resized as necessary. 185 kern_return_t ReadTaskMemory(task_port_t target_task, 186 const uint64_t address, 187 size_t length, 188 vector<uint8_t>& bytes) { 189 int systemPageSize = getpagesize(); 190 191 // use the negative of the page size for the mask to find the page address 192 mach_vm_address_t page_address = address & (-systemPageSize); 193 194 mach_vm_address_t last_page_address = 195 (address + length + (systemPageSize - 1)) & (-systemPageSize); 196 197 mach_vm_size_t page_size = last_page_address - page_address; 198 uint8_t* local_start; 199 uint32_t local_length; 200 201 kern_return_t r = mach_vm_read(target_task, 202 page_address, 203 page_size, 204 reinterpret_cast<vm_offset_t*>(&local_start), 205 &local_length); 206 207 if (r != KERN_SUCCESS) 208 return r; 209 210 bytes.resize(length); 211 memcpy(&bytes[0], 212 &local_start[(mach_vm_address_t)address - page_address], 213 length); 214 mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length); 215 return KERN_SUCCESS; 216 } 217 218 #pragma mark - 219 220 //============================================================================== 221 // Traits structs for specializing function templates to handle 222 // 32-bit/64-bit Mach-O files. 223 struct MachO32 { 224 typedef mach_header mach_header_type; 225 typedef segment_command mach_segment_command_type; 226 typedef dyld_image_info32 dyld_image_info; 227 typedef dyld_all_image_infos32 dyld_all_image_infos; 228 typedef struct nlist nlist_type; 229 static const uint32_t magic = MH_MAGIC; 230 static const uint32_t segment_load_command = LC_SEGMENT; 231 }; 232 233 struct MachO64 { 234 typedef mach_header_64 mach_header_type; 235 typedef segment_command_64 mach_segment_command_type; 236 typedef dyld_image_info64 dyld_image_info; 237 typedef dyld_all_image_infos64 dyld_all_image_infos; 238 typedef struct nlist_64 nlist_type; 239 static const uint32_t magic = MH_MAGIC_64; 240 static const uint32_t segment_load_command = LC_SEGMENT_64; 241 }; 242 243 template<typename MachBits> 244 bool FindTextSection(DynamicImage& image) { 245 typedef typename MachBits::mach_header_type mach_header_type; 246 typedef typename MachBits::mach_segment_command_type 247 mach_segment_command_type; 248 249 const mach_header_type* header = 250 reinterpret_cast<const mach_header_type*>(&image.header_[0]); 251 252 if(header->magic != MachBits::magic) { 253 return false; 254 } 255 256 const struct load_command* cmd = 257 reinterpret_cast<const struct load_command*>(header + 1); 258 259 bool found_text_section = false; 260 bool found_dylib_id_command = false; 261 for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) { 262 if (!found_text_section) { 263 if (cmd->cmd == MachBits::segment_load_command) { 264 const mach_segment_command_type* seg = 265 reinterpret_cast<const mach_segment_command_type*>(cmd); 266 267 if (!strcmp(seg->segname, "__TEXT")) { 268 image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr); 269 image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize); 270 image.slide_ = 0; 271 272 if (seg->fileoff == 0 && seg->filesize != 0) { 273 image.slide_ = 274 (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr; 275 } 276 found_text_section = true; 277 } 278 } 279 } 280 281 if (!found_dylib_id_command) { 282 if (cmd->cmd == LC_ID_DYLIB) { 283 const struct dylib_command* dc = 284 reinterpret_cast<const struct dylib_command*>(cmd); 285 286 image.version_ = dc->dylib.current_version; 287 found_dylib_id_command = true; 288 } 289 } 290 291 if (found_dylib_id_command && found_text_section) { 292 return true; 293 } 294 295 cmd = reinterpret_cast<const struct load_command*> 296 (reinterpret_cast<const char*>(cmd) + cmd->cmdsize); 297 } 298 299 return false; 300 } 301 302 //============================================================================== 303 // Initializes vmaddr_, vmsize_, and slide_ 304 void DynamicImage::CalculateMemoryAndVersionInfo() { 305 // unless we can process the header, ensure that calls to 306 // IsValid() will return false 307 vmaddr_ = 0; 308 vmsize_ = 0; 309 slide_ = 0; 310 version_ = 0; 311 312 // The function template above does all the real work. 313 if (Is64Bit()) 314 FindTextSection<MachO64>(*this); 315 else 316 FindTextSection<MachO32>(*this); 317 } 318 319 //============================================================================== 320 // The helper function template abstracts the 32/64-bit differences. 321 template<typename MachBits> 322 uint32_t GetFileTypeFromHeader(DynamicImage& image) { 323 typedef typename MachBits::mach_header_type mach_header_type; 324 325 const mach_header_type* header = 326 reinterpret_cast<const mach_header_type*>(&image.header_[0]); 327 return header->filetype; 328 } 329 330 uint32_t DynamicImage::GetFileType() { 331 if (Is64Bit()) 332 return GetFileTypeFromHeader<MachO64>(*this); 333 334 return GetFileTypeFromHeader<MachO32>(*this); 335 } 336 337 #pragma mark - 338 339 //============================================================================== 340 // Loads information about dynamically loaded code in the given task. 341 DynamicImages::DynamicImages(mach_port_t task) 342 : task_(task), 343 cpu_type_(DetermineTaskCPUType(task)), 344 image_list_() { 345 ReadImageInfoForTask(); 346 } 347 348 template<typename MachBits> 349 static uint64_t LookupSymbol(const char* symbol_name, 350 const char* filename, 351 cpu_type_t cpu_type) { 352 typedef typename MachBits::nlist_type nlist_type; 353 354 nlist_type symbol_info[8] = {}; 355 const char* symbolNames[2] = { symbol_name, "\0" }; 356 nlist_type& list = symbol_info[0]; 357 int invalidEntriesCount = breakpad_nlist(filename, 358 &list, 359 symbolNames, 360 cpu_type); 361 362 if(invalidEntriesCount != 0) { 363 return 0; 364 } 365 366 assert(list.n_value); 367 return list.n_value; 368 } 369 370 #if TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 371 static bool HasTaskDyldInfo() { 372 return true; 373 } 374 #else 375 static SInt32 GetOSVersionInternal() { 376 SInt32 os_version = 0; 377 Gestalt(gestaltSystemVersion, &os_version); 378 return os_version; 379 } 380 381 static SInt32 GetOSVersion() { 382 static SInt32 os_version = GetOSVersionInternal(); 383 return os_version; 384 } 385 386 static bool HasTaskDyldInfo() { 387 return GetOSVersion() >= 0x1060; 388 } 389 #endif // TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= 10_6 390 391 uint64_t DynamicImages::GetDyldAllImageInfosPointer() { 392 if (HasTaskDyldInfo()) { 393 task_dyld_info_data_t task_dyld_info; 394 mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; 395 if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info, 396 &count) != KERN_SUCCESS) { 397 return 0; 398 } 399 400 return (uint64_t)task_dyld_info.all_image_info_addr; 401 } else { 402 const char* imageSymbolName = "_dyld_all_image_infos"; 403 const char* dyldPath = "/usr/lib/dyld"; 404 405 if (Is64Bit()) 406 return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_); 407 return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_); 408 } 409 } 410 411 //============================================================================== 412 // This code was written using dyld_debug.c (from Darwin) as a guide. 413 414 template<typename MachBits> 415 void ReadImageInfo(DynamicImages& images, 416 uint64_t image_list_address) { 417 typedef typename MachBits::dyld_image_info dyld_image_info; 418 typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos; 419 typedef typename MachBits::mach_header_type mach_header_type; 420 421 // Read the structure inside of dyld that contains information about 422 // loaded images. We're reading from the desired task's address space. 423 424 // Here we make the assumption that dyld loaded at the same address in 425 // the crashed process vs. this one. This is an assumption made in 426 // "dyld_debug.c" and is said to be nearly always valid. 427 vector<uint8_t> dyld_all_info_bytes; 428 if (ReadTaskMemory(images.task_, 429 image_list_address, 430 sizeof(dyld_all_image_infos), 431 dyld_all_info_bytes) != KERN_SUCCESS) 432 return; 433 434 dyld_all_image_infos* dyldInfo = 435 reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]); 436 437 // number of loaded images 438 int count = dyldInfo->infoArrayCount; 439 440 // Read an array of dyld_image_info structures each containing 441 // information about a loaded image. 442 vector<uint8_t> dyld_info_array_bytes; 443 if (ReadTaskMemory(images.task_, 444 dyldInfo->infoArray, 445 count * sizeof(dyld_image_info), 446 dyld_info_array_bytes) != KERN_SUCCESS) 447 return; 448 449 dyld_image_info* infoArray = 450 reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]); 451 images.image_list_.reserve(count); 452 453 for (int i = 0; i < count; ++i) { 454 dyld_image_info& info = infoArray[i]; 455 456 // First read just the mach_header from the image in the task. 457 vector<uint8_t> mach_header_bytes; 458 if (ReadTaskMemory(images.task_, 459 info.load_address_, 460 sizeof(mach_header_type), 461 mach_header_bytes) != KERN_SUCCESS) 462 continue; // bail on this dynamic image 463 464 mach_header_type* header = 465 reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]); 466 467 // Now determine the total amount necessary to read the header 468 // plus all of the load commands. 469 size_t header_size = 470 sizeof(mach_header_type) + header->sizeofcmds; 471 472 if (ReadTaskMemory(images.task_, 473 info.load_address_, 474 header_size, 475 mach_header_bytes) != KERN_SUCCESS) 476 continue; 477 478 // Read the file name from the task's memory space. 479 string file_path; 480 if (info.file_path_) { 481 // Although we're reading kMaxStringLength bytes, it's copied in the 482 // the DynamicImage constructor below with the correct string length, 483 // so it's not really wasting memory. 484 file_path = ReadTaskString(images.task_, info.file_path_); 485 } 486 487 // Create an object representing this image and add it to our list. 488 DynamicImage* new_image; 489 new_image = new DynamicImage(&mach_header_bytes[0], 490 header_size, 491 info.load_address_, 492 file_path, 493 static_cast<uintptr_t>(info.file_mod_date_), 494 images.task_, 495 images.cpu_type_); 496 497 if (new_image->IsValid()) { 498 images.image_list_.push_back(DynamicImageRef(new_image)); 499 } else { 500 delete new_image; 501 } 502 } 503 504 // sorts based on loading address 505 sort(images.image_list_.begin(), images.image_list_.end()); 506 // remove duplicates - this happens in certain strange cases 507 // You can see it in DashboardClient when Google Gadgets plugin 508 // is installed. Apple's crash reporter log and gdb "info shared" 509 // both show the same library multiple times at the same address 510 511 vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(), 512 images.image_list_.end()); 513 images.image_list_.erase(it, images.image_list_.end()); 514 } 515 516 void DynamicImages::ReadImageInfoForTask() { 517 uint64_t imageList = GetDyldAllImageInfosPointer(); 518 519 if (imageList) { 520 if (Is64Bit()) 521 ReadImageInfo<MachO64>(*this, imageList); 522 else 523 ReadImageInfo<MachO32>(*this, imageList); 524 } 525 } 526 527 //============================================================================== 528 DynamicImage* DynamicImages::GetExecutableImage() { 529 int executable_index = GetExecutableImageIndex(); 530 531 if (executable_index >= 0) { 532 return GetImage(executable_index); 533 } 534 535 return NULL; 536 } 537 538 //============================================================================== 539 // returns -1 if failure to find executable 540 int DynamicImages::GetExecutableImageIndex() { 541 int image_count = GetImageCount(); 542 543 for (int i = 0; i < image_count; ++i) { 544 DynamicImage* image = GetImage(i); 545 if (image->GetFileType() == MH_EXECUTE) { 546 return i; 547 } 548 } 549 550 return -1; 551 } 552 553 //============================================================================== 554 // static 555 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) { 556 if (task == mach_task_self()) 557 return GetNativeCPUType(); 558 559 int mib[CTL_MAXNAME]; 560 size_t mibLen = CTL_MAXNAME; 561 int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen); 562 if (err == 0) { 563 assert(mibLen < CTL_MAXNAME); 564 pid_for_task(task, &mib[mibLen]); 565 mibLen += 1; 566 567 cpu_type_t cpu_type; 568 size_t cpuTypeSize = sizeof(cpu_type); 569 sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0); 570 return cpu_type; 571 } 572 573 return GetNativeCPUType(); 574 } 575 576 } // namespace google_breakpad