/ src / client / mac / handler / dynamic_images.cc
dynamic_images.cc
  1  // Copyright 2007 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  #ifdef HAVE_CONFIG_H
 30  #include <config.h>  // Must come first
 31  #endif
 32  
 33  #include "client/mac/handler/dynamic_images.h"
 34  
 35  extern "C" { // needed to compile on Leopard
 36    #include <mach-o/nlist.h>
 37    #include <stdlib.h>
 38    #include <stdio.h>
 39  }
 40  
 41  #include <assert.h>
 42  #include <AvailabilityMacros.h>
 43  #include <dlfcn.h>
 44  #include <mach/task_info.h>
 45  #include <sys/sysctl.h>
 46  #include <TargetConditionals.h>
 47  #include <unistd.h>
 48  
 49  #include <algorithm>
 50  #include <string>
 51  #include <vector>
 52  
 53  #include "breakpad_nlist_64.h"
 54  
 55  #if !TARGET_OS_IPHONE
 56  #include <CoreServices/CoreServices.h>
 57  
 58  #ifndef MAC_OS_X_VERSION_10_6
 59  #define MAC_OS_X_VERSION_10_6 1060
 60  #endif
 61  
 62  #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
 63  
 64  // Fallback declarations for TASK_DYLD_INFO and friends, introduced in
 65  // <mach/task_info.h> in the Mac OS X 10.6 SDK.
 66  #define TASK_DYLD_INFO 17
 67  struct task_dyld_info {
 68    mach_vm_address_t all_image_info_addr;
 69    mach_vm_size_t all_image_info_size;
 70  };
 71  typedef struct task_dyld_info task_dyld_info_data_t;
 72  typedef struct task_dyld_info* task_dyld_info_t;
 73  #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
 74  
 75  #endif
 76  
 77  #endif  // !TARGET_OS_IPHONE
 78  
 79  namespace google_breakpad {
 80  
 81  using std::string;
 82  using std::vector;
 83  
 84  //==============================================================================
 85  // Returns the size of the memory region containing |address| and the
 86  // number of bytes from |address| to the end of the region.
 87  // We potentially, will extend the size of the original
 88  // region by the size of the following region if it's contiguous with the
 89  // first in order to handle cases when we're reading strings and they
 90  // straddle two vm regions.
 91  //
 92  static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
 93                                            const uint64_t address,
 94                                            mach_vm_size_t* size_to_end) {
 95    mach_vm_address_t region_base = (mach_vm_address_t)address;
 96    mach_vm_size_t region_size;
 97    natural_t nesting_level = 0;
 98    vm_region_submap_info_64 submap_info;
 99    mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
100  
101    // Get information about the vm region containing |address|
102    vm_region_recurse_info_t region_info;
103    region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
104  
105    kern_return_t result =
106      mach_vm_region_recurse(target_task,
107                             &region_base,
108                             &region_size,
109                             &nesting_level,
110                             region_info,
111                             &info_count);
112  
113    if (result == KERN_SUCCESS) {
114      // Get distance from |address| to the end of this region
115      *size_to_end = region_base + region_size -(mach_vm_address_t)address;
116  
117      // If we want to handle strings as long as 4096 characters we may need
118      // to check if there's a vm region immediately following the first one.
119      // If so, we need to extend |*size_to_end| to go all the way to the end
120      // of the second region.
121      if (*size_to_end < 4096) {
122        // Second region starts where the first one ends
123        mach_vm_address_t region_base2 =
124          (mach_vm_address_t)(region_base + region_size);
125        mach_vm_size_t region_size2;
126  
127        // Get information about the following vm region
128        result =
129          mach_vm_region_recurse(target_task,
130                                 &region_base2,
131                                 &region_size2,
132                                 &nesting_level,
133                                 region_info,
134                                 &info_count);
135  
136        // Extend region_size to go all the way to the end of the 2nd region
137        if (result == KERN_SUCCESS
138            && region_base2 == region_base + region_size) {
139          region_size += region_size2;
140        }
141      }
142  
143      *size_to_end = region_base + region_size -(mach_vm_address_t)address;
144    } else {
145      region_size = 0;
146      *size_to_end = 0;
147    }
148  
149    return region_size;
150  }
151  
152  #define kMaxStringLength 8192
153  //==============================================================================
154  // Reads a NULL-terminated string from another task.
155  //
156  // Warning!  This will not read any strings longer than kMaxStringLength-1
157  //
158  static string ReadTaskString(task_port_t target_task,
159                               const uint64_t address) {
160    // The problem is we don't know how much to read until we know how long
161    // the string is. And we don't know how long the string is, until we've read
162    // the memory!  So, we'll try to read kMaxStringLength bytes
163    // (or as many bytes as we can until we reach the end of the vm region).
164    mach_vm_size_t size_to_end;
165    GetMemoryRegionSize(target_task, address, &size_to_end);
166  
167    if (size_to_end > 0) {
168      mach_vm_size_t size_to_read =
169        size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
170  
171      vector<uint8_t> bytes;
172      if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
173          KERN_SUCCESS)
174        return string();
175  
176      return string(reinterpret_cast<const char*>(&bytes[0]));
177    }
178  
179    return string();
180  }
181  
182  //==============================================================================
183  // Reads an address range from another task. The bytes read will be returned
184  // in bytes, which will be resized as necessary.
185  kern_return_t ReadTaskMemory(task_port_t target_task,
186                               const uint64_t address,
187                               size_t length,
188                               vector<uint8_t>& bytes) {
189    int systemPageSize = getpagesize();
190  
191    // use the negative of the page size for the mask to find the page address
192    mach_vm_address_t page_address = address & (-systemPageSize);
193  
194    mach_vm_address_t last_page_address =
195        (address + length + (systemPageSize - 1)) & (-systemPageSize);
196  
197    mach_vm_size_t page_size = last_page_address - page_address;
198    uint8_t* local_start;
199    uint32_t local_length;
200  
201    kern_return_t r = mach_vm_read(target_task,
202                                   page_address,
203                                   page_size,
204                                   reinterpret_cast<vm_offset_t*>(&local_start),
205                                   &local_length);
206  
207    if (r != KERN_SUCCESS)
208      return r;
209  
210    bytes.resize(length);
211    memcpy(&bytes[0],
212           &local_start[(mach_vm_address_t)address - page_address],
213           length);
214    mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
215    return KERN_SUCCESS;
216  }
217  
218  #pragma mark -
219  
220  //==============================================================================
221  // Traits structs for specializing function templates to handle
222  // 32-bit/64-bit Mach-O files.
223  struct MachO32 {
224    typedef mach_header mach_header_type;
225    typedef segment_command mach_segment_command_type;
226    typedef dyld_image_info32 dyld_image_info;
227    typedef dyld_all_image_infos32 dyld_all_image_infos;
228    typedef struct nlist nlist_type;
229    static const uint32_t magic = MH_MAGIC;
230    static const uint32_t segment_load_command = LC_SEGMENT;
231  };
232  
233  struct MachO64 {
234    typedef mach_header_64 mach_header_type;
235    typedef segment_command_64 mach_segment_command_type;
236    typedef dyld_image_info64 dyld_image_info;
237    typedef dyld_all_image_infos64 dyld_all_image_infos;
238    typedef struct nlist_64 nlist_type;
239    static const uint32_t magic = MH_MAGIC_64;
240    static const uint32_t segment_load_command = LC_SEGMENT_64;
241  };
242  
243  template<typename MachBits>
244  bool FindTextSection(DynamicImage& image) {
245    typedef typename MachBits::mach_header_type mach_header_type;
246    typedef typename MachBits::mach_segment_command_type
247        mach_segment_command_type;
248    
249    const mach_header_type* header =
250        reinterpret_cast<const mach_header_type*>(&image.header_[0]);
251  
252    if(header->magic != MachBits::magic) {
253      return false;
254    }
255  
256    const struct load_command* cmd =
257        reinterpret_cast<const struct load_command*>(header + 1);
258  
259    bool found_text_section = false;
260    bool found_dylib_id_command = false;
261    for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
262      if (!found_text_section) {
263        if (cmd->cmd == MachBits::segment_load_command) {
264          const mach_segment_command_type* seg =
265              reinterpret_cast<const mach_segment_command_type*>(cmd);
266  
267          if (!strcmp(seg->segname, "__TEXT")) {
268            image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
269            image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
270            image.slide_ = 0;
271  
272            if (seg->fileoff == 0 && seg->filesize != 0) {
273              image.slide_ =
274                  (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
275            }
276            found_text_section = true;
277          }
278        }
279      }
280  
281      if (!found_dylib_id_command) {
282        if (cmd->cmd == LC_ID_DYLIB) {
283          const struct dylib_command* dc =
284              reinterpret_cast<const struct dylib_command*>(cmd);
285  
286          image.version_ = dc->dylib.current_version;
287          found_dylib_id_command = true;
288        }
289      }
290  
291      if (found_dylib_id_command && found_text_section) {
292        return true;
293      }
294  
295      cmd = reinterpret_cast<const struct load_command*>
296          (reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
297    }
298  
299    return false;
300  }
301  
302  //==============================================================================
303  // Initializes vmaddr_, vmsize_, and slide_
304  void DynamicImage::CalculateMemoryAndVersionInfo() {
305    // unless we can process the header, ensure that calls to
306    // IsValid() will return false
307    vmaddr_ = 0;
308    vmsize_ = 0;
309    slide_ = 0;
310    version_ = 0;
311  
312    // The function template above does all the real work.
313    if (Is64Bit())
314      FindTextSection<MachO64>(*this);
315    else
316      FindTextSection<MachO32>(*this);
317  }
318  
319  //==============================================================================
320  // The helper function template abstracts the 32/64-bit differences.
321  template<typename MachBits>
322  uint32_t GetFileTypeFromHeader(DynamicImage& image) {
323    typedef typename MachBits::mach_header_type mach_header_type;
324  
325    const mach_header_type* header =
326        reinterpret_cast<const mach_header_type*>(&image.header_[0]);
327    return header->filetype;
328  }
329  
330  uint32_t DynamicImage::GetFileType() {
331    if (Is64Bit())
332      return GetFileTypeFromHeader<MachO64>(*this);
333  
334    return GetFileTypeFromHeader<MachO32>(*this);
335  }
336  
337  #pragma mark -
338  
339  //==============================================================================
340  // Loads information about dynamically loaded code in the given task.
341  DynamicImages::DynamicImages(mach_port_t task)
342      : task_(task),
343        cpu_type_(DetermineTaskCPUType(task)),
344        image_list_() {
345    ReadImageInfoForTask();
346  }
347  
348  template<typename MachBits>
349  static uint64_t LookupSymbol(const char* symbol_name,
350                               const char* filename,
351                               cpu_type_t cpu_type) {
352    typedef typename MachBits::nlist_type nlist_type;
353  
354    nlist_type symbol_info[8] = {};
355    const char* symbolNames[2] = { symbol_name, "\0" };
356    nlist_type& list = symbol_info[0];
357    int invalidEntriesCount = breakpad_nlist(filename,
358                                             &list,
359                                             symbolNames,
360                                             cpu_type);
361  
362    if(invalidEntriesCount != 0) {
363      return 0;
364    }
365  
366    assert(list.n_value);
367    return list.n_value;
368  }
369  
370  #if TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
371  static bool HasTaskDyldInfo() {
372    return true;
373  }
374  #else
375  static SInt32 GetOSVersionInternal() {
376    SInt32 os_version = 0;
377    Gestalt(gestaltSystemVersion, &os_version);
378    return os_version;
379  }
380  
381  static SInt32 GetOSVersion() {
382    static SInt32 os_version = GetOSVersionInternal();
383    return os_version;
384  }
385  
386  static bool HasTaskDyldInfo() {
387    return GetOSVersion() >= 0x1060;
388  }
389  #endif  // TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= 10_6
390  
391  uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
392    if (HasTaskDyldInfo()) {
393      task_dyld_info_data_t task_dyld_info;
394      mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
395      if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
396                    &count) != KERN_SUCCESS) {
397        return 0;
398      }
399  
400      return (uint64_t)task_dyld_info.all_image_info_addr;
401    } else {
402      const char* imageSymbolName = "_dyld_all_image_infos";
403      const char* dyldPath = "/usr/lib/dyld";
404  
405      if (Is64Bit())
406        return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_);
407      return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_);
408    }
409  }
410  
411  //==============================================================================
412  // This code was written using dyld_debug.c (from Darwin) as a guide.
413  
414  template<typename MachBits>
415  void ReadImageInfo(DynamicImages& images,
416                     uint64_t image_list_address) {
417    typedef typename MachBits::dyld_image_info dyld_image_info;
418    typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
419    typedef typename MachBits::mach_header_type mach_header_type;
420  
421    // Read the structure inside of dyld that contains information about
422    // loaded images.  We're reading from the desired task's address space.
423  
424    // Here we make the assumption that dyld loaded at the same address in
425    // the crashed process vs. this one.  This is an assumption made in
426    // "dyld_debug.c" and is said to be nearly always valid.
427    vector<uint8_t> dyld_all_info_bytes;
428    if (ReadTaskMemory(images.task_,
429                       image_list_address,
430                       sizeof(dyld_all_image_infos),
431                       dyld_all_info_bytes) != KERN_SUCCESS)
432      return;
433  
434    dyld_all_image_infos* dyldInfo =
435      reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
436  
437    // number of loaded images
438    int count = dyldInfo->infoArrayCount;
439  
440    // Read an array of dyld_image_info structures each containing
441    // information about a loaded image.
442    vector<uint8_t> dyld_info_array_bytes;
443      if (ReadTaskMemory(images.task_,
444                         dyldInfo->infoArray,
445                         count * sizeof(dyld_image_info),
446                         dyld_info_array_bytes) != KERN_SUCCESS)
447        return;
448  
449      dyld_image_info* infoArray =
450          reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
451      images.image_list_.reserve(count);
452  
453      for (int i = 0; i < count; ++i) {
454        dyld_image_info& info = infoArray[i];
455  
456        // First read just the mach_header from the image in the task.
457        vector<uint8_t> mach_header_bytes;
458        if (ReadTaskMemory(images.task_,
459                           info.load_address_,
460                           sizeof(mach_header_type),
461                           mach_header_bytes) != KERN_SUCCESS)
462          continue;  // bail on this dynamic image
463  
464        mach_header_type* header =
465            reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
466  
467        // Now determine the total amount necessary to read the header
468        // plus all of the load commands.
469        size_t header_size =
470            sizeof(mach_header_type) + header->sizeofcmds;
471  
472        if (ReadTaskMemory(images.task_,
473                           info.load_address_,
474                           header_size,
475                           mach_header_bytes) != KERN_SUCCESS)
476          continue;
477  
478        // Read the file name from the task's memory space.
479        string file_path;
480        if (info.file_path_) {
481          // Although we're reading kMaxStringLength bytes, it's copied in the
482          // the DynamicImage constructor below with the correct string length,
483          // so it's not really wasting memory.
484          file_path = ReadTaskString(images.task_, info.file_path_);
485        }
486  
487        // Create an object representing this image and add it to our list.
488        DynamicImage* new_image;
489        new_image = new DynamicImage(&mach_header_bytes[0],
490                                     header_size,
491                                     info.load_address_,
492                                     file_path,
493                                     static_cast<uintptr_t>(info.file_mod_date_),
494                                     images.task_,
495                                     images.cpu_type_);
496  
497        if (new_image->IsValid()) {
498          images.image_list_.push_back(DynamicImageRef(new_image));
499        } else {
500          delete new_image;
501        }
502      }
503  
504      // sorts based on loading address
505      sort(images.image_list_.begin(), images.image_list_.end());
506      // remove duplicates - this happens in certain strange cases
507      // You can see it in DashboardClient when Google Gadgets plugin
508      // is installed.  Apple's crash reporter log and gdb "info shared"
509      // both show the same library multiple times at the same address
510  
511      vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
512                                                    images.image_list_.end());
513      images.image_list_.erase(it, images.image_list_.end());
514  }
515  
516  void DynamicImages::ReadImageInfoForTask() {
517    uint64_t imageList = GetDyldAllImageInfosPointer();
518  
519    if (imageList) {
520      if (Is64Bit())
521        ReadImageInfo<MachO64>(*this, imageList);
522      else
523        ReadImageInfo<MachO32>(*this, imageList);
524    }
525  }
526  
527  //==============================================================================
528  DynamicImage* DynamicImages::GetExecutableImage() {
529    int executable_index = GetExecutableImageIndex();
530  
531    if (executable_index >= 0) {
532      return GetImage(executable_index);
533    }
534  
535    return NULL;
536  }
537  
538  //==============================================================================
539  // returns -1 if failure to find executable
540  int DynamicImages::GetExecutableImageIndex() {
541    int image_count = GetImageCount();
542  
543    for (int i = 0; i < image_count; ++i) {
544      DynamicImage* image = GetImage(i);
545      if (image->GetFileType() == MH_EXECUTE) {
546        return i;
547      }
548    }
549  
550    return -1;
551  }
552  
553  //==============================================================================
554  // static
555  cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
556    if (task == mach_task_self())
557      return GetNativeCPUType();
558  
559    int mib[CTL_MAXNAME];
560    size_t mibLen = CTL_MAXNAME;
561    int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
562    if (err == 0) {
563      assert(mibLen < CTL_MAXNAME);
564      pid_for_task(task, &mib[mibLen]);
565      mibLen += 1;
566  
567      cpu_type_t cpu_type;
568      size_t cpuTypeSize = sizeof(cpu_type);
569      sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
570      return cpu_type;
571    }
572  
573    return GetNativeCPUType();
574  }
575  
576  }  // namespace google_breakpad