/ src / common / windows / pe_util.cc
pe_util.cc
  1  // Copyright 2019 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  #ifdef HAVE_CONFIG_H
 30  #include <config.h>  // Must come first
 31  #endif
 32  
 33  #include "pe_util.h"
 34  
 35  #include <windows.h>
 36  #include <winnt.h>
 37  #include <atlbase.h>
 38  #include <ImageHlp.h>
 39  
 40  #include <functional>
 41  #include <memory>
 42  
 43  #include "common/windows/string_utils-inl.h"
 44  #include "common/windows/guid_string.h"
 45  
 46  namespace {
 47  
 48  /*
 49   * Not defined in WinNT.h prior to SDK 10.0.20348.0 for some reason.
 50   * Definitions taken from: http://uninformed.org/index.cgi?v=4&a=1&p=13
 51   *
 52   */
 53  typedef unsigned char UBYTE;
 54  
 55  #if !defined(UNW_FLAG_EHANDLER)
 56  #define UNW_FLAG_EHANDLER  0x01
 57  #endif
 58  #if !defined(UNW_FLAG_UHANDLER)
 59  #define UNW_FLAG_UHANDLER  0x02
 60  #endif
 61  #if !defined(UNW_FLAG_CHAININFO)
 62  #define UNW_FLAG_CHAININFO 0x04
 63  #endif
 64  
 65  union UnwindCode {
 66    struct {
 67      UBYTE offset_in_prolog;
 68      UBYTE unwind_operation_code : 4;
 69      UBYTE operation_info : 4;
 70    };
 71    USHORT frame_offset;
 72  };
 73  
 74  enum UnwindOperationCodes {
 75    UWOP_PUSH_NONVOL = 0, /* info == register number */
 76    UWOP_ALLOC_LARGE,     /* no info, alloc size in next 2 slots */
 77    UWOP_ALLOC_SMALL,     /* info == size of allocation / 8 - 1 */
 78    UWOP_SET_FPREG,       /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
 79    UWOP_SAVE_NONVOL,     /* info == register number, offset in next slot */
 80    UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
 81    // XXX: these are missing from MSDN!
 82    // See: http://www.osronline.com/ddkx/kmarch/64bitamd_4rs7.htm
 83    UWOP_SAVE_XMM,
 84    UWOP_SAVE_XMM_FAR,
 85    UWOP_SAVE_XMM128,     /* info == XMM reg number, offset in next slot */
 86    UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
 87    UWOP_PUSH_MACHFRAME   /* info == 0: no error-code, 1: error-code */
 88  };
 89  
 90  // See: http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
 91  // Note: some fields removed as we don't use them.
 92  struct UnwindInfo {
 93    UBYTE version : 3;
 94    UBYTE flags : 5;
 95    UBYTE size_of_prolog;
 96    UBYTE count_of_codes;
 97    UBYTE frame_register : 4;
 98    UBYTE frame_offset : 4;
 99    UnwindCode unwind_code[1];
100  };
101  
102  struct CV_INFO_PDB70 {
103    ULONG cv_signature;
104    GUID signature;
105    ULONG age;
106    CHAR pdb_filename[ANYSIZE_ARRAY];
107  };
108  
109  #define CV_SIGNATURE_RSDS 'SDSR'
110  
111  // A helper class to scope a PLOADED_IMAGE.
112  class AutoImage {
113  public:
114    explicit AutoImage(PLOADED_IMAGE img) : img_(img) {}
115    ~AutoImage() {
116      if (img_)
117        ImageUnload(img_);
118    }
119  
120    operator PLOADED_IMAGE() { return img_; }
121    PLOADED_IMAGE operator->() { return img_; }
122  
123  private:
124    PLOADED_IMAGE img_;
125  };
126  }  // namespace
127  
128  namespace google_breakpad {
129  
130  using std::unique_ptr;
131  using google_breakpad::GUIDString;
132  
133  bool ReadModuleInfo(const wstring & pe_file, PDBModuleInfo * info) {
134    // Convert wchar to native charset because ImageLoad only takes
135    // a PSTR as input.
136    string img_file;
137    if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) {
138      fprintf(stderr, "Image path '%S' contains unrecognized characters.\n",
139          pe_file.c_str());
140      return false;
141    }
142  
143    AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL));
144    if (!img) {
145      fprintf(stderr, "Failed to load %s\n", img_file.c_str());
146      return false;
147    }
148  
149    info->cpu = FileHeaderMachineToCpuString(
150        img->FileHeader->FileHeader.Machine);
151  
152    PIMAGE_OPTIONAL_HEADER64 optional_header =
153        &(reinterpret_cast<PIMAGE_NT_HEADERS64>(img->FileHeader))->OptionalHeader;
154  
155    // Search debug directories for a guid signature & age
156    DWORD debug_rva = optional_header->
157      DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress;
158    DWORD debug_size = optional_header->
159      DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].Size;
160    PIMAGE_DEBUG_DIRECTORY debug_directories =
161      static_cast<PIMAGE_DEBUG_DIRECTORY>(
162        ImageRvaToVa(img->FileHeader,
163          img->MappedAddress,
164          debug_rva,
165          &img->LastRvaSection));
166  
167    for (DWORD i = 0; i < debug_size / sizeof(*debug_directories); i++) {
168      if (debug_directories[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW ||
169          debug_directories[i].SizeOfData < sizeof(CV_INFO_PDB70)) {
170        continue;
171      }
172  
173      struct CV_INFO_PDB70* cv_info = static_cast<CV_INFO_PDB70*>(ImageRvaToVa(
174          img->FileHeader,
175          img->MappedAddress,
176          debug_directories[i].AddressOfRawData,
177          &img->LastRvaSection));
178      if (cv_info->cv_signature != CV_SIGNATURE_RSDS) {
179        continue;
180      }
181  
182      info->debug_identifier = GenerateDebugIdentifier(cv_info->age,
183          cv_info->signature);
184  
185      // This code assumes that the pdb_filename is stored as ASCII without
186      // multibyte characters, but it's not clear if that's true.
187      size_t debug_file_length = strnlen_s(cv_info->pdb_filename, MAX_PATH);
188      if (debug_file_length < 0 || debug_file_length >= MAX_PATH) {
189        fprintf(stderr, "PE debug directory is corrupt.\n");
190        return false;
191      }
192      std::string debug_file(cv_info->pdb_filename, debug_file_length);
193      if (!WindowsStringUtils::safe_mbstowcs(debug_file, &info->debug_file)) {
194        fprintf(stderr, "PDB filename '%s' contains unrecognized characters.\n",
195            debug_file.c_str());
196        return false;
197      }
198      info->debug_file = WindowsStringUtils::GetBaseName(info->debug_file);
199  
200      return true;
201    }
202  
203    fprintf(stderr, "Image is missing debug information.\n");
204    return false;
205  }
206  
207  bool ReadPEInfo(const wstring & pe_file, PEModuleInfo * info) {
208    // Convert wchar to native charset because ImageLoad only takes
209    // a PSTR as input.
210    string img_file;
211    if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) {
212      fprintf(stderr, "Image path '%S' contains unrecognized characters.\n",
213          pe_file.c_str());
214      return false;
215    }
216  
217    AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL));
218    if (!img) {
219      fprintf(stderr, "Failed to open PE file: %S\n", pe_file.c_str());
220      return false;
221    }
222  
223    info->code_file = WindowsStringUtils::GetBaseName(pe_file);
224  
225    // The date and time that the file was created by the linker.
226    DWORD TimeDateStamp = img->FileHeader->FileHeader.TimeDateStamp;
227    // The size of the file in bytes, including all headers.
228    DWORD SizeOfImage = 0;
229    PIMAGE_OPTIONAL_HEADER64 opt =
230      &((PIMAGE_NT_HEADERS64)img->FileHeader)->OptionalHeader;
231    if (opt->Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
232      // 64-bit PE file.
233      SizeOfImage = opt->SizeOfImage;
234    }
235    else {
236      // 32-bit PE file.
237      SizeOfImage = img->FileHeader->OptionalHeader.SizeOfImage;
238    }
239    wchar_t code_identifier[32];
240    swprintf(code_identifier,
241      sizeof(code_identifier) / sizeof(code_identifier[0]),
242      L"%08X%X", TimeDateStamp, SizeOfImage);
243    info->code_identifier = code_identifier;
244  
245    return true;
246  }
247  
248  bool PrintPEFrameData(const wstring & pe_file, FILE * out_file)
249  {
250    // Convert wchar to native charset because ImageLoad only takes
251    // a PSTR as input.
252    string img_file;
253    if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) {
254      fprintf(stderr, "Image path '%S' contains unrecognized characters.\n",
255          pe_file.c_str());
256      return false;
257    }
258  
259    AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL));
260    if (!img) {
261      fprintf(stderr, "Failed to load %s\n", img_file.c_str());
262      return false;
263    }
264    PIMAGE_OPTIONAL_HEADER64 optional_header =
265      &(reinterpret_cast<PIMAGE_NT_HEADERS64>(img->FileHeader))->OptionalHeader;
266    if (optional_header->Magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
267      fprintf(stderr, "Not a PE32+ image\n");
268      return false;
269    }
270  
271    // Read Exception Directory
272    DWORD exception_rva = optional_header->
273      DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION].VirtualAddress;
274    DWORD exception_size = optional_header->
275      DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION].Size;
276    PIMAGE_RUNTIME_FUNCTION_ENTRY funcs =
277      static_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>(
278        ImageRvaToVa(img->FileHeader,
279          img->MappedAddress,
280          exception_rva,
281          &img->LastRvaSection));
282    for (DWORD i = 0; i < exception_size / sizeof(*funcs); i++) {
283      DWORD unwind_rva = funcs[i].UnwindInfoAddress;
284      // handle chaining
285      while (unwind_rva & 0x1) {
286        unwind_rva ^= 0x1;
287        PIMAGE_RUNTIME_FUNCTION_ENTRY chained_func =
288          static_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>(
289            ImageRvaToVa(img->FileHeader,
290              img->MappedAddress,
291              unwind_rva,
292              &img->LastRvaSection));
293        unwind_rva = chained_func->UnwindInfoAddress;
294      }
295  
296      UnwindInfo *unwind_info = static_cast<UnwindInfo*>(
297        ImageRvaToVa(img->FileHeader,
298          img->MappedAddress,
299          unwind_rva,
300          &img->LastRvaSection));
301  
302      DWORD stack_size = 8;  // minimal stack size is 8 for RIP
303      DWORD rip_offset = 8;
304      do {
305        for (UBYTE c = 0; c < unwind_info->count_of_codes; c++) {
306          UnwindCode *unwind_code = &unwind_info->unwind_code[c];
307          switch (unwind_code->unwind_operation_code) {
308          case UWOP_PUSH_NONVOL: {
309            stack_size += 8;
310            break;
311          }
312          case UWOP_ALLOC_LARGE: {
313            if (unwind_code->operation_info == 0) {
314              c++;
315              if (c < unwind_info->count_of_codes)
316                stack_size += (unwind_code + 1)->frame_offset * 8;
317            }
318            else {
319              c += 2;
320              if (c < unwind_info->count_of_codes)
321                stack_size += (unwind_code + 1)->frame_offset |
322                ((unwind_code + 2)->frame_offset << 16);
323            }
324            break;
325          }
326          case UWOP_ALLOC_SMALL: {
327            stack_size += unwind_code->operation_info * 8 + 8;
328            break;
329          }
330          case UWOP_SET_FPREG:
331          case UWOP_SAVE_XMM:
332          case UWOP_SAVE_XMM_FAR:
333            break;
334          case UWOP_SAVE_NONVOL:
335          case UWOP_SAVE_XMM128: {
336            c++;  // skip slot with offset
337            break;
338          }
339          case UWOP_SAVE_NONVOL_FAR:
340          case UWOP_SAVE_XMM128_FAR: {
341            c += 2;  // skip 2 slots with offset
342            break;
343          }
344          case UWOP_PUSH_MACHFRAME: {
345            if (unwind_code->operation_info) {
346              stack_size += 88;
347            }
348            else {
349              stack_size += 80;
350            }
351            rip_offset += 80;
352            break;
353          }
354          }
355        }
356        if (unwind_info->flags & UNW_FLAG_CHAININFO) {
357          PIMAGE_RUNTIME_FUNCTION_ENTRY chained_func =
358            reinterpret_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>(
359            (unwind_info->unwind_code +
360              ((unwind_info->count_of_codes + 1) & ~1)));
361  
362          unwind_info = static_cast<UnwindInfo*>(
363            ImageRvaToVa(img->FileHeader,
364              img->MappedAddress,
365              chained_func->UnwindInfoAddress,
366              &img->LastRvaSection));
367        }
368        else {
369          unwind_info = NULL;
370        }
371      } while (unwind_info);
372      fprintf(out_file, "STACK CFI INIT %lx %lx .cfa: $rsp .ra: .cfa %lu - ^\n",
373        funcs[i].BeginAddress,
374        funcs[i].EndAddress - funcs[i].BeginAddress, rip_offset);
375      fprintf(out_file, "STACK CFI %lx .cfa: $rsp %lu +\n",
376        funcs[i].BeginAddress, stack_size);
377    }
378  
379    return true;
380  }
381  
382  wstring GenerateDebugIdentifier(DWORD age, GUID signature)
383  {
384    // Use the same format that the MS symbol server uses in filesystem
385    // hierarchies.
386    wchar_t age_string[9];
387    swprintf(age_string, sizeof(age_string) / sizeof(age_string[0]),
388      L"%x", age);
389  
390    // remove when VC++7.1 is no longer supported
391    age_string[sizeof(age_string) / sizeof(age_string[0]) - 1] = L'\0';
392  
393    wstring debug_identifier = GUIDString::GUIDToSymbolServerWString(&signature);
394    debug_identifier.append(age_string);
395  
396    return debug_identifier;
397  }
398  
399  wstring GenerateDebugIdentifier(DWORD age, DWORD signature)
400  {
401    // Use the same format that the MS symbol server uses in filesystem
402    // hierarchies.
403    wchar_t identifier_string[17];
404    swprintf(identifier_string,
405      sizeof(identifier_string) / sizeof(identifier_string[0]),
406      L"%08X%x", signature, age);
407  
408    // remove when VC++7.1 is no longer supported
409    identifier_string[sizeof(identifier_string) /
410      sizeof(identifier_string[0]) - 1] = L'\0';
411  
412    return wstring(identifier_string);
413  }
414  
415  }  // namespace google_breakpad