pe_util.cc
1 // Copyright 2019 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> // Must come first 31 #endif 32 33 #include "pe_util.h" 34 35 #include <windows.h> 36 #include <winnt.h> 37 #include <atlbase.h> 38 #include <ImageHlp.h> 39 40 #include <functional> 41 #include <memory> 42 43 #include "common/windows/string_utils-inl.h" 44 #include "common/windows/guid_string.h" 45 46 namespace { 47 48 /* 49 * Not defined in WinNT.h prior to SDK 10.0.20348.0 for some reason. 50 * Definitions taken from: http://uninformed.org/index.cgi?v=4&a=1&p=13 51 * 52 */ 53 typedef unsigned char UBYTE; 54 55 #if !defined(UNW_FLAG_EHANDLER) 56 #define UNW_FLAG_EHANDLER 0x01 57 #endif 58 #if !defined(UNW_FLAG_UHANDLER) 59 #define UNW_FLAG_UHANDLER 0x02 60 #endif 61 #if !defined(UNW_FLAG_CHAININFO) 62 #define UNW_FLAG_CHAININFO 0x04 63 #endif 64 65 union UnwindCode { 66 struct { 67 UBYTE offset_in_prolog; 68 UBYTE unwind_operation_code : 4; 69 UBYTE operation_info : 4; 70 }; 71 USHORT frame_offset; 72 }; 73 74 enum UnwindOperationCodes { 75 UWOP_PUSH_NONVOL = 0, /* info == register number */ 76 UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ 77 UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ 78 UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ 79 UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ 80 UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ 81 // XXX: these are missing from MSDN! 82 // See: http://www.osronline.com/ddkx/kmarch/64bitamd_4rs7.htm 83 UWOP_SAVE_XMM, 84 UWOP_SAVE_XMM_FAR, 85 UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */ 86 UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ 87 UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ 88 }; 89 90 // See: http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx 91 // Note: some fields removed as we don't use them. 92 struct UnwindInfo { 93 UBYTE version : 3; 94 UBYTE flags : 5; 95 UBYTE size_of_prolog; 96 UBYTE count_of_codes; 97 UBYTE frame_register : 4; 98 UBYTE frame_offset : 4; 99 UnwindCode unwind_code[1]; 100 }; 101 102 struct CV_INFO_PDB70 { 103 ULONG cv_signature; 104 GUID signature; 105 ULONG age; 106 CHAR pdb_filename[ANYSIZE_ARRAY]; 107 }; 108 109 #define CV_SIGNATURE_RSDS 'SDSR' 110 111 // A helper class to scope a PLOADED_IMAGE. 112 class AutoImage { 113 public: 114 explicit AutoImage(PLOADED_IMAGE img) : img_(img) {} 115 ~AutoImage() { 116 if (img_) 117 ImageUnload(img_); 118 } 119 120 operator PLOADED_IMAGE() { return img_; } 121 PLOADED_IMAGE operator->() { return img_; } 122 123 private: 124 PLOADED_IMAGE img_; 125 }; 126 } // namespace 127 128 namespace google_breakpad { 129 130 using std::unique_ptr; 131 using google_breakpad::GUIDString; 132 133 bool ReadModuleInfo(const wstring & pe_file, PDBModuleInfo * info) { 134 // Convert wchar to native charset because ImageLoad only takes 135 // a PSTR as input. 136 string img_file; 137 if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) { 138 fprintf(stderr, "Image path '%S' contains unrecognized characters.\n", 139 pe_file.c_str()); 140 return false; 141 } 142 143 AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL)); 144 if (!img) { 145 fprintf(stderr, "Failed to load %s\n", img_file.c_str()); 146 return false; 147 } 148 149 info->cpu = FileHeaderMachineToCpuString( 150 img->FileHeader->FileHeader.Machine); 151 152 PIMAGE_OPTIONAL_HEADER64 optional_header = 153 &(reinterpret_cast<PIMAGE_NT_HEADERS64>(img->FileHeader))->OptionalHeader; 154 155 // Search debug directories for a guid signature & age 156 DWORD debug_rva = optional_header-> 157 DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress; 158 DWORD debug_size = optional_header-> 159 DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].Size; 160 PIMAGE_DEBUG_DIRECTORY debug_directories = 161 static_cast<PIMAGE_DEBUG_DIRECTORY>( 162 ImageRvaToVa(img->FileHeader, 163 img->MappedAddress, 164 debug_rva, 165 &img->LastRvaSection)); 166 167 for (DWORD i = 0; i < debug_size / sizeof(*debug_directories); i++) { 168 if (debug_directories[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW || 169 debug_directories[i].SizeOfData < sizeof(CV_INFO_PDB70)) { 170 continue; 171 } 172 173 struct CV_INFO_PDB70* cv_info = static_cast<CV_INFO_PDB70*>(ImageRvaToVa( 174 img->FileHeader, 175 img->MappedAddress, 176 debug_directories[i].AddressOfRawData, 177 &img->LastRvaSection)); 178 if (cv_info->cv_signature != CV_SIGNATURE_RSDS) { 179 continue; 180 } 181 182 info->debug_identifier = GenerateDebugIdentifier(cv_info->age, 183 cv_info->signature); 184 185 // This code assumes that the pdb_filename is stored as ASCII without 186 // multibyte characters, but it's not clear if that's true. 187 size_t debug_file_length = strnlen_s(cv_info->pdb_filename, MAX_PATH); 188 if (debug_file_length < 0 || debug_file_length >= MAX_PATH) { 189 fprintf(stderr, "PE debug directory is corrupt.\n"); 190 return false; 191 } 192 std::string debug_file(cv_info->pdb_filename, debug_file_length); 193 if (!WindowsStringUtils::safe_mbstowcs(debug_file, &info->debug_file)) { 194 fprintf(stderr, "PDB filename '%s' contains unrecognized characters.\n", 195 debug_file.c_str()); 196 return false; 197 } 198 info->debug_file = WindowsStringUtils::GetBaseName(info->debug_file); 199 200 return true; 201 } 202 203 fprintf(stderr, "Image is missing debug information.\n"); 204 return false; 205 } 206 207 bool ReadPEInfo(const wstring & pe_file, PEModuleInfo * info) { 208 // Convert wchar to native charset because ImageLoad only takes 209 // a PSTR as input. 210 string img_file; 211 if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) { 212 fprintf(stderr, "Image path '%S' contains unrecognized characters.\n", 213 pe_file.c_str()); 214 return false; 215 } 216 217 AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL)); 218 if (!img) { 219 fprintf(stderr, "Failed to open PE file: %S\n", pe_file.c_str()); 220 return false; 221 } 222 223 info->code_file = WindowsStringUtils::GetBaseName(pe_file); 224 225 // The date and time that the file was created by the linker. 226 DWORD TimeDateStamp = img->FileHeader->FileHeader.TimeDateStamp; 227 // The size of the file in bytes, including all headers. 228 DWORD SizeOfImage = 0; 229 PIMAGE_OPTIONAL_HEADER64 opt = 230 &((PIMAGE_NT_HEADERS64)img->FileHeader)->OptionalHeader; 231 if (opt->Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) { 232 // 64-bit PE file. 233 SizeOfImage = opt->SizeOfImage; 234 } 235 else { 236 // 32-bit PE file. 237 SizeOfImage = img->FileHeader->OptionalHeader.SizeOfImage; 238 } 239 wchar_t code_identifier[32]; 240 swprintf(code_identifier, 241 sizeof(code_identifier) / sizeof(code_identifier[0]), 242 L"%08X%X", TimeDateStamp, SizeOfImage); 243 info->code_identifier = code_identifier; 244 245 return true; 246 } 247 248 bool PrintPEFrameData(const wstring & pe_file, FILE * out_file) 249 { 250 // Convert wchar to native charset because ImageLoad only takes 251 // a PSTR as input. 252 string img_file; 253 if (!WindowsStringUtils::safe_wcstombs(pe_file, &img_file)) { 254 fprintf(stderr, "Image path '%S' contains unrecognized characters.\n", 255 pe_file.c_str()); 256 return false; 257 } 258 259 AutoImage img(ImageLoad((PSTR)img_file.c_str(), NULL)); 260 if (!img) { 261 fprintf(stderr, "Failed to load %s\n", img_file.c_str()); 262 return false; 263 } 264 PIMAGE_OPTIONAL_HEADER64 optional_header = 265 &(reinterpret_cast<PIMAGE_NT_HEADERS64>(img->FileHeader))->OptionalHeader; 266 if (optional_header->Magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) { 267 fprintf(stderr, "Not a PE32+ image\n"); 268 return false; 269 } 270 271 // Read Exception Directory 272 DWORD exception_rva = optional_header-> 273 DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION].VirtualAddress; 274 DWORD exception_size = optional_header-> 275 DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION].Size; 276 PIMAGE_RUNTIME_FUNCTION_ENTRY funcs = 277 static_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>( 278 ImageRvaToVa(img->FileHeader, 279 img->MappedAddress, 280 exception_rva, 281 &img->LastRvaSection)); 282 for (DWORD i = 0; i < exception_size / sizeof(*funcs); i++) { 283 DWORD unwind_rva = funcs[i].UnwindInfoAddress; 284 // handle chaining 285 while (unwind_rva & 0x1) { 286 unwind_rva ^= 0x1; 287 PIMAGE_RUNTIME_FUNCTION_ENTRY chained_func = 288 static_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>( 289 ImageRvaToVa(img->FileHeader, 290 img->MappedAddress, 291 unwind_rva, 292 &img->LastRvaSection)); 293 unwind_rva = chained_func->UnwindInfoAddress; 294 } 295 296 UnwindInfo *unwind_info = static_cast<UnwindInfo*>( 297 ImageRvaToVa(img->FileHeader, 298 img->MappedAddress, 299 unwind_rva, 300 &img->LastRvaSection)); 301 302 DWORD stack_size = 8; // minimal stack size is 8 for RIP 303 DWORD rip_offset = 8; 304 do { 305 for (UBYTE c = 0; c < unwind_info->count_of_codes; c++) { 306 UnwindCode *unwind_code = &unwind_info->unwind_code[c]; 307 switch (unwind_code->unwind_operation_code) { 308 case UWOP_PUSH_NONVOL: { 309 stack_size += 8; 310 break; 311 } 312 case UWOP_ALLOC_LARGE: { 313 if (unwind_code->operation_info == 0) { 314 c++; 315 if (c < unwind_info->count_of_codes) 316 stack_size += (unwind_code + 1)->frame_offset * 8; 317 } 318 else { 319 c += 2; 320 if (c < unwind_info->count_of_codes) 321 stack_size += (unwind_code + 1)->frame_offset | 322 ((unwind_code + 2)->frame_offset << 16); 323 } 324 break; 325 } 326 case UWOP_ALLOC_SMALL: { 327 stack_size += unwind_code->operation_info * 8 + 8; 328 break; 329 } 330 case UWOP_SET_FPREG: 331 case UWOP_SAVE_XMM: 332 case UWOP_SAVE_XMM_FAR: 333 break; 334 case UWOP_SAVE_NONVOL: 335 case UWOP_SAVE_XMM128: { 336 c++; // skip slot with offset 337 break; 338 } 339 case UWOP_SAVE_NONVOL_FAR: 340 case UWOP_SAVE_XMM128_FAR: { 341 c += 2; // skip 2 slots with offset 342 break; 343 } 344 case UWOP_PUSH_MACHFRAME: { 345 if (unwind_code->operation_info) { 346 stack_size += 88; 347 } 348 else { 349 stack_size += 80; 350 } 351 rip_offset += 80; 352 break; 353 } 354 } 355 } 356 if (unwind_info->flags & UNW_FLAG_CHAININFO) { 357 PIMAGE_RUNTIME_FUNCTION_ENTRY chained_func = 358 reinterpret_cast<PIMAGE_RUNTIME_FUNCTION_ENTRY>( 359 (unwind_info->unwind_code + 360 ((unwind_info->count_of_codes + 1) & ~1))); 361 362 unwind_info = static_cast<UnwindInfo*>( 363 ImageRvaToVa(img->FileHeader, 364 img->MappedAddress, 365 chained_func->UnwindInfoAddress, 366 &img->LastRvaSection)); 367 } 368 else { 369 unwind_info = NULL; 370 } 371 } while (unwind_info); 372 fprintf(out_file, "STACK CFI INIT %lx %lx .cfa: $rsp .ra: .cfa %lu - ^\n", 373 funcs[i].BeginAddress, 374 funcs[i].EndAddress - funcs[i].BeginAddress, rip_offset); 375 fprintf(out_file, "STACK CFI %lx .cfa: $rsp %lu +\n", 376 funcs[i].BeginAddress, stack_size); 377 } 378 379 return true; 380 } 381 382 wstring GenerateDebugIdentifier(DWORD age, GUID signature) 383 { 384 // Use the same format that the MS symbol server uses in filesystem 385 // hierarchies. 386 wchar_t age_string[9]; 387 swprintf(age_string, sizeof(age_string) / sizeof(age_string[0]), 388 L"%x", age); 389 390 // remove when VC++7.1 is no longer supported 391 age_string[sizeof(age_string) / sizeof(age_string[0]) - 1] = L'\0'; 392 393 wstring debug_identifier = GUIDString::GUIDToSymbolServerWString(&signature); 394 debug_identifier.append(age_string); 395 396 return debug_identifier; 397 } 398 399 wstring GenerateDebugIdentifier(DWORD age, DWORD signature) 400 { 401 // Use the same format that the MS symbol server uses in filesystem 402 // hierarchies. 403 wchar_t identifier_string[17]; 404 swprintf(identifier_string, 405 sizeof(identifier_string) / sizeof(identifier_string[0]), 406 L"%08X%x", signature, age); 407 408 // remove when VC++7.1 is no longer supported 409 identifier_string[sizeof(identifier_string) / 410 sizeof(identifier_string[0]) - 1] = L'\0'; 411 412 return wstring(identifier_string); 413 } 414 415 } // namespace google_breakpad