bundle_fix_up.py
1 import argparse 2 import hashlib 3 import os 4 from pathlib import Path 5 import platform 6 import shutil 7 import struct 8 import subprocess 9 from typing import List, Optional, Tuple 10 11 parser = argparse.ArgumentParser(description="Fixup for MacOS application bundle") 12 parser.add_argument("input_directory", help="Input directory (Application path)") 13 parser.add_argument("executable_sub_path", help="Main executable sub path") 14 15 # Use Apple LLVM on Darwin, otherwise standard LLVM. 16 if platform.system() == "Darwin": 17 OTOOL = "otool" 18 INSTALL_NAME_TOOL = "install_name_tool" 19 else: 20 OTOOL = shutil.which("llvm-otool") 21 if OTOOL is None: 22 for llvm_ver in [15, 14, 13]: 23 otool_path = shutil.which(f"llvm-otool-{llvm_ver}") 24 if otool_path is not None: 25 OTOOL = otool_path 26 INSTALL_NAME_TOOL = shutil.which(f"llvm-install-name-tool-{llvm_ver}") 27 break 28 else: 29 INSTALL_NAME_TOOL = shutil.which("llvm-install-name-tool") 30 31 32 args = parser.parse_args() 33 34 35 def get_dylib_id(dylib_path: Path) -> str: 36 res = subprocess.check_output([OTOOL, "-D", str(dylib_path.absolute())]).decode( 37 "utf-8" 38 ) 39 40 return res.split("\n")[1] 41 42 43 def get_dylib_dependencies(dylib_path: Path) -> List[str]: 44 output = ( 45 subprocess.check_output([OTOOL, "-L", str(dylib_path.absolute())]) 46 .decode("utf-8") 47 .split("\n")[1:] 48 ) 49 50 res = [] 51 52 for line in output: 53 line = line.strip() 54 index = line.find(" (compatibility version ") 55 if index == -1: 56 continue 57 58 line = line[:index] 59 60 res.append(line) 61 62 return res 63 64 65 def replace_dylib_id(dylib_path: Path, new_id: str): 66 subprocess.check_call( 67 [INSTALL_NAME_TOOL, "-id", new_id, str(dylib_path.absolute())] 68 ) 69 70 71 def change_dylib_link(dylib_path: Path, old: str, new: str): 72 subprocess.check_call( 73 [INSTALL_NAME_TOOL, "-change", old, new, str(dylib_path.absolute())] 74 ) 75 76 77 def add_dylib_rpath(dylib_path: Path, rpath: str): 78 subprocess.check_call( 79 [INSTALL_NAME_TOOL, "-add_rpath", rpath, str(dylib_path.absolute())] 80 ) 81 82 83 def fixup_dylib( 84 dylib_path: Path, 85 replacement_path: str, 86 search_path: List[str], 87 content_directory: Path, 88 ): 89 dylib_id = get_dylib_id(dylib_path) 90 new_dylib_id = replacement_path + "/" + os.path.basename(dylib_id) 91 replace_dylib_id(dylib_path, new_dylib_id) 92 93 dylib_dependencies = get_dylib_dependencies(dylib_path) 94 dylib_new_mapping = {} 95 96 for dylib_dependency in dylib_dependencies: 97 if ( 98 not dylib_dependency.startswith("@executable_path") 99 and not dylib_dependency.startswith("/usr/lib") 100 and not dylib_dependency.startswith("/System/Library") 101 ): 102 dylib_dependency_name = os.path.basename(dylib_dependency) 103 library_found = False 104 for library_base_path in search_path: 105 lib_path = Path(os.path.join(library_base_path, dylib_dependency_name)) 106 107 if lib_path.exists(): 108 target_replacement_path = get_path_related_to_target_exec( 109 content_directory, lib_path 110 ) 111 112 dylib_new_mapping[dylib_dependency] = ( 113 target_replacement_path 114 + "/" 115 + os.path.basename(dylib_dependency) 116 ) 117 library_found = True 118 119 if not library_found: 120 raise Exception( 121 f"{dylib_id}: Cannot find dependency {dylib_dependency_name} for fixup" 122 ) 123 124 for key in dylib_new_mapping: 125 change_dylib_link(dylib_path, key, dylib_new_mapping[key]) 126 127 128 FILE_TYPE_ASSEMBLY = 1 129 130 ALIGN_REQUIREMENTS = 4096 131 132 133 def parse_embedded_string(data: bytes) -> Tuple[bytes, str]: 134 first_byte = data[0] 135 136 if (first_byte & 0x80) == 0: 137 size = first_byte 138 data = data[1:] 139 else: 140 second_byte = data[1] 141 142 assert (second_byte & 0x80) == 0 143 144 size = (second_byte << 7) | (first_byte & 0x7F) 145 146 data = data[2:] 147 148 res = data[:size].decode("utf-8") 149 data = data[size:] 150 151 return (data, res) 152 153 154 def write_embedded_string(file, string: str): 155 raw_str = string.encode("utf-8") 156 raw_str_len = len(raw_str) 157 158 assert raw_str_len < 0x7FFF 159 160 if raw_str_len > 0x7F: 161 file.write(struct.pack("b", raw_str_len & 0x7F | 0x80)) 162 file.write(struct.pack("b", raw_str_len >> 7)) 163 else: 164 file.write(struct.pack("b", raw_str_len)) 165 166 file.write(raw_str) 167 168 169 class BundleFileEntry(object): 170 offset: int 171 size: int 172 compressed_size: int 173 file_type: int 174 relative_path: str 175 data: bytes 176 177 def __init__( 178 self, 179 offset: int, 180 size: int, 181 compressed_size: int, 182 file_type: int, 183 relative_path: str, 184 data: bytes, 185 ) -> None: 186 self.offset = offset 187 self.size = size 188 self.compressed_size = compressed_size 189 self.file_type = file_type 190 self.relative_path = relative_path 191 self.data = data 192 193 def write(self, file): 194 self.offset = file.tell() 195 196 if ( 197 self.file_type == FILE_TYPE_ASSEMBLY 198 and (self.offset % ALIGN_REQUIREMENTS) != 0 199 ): 200 padding_size = ALIGN_REQUIREMENTS - (self.offset % ALIGN_REQUIREMENTS) 201 file.write(b"\0" * padding_size) 202 self.offset += padding_size 203 204 file.write(self.data) 205 206 def write_header(self, file): 207 file.write( 208 struct.pack( 209 "QQQb", self.offset, self.size, self.compressed_size, self.file_type 210 ) 211 ) 212 write_embedded_string(file, self.relative_path) 213 214 215 class BundleManifest(object): 216 major: int 217 minor: int 218 bundle_id: str 219 deps_json: BundleFileEntry 220 runtimeconfig_json: BundleFileEntry 221 flags: int 222 files: List[BundleFileEntry] 223 224 def __init__( 225 self, 226 major: int, 227 minor: int, 228 bundle_id: str, 229 deps_json: BundleFileEntry, 230 runtimeconfig_json: BundleFileEntry, 231 flags: int, 232 files: List[BundleFileEntry], 233 ) -> None: 234 self.major = major 235 self.minor = minor 236 self.bundle_id = bundle_id 237 self.deps_json = deps_json 238 self.runtimeconfig_json = runtimeconfig_json 239 self.flags = flags 240 self.files = files 241 242 def write(self, file) -> int: 243 for bundle_file in self.files: 244 bundle_file.write(file) 245 246 bundle_header_offset = file.tell() 247 file.write(struct.pack("iiI", self.major, self.minor, len(self.files))) 248 write_embedded_string(file, self.bundle_id) 249 250 if self.deps_json is not None: 251 deps_json_location_offset = self.deps_json.offset 252 deps_json_location_size = self.deps_json.size 253 else: 254 deps_json_location_offset = 0 255 deps_json_location_size = 0 256 257 if self.runtimeconfig_json is not None: 258 runtimeconfig_json_location_offset = self.runtimeconfig_json.offset 259 runtimeconfig_json_location_size = self.runtimeconfig_json.size 260 else: 261 runtimeconfig_json_location_offset = 0 262 runtimeconfig_json_location_size = 0 263 264 file.write( 265 struct.pack("qq", deps_json_location_offset, deps_json_location_size) 266 ) 267 file.write( 268 struct.pack( 269 "qq", 270 runtimeconfig_json_location_offset, 271 runtimeconfig_json_location_size, 272 ) 273 ) 274 file.write(struct.pack("q", self.flags)) 275 276 for bundle_file in self.files: 277 bundle_file.write_header(file) 278 279 return bundle_header_offset 280 281 282 def read_file_entry( 283 raw_data: bytes, header_bytes: bytes 284 ) -> Tuple[bytes, BundleFileEntry]: 285 ( 286 offset, 287 size, 288 compressed_size, 289 file_type, 290 ) = struct.unpack("QQQb", header_bytes[:0x19]) 291 (header_bytes, relative_path) = parse_embedded_string(header_bytes[0x19:]) 292 293 target_size = compressed_size 294 295 if target_size == 0: 296 target_size = size 297 298 return ( 299 header_bytes, 300 BundleFileEntry( 301 offset, 302 size, 303 compressed_size, 304 file_type, 305 relative_path, 306 raw_data[offset : offset + target_size], 307 ), 308 ) 309 310 311 def get_dotnet_bundle_data(data: bytes) -> Optional[Tuple[int, int, BundleManifest]]: 312 offset = data.find(hashlib.sha256(b".net core bundle\n").digest()) 313 314 if offset == -1: 315 return None 316 317 raw_header_offset = data[offset - 8 : offset] 318 (header_offset,) = struct.unpack("q", raw_header_offset) 319 header_bytes = data[header_offset:] 320 321 ( 322 major, 323 minor, 324 files_count, 325 ) = struct.unpack("iiI", header_bytes[:0xC]) 326 header_bytes = header_bytes[0xC:] 327 328 (header_bytes, bundle_id) = parse_embedded_string(header_bytes) 329 330 # v2 header 331 ( 332 deps_json_location_offset, 333 deps_json_location_size, 334 ) = struct.unpack("qq", header_bytes[:0x10]) 335 ( 336 runtimeconfig_json_location_offset, 337 runtimeconfig_json_location_size, 338 ) = struct.unpack("qq", header_bytes[0x10:0x20]) 339 (flags,) = struct.unpack("q", header_bytes[0x20:0x28]) 340 header_bytes = header_bytes[0x28:] 341 342 files = [] 343 344 deps_json = None 345 runtimeconfig_json = None 346 347 for _ in range(files_count): 348 (header_bytes, file_entry) = read_file_entry(data, header_bytes) 349 350 files.append(file_entry) 351 352 if file_entry.offset == deps_json_location_offset: 353 deps_json = file_entry 354 elif file_entry.offset == runtimeconfig_json_location_offset: 355 runtimeconfig_json = file_entry 356 357 file_entry = files[0] 358 359 return ( 360 file_entry.offset, 361 header_offset, 362 BundleManifest( 363 major, minor, bundle_id, deps_json, runtimeconfig_json, flags, files 364 ), 365 ) 366 367 368 LC_SYMTAB = 0x2 369 LC_SEGMENT_64 = 0x19 370 LC_CODE_SIGNATURE = 0x1D 371 372 373 def fixup_linkedit(file, data: bytes, new_size: int): 374 offset = 0 375 376 ( 377 macho_magic, 378 macho_cputype, 379 macho_cpusubtype, 380 macho_filetype, 381 macho_ncmds, 382 macho_sizeofcmds, 383 macho_flags, 384 macho_reserved, 385 ) = struct.unpack("IiiIIIII", data[offset : offset + 0x20]) 386 387 offset += 0x20 388 389 linkedit_offset = None 390 symtab_offset = None 391 codesign_offset = None 392 393 for _ in range(macho_ncmds): 394 (cmd, cmdsize) = struct.unpack("II", data[offset : offset + 8]) 395 396 if cmd == LC_SEGMENT_64: 397 ( 398 cmd, 399 cmdsize, 400 segname_raw, 401 vmaddr, 402 vmsize, 403 fileoff, 404 filesize, 405 maxprot, 406 initprot, 407 nsects, 408 flags, 409 ) = struct.unpack("II16sQQQQiiII", data[offset : offset + 72]) 410 segname = segname_raw.decode("utf-8").split("\0")[0] 411 412 if segname == "__LINKEDIT": 413 linkedit_offset = offset 414 elif cmd == LC_SYMTAB: 415 symtab_offset = offset 416 elif cmd == LC_CODE_SIGNATURE: 417 codesign_offset = offset 418 419 offset += cmdsize 420 pass 421 422 assert linkedit_offset is not None and symtab_offset is not None 423 424 # If there is a codesign section, clean it up. 425 if codesign_offset is not None: 426 ( 427 codesign_cmd, 428 codesign_cmdsize, 429 codesign_dataoff, 430 codesign_datasize, 431 ) = struct.unpack("IIII", data[codesign_offset : codesign_offset + 16]) 432 file.seek(codesign_offset) 433 file.write(b"\0" * codesign_cmdsize) 434 435 macho_ncmds -= 1 436 macho_sizeofcmds -= codesign_cmdsize 437 file.seek(0) 438 file.write( 439 struct.pack( 440 "IiiIIIII", 441 macho_magic, 442 macho_cputype, 443 macho_cpusubtype, 444 macho_filetype, 445 macho_ncmds, 446 macho_sizeofcmds, 447 macho_flags, 448 macho_reserved, 449 ) 450 ) 451 452 file.seek(codesign_dataoff) 453 file.write(b"\0" * codesign_datasize) 454 455 ( 456 symtab_cmd, 457 symtab_cmdsize, 458 symtab_symoff, 459 symtab_nsyms, 460 symtab_stroff, 461 symtab_strsize, 462 ) = struct.unpack("IIIIII", data[symtab_offset : symtab_offset + 24]) 463 464 symtab_strsize = new_size - symtab_stroff 465 466 new_symtab = struct.pack( 467 "IIIIII", 468 symtab_cmd, 469 symtab_cmdsize, 470 symtab_symoff, 471 symtab_nsyms, 472 symtab_stroff, 473 symtab_strsize, 474 ) 475 476 file.seek(symtab_offset) 477 file.write(new_symtab) 478 479 ( 480 linkedit_cmd, 481 linkedit_cmdsize, 482 linkedit_segname_raw, 483 linkedit_vmaddr, 484 linkedit_vmsize, 485 linkedit_fileoff, 486 linkedit_filesize, 487 linkedit_maxprot, 488 linkedit_initprot, 489 linkedit_nsects, 490 linkedit_flags, 491 ) = struct.unpack("II16sQQQQiiII", data[linkedit_offset : linkedit_offset + 72]) 492 493 linkedit_filesize = new_size - linkedit_fileoff 494 linkedit_vmsize = linkedit_filesize 495 496 new_linkedit = struct.pack( 497 "II16sQQQQiiII", 498 linkedit_cmd, 499 linkedit_cmdsize, 500 linkedit_segname_raw, 501 linkedit_vmaddr, 502 linkedit_vmsize, 503 linkedit_fileoff, 504 linkedit_filesize, 505 linkedit_maxprot, 506 linkedit_initprot, 507 linkedit_nsects, 508 linkedit_flags, 509 ) 510 file.seek(linkedit_offset) 511 file.write(new_linkedit) 512 513 514 def write_bundle_data( 515 output, 516 old_bundle_base_offset: int, 517 new_bundle_base_offset: int, 518 bundle: BundleManifest, 519 ) -> int: 520 # Write bundle data 521 bundle_header_offset = bundle.write(output) 522 total_size = output.tell() 523 524 # Patch the header position 525 offset = file_data.find(hashlib.sha256(b".net core bundle\n").digest()) 526 output.seek(offset - 8) 527 output.write(struct.pack("q", bundle_header_offset)) 528 529 return total_size - new_bundle_base_offset 530 531 532 input_directory: Path = Path(args.input_directory) 533 content_directory: Path = Path(os.path.join(args.input_directory, "Contents")) 534 executable_path: Path = Path(os.path.join(content_directory, args.executable_sub_path)) 535 536 537 def get_path_related_to_other_path(a: Path, b: Path) -> str: 538 temp = b 539 540 parts = [] 541 542 while temp != a: 543 temp = temp.parent 544 parts.append(temp.name) 545 546 parts.remove(parts[-1]) 547 parts.reverse() 548 549 return "/".join(parts) 550 551 552 def get_path_related_to_target_exec(input_directory: Path, path: Path): 553 return "@executable_path/../" + get_path_related_to_other_path( 554 input_directory, path 555 ) 556 557 558 search_path = [ 559 Path(os.path.join(content_directory, "Frameworks")), 560 Path(os.path.join(content_directory, "Resources/lib")), 561 ] 562 563 564 for path in content_directory.rglob("**/*.dylib"): 565 current_search_path = [path.parent] 566 current_search_path.extend(search_path) 567 568 fixup_dylib( 569 path, 570 get_path_related_to_target_exec(content_directory, path), 571 current_search_path, 572 content_directory, 573 ) 574 575 for path in content_directory.rglob("**/*.so"): 576 current_search_path = [path.parent] 577 current_search_path.extend(search_path) 578 579 fixup_dylib( 580 path, 581 get_path_related_to_target_exec(content_directory, path), 582 current_search_path, 583 content_directory, 584 ) 585 586 587 with open(executable_path, "rb") as input: 588 file_data = input.read() 589 590 591 (bundle_base_offset, bundle_header_offset, bundle) = get_dotnet_bundle_data(file_data) 592 593 add_dylib_rpath(executable_path, "@executable_path/../Frameworks/") 594 595 # Recent "vanilla" version of LLVM (LLVM 13 and upper) seems to really dislike how .NET package its assemblies. 596 # As a result, after execution of install_name_tool it will have "fixed" the symtab resulting in a missing .NET bundle... 597 # To mitigate that, we check if the bundle offset inside the binary is valid after install_name_tool and readd .NET bundle if not. 598 output_file_size = os.stat(executable_path).st_size 599 if output_file_size < bundle_header_offset: 600 print("LLVM broke the .NET bundle, readding bundle data...") 601 with open(executable_path, "r+b") as output: 602 file_data = output.read() 603 bundle_data_size = write_bundle_data( 604 output, bundle_base_offset, output_file_size, bundle 605 ) 606 607 # Now patch the __LINKEDIT section 608 new_size = output_file_size + bundle_data_size 609 fixup_linkedit(output, file_data, new_size)