/ distribution / macos / bundle_fix_up.py
bundle_fix_up.py
  1  import argparse
  2  import hashlib
  3  import os
  4  from pathlib import Path
  5  import platform
  6  import shutil
  7  import struct
  8  import subprocess
  9  from typing import List, Optional, Tuple
 10  
 11  parser = argparse.ArgumentParser(description="Fixup for MacOS application bundle")
 12  parser.add_argument("input_directory", help="Input directory (Application path)")
 13  parser.add_argument("executable_sub_path", help="Main executable sub path")
 14  
 15  # Use Apple LLVM on Darwin, otherwise standard LLVM.
 16  if platform.system() == "Darwin":
 17      OTOOL = "otool"
 18      INSTALL_NAME_TOOL = "install_name_tool"
 19  else:
 20      OTOOL = shutil.which("llvm-otool")
 21      if OTOOL is None:
 22          for llvm_ver in [15, 14, 13]:
 23              otool_path = shutil.which(f"llvm-otool-{llvm_ver}")
 24              if otool_path is not None:
 25                  OTOOL = otool_path
 26                  INSTALL_NAME_TOOL = shutil.which(f"llvm-install-name-tool-{llvm_ver}")
 27                  break
 28      else:
 29          INSTALL_NAME_TOOL = shutil.which("llvm-install-name-tool")
 30  
 31  
 32  args = parser.parse_args()
 33  
 34  
 35  def get_dylib_id(dylib_path: Path) -> str:
 36      res = subprocess.check_output([OTOOL, "-D", str(dylib_path.absolute())]).decode(
 37          "utf-8"
 38      )
 39  
 40      return res.split("\n")[1]
 41  
 42  
 43  def get_dylib_dependencies(dylib_path: Path) -> List[str]:
 44      output = (
 45          subprocess.check_output([OTOOL, "-L", str(dylib_path.absolute())])
 46          .decode("utf-8")
 47          .split("\n")[1:]
 48      )
 49  
 50      res = []
 51  
 52      for line in output:
 53          line = line.strip()
 54          index = line.find(" (compatibility version ")
 55          if index == -1:
 56              continue
 57  
 58          line = line[:index]
 59  
 60          res.append(line)
 61  
 62      return res
 63  
 64  
 65  def replace_dylib_id(dylib_path: Path, new_id: str):
 66      subprocess.check_call(
 67          [INSTALL_NAME_TOOL, "-id", new_id, str(dylib_path.absolute())]
 68      )
 69  
 70  
 71  def change_dylib_link(dylib_path: Path, old: str, new: str):
 72      subprocess.check_call(
 73          [INSTALL_NAME_TOOL, "-change", old, new, str(dylib_path.absolute())]
 74      )
 75  
 76  
 77  def add_dylib_rpath(dylib_path: Path, rpath: str):
 78      subprocess.check_call(
 79          [INSTALL_NAME_TOOL, "-add_rpath", rpath, str(dylib_path.absolute())]
 80      )
 81  
 82  
 83  def fixup_dylib(
 84      dylib_path: Path,
 85      replacement_path: str,
 86      search_path: List[str],
 87      content_directory: Path,
 88  ):
 89      dylib_id = get_dylib_id(dylib_path)
 90      new_dylib_id = replacement_path + "/" + os.path.basename(dylib_id)
 91      replace_dylib_id(dylib_path, new_dylib_id)
 92  
 93      dylib_dependencies = get_dylib_dependencies(dylib_path)
 94      dylib_new_mapping = {}
 95  
 96      for dylib_dependency in dylib_dependencies:
 97          if (
 98              not dylib_dependency.startswith("@executable_path")
 99              and not dylib_dependency.startswith("/usr/lib")
100              and not dylib_dependency.startswith("/System/Library")
101          ):
102              dylib_dependency_name = os.path.basename(dylib_dependency)
103              library_found = False
104              for library_base_path in search_path:
105                  lib_path = Path(os.path.join(library_base_path, dylib_dependency_name))
106  
107                  if lib_path.exists():
108                      target_replacement_path = get_path_related_to_target_exec(
109                          content_directory, lib_path
110                      )
111  
112                      dylib_new_mapping[dylib_dependency] = (
113                          target_replacement_path
114                          + "/"
115                          + os.path.basename(dylib_dependency)
116                      )
117                      library_found = True
118  
119              if not library_found:
120                  raise Exception(
121                      f"{dylib_id}: Cannot find dependency {dylib_dependency_name} for fixup"
122                  )
123  
124      for key in dylib_new_mapping:
125          change_dylib_link(dylib_path, key, dylib_new_mapping[key])
126  
127  
128  FILE_TYPE_ASSEMBLY = 1
129  
130  ALIGN_REQUIREMENTS = 4096
131  
132  
133  def parse_embedded_string(data: bytes) -> Tuple[bytes, str]:
134      first_byte = data[0]
135  
136      if (first_byte & 0x80) == 0:
137          size = first_byte
138          data = data[1:]
139      else:
140          second_byte = data[1]
141  
142          assert (second_byte & 0x80) == 0
143  
144          size = (second_byte << 7) | (first_byte & 0x7F)
145  
146          data = data[2:]
147  
148      res = data[:size].decode("utf-8")
149      data = data[size:]
150  
151      return (data, res)
152  
153  
154  def write_embedded_string(file, string: str):
155      raw_str = string.encode("utf-8")
156      raw_str_len = len(raw_str)
157  
158      assert raw_str_len < 0x7FFF
159  
160      if raw_str_len > 0x7F:
161          file.write(struct.pack("b", raw_str_len & 0x7F | 0x80))
162          file.write(struct.pack("b", raw_str_len >> 7))
163      else:
164          file.write(struct.pack("b", raw_str_len))
165  
166      file.write(raw_str)
167  
168  
169  class BundleFileEntry(object):
170      offset: int
171      size: int
172      compressed_size: int
173      file_type: int
174      relative_path: str
175      data: bytes
176  
177      def __init__(
178          self,
179          offset: int,
180          size: int,
181          compressed_size: int,
182          file_type: int,
183          relative_path: str,
184          data: bytes,
185      ) -> None:
186          self.offset = offset
187          self.size = size
188          self.compressed_size = compressed_size
189          self.file_type = file_type
190          self.relative_path = relative_path
191          self.data = data
192  
193      def write(self, file):
194          self.offset = file.tell()
195  
196          if (
197              self.file_type == FILE_TYPE_ASSEMBLY
198              and (self.offset % ALIGN_REQUIREMENTS) != 0
199          ):
200              padding_size = ALIGN_REQUIREMENTS - (self.offset % ALIGN_REQUIREMENTS)
201              file.write(b"\0" * padding_size)
202              self.offset += padding_size
203  
204          file.write(self.data)
205  
206      def write_header(self, file):
207          file.write(
208              struct.pack(
209                  "QQQb", self.offset, self.size, self.compressed_size, self.file_type
210              )
211          )
212          write_embedded_string(file, self.relative_path)
213  
214  
215  class BundleManifest(object):
216      major: int
217      minor: int
218      bundle_id: str
219      deps_json: BundleFileEntry
220      runtimeconfig_json: BundleFileEntry
221      flags: int
222      files: List[BundleFileEntry]
223  
224      def __init__(
225          self,
226          major: int,
227          minor: int,
228          bundle_id: str,
229          deps_json: BundleFileEntry,
230          runtimeconfig_json: BundleFileEntry,
231          flags: int,
232          files: List[BundleFileEntry],
233      ) -> None:
234          self.major = major
235          self.minor = minor
236          self.bundle_id = bundle_id
237          self.deps_json = deps_json
238          self.runtimeconfig_json = runtimeconfig_json
239          self.flags = flags
240          self.files = files
241  
242      def write(self, file) -> int:
243          for bundle_file in self.files:
244              bundle_file.write(file)
245  
246          bundle_header_offset = file.tell()
247          file.write(struct.pack("iiI", self.major, self.minor, len(self.files)))
248          write_embedded_string(file, self.bundle_id)
249  
250          if self.deps_json is not None:
251              deps_json_location_offset = self.deps_json.offset
252              deps_json_location_size = self.deps_json.size
253          else:
254              deps_json_location_offset = 0
255              deps_json_location_size = 0
256  
257          if self.runtimeconfig_json is not None:
258              runtimeconfig_json_location_offset = self.runtimeconfig_json.offset
259              runtimeconfig_json_location_size = self.runtimeconfig_json.size
260          else:
261              runtimeconfig_json_location_offset = 0
262              runtimeconfig_json_location_size = 0
263  
264          file.write(
265              struct.pack("qq", deps_json_location_offset, deps_json_location_size)
266          )
267          file.write(
268              struct.pack(
269                  "qq",
270                  runtimeconfig_json_location_offset,
271                  runtimeconfig_json_location_size,
272              )
273          )
274          file.write(struct.pack("q", self.flags))
275  
276          for bundle_file in self.files:
277              bundle_file.write_header(file)
278  
279          return bundle_header_offset
280  
281  
282  def read_file_entry(
283      raw_data: bytes, header_bytes: bytes
284  ) -> Tuple[bytes, BundleFileEntry]:
285      (
286          offset,
287          size,
288          compressed_size,
289          file_type,
290      ) = struct.unpack("QQQb", header_bytes[:0x19])
291      (header_bytes, relative_path) = parse_embedded_string(header_bytes[0x19:])
292  
293      target_size = compressed_size
294  
295      if target_size == 0:
296          target_size = size
297  
298      return (
299          header_bytes,
300          BundleFileEntry(
301              offset,
302              size,
303              compressed_size,
304              file_type,
305              relative_path,
306              raw_data[offset : offset + target_size],
307          ),
308      )
309  
310  
311  def get_dotnet_bundle_data(data: bytes) -> Optional[Tuple[int, int, BundleManifest]]:
312      offset = data.find(hashlib.sha256(b".net core bundle\n").digest())
313  
314      if offset == -1:
315          return None
316  
317      raw_header_offset = data[offset - 8 : offset]
318      (header_offset,) = struct.unpack("q", raw_header_offset)
319      header_bytes = data[header_offset:]
320  
321      (
322          major,
323          minor,
324          files_count,
325      ) = struct.unpack("iiI", header_bytes[:0xC])
326      header_bytes = header_bytes[0xC:]
327  
328      (header_bytes, bundle_id) = parse_embedded_string(header_bytes)
329  
330      # v2 header
331      (
332          deps_json_location_offset,
333          deps_json_location_size,
334      ) = struct.unpack("qq", header_bytes[:0x10])
335      (
336          runtimeconfig_json_location_offset,
337          runtimeconfig_json_location_size,
338      ) = struct.unpack("qq", header_bytes[0x10:0x20])
339      (flags,) = struct.unpack("q", header_bytes[0x20:0x28])
340      header_bytes = header_bytes[0x28:]
341  
342      files = []
343  
344      deps_json = None
345      runtimeconfig_json = None
346  
347      for _ in range(files_count):
348          (header_bytes, file_entry) = read_file_entry(data, header_bytes)
349  
350          files.append(file_entry)
351  
352          if file_entry.offset == deps_json_location_offset:
353              deps_json = file_entry
354          elif file_entry.offset == runtimeconfig_json_location_offset:
355              runtimeconfig_json = file_entry
356  
357      file_entry = files[0]
358  
359      return (
360          file_entry.offset,
361          header_offset,
362          BundleManifest(
363              major, minor, bundle_id, deps_json, runtimeconfig_json, flags, files
364          ),
365      )
366  
367  
368  LC_SYMTAB = 0x2
369  LC_SEGMENT_64 = 0x19
370  LC_CODE_SIGNATURE = 0x1D
371  
372  
373  def fixup_linkedit(file, data: bytes, new_size: int):
374      offset = 0
375  
376      (
377          macho_magic,
378          macho_cputype,
379          macho_cpusubtype,
380          macho_filetype,
381          macho_ncmds,
382          macho_sizeofcmds,
383          macho_flags,
384          macho_reserved,
385      ) = struct.unpack("IiiIIIII", data[offset : offset + 0x20])
386  
387      offset += 0x20
388  
389      linkedit_offset = None
390      symtab_offset = None
391      codesign_offset = None
392  
393      for _ in range(macho_ncmds):
394          (cmd, cmdsize) = struct.unpack("II", data[offset : offset + 8])
395  
396          if cmd == LC_SEGMENT_64:
397              (
398                  cmd,
399                  cmdsize,
400                  segname_raw,
401                  vmaddr,
402                  vmsize,
403                  fileoff,
404                  filesize,
405                  maxprot,
406                  initprot,
407                  nsects,
408                  flags,
409              ) = struct.unpack("II16sQQQQiiII", data[offset : offset + 72])
410              segname = segname_raw.decode("utf-8").split("\0")[0]
411  
412              if segname == "__LINKEDIT":
413                  linkedit_offset = offset
414          elif cmd == LC_SYMTAB:
415              symtab_offset = offset
416          elif cmd == LC_CODE_SIGNATURE:
417              codesign_offset = offset
418  
419          offset += cmdsize
420          pass
421  
422      assert linkedit_offset is not None and symtab_offset is not None
423  
424      # If there is a codesign section, clean it up.
425      if codesign_offset is not None:
426          (
427              codesign_cmd,
428              codesign_cmdsize,
429              codesign_dataoff,
430              codesign_datasize,
431          ) = struct.unpack("IIII", data[codesign_offset : codesign_offset + 16])
432          file.seek(codesign_offset)
433          file.write(b"\0" * codesign_cmdsize)
434  
435          macho_ncmds -= 1
436          macho_sizeofcmds -= codesign_cmdsize
437          file.seek(0)
438          file.write(
439              struct.pack(
440                  "IiiIIIII",
441                  macho_magic,
442                  macho_cputype,
443                  macho_cpusubtype,
444                  macho_filetype,
445                  macho_ncmds,
446                  macho_sizeofcmds,
447                  macho_flags,
448                  macho_reserved,
449              )
450          )
451  
452          file.seek(codesign_dataoff)
453          file.write(b"\0" * codesign_datasize)
454  
455      (
456          symtab_cmd,
457          symtab_cmdsize,
458          symtab_symoff,
459          symtab_nsyms,
460          symtab_stroff,
461          symtab_strsize,
462      ) = struct.unpack("IIIIII", data[symtab_offset : symtab_offset + 24])
463  
464      symtab_strsize = new_size - symtab_stroff
465  
466      new_symtab = struct.pack(
467          "IIIIII",
468          symtab_cmd,
469          symtab_cmdsize,
470          symtab_symoff,
471          symtab_nsyms,
472          symtab_stroff,
473          symtab_strsize,
474      )
475  
476      file.seek(symtab_offset)
477      file.write(new_symtab)
478  
479      (
480          linkedit_cmd,
481          linkedit_cmdsize,
482          linkedit_segname_raw,
483          linkedit_vmaddr,
484          linkedit_vmsize,
485          linkedit_fileoff,
486          linkedit_filesize,
487          linkedit_maxprot,
488          linkedit_initprot,
489          linkedit_nsects,
490          linkedit_flags,
491      ) = struct.unpack("II16sQQQQiiII", data[linkedit_offset : linkedit_offset + 72])
492  
493      linkedit_filesize = new_size - linkedit_fileoff
494      linkedit_vmsize = linkedit_filesize
495  
496      new_linkedit = struct.pack(
497          "II16sQQQQiiII",
498          linkedit_cmd,
499          linkedit_cmdsize,
500          linkedit_segname_raw,
501          linkedit_vmaddr,
502          linkedit_vmsize,
503          linkedit_fileoff,
504          linkedit_filesize,
505          linkedit_maxprot,
506          linkedit_initprot,
507          linkedit_nsects,
508          linkedit_flags,
509      )
510      file.seek(linkedit_offset)
511      file.write(new_linkedit)
512  
513  
514  def write_bundle_data(
515      output,
516      old_bundle_base_offset: int,
517      new_bundle_base_offset: int,
518      bundle: BundleManifest,
519  ) -> int:
520      # Write bundle data
521      bundle_header_offset = bundle.write(output)
522      total_size = output.tell()
523  
524      # Patch the header position
525      offset = file_data.find(hashlib.sha256(b".net core bundle\n").digest())
526      output.seek(offset - 8)
527      output.write(struct.pack("q", bundle_header_offset))
528  
529      return total_size - new_bundle_base_offset
530  
531  
532  input_directory: Path = Path(args.input_directory)
533  content_directory: Path = Path(os.path.join(args.input_directory, "Contents"))
534  executable_path: Path = Path(os.path.join(content_directory, args.executable_sub_path))
535  
536  
537  def get_path_related_to_other_path(a: Path, b: Path) -> str:
538      temp = b
539  
540      parts = []
541  
542      while temp != a:
543          temp = temp.parent
544          parts.append(temp.name)
545  
546      parts.remove(parts[-1])
547      parts.reverse()
548  
549      return "/".join(parts)
550  
551  
552  def get_path_related_to_target_exec(input_directory: Path, path: Path):
553      return "@executable_path/../" + get_path_related_to_other_path(
554          input_directory, path
555      )
556  
557  
558  search_path = [
559      Path(os.path.join(content_directory, "Frameworks")),
560      Path(os.path.join(content_directory, "Resources/lib")),
561  ]
562  
563  
564  for path in content_directory.rglob("**/*.dylib"):
565      current_search_path = [path.parent]
566      current_search_path.extend(search_path)
567  
568      fixup_dylib(
569          path,
570          get_path_related_to_target_exec(content_directory, path),
571          current_search_path,
572          content_directory,
573      )
574  
575  for path in content_directory.rglob("**/*.so"):
576      current_search_path = [path.parent]
577      current_search_path.extend(search_path)
578  
579      fixup_dylib(
580          path,
581          get_path_related_to_target_exec(content_directory, path),
582          current_search_path,
583          content_directory,
584      )
585  
586  
587  with open(executable_path, "rb") as input:
588      file_data = input.read()
589  
590  
591  (bundle_base_offset, bundle_header_offset, bundle) = get_dotnet_bundle_data(file_data)
592  
593  add_dylib_rpath(executable_path, "@executable_path/../Frameworks/")
594  
595  # Recent "vanilla" version of LLVM (LLVM 13 and upper) seems to really dislike how .NET package its assemblies.
596  # As a result, after execution of install_name_tool it will have "fixed" the symtab resulting in a missing .NET bundle...
597  # To mitigate that, we check if the bundle offset inside the binary is valid after install_name_tool and readd .NET bundle if not.
598  output_file_size = os.stat(executable_path).st_size
599  if output_file_size < bundle_header_offset:
600      print("LLVM broke the .NET bundle, readding bundle data...")
601      with open(executable_path, "r+b") as output:
602          file_data = output.read()
603          bundle_data_size = write_bundle_data(
604              output, bundle_base_offset, output_file_size, bundle
605          )
606  
607          # Now patch the __LINKEDIT section
608          new_size = output_file_size + bundle_data_size
609          fixup_linkedit(output, file_data, new_size)