/ libpkg / pkg_abi_macho.c
pkg_abi_macho.c
  1  /*-
  2   * Copyright (c) 2024 Keve Müller <kevemueller@users.github.com>
  3   *
  4   * SPDX-License-Identifier: BSD-2-Clause
  5   */
  6  
  7  #include <errno.h>
  8  
  9  #include "private/binfmt.h"
 10  #include "private/binfmt_macho.h"
 11  #include "private/pkg.h"
 12  #include "private/event.h"
 13  #include "private/pkg_abi.h"
 14  
 15  /**
 16   * Routines to support pkg_abi.c functions when dealing with Mach-O files.
 17   * Supports getting struct pkg_abi from the binary's load commands.
 18   * Supports getting shared libary information (needed, provided & loader).
 19   * Picks right binary in Universal binary based on ABI.
 20   */
 21  
 22  static enum pkg_arch
 23  cputype_to_pkg_arch(const cpu_type_subtype_t cpu)
 24  {
 25  	switch (cpu.type) {
 26  	case CPU_TYPE_ARM:
 27  		if (cpu.type_is64_32) {
 28  			return (PKG_ARCH_UNKNOWN); /* aarch64-x32 */
 29  		} else if (cpu.type_is64) {
 30  			return (PKG_ARCH_AARCH64);
 31  		} else {
 32  			switch (cpu.subtype_arm) {
 33  			case CPU_SUBTYPE_ARM_V7:
 34  			case CPU_SUBTYPE_ARM_V7S:
 35  			case CPU_SUBTYPE_ARM_V7K:
 36  			case CPU_SUBTYPE_ARM_V7M:
 37  			case CPU_SUBTYPE_ARM_V7EM:
 38  				return (PKG_ARCH_ARMV7);
 39  			case CPU_SUBTYPE_ARM_V6:
 40  			case CPU_SUBTYPE_ARM_V6M:
 41  				return (PKG_ARCH_ARMV6);
 42  			case CPU_SUBTYPE_ARM_XSCALE:
 43  			case CPU_SUBTYPE_ARM_V5:
 44  			case CPU_SUBTYPE_ARM_V4T:
 45  			case CPU_SUBTYPE_ARM_ALL:
 46  			default:
 47  				return (PKG_ARCH_UNKNOWN);
 48  			}
 49  		}
 50  	case CPU_TYPE_POWERPC:
 51  		if (cpu.type_is64_32) {
 52  			return (PKG_ARCH_UNKNOWN); /* powerpc64-x32 */
 53  		} else if (cpu.type_is64) {
 54  			return (PKG_ARCH_POWERPC64);
 55  		} else {
 56  			return (PKG_ARCH_POWERPC);
 57  		}
 58  	case CPU_TYPE_X86:
 59  		if (cpu.type_is64_32) {
 60  			return (PKG_ARCH_UNKNOWN); /* amd64-x32 */
 61  		} else if (cpu.type_is64) {
 62  			return (PKG_ARCH_AMD64);
 63  		} else {
 64  			return (PKG_ARCH_I386);
 65  		}
 66  	default:
 67  		return (PKG_ARCH_UNKNOWN);
 68  	}
 69  }
 70  
 71  static cpu_type_subtype_t
 72  pkg_arch_to_cputype(enum pkg_arch arch) {
 73  	cpu_type_subtype_t cpu = { 0 };
 74  
 75  	switch (arch) {
 76  	case PKG_ARCH_AARCH64:
 77  		cpu.type = CPU_TYPE_ARM;
 78  		cpu.type_is64 = true;
 79  		break;
 80  	case PKG_ARCH_AMD64:
 81  		cpu.type = CPU_TYPE_X86;
 82  		cpu.type_is64 = true;
 83  		cpu.subtype_x86 = CPU_SUBTYPE_X86_ALL;
 84  		break;
 85  	case PKG_ARCH_ARMV6:
 86  		cpu.type = CPU_TYPE_ARM;
 87  		cpu.subtype_arm = CPU_SUBTYPE_ARM_V6;
 88  		break;
 89  	case PKG_ARCH_ARMV7:
 90  		cpu.type = CPU_TYPE_ARM;
 91  		cpu.subtype_arm = CPU_SUBTYPE_ARM_V7;
 92  		break;
 93  	case PKG_ARCH_I386:
 94  		cpu.type = CPU_TYPE_X86;
 95  		cpu.subtype_x86 = CPU_SUBTYPE_X86_ALL;
 96  		break;
 97  	case PKG_ARCH_POWERPC:
 98  		cpu.type = CPU_TYPE_POWERPC;
 99  		cpu.subtype_ppc = CPU_SUBTYPE_POWERPC_ALL;
100  		break;
101  	case PKG_ARCH_POWERPC64:
102  		cpu.type = CPU_TYPE_POWERPC;
103  		cpu.type_is64 = true;
104  		cpu.subtype_ppc = CPU_SUBTYPE_POWERPC_ALL;
105  		break;
106  	case PKG_ARCH_ANY:
107  	case PKG_ARCH_POWERPC64LE:
108  	case PKG_ARCH_RISCV32:
109  	case PKG_ARCH_RISCV64:
110  	case PKG_ARCH_UNKNOWN:
111  		cpu.type = CPU_TYPE_ANY;
112  		break;
113  	}
114  
115  	return cpu;
116  }
117  
118  
119  /**
120   * Using the passed mf descriptor, match the best entry using the provided hint.
121   * No hint or no architecture in hint -> first entry. Debug1 warning if this is not precise match (there were multiple to choose from)
122   * Hint -> always match, even if single architecture in file. Notice if match fails and return null.
123   */
124  static const fat_arch_t *
125  match_entry(macho_file_t *mf, enum pkg_arch arch_hint)
126  {
127  	const fat_arch_t *p = mf->arch;
128  	if (arch_hint != PKG_ARCH_UNKNOWN) {
129  		const cpu_type_subtype_t cpu_hint = pkg_arch_to_cputype(arch_hint);
130  		const fat_arch_t *p_end = p + mf->narch;
131  		while (p < p_end) {
132  			// do not match cpu_hint.type == CPU_TYPE_ANY which is used if the
133  			// arch_hint was not recognized
134  			if (p->cpu.type == cpu_hint.type &&
135  			    p->cpu.type_is64 == cpu_hint.type_is64) {
136  				switch (cpu_hint.type) {
137  				case CPU_TYPE_ARM:
138  					if (p->cpu.subtype_arm ==
139  						CPU_SUBTYPE_ARM_ALL ||
140  					    cpu_hint.subtype_arm ==
141  						CPU_SUBTYPE_ARM_ALL ||
142  					    p->cpu.subtype_arm ==
143  						cpu_hint.subtype_arm) {
144  							return p;
145  					}
146  					break;
147  				case CPU_TYPE_POWERPC:
148  					if (p->cpu.subtype_ppc ==
149  						CPU_SUBTYPE_POWERPC_ALL ||
150  					    cpu_hint.subtype_ppc ==
151  						CPU_SUBTYPE_POWERPC_ALL ||
152  					    p->cpu.subtype_ppc ==
153  						cpu_hint.subtype_ppc) {
154  							return p;
155  					}
156  					break;
157  				case CPU_TYPE_X86:
158  					if (p->cpu.subtype_x86 ==
159  						CPU_SUBTYPE_X86_ALL ||
160  					    cpu_hint.subtype_x86 ==
161  						CPU_SUBTYPE_X86_ALL ||
162  					    p->cpu.subtype_x86 ==
163  						cpu_hint.subtype_x86) {
164  							return p;
165  					}
166  					break;
167  				default:
168  					break;
169  				}
170  			}
171  			pkg_debug(1, "Looking for %s, did not match %s",
172  			    pkg_arch_to_string(PKG_OS_DARWIN, arch_hint),
173  			    pkg_arch_to_string(PKG_OS_DARWIN, cputype_to_pkg_arch(p->cpu)));
174  			p++;
175  		}
176  		pkg_emit_notice("Scanned %d entr%s, found none matching selector %s",
177  			mf->narch, mf->narch > 1 ? "ies" : "y",
178  			pkg_arch_to_string(PKG_OS_DARWIN, arch_hint));
179  		return 0;
180  	} else if (mf->narch > 1 ) {
181  		pkg_debug(1,"Found %"PRIu32" entries in universal binary, picking first",
182  			mf->narch);
183  	}
184  	return p;
185  }
186  
187  /**
188   * With a not-null, potentially pre-populated os_info structure, fill
189   * all members of os_info except altabi with values obtained by parsing the Mach-O
190   * file passed with file descriptor.
191   *
192   * The arch_hint is used to determine the fat entry to be parsed in a universal
193   * binary. If arch_hint is PKG_ARCH_UNKNOWN, the first entry is used.
194   *
195   * Returns EPKG_OK if all went fine, EPKG_FATAL if anything went wrong.
196   * Seeks the file descriptor to an arbitrary position.
197   */
198  int
199  pkg_macho_abi_from_fd(int fd, struct pkg_abi *abi, enum pkg_arch arch_hint)
200  {
201  	*abi = (struct pkg_abi){0};
202  
203  	ssize_t x;
204  	pkg_error_t ret = EPKG_FATAL;
205  
206  	macho_file_t *mf = 0;
207  	build_version_t *bv = 0;
208  
209  	if ((x = read_macho_file(fd, &mf)) < 0) {
210  		goto cleanup;
211  	}
212  
213  	const fat_arch_t *p = match_entry(mf, arch_hint);
214  
215  	if (!p) {
216  		goto cleanup;
217  	}
218  
219  	if (-1 == (x = lseek(fd, p->offset, SEEK_SET))) {
220  		goto cleanup;
221  	}
222  	size_t n = 0;
223  	macho_header_t mh;
224  	if ((x = read_macho_header(fd, &mh)) < 0) {
225  		goto cleanup;
226  	}
227  	const bool swap = mh.swap;
228  	n = 0;
229  	for (uint32_t ui = mh.ncmds; ui-- > 0;) {
230  		size_t n0 = n;
231  		uint32_t loadcmdtype;
232  		uint32_t loadcmdsize;
233  		READ(u32, loadcmdtype);
234  		READ(u32, loadcmdsize);
235  		enum MachOLoadCommand loadcmd = loadcmdtype & ~LC_REQ_DYLD;
236  		switch (loadcmd) {
237  		case LC_BUILD_VERSION:
238  			if (bv) { // overwrite previous LC_VERSION_MIN_X
239  				  // values
240  				free(bv);
241  				bv = 0;
242  			}
243  			READ(build_version, bv);
244  			break;
245  		case LC_VERSION_MIN_IPHONEOS:
246  		case LC_VERSION_MIN_MACOSX:
247  		case LC_VERSION_MIN_TVOS:
248  		case LC_VERSION_MIN_WATCHOS:
249  			if (!bv) {
250  				if ((x = read_min_version(fd, swap, loadcmd,
251  					 &bv)) < 0) {
252  					goto cleanup;
253  				}
254  				n += x;
255  				break;
256  			}
257  			// have seen the more precise
258  			// LC_BUILD_VERSION already
259  			// fall through and disregard this
260  		default:
261  			break;
262  		}
263  		const uint32_t fill = loadcmdsize - (n - n0);
264  		if (fill && -1 == (x = lseek(fd, fill, SEEK_CUR))) {
265  			goto cleanup;
266  		}
267  		n += fill;
268  		if (n > mh.sizeofcmds) {
269  			// we passed the frame boundary of the load commands
270  			pkg_emit_error("Mach-O structure misread.");
271  			errno = EINVAL;
272  			goto cleanup;
273  		}
274  	}
275  
276  	if (bv) {
277  		macho_version_t darwin;
278  		map_platform_to_darwin(&darwin, bv->platform, bv->minos);
279  
280  		abi->os = PKG_OS_DARWIN;
281  
282  		abi->major = darwin.major;
283  		abi->minor = darwin.minor;
284  		abi->patch = darwin.patch;
285  
286  		abi->arch = cputype_to_pkg_arch(mh.cpu);
287  
288  		if (abi->arch == PKG_ARCH_UNKNOWN) {
289  			ret = EPKG_FATAL;
290  		} else {
291  			ret = EPKG_OK;
292  		}
293  	} else {
294  		pkg_emit_notice("No OS version information found in binary.");
295  		ret = EPKG_WARN;
296  	}
297  
298  cleanup:
299  	free(bv);
300  	free(mf);
301  	return ret;
302  }
303  
304  static int
305  analyse_macho(int fd, struct pkg *pkg, char **provided,
306      enum pkg_shlib_flags *provided_flags)
307  {
308  	assert(*provided == NULL);
309  	assert(*provided_flags == PKG_SHLIB_FLAGS_NONE);
310  
311  	ssize_t x;
312  	pkg_error_t ret = EPKG_END;
313  
314  	macho_file_t *mf = 0;
315  
316  	if ((x = read_macho_file(fd, &mf)) < 0) {
317  		goto cleanup;
318  	}
319  
320  	const fat_arch_t *p = match_entry(mf, ctx.abi.arch);
321  
322  	if (!p) {
323  		goto cleanup;
324  	}
325  
326  	if (-1 == (x = lseek(fd, p->offset, SEEK_SET))) {
327  		goto cleanup;
328  	}
329  	size_t n = 0;
330  	macho_header_t mh;
331  	if ((x = read_macho_header(fd, &mh)) < 0) {
332  		goto cleanup;
333  	}
334  	const bool swap = mh.swap;
335  	n = 0;
336  	for (uint32_t ui = mh.ncmds; ui-- > 0;) {
337  		size_t n0 = n;
338  		uint32_t loadcmdtype;
339  		uint32_t loadcmdsize;
340  		READ(u32, loadcmdtype);
341  		READ(u32, loadcmdsize);
342  		enum MachOLoadCommand loadcmd = loadcmdtype & ~LC_REQ_DYLD;
343  		switch (loadcmd) {
344  		case LC_RPATH:
345  		case LC_LOAD_DYLINKER:;
346  			char *dylinker = 0;
347  			if ((x = read_path(fd, swap, loadcmdsize,
348  					&dylinker)) < 0) {
349  				goto cleanup;
350  			}
351  			n += x;
352  			pkg_debug(3, "load_dylinker %d: %s\n", loadcmd, dylinker);
353  			free(dylinker);
354  			break;
355  		case LC_ID_DYLIB:   // provides
356  		case LC_LOAD_DYLIB: // requires...
357  		case LC_LOAD_WEAK_DYLIB:
358  		case LC_REEXPORT_DYLIB:
359  		case LC_LAZY_LOAD_DYLIB:
360  		case LC_LOAD_UPWARD_DYLIB:;
361  			dylib_t *dylib = 0;
362  			if ((x = read_dylib(fd, swap, loadcmdsize,
363  					&dylib)) < 0) {
364  				goto cleanup;
365  			}
366  			n += x;
367  			// while under Darwin full path references are recommended and ubiquitous,
368  			// we align with pkg native environment and use only the basename
369  			// this also strips off any @executable_path, @loader_path, @rpath components
370  			const char * basename = strrchr(dylib->path, '/');
371  			basename = basename ? basename + 1 : dylib->path;
372  			pkg_debug(3,
373  				"Adding dynamic library path: %s ts %"PRIu32" current(%"PRIuFAST16", %"PRIuFAST16", %"PRIuFAST16") compat(%"PRIuFAST16", %"PRIuFAST16", %"PRIuFAST16")\n",
374  				dylib->path, dylib->timestamp,
375  				dylib->current_version.major,
376  				dylib->current_version.minor,
377  				dylib->current_version.patch,
378  				dylib->compatibility_version.major,
379  				dylib->compatibility_version.minor,
380  				dylib->compatibility_version.patch);
381  
382  			char *lib_with_version;
383  			if (dylib->current_version.patch) {
384  				xasprintf(&lib_with_version, "%s-%"PRIuFAST16".%"PRIuFAST16".%"PRIuFAST16, basename, dylib->current_version.major, dylib->current_version.minor, dylib->current_version.patch);
385  			} else {
386  				xasprintf(&lib_with_version, "%s-%"PRIuFAST16".%"PRIuFAST16, basename, dylib->current_version.major, dylib->current_version.minor);
387  			}
388  			if (LC_ID_DYLIB == loadcmd) {
389  				if (*provided != NULL) {
390  					pkg_emit_error("malformed Macho-O file has multiple LC_ID_DYLIB entries");
391  					goto cleanup;
392  				}
393  				*provided = xstrdup(lib_with_version);
394  			} else {
395  				pkg_addshlib_required(pkg, lib_with_version, PKG_SHLIB_FLAGS_NONE);
396  			}
397  			free(lib_with_version);
398  			free(dylib);
399  			break;
400  		default:
401  			break;
402  		}
403  		const uint32_t fill = loadcmdsize - (n - n0);
404  		if (fill && -1 == (x = lseek(fd, fill, SEEK_CUR))) {
405  			goto cleanup;
406  		}
407  		n += fill;
408  		if (n > mh.sizeofcmds) {
409  			// we passed the frame boundary of the load commands
410  			pkg_emit_error("Mach-O structure misread.");
411  			errno = EINVAL;
412  			goto cleanup;
413  		}
414  	}
415  
416  cleanup:
417  	free(mf);
418  	return ret;
419  }
420  
421  int
422  pkg_analyse_init_macho(__unused const char *stage)
423  {
424  	return EPKG_OK;
425  }
426  
427  int
428  pkg_analyse_macho(const bool developer_mode, struct pkg *pkg,
429      const char *fpath, char **provided, enum pkg_shlib_flags *provided_flags)
430  {
431  	assert(*provided == NULL);
432  	assert(*provided_flags == PKG_SHLIB_FLAGS_NONE);
433  
434  	int ret = EPKG_OK;
435  	pkg_debug(1, "Analysing Mach-O %s", fpath);
436  
437  	int fd = open(fpath, O_RDONLY);
438  	if (-1 == fd) {
439  		// pkg_emit_errno("open_pkg_analyse_macho", fpath);
440  		// ret = EPKG_FATAL;
441  		// Be consistent with analyse_elf and return no error if fpath cannot be opened
442  		return ret;
443  	} else {
444  		ret = analyse_macho(fd, pkg, provided, provided_flags);
445  		if (-1 == close(fd)) {
446  			pkg_emit_errno("close_pkg_analyse_macho", fpath);
447  			ret = EPKG_FATAL;
448  		}
449  	}
450  	if (developer_mode) {
451  		if (ret != EPKG_OK && ret != EPKG_END) {
452  			return EPKG_WARN;
453  		}
454  	}
455  	return ret;
456  }
457  
458  int
459  pkg_analyse_close_macho(void)
460  {
461  	return EPKG_OK;
462  }