/ duct-tape / xnu / osfmk / kern / restartable.c
restartable.c
  1  /*
  2   * Copyright (c) 2019 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  
 29  #include <mach/mach_types.h>
 30  #include <mach/task.h>
 31  
 32  #include <kern/ast.h>
 33  #include <kern/kalloc.h>
 34  #include <kern/kern_types.h>
 35  #include <kern/mach_param.h>
 36  #include <kern/machine.h>
 37  #include <kern/misc_protos.h>
 38  #include <kern/processor.h>
 39  #include <kern/queue.h>
 40  #include <kern/restartable.h>
 41  #include <kern/task.h>
 42  #include <kern/thread.h>
 43  #include <kern/waitq.h>
 44  
 45  #include <os/hash.h>
 46  #include <os/refcnt.h>
 47  
 48  /**
 49   * @file osfmk/kern/restartable.c
 50   *
 51   * @brief
 52   * This module implements restartable userspace functions.
 53   *
 54   * @discussion
 55   * task_restartable_ranges_register() allows task to configure
 56   * the restartable ranges, only once per task,
 57   * before it has made its second thread.
 58   *
 59   * task_restartable_ranges_synchronize() can later be used to trigger
 60   * restarts for threads with a PC in a restartable region.
 61   *
 62   * It is implemented with an AST (AST_RESET_PCS) that will cause threads
 63   * as they return to userspace to reset PCs in a restartable region
 64   * to the recovery offset of this region.
 65   *
 66   * Because signal delivery would mask the proper saved PC for threads,
 67   * sigreturn also forcefully sets the AST and will go through the logic
 68   * every single time.
 69   */
 70  
 71  typedef int (*cmpfunc_t)(const void *a, const void *b);
 72  extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
 73  
 74  struct restartable_ranges {
 75  	queue_chain_t            rr_link;
 76  	os_refcnt_t              rr_ref;
 77  	uint32_t                 rr_count;
 78  	uint32_t                 rr_hash;
 79  	task_restartable_range_t rr_ranges[];
 80  };
 81  
 82  #if DEBUG || DEVELOPMENT
 83  #define RR_HASH_SIZE   256
 84  #else
 85  // Release kernel userspace should have shared caches and a single registration
 86  #define RR_HASH_SIZE    16
 87  #endif
 88  
 89  static queue_head_t rr_hash[RR_HASH_SIZE];
 90  LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges");
 91  LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp);
 92  
 93  #define rr_lock()   lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp)
 94  #define rr_unlock() lck_spin_unlock(&rr_spinlock);
 95  
 96  #pragma mark internals
 97  
 98  /**
 99   * @function _ranges_cmp
100   *
101   * @brief
102   * Compares two ranges together.
103   */
104  static int
105  _ranges_cmp(const void *_r1, const void *_r2)
106  {
107  	const task_restartable_range_t *r1 = _r1;
108  	const task_restartable_range_t *r2 = _r2;
109  
110  	if (r1->location != r2->location) {
111  		return r1->location < r2->location ? -1 : 1;
112  	}
113  	if (r1->length == r2->length) {
114  		return 0;
115  	}
116  	return r1->length < r2->length ? -1 : 1;
117  }
118  
119  /**
120   * @function _ranges_validate
121   *
122   * @brief
123   * Validates an array of PC ranges for wraps and intersections.
124   *
125   * @discussion
126   * This sorts and modifies the input.
127   *
128   * The ranges must:
129   * - not wrap around,
130   * - have a length/recovery offset within a page of the range start
131   *
132   * @returns
133   * - KERN_SUCCESS:          ranges are valid
134   * - KERN_INVALID_ARGUMENT: ranges are invalid
135   */
136  static kern_return_t
137  _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count)
138  {
139  	qsort(ranges, count, sizeof(task_restartable_range_t), _ranges_cmp);
140  	uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX;
141  	uint64_t end, recovery;
142  
143  	if (count == 0) {
144  		return KERN_INVALID_ARGUMENT;
145  	}
146  
147  	for (size_t i = 0; i < count; i++) {
148  		if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX ||
149  		    ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) {
150  			return KERN_INVALID_ARGUMENT;
151  		}
152  		if (ranges[i].flags) {
153  			return KERN_INVALID_ARGUMENT;
154  		}
155  		if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) {
156  			return KERN_INVALID_ARGUMENT;
157  		}
158  		if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) {
159  			return KERN_INVALID_ARGUMENT;
160  		}
161  		if (ranges[i].location > limit || end > limit || recovery > limit) {
162  			return KERN_INVALID_ARGUMENT;
163  		}
164  		if (i + 1 < count && end > ranges[i + 1].location) {
165  			return KERN_INVALID_ARGUMENT;
166  		}
167  	}
168  
169  	return KERN_SUCCESS;
170  }
171  
172  /**
173   * @function _ranges_lookup
174   *
175   * @brief
176   * Lookup the left side of a range for a given PC within a set of ranges.
177   *
178   * @returns
179   * - 0: no PC range found
180   * - the left-side of the range.
181   */
182  __attribute__((always_inline))
183  static mach_vm_address_t
184  _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc)
185  {
186  	task_restartable_range_t *ranges = rr->rr_ranges;
187  	uint32_t l = 0, r = rr->rr_count;
188  
189  	if (pc <= ranges[0].location) {
190  		return 0;
191  	}
192  	if (pc >= ranges[r - 1].location + ranges[r - 1].length) {
193  		return 0;
194  	}
195  
196  	while (l < r) {
197  		uint32_t i = (r + l) / 2;
198  		mach_vm_address_t location = ranges[i].location;
199  
200  		if (pc <= location) {
201  			/* if the PC is exactly at pc_start, no reset is needed */
202  			r = i;
203  		} else if (location + ranges[i].length <= pc) {
204  			/* if the PC is exactly at the end, it's out of the function */
205  			l = i + 1;
206  		} else {
207  			/* else it's strictly in the range, return the recovery pc */
208  			return location + ranges[i].recovery_offs;
209  		}
210  	}
211  
212  	return 0;
213  }
214  
215  /**
216   * @function _restartable_ranges_dispose
217   *
218   * @brief
219   * Helper to dispose of a range that has reached a 0 refcount.
220   */
221  __attribute__((noinline))
222  static void
223  _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove)
224  {
225  	if (hash_remove) {
226  		rr_lock();
227  		remqueue(&rr->rr_link);
228  		rr_unlock();
229  	}
230  	kfree(rr, sizeof(*rr) + rr->rr_count * sizeof(task_restartable_range_t));
231  }
232  
233  /**
234   * @function _restartable_ranges_equals
235   *
236   * @brief
237   * Helper to compare two restartable ranges.
238   */
239  static bool
240  _restartable_ranges_equals(
241  	const struct restartable_ranges *rr1,
242  	const struct restartable_ranges *rr2)
243  {
244  	size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t);
245  	return rr1->rr_hash == rr2->rr_hash &&
246  	       rr1->rr_count == rr2->rr_count &&
247  	       memcmp(rr1->rr_ranges, rr2->rr_ranges, rr1_size) == 0;
248  }
249  
250  /**
251   * @function _restartable_ranges_create
252   *
253   * @brief
254   * Helper to create a uniqued restartable range.
255   *
256   * @returns
257   * - KERN_SUCCESS
258   * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed.
259   * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory
260   */
261  static kern_return_t
262  _restartable_ranges_create(task_t task, task_restartable_range_t *ranges,
263      uint32_t count, struct restartable_ranges **rr_storage)
264  {
265  	struct restartable_ranges *rr, *rr_found, *rr_base;
266  	queue_head_t *head;
267  	uint32_t base_count, total_count;
268  	size_t base_size, size;
269  	kern_return_t kr;
270  
271  	rr_base = *rr_storage;
272  	base_count = rr_base ? rr_base->rr_count : 0;
273  	base_size = sizeof(task_restartable_range_t) * base_count;
274  	size = sizeof(task_restartable_range_t) * count;
275  
276  	if (os_add_overflow(base_count, count, &total_count)) {
277  		return KERN_INVALID_ARGUMENT;
278  	}
279  	if (total_count > 1024) {
280  		return KERN_RESOURCE_SHORTAGE;
281  	}
282  
283  	rr = kalloc(sizeof(*rr) + base_size + size);
284  	if (rr == NULL) {
285  		return KERN_RESOURCE_SHORTAGE;
286  	}
287  
288  	queue_chain_init(rr->rr_link);
289  	os_ref_init(&rr->rr_ref, NULL);
290  	rr->rr_count = total_count;
291  	if (base_size) {
292  		memcpy(rr->rr_ranges, rr_base->rr_ranges, base_size);
293  	}
294  	memcpy(rr->rr_ranges + base_count, ranges, size);
295  	kr = _ranges_validate(task, rr->rr_ranges, total_count);
296  	if (kr) {
297  		_restartable_ranges_dispose(rr, false);
298  		return kr;
299  	}
300  	rr->rr_hash = os_hash_jenkins(rr->rr_ranges,
301  	    rr->rr_count * sizeof(task_restartable_range_t));
302  
303  	head = &rr_hash[rr->rr_hash % RR_HASH_SIZE];
304  
305  	rr_lock();
306  	queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) {
307  		if (_restartable_ranges_equals(rr, rr_found) &&
308  		os_ref_retain_try(&rr_found->rr_ref)) {
309  			goto found;
310  		}
311  	}
312  
313  	enqueue_tail(head, &rr->rr_link);
314  	rr_found = rr;
315  
316  found:
317  	if (rr_base && os_ref_release_relaxed(&rr_base->rr_ref) == 0) {
318  		remqueue(&rr_base->rr_link);
319  	} else {
320  		rr_base = NULL;
321  	}
322  	rr_unlock();
323  
324  	*rr_storage = rr_found;
325  
326  	if (rr_found != rr) {
327  		_restartable_ranges_dispose(rr, false);
328  	}
329  	if (rr_base) {
330  		_restartable_ranges_dispose(rr_base, false);
331  	}
332  	return KERN_SUCCESS;
333  }
334  
335  #pragma mark extern interfaces
336  
337  void
338  restartable_ranges_release(struct restartable_ranges *rr)
339  {
340  	if (os_ref_release_relaxed(&rr->rr_ref) == 0) {
341  		_restartable_ranges_dispose(rr, true);
342  	}
343  }
344  
345  void
346  thread_reset_pcs_ast(thread_t thread)
347  {
348  	task_t task = thread->task;
349  	struct restartable_ranges *rr;
350  	mach_vm_address_t pc;
351  
352  	/*
353  	 * Because restartable_ranges are set while the task only has on thread
354  	 * and can't be mutated outside of this, no lock is required to read this.
355  	 */
356  	rr = task->restartable_ranges;
357  	if (rr) {
358  		/* pairs with the barrier in task_restartable_ranges_synchronize() */
359  		os_atomic_thread_fence(acquire);
360  
361  		pc = _ranges_lookup(rr, machine_thread_pc(thread));
362  
363  		if (pc) {
364  			machine_thread_reset_pc(thread, pc);
365  		}
366  	}
367  }
368  
369  void
370  restartable_init(void)
371  {
372  	for (size_t i = 0; i < RR_HASH_SIZE; i++) {
373  		queue_head_init(rr_hash[i]);
374  	}
375  }
376  
377  #pragma mark MiG interfaces
378  
379  kern_return_t
380  task_restartable_ranges_register(
381  	task_t                    task,
382  	task_restartable_range_t *ranges,
383  	mach_msg_type_number_t    count)
384  {
385  	kern_return_t kr;
386  	thread_t th;
387  
388  	if (task != current_task()) {
389  		return KERN_FAILURE;
390  	}
391  
392  
393  	kr = _ranges_validate(task, ranges, count);
394  
395  	if (kr == KERN_SUCCESS) {
396  		task_lock(task);
397  
398  		queue_iterate(&task->threads, th, thread_t, task_threads) {
399  			if (th != current_thread()) {
400  				kr = KERN_NOT_SUPPORTED;
401  				break;
402  			}
403  		}
404  #if !DEBUG && !DEVELOPMENT
405  		/*
406  		 * For security reasons, on release kernels, only allow for this to be
407  		 * configured once.
408  		 *
409  		 * But to be able to test the feature we need to relax this for
410  		 * dev kernels.
411  		 */
412  		if (task->restartable_ranges) {
413  			kr = KERN_NOT_SUPPORTED;
414  		}
415  #endif
416  		if (kr == KERN_SUCCESS) {
417  			kr = _restartable_ranges_create(task, ranges, count,
418  			    &task->restartable_ranges);
419  		}
420  		task_unlock(task);
421  	}
422  
423  	return kr;
424  }
425  
426  kern_return_t
427  task_restartable_ranges_synchronize(task_t task)
428  {
429  	thread_t thread;
430  
431  	if (task != current_task()) {
432  		return KERN_FAILURE;
433  	}
434  
435  	/* pairs with the barrier in thread_reset_pcs_ast() */
436  	os_atomic_thread_fence(release);
437  
438  	task_lock(task);
439  
440  	if (task->restartable_ranges) {
441  		queue_iterate(&task->threads, thread, thread_t, task_threads) {
442  			if (thread != current_thread()) {
443  				thread_mtx_lock(thread);
444  				act_set_ast_reset_pcs(thread);
445  				thread_mtx_unlock(thread);
446  			}
447  		}
448  	}
449  
450  	task_unlock(task);
451  
452  	return KERN_SUCCESS;
453  }