/ duct-tape / xnu / osfmk / kern / affinity.c
affinity.c
  1  /*
  2   * Copyright (c) 2007 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  
 29  #include <kern/affinity.h>
 30  #include <kern/task.h>
 31  #include <kern/kalloc.h>
 32  #include <machine/cpu_affinity.h>
 33  
 34  /*
 35   * Affinity involves 2 objects:
 36   * - affinity namespace:
 37   *	shared by a task family, this controls affinity tag lookup and
 38   *	allocation; it anchors all affinity sets in one namespace
 39   * - affinity set:
 40   *      anchors all threads with membership of this affinity set
 41   *	and which share an affinity tag in the owning namespace.
 42   *
 43   * Locking:
 44   * - The task lock protects the creation of an affinity namespace.
 45   * - The affinity namespace mutex protects the inheritance of a namespace
 46   *   and its thread membership. This includes its destruction when the task
 47   *   reference count goes to zero.
 48   * - The thread mutex protects a thread's affinity set membership, but in
 49   *   addition, the thread_lock is taken to write thread->affinity_set since this
 50   *   field (representng the active affinity set) is read by the scheduler.
 51   *
 52   * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
 53   */
 54  
 55  #if AFFINITY_DEBUG
 56  #define DBG(x...)       kprintf("DBG: " x)
 57  #else
 58  #define DBG(x...)
 59  #endif
 60  
 61  struct affinity_space {
 62  	lck_mtx_t               aspc_lock;
 63  	uint32_t                aspc_task_count;
 64  	queue_head_t    aspc_affinities;
 65  };
 66  typedef struct affinity_space *affinity_space_t;
 67  
 68  static affinity_space_t affinity_space_alloc(void);
 69  static void affinity_space_free(affinity_space_t aspc);
 70  static affinity_set_t affinity_set_alloc(void);
 71  static void affinity_set_free(affinity_set_t aset);
 72  static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
 73  static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
 74  static void affinity_set_add(affinity_set_t aset, thread_t thread);
 75  static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
 76  
 77  /*
 78   * The following globals may be modified by the sysctls
 79   *   kern.affinity_sets_enabled	- disables hinting if cleared
 80   *   kern.affinity_sets_mapping	- controls cache distribution policy
 81   * See bsd/kern_sysctl.c
 82   *
 83   * Affinity sets are not used on embedded, which typically only
 84   * has a single pset, and last-processor affinity is
 85   * more important than pset affinity.
 86   */
 87  #if !defined(XNU_TARGET_OS_OSX)
 88  boolean_t       affinity_sets_enabled = FALSE;
 89  int             affinity_sets_mapping = 0;
 90  #else /* !defined(XNU_TARGET_OS_OSX) */
 91  boolean_t       affinity_sets_enabled = TRUE;
 92  int             affinity_sets_mapping = 1;
 93  #endif /* !defined(XNU_TARGET_OS_OSX) */
 94  
 95  boolean_t
 96  thread_affinity_is_supported(void)
 97  {
 98  	return ml_get_max_affinity_sets() != 0;
 99  }
100  
101  
102  /*
103   * thread_affinity_get()
104   * Return the affinity tag for a thread.
105   * Called with the thread mutex held.
106   */
107  uint32_t
108  thread_affinity_get(thread_t thread)
109  {
110  	uint32_t tag;
111  
112  	if (thread->affinity_set != NULL) {
113  		tag = thread->affinity_set->aset_tag;
114  	} else {
115  		tag = THREAD_AFFINITY_TAG_NULL;
116  	}
117  
118  	return tag;
119  }
120  
121  
122  /*
123   * thread_affinity_set()
124   * Place a thread in an affinity set identified by a tag.
125   * Called with thread referenced but not locked.
126   */
127  kern_return_t
128  thread_affinity_set(thread_t thread, uint32_t tag)
129  {
130  	affinity_set_t          aset;
131  	affinity_set_t          empty_aset = NULL;
132  	affinity_space_t        aspc;
133  	affinity_space_t        new_aspc = NULL;
134  
135  	DBG("thread_affinity_set(%p,%u)\n", thread, tag);
136  
137  	task_lock(thread->task);
138  	aspc = thread->task->affinity_space;
139  	if (aspc == NULL) {
140  		task_unlock(thread->task);
141  		new_aspc = affinity_space_alloc();
142  		if (new_aspc == NULL) {
143  			return KERN_RESOURCE_SHORTAGE;
144  		}
145  		task_lock(thread->task);
146  		if (thread->task->affinity_space == NULL) {
147  			thread->task->affinity_space = new_aspc;
148  			new_aspc = NULL;
149  		}
150  		aspc = thread->task->affinity_space;
151  	}
152  	task_unlock(thread->task);
153  	if (new_aspc) {
154  		affinity_space_free(new_aspc);
155  	}
156  
157  	thread_mtx_lock(thread);
158  	if (!thread->active) {
159  		/* Beaten to lock and the thread is dead */
160  		thread_mtx_unlock(thread);
161  		return KERN_TERMINATED;
162  	}
163  
164  	lck_mtx_lock(&aspc->aspc_lock);
165  	aset = thread->affinity_set;
166  	if (aset != NULL) {
167  		/*
168  		 * Remove thread from current affinity set
169  		 */
170  		DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
171  		    thread, tag, aset);
172  		empty_aset = affinity_set_remove(aset, thread);
173  	}
174  
175  	if (tag != THREAD_AFFINITY_TAG_NULL) {
176  		aset = affinity_set_find(aspc, tag);
177  		if (aset != NULL) {
178  			/*
179  			 * Add thread to existing affinity set
180  			 */
181  			DBG("thread_affinity_set(%p,%u) found aset %p\n",
182  			    thread, tag, aset);
183  		} else {
184  			/*
185  			 * Use the new affinity set, add this thread
186  			 * and place it in a suitable processor set.
187  			 */
188  			if (empty_aset != NULL) {
189  				aset = empty_aset;
190  				empty_aset = NULL;
191  			} else {
192  				aset = affinity_set_alloc();
193  				if (aset == NULL) {
194  					lck_mtx_unlock(&aspc->aspc_lock);
195  					thread_mtx_unlock(thread);
196  					return KERN_RESOURCE_SHORTAGE;
197  				}
198  			}
199  			DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
200  			    thread, tag, aset);
201  			aset->aset_tag = tag;
202  			affinity_set_place(aspc, aset);
203  		}
204  		affinity_set_add(aset, thread);
205  	}
206  
207  	lck_mtx_unlock(&aspc->aspc_lock);
208  	thread_mtx_unlock(thread);
209  
210  	/*
211  	 * If we wound up not using an empty aset we created,
212  	 * free it here.
213  	 */
214  	if (empty_aset != NULL) {
215  		affinity_set_free(empty_aset);
216  	}
217  
218  	if (thread == current_thread()) {
219  		thread_block(THREAD_CONTINUE_NULL);
220  	}
221  
222  	return KERN_SUCCESS;
223  }
224  
225  /*
226   * task_affinity_create()
227   * Called from task create.
228   */
229  void
230  task_affinity_create(task_t parent_task, task_t child_task)
231  {
232  	affinity_space_t        aspc = parent_task->affinity_space;
233  
234  	DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
235  
236  	assert(aspc);
237  
238  	/*
239  	 * Bump the task reference count on the shared namespace and
240  	 * give it to the child.
241  	 */
242  	lck_mtx_lock(&aspc->aspc_lock);
243  	aspc->aspc_task_count++;
244  	child_task->affinity_space = aspc;
245  	lck_mtx_unlock(&aspc->aspc_lock);
246  }
247  
248  /*
249   * task_affinity_deallocate()
250   * Called from task_deallocate() when there's a namespace to dereference.
251   */
252  void
253  task_affinity_deallocate(task_t task)
254  {
255  	affinity_space_t        aspc = task->affinity_space;
256  
257  	DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
258  	    task, aspc, aspc->aspc_task_count);
259  
260  	lck_mtx_lock(&aspc->aspc_lock);
261  	if (--(aspc->aspc_task_count) == 0) {
262  		assert(queue_empty(&aspc->aspc_affinities));
263  		lck_mtx_unlock(&aspc->aspc_lock);
264  		affinity_space_free(aspc);
265  	} else {
266  		lck_mtx_unlock(&aspc->aspc_lock);
267  	}
268  }
269  
270  /*
271   * task_affinity_info()
272   * Return affinity tag info (number, min, max) for the task.
273   *
274   * Conditions: task is locked.
275   */
276  kern_return_t
277  task_affinity_info(
278  	task_t                  task,
279  	task_info_t             task_info_out,
280  	mach_msg_type_number_t  *task_info_count)
281  {
282  	affinity_set_t                  aset;
283  	affinity_space_t                aspc;
284  	task_affinity_tag_info_t        info;
285  
286  	*task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
287  	info = (task_affinity_tag_info_t) task_info_out;
288  	info->set_count = 0;
289  	info->task_count = 0;
290  	info->min = THREAD_AFFINITY_TAG_NULL;
291  	info->max = THREAD_AFFINITY_TAG_NULL;
292  
293  	aspc = task->affinity_space;
294  	if (aspc) {
295  		lck_mtx_lock(&aspc->aspc_lock);
296  		queue_iterate(&aspc->aspc_affinities,
297  		    aset, affinity_set_t, aset_affinities) {
298  			info->set_count++;
299  			if (info->min == THREAD_AFFINITY_TAG_NULL ||
300  			    aset->aset_tag < (uint32_t) info->min) {
301  				info->min = aset->aset_tag;
302  			}
303  			if (info->max == THREAD_AFFINITY_TAG_NULL ||
304  			    aset->aset_tag > (uint32_t) info->max) {
305  				info->max = aset->aset_tag;
306  			}
307  		}
308  		info->task_count = aspc->aspc_task_count;
309  		lck_mtx_unlock(&aspc->aspc_lock);
310  	}
311  	return KERN_SUCCESS;
312  }
313  
314  /*
315   * Called from thread_dup() during fork() with child's mutex held.
316   * Set the child into the parent's affinity set.
317   * Note the affinity space is shared.
318   */
319  void
320  thread_affinity_dup(thread_t parent, thread_t child)
321  {
322  	affinity_set_t                  aset;
323  	affinity_space_t                aspc;
324  
325  	thread_mtx_lock(parent);
326  	aset = parent->affinity_set;
327  	DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
328  	if (aset == NULL) {
329  		thread_mtx_unlock(parent);
330  		return;
331  	}
332  
333  	aspc = aset->aset_space;
334  	assert(aspc == parent->task->affinity_space);
335  	assert(aspc == child->task->affinity_space);
336  
337  	lck_mtx_lock(&aspc->aspc_lock);
338  	affinity_set_add(aset, child);
339  	lck_mtx_unlock(&aspc->aspc_lock);
340  
341  	thread_mtx_unlock(parent);
342  }
343  
344  /*
345   * thread_affinity_terminate()
346   * Remove thread from any affinity set.
347   * Called with the thread mutex locked.
348   */
349  void
350  thread_affinity_terminate(thread_t thread)
351  {
352  	affinity_set_t          aset = thread->affinity_set;
353  	affinity_space_t        aspc;
354  
355  	DBG("thread_affinity_terminate(%p)\n", thread);
356  
357  	aspc = aset->aset_space;
358  	lck_mtx_lock(&aspc->aspc_lock);
359  	if (affinity_set_remove(aset, thread)) {
360  		affinity_set_free(aset);
361  	}
362  	lck_mtx_unlock(&aspc->aspc_lock);
363  }
364  
365  /*
366   * thread_affinity_exec()
367   * Called from execve() to cancel any current affinity - a new image implies
368   * the calling thread terminates any expressed or inherited affinity.
369   */
370  void
371  thread_affinity_exec(thread_t thread)
372  {
373  	if (thread->affinity_set != AFFINITY_SET_NULL) {
374  		thread_affinity_terminate(thread);
375  	}
376  }
377  
378  /*
379   * Create an empty affinity namespace data structure.
380   */
381  static affinity_space_t
382  affinity_space_alloc(void)
383  {
384  	affinity_space_t        aspc;
385  
386  	aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
387  	if (aspc == NULL) {
388  		return NULL;
389  	}
390  
391  	lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
392  	queue_init(&aspc->aspc_affinities);
393  	aspc->aspc_task_count = 1;
394  
395  	DBG("affinity_space_create() returns %p\n", aspc);
396  	return aspc;
397  }
398  
399  /*
400   * Destroy the given empty affinity namespace data structure.
401   */
402  static void
403  affinity_space_free(affinity_space_t aspc)
404  {
405  	assert(queue_empty(&aspc->aspc_affinities));
406  
407  	lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp);
408  	DBG("affinity_space_free(%p)\n", aspc);
409  	kfree(aspc, sizeof(struct affinity_space));
410  }
411  
412  
413  /*
414   * Create an empty affinity set data structure
415   * entering it into a list anchored by the owning task.
416   */
417  static affinity_set_t
418  affinity_set_alloc(void)
419  {
420  	affinity_set_t  aset;
421  
422  	aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
423  	if (aset == NULL) {
424  		return NULL;
425  	}
426  
427  	aset->aset_thread_count = 0;
428  	queue_init(&aset->aset_affinities);
429  	queue_init(&aset->aset_threads);
430  	aset->aset_num = 0;
431  	aset->aset_pset = PROCESSOR_SET_NULL;
432  	aset->aset_space = NULL;
433  
434  	DBG("affinity_set_create() returns %p\n", aset);
435  	return aset;
436  }
437  
438  /*
439   * Destroy the given empty affinity set data structure
440   * after removing it from the parent task.
441   */
442  static void
443  affinity_set_free(affinity_set_t aset)
444  {
445  	assert(queue_empty(&aset->aset_threads));
446  
447  	DBG("affinity_set_free(%p)\n", aset);
448  	kfree(aset, sizeof(struct affinity_set));
449  }
450  
451  /*
452   * Add a thread to an affinity set.
453   * The caller must have the thread mutex and space locked.
454   */
455  static void
456  affinity_set_add(affinity_set_t aset, thread_t thread)
457  {
458  	spl_t   s;
459  
460  	DBG("affinity_set_add(%p,%p)\n", aset, thread);
461  	queue_enter(&aset->aset_threads,
462  	    thread, thread_t, affinity_threads);
463  	aset->aset_thread_count++;
464  	s = splsched();
465  	thread_lock(thread);
466  	thread->affinity_set = affinity_sets_enabled ? aset : NULL;
467  	thread_unlock(thread);
468  	splx(s);
469  }
470  
471  /*
472   * Remove a thread from an affinity set returning the set if now empty.
473   * The caller must have the thread mutex and space locked.
474   */
475  static affinity_set_t
476  affinity_set_remove(affinity_set_t aset, thread_t thread)
477  {
478  	spl_t   s;
479  
480  	s = splsched();
481  	thread_lock(thread);
482  	thread->affinity_set = NULL;
483  	thread_unlock(thread);
484  	splx(s);
485  
486  	aset->aset_thread_count--;
487  	queue_remove(&aset->aset_threads,
488  	    thread, thread_t, affinity_threads);
489  	if (queue_empty(&aset->aset_threads)) {
490  		queue_remove(&aset->aset_space->aspc_affinities,
491  		    aset, affinity_set_t, aset_affinities);
492  		assert(aset->aset_thread_count == 0);
493  		aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
494  		aset->aset_num = 0;
495  		aset->aset_pset = PROCESSOR_SET_NULL;
496  		aset->aset_space = NULL;
497  		DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
498  		return aset;
499  	} else {
500  		DBG("affinity_set_remove(%p,%p)\n", aset, thread);
501  		return NULL;
502  	}
503  }
504  
505  /*
506   * Find an affinity set in the parent task with the given affinity tag.
507   * The caller must have the space locked.
508   */
509  static affinity_set_t
510  affinity_set_find(affinity_space_t space, uint32_t tag)
511  {
512  	affinity_set_t  aset;
513  
514  	queue_iterate(&space->aspc_affinities,
515  	    aset, affinity_set_t, aset_affinities) {
516  		if (aset->aset_tag == tag) {
517  			DBG("affinity_set_find(%p,%u) finds %p\n",
518  			    space, tag, aset);
519  			return aset;
520  		}
521  	}
522  	DBG("affinity_set_find(%p,%u) not found\n", space, tag);
523  	return NULL;
524  }
525  
526  /*
527   * affinity_set_place() assigns an affinity set to a suitable processor_set.
528   * The selection criteria is:
529   *  - the set currently occupied by the least number of affinities
530   *    belonging to the owning the task.
531   * The caller must have the space locked.
532   */
533  static void
534  affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
535  {
536  	unsigned short    set_occupancy[MAX_CPUS] = { 0 };
537  	unsigned    num_cpu_asets = ml_get_max_affinity_sets();
538  	unsigned    i_least_occupied;
539  	affinity_set_t  aset;
540  
541  	if (__improbable(num_cpu_asets > MAX_CPUS)) {
542  		// If this triggers then the array needs to be made bigger.
543  		panic("num_cpu_asets = %d > %d too big in %s\n", num_cpu_asets, MAX_CPUS, __FUNCTION__);
544  	}
545  
546  	/*
547  	 * Scan the affinity sets calculating the number of sets
548  	 * occupy the available physical affinities.
549  	 */
550  	queue_iterate(&aspc->aspc_affinities,
551  	    aset, affinity_set_t, aset_affinities) {
552  		if (aset->aset_num < num_cpu_asets) {
553  			set_occupancy[aset->aset_num]++;
554  		} else {
555  			panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
556  		}
557  	}
558  
559  	/*
560  	 * Find the least occupied set (or the first empty set).
561  	 * To distribute placements somewhat, start searching from
562  	 * a cpu affinity chosen randomly per namespace:
563  	 *   [(unsigned int)aspc % 127] % num_cpu_asets
564  	 * unless this mapping policy is overridden.
565  	 */
566  	if (affinity_sets_mapping == 0) {
567  		i_least_occupied = 0;
568  	} else {
569  		i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
570  	}
571  	for (unsigned i = 0; i < num_cpu_asets; i++) {
572  		unsigned int    j = (i_least_occupied + i) % num_cpu_asets;
573  		if (set_occupancy[j] == 0) {
574  			i_least_occupied = j;
575  			break;
576  		}
577  		if (set_occupancy[j] < set_occupancy[i_least_occupied]) {
578  			i_least_occupied = j;
579  		}
580  	}
581  	new_aset->aset_num = i_least_occupied;
582  	new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
583  
584  	/* Add the new affinity set to the group */
585  	new_aset->aset_space = aspc;
586  	queue_enter(&aspc->aspc_affinities,
587  	    new_aset, affinity_set_t, aset_affinities);
588  
589  	DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
590  	    aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
591  }