/ duct-tape / xnu / osfmk / kern / thread_group.c
thread_group.c
  1  /*
  2   * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  
 29  #include <mach/mach_types.h>
 30  #include <kern/kern_types.h>
 31  #include <kern/processor.h>
 32  #include <kern/thread.h>
 33  #include <kern/thread_group.h>
 34  #include <kern/zalloc.h>
 35  #include <kern/task.h>
 36  #include <kern/machine.h>
 37  #include <kern/coalition.h>
 38  #include <sys/errno.h>
 39  #include <kern/queue.h>
 40  #include <kern/locks.h>
 41  #include <kern/thread_group.h>
 42  #include <kern/sched_clutch.h>
 43  
 44  #if CONFIG_THREAD_GROUPS
 45  
 46  #define CACHELINE_SIZE (1 << MMU_CLINE)
 47  
 48  struct thread_group {
 49  	uint64_t                tg_id;
 50  	char                    tg_name[THREAD_GROUP_MAXNAME];
 51  	struct os_refcnt        tg_refcount;
 52  	uint32_t                tg_flags;
 53  	cluster_type_t          tg_recommendation;
 54  	queue_chain_t           tg_queue_chain;
 55  #if CONFIG_SCHED_CLUTCH
 56  	struct sched_clutch     tg_sched_clutch;
 57  #endif /* CONFIG_SCHED_CLUTCH */
 58  	// 16 bytes of padding here
 59  	uint8_t                 tg_machine_data[] __attribute__((aligned(CACHELINE_SIZE)));
 60  } __attribute__((aligned(8)));
 61  
 62  static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
 63  static uint32_t tg_count;
 64  static queue_head_t tg_queue;
 65  static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
 66  static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
 67  static LCK_SPIN_DECLARE(tg_flags_update_lock, &tg_lck_grp);
 68  
 69  static uint64_t tg_next_id = 0;
 70  static uint32_t tg_size;
 71  static uint32_t tg_machine_data_size;
 72  static struct thread_group *tg_system;
 73  static struct thread_group *tg_background;
 74  static struct thread_group *tg_adaptive;
 75  static struct thread_group *tg_vm;
 76  static struct thread_group *tg_io_storage;
 77  static struct thread_group *tg_perf_controller;
 78  int tg_set_by_bankvoucher;
 79  
 80  static bool thread_group_retain_try(struct thread_group *tg);
 81  
 82  /*
 83   * Initialize thread groups at boot
 84   */
 85  void
 86  thread_group_init(void)
 87  {
 88  	// Get thread group structure extension from EDT or boot-args (which can override EDT)
 89  	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
 90  		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
 91  			tg_machine_data_size = 8;
 92  		}
 93  	}
 94  
 95  	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
 96  	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
 97  		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
 98  			tg_set_by_bankvoucher = 1;
 99  		}
100  	}
101  
102  	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
103  	if (tg_size % CACHELINE_SIZE) {
104  		tg_size += CACHELINE_SIZE - (tg_size % CACHELINE_SIZE);
105  	}
106  	tg_machine_data_size = tg_size - sizeof(struct thread_group);
107  	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
108  	assert(offsetof(struct thread_group, tg_machine_data) % CACHELINE_SIZE == 0);
109  	tg_zone = zone_create("thread_groups", tg_size, ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED);
110  
111  	queue_head_init(tg_queue);
112  	tg_system = thread_group_create_and_retain();
113  	thread_group_set_name(tg_system, "system");
114  	tg_background = thread_group_create_and_retain();
115  	thread_group_set_name(tg_background, "background");
116  	tg_adaptive = thread_group_create_and_retain();
117  	thread_group_set_name(tg_adaptive, "adaptive");
118  	tg_vm = thread_group_create_and_retain();
119  	thread_group_set_name(tg_vm, "VM");
120  	tg_io_storage = thread_group_create_and_retain();
121  	thread_group_set_name(tg_io_storage, "io storage");
122  	tg_perf_controller = thread_group_create_and_retain();
123  	thread_group_set_name(tg_perf_controller, "perf_controller");
124  
125  	/*
126  	 * If CLPC is disabled, it would recommend SMP for all thread groups.
127  	 * In that mode, the scheduler would like to restrict the kernel thread
128  	 * groups to the E-cluster while all other thread groups are run on the
129  	 * P-cluster. To identify the kernel thread groups, mark them with a
130  	 * special flag THREAD_GROUP_FLAGS_SMP_RESTRICT which is looked at by
131  	 * recommended_pset_type().
132  	 */
133  	tg_system->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
134  	tg_vm->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
135  	tg_io_storage->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
136  	tg_perf_controller->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
137  }
138  
139  #if CONFIG_SCHED_CLUTCH
140  /*
141   * sched_clutch_for_thread
142   *
143   * The routine provides a back linkage from the thread to the
144   * sched_clutch it belongs to. This relationship is based on the
145   * thread group membership of the thread. Since that membership is
146   * changed from the thread context with the thread lock held, this
147   * linkage should be looked at only with the thread lock held or
148   * when the thread cannot be running (for eg. the thread is in the
149   * runq and being removed as part of thread_select().
150   */
151  sched_clutch_t
152  sched_clutch_for_thread(thread_t thread)
153  {
154  	assert(thread->thread_group != NULL);
155  	return &(thread->thread_group->tg_sched_clutch);
156  }
157  
158  sched_clutch_t
159  sched_clutch_for_thread_group(struct thread_group *thread_group)
160  {
161  	return &(thread_group->tg_sched_clutch);
162  }
163  
164  /*
165   * Translate the TG flags to a priority boost for the sched_clutch.
166   * This priority boost will apply to the entire clutch represented
167   * by the thread group.
168   */
169  static void
170  sched_clutch_update_tg_flags(sched_clutch_t clutch, uint8_t flags)
171  {
172  	sched_clutch_tg_priority_t sc_tg_pri = 0;
173  	if (flags & THREAD_GROUP_FLAGS_UI_APP) {
174  		sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
175  	} else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
176  		sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
177  	} else {
178  		sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
179  	}
180  	os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
181  }
182  
183  #endif /* CONFIG_SCHED_CLUTCH */
184  
185  /*
186   * Use a spinlock to protect all thread group flag updates.
187   * The lock should not have heavy contention since these flag updates should
188   * be infrequent. If this lock has contention issues, it should be changed to
189   * a per thread-group lock.
190   *
191   * The lock protects the flags field in the thread_group structure. It is also
192   * held while doing callouts to CLPC to reflect these flag changes.
193   */
194  
195  void
196  thread_group_flags_update_lock(void)
197  {
198  	lck_spin_lock_grp(&tg_flags_update_lock, &tg_lck_grp);
199  }
200  
201  void
202  thread_group_flags_update_unlock(void)
203  {
204  	lck_spin_unlock(&tg_flags_update_lock);
205  }
206  
207  /*
208   * Inform platform code about already existing thread groups
209   * or ask it to free state for all thread groups
210   */
211  void
212  thread_group_resync(boolean_t create)
213  {
214  	struct thread_group *tg;
215  
216  	lck_mtx_lock(&tg_lock);
217  	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
218  		if (create) {
219  			machine_thread_group_init(tg);
220  		} else {
221  			machine_thread_group_deinit(tg);
222  		}
223  	}
224  	lck_mtx_unlock(&tg_lock);
225  }
226  
227  /*
228   * Create new thread group and add new reference to it.
229   */
230  struct thread_group *
231  thread_group_create_and_retain(void)
232  {
233  	struct thread_group *tg;
234  
235  	tg = (struct thread_group *)zalloc(tg_zone);
236  	if (tg == NULL) {
237  		panic("thread group zone over commit");
238  	}
239  	assert((uintptr_t)tg % CACHELINE_SIZE == 0);
240  	bzero(tg, sizeof(struct thread_group));
241  
242  #if CONFIG_SCHED_CLUTCH
243  	/*
244  	 * The clutch scheduler maintains a bunch of runqs per thread group. For
245  	 * each thread group it maintains a sched_clutch structure. The lifetime
246  	 * of that structure is tied directly to the lifetime of the thread group.
247  	 */
248  	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
249  
250  	/*
251  	 * Since the thread group flags are used to determine any priority promotions
252  	 * for the threads in the thread group, initialize them to 0.
253  	 */
254  	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), 0);
255  
256  #endif /* CONFIG_SCHED_CLUTCH */
257  
258  	lck_mtx_lock(&tg_lock);
259  	tg->tg_id = tg_next_id++;
260  	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
261  	os_ref_init(&tg->tg_refcount, NULL);
262  	tg_count++;
263  	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
264  	lck_mtx_unlock(&tg_lock);
265  
266  	// call machine layer init before this thread group becomes visible
267  	machine_thread_group_init(tg);
268  
269  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), tg->tg_id);
270  
271  	return tg;
272  }
273  
274  /*
275   * Point newly created thread to its home thread group
276   */
277  void
278  thread_group_init_thread(thread_t t, task_t task)
279  {
280  	struct thread_group *tg = task_coalition_get_thread_group(task);
281  	t->thread_group = tg;
282  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
283  	    THREAD_GROUP_INVALID, tg->tg_id, (uintptr_t)thread_tid(t));
284  }
285  
286  /*
287   * Set thread group name
288   */
289  void
290  thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
291  {
292  	if (name == NULL) {
293  		return;
294  	}
295  	if (!thread_group_retain_try(tg)) {
296  		return;
297  	}
298  	if (tg->tg_name[0] == '\0') {
299  		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
300  #if defined(__LP64__)
301  		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
302  		    tg->tg_id,
303  		    *(uint64_t*)(void*)&tg->tg_name[0],
304  		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)]
305  		    );
306  #else /* defined(__LP64__) */
307  		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
308  		    tg->tg_id,
309  		    *(uint32_t*)(void*)&tg->tg_name[0],
310  		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)]
311  		    );
312  #endif /* defined(__LP64__) */
313  	}
314  	thread_group_release(tg);
315  }
316  
317  void
318  thread_group_set_flags(struct thread_group *tg, uint64_t flags)
319  {
320  	thread_group_flags_update_lock();
321  	thread_group_set_flags_locked(tg, flags);
322  	thread_group_flags_update_unlock();
323  }
324  
325  void
326  thread_group_clear_flags(struct thread_group *tg, uint64_t flags)
327  {
328  	thread_group_flags_update_lock();
329  	thread_group_clear_flags_locked(tg, flags);
330  	thread_group_flags_update_unlock();
331  }
332  
333  /*
334   * Set thread group flags and perform related actions.
335   * The tg_flags_update_lock should be held.
336   * Currently supported flags are:
337   * - THREAD_GROUP_FLAGS_EFFICIENT
338   * - THREAD_GROUP_FLAGS_UI_APP
339   */
340  
341  void
342  thread_group_set_flags_locked(struct thread_group *tg, uint64_t flags)
343  {
344  	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
345  		panic("thread_group_set_flags: Invalid flags %llu", flags);
346  	}
347  
348  	if ((tg->tg_flags & flags) == flags) {
349  		return;
350  	}
351  
352  	__kdebug_only uint64_t old_flags = tg->tg_flags;
353  	tg->tg_flags |= flags;
354  	machine_thread_group_flags_update(tg, tg->tg_flags);
355  #if CONFIG_SCHED_CLUTCH
356  	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
357  #endif /* CONFIG_SCHED_CLUTCH */
358  	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
359  	    tg->tg_id, tg->tg_flags, old_flags);
360  }
361  
362  /*
363   * Clear thread group flags and perform related actions
364   * The tg_flags_update_lock should be held.
365   * Currently supported flags are:
366   * - THREAD_GROUP_FLAGS_EFFICIENT
367   * - THREAD_GROUP_FLAGS_UI_APP
368   */
369  
370  void
371  thread_group_clear_flags_locked(struct thread_group *tg, uint64_t flags)
372  {
373  	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
374  		panic("thread_group_clear_flags: Invalid flags %llu", flags);
375  	}
376  
377  	if ((tg->tg_flags & flags) == 0) {
378  		return;
379  	}
380  
381  	__kdebug_only uint64_t old_flags = tg->tg_flags;
382  	tg->tg_flags &= ~flags;
383  #if CONFIG_SCHED_CLUTCH
384  	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
385  #endif /* CONFIG_SCHED_CLUTCH */
386  	machine_thread_group_flags_update(tg, tg->tg_flags);
387  	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
388  	    tg->tg_id, tg->tg_flags, old_flags);
389  }
390  
391  
392  
393  /*
394   * Find thread group with specified name and put new reference to it.
395   */
396  struct thread_group *
397  thread_group_find_by_name_and_retain(char *name)
398  {
399  	struct thread_group *result = NULL;
400  
401  	if (name == NULL) {
402  		return NULL;
403  	}
404  
405  	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
406  		return thread_group_retain(tg_system);
407  	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
408  		return thread_group_retain(tg_background);
409  	} else if (strncmp("adaptive", name, THREAD_GROUP_MAXNAME) == 0) {
410  		return thread_group_retain(tg_adaptive);
411  	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
412  		return thread_group_retain(tg_perf_controller);
413  	}
414  
415  	struct thread_group *tg;
416  	lck_mtx_lock(&tg_lock);
417  	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
418  		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
419  		    thread_group_retain_try(tg)) {
420  			result = tg;
421  			break;
422  		}
423  	}
424  	lck_mtx_unlock(&tg_lock);
425  	return result;
426  }
427  
428  /*
429   * Find thread group with specified ID and add new reference to it.
430   */
431  struct thread_group *
432  thread_group_find_by_id_and_retain(uint64_t id)
433  {
434  	struct thread_group *tg = NULL;
435  	struct thread_group *result = NULL;
436  
437  	switch (id) {
438  	case THREAD_GROUP_SYSTEM:
439  		result = tg_system;
440  		thread_group_retain(tg_system);
441  		break;
442  	case THREAD_GROUP_BACKGROUND:
443  		result = tg_background;
444  		thread_group_retain(tg_background);
445  		break;
446  	case THREAD_GROUP_ADAPTIVE:
447  		result = tg_adaptive;
448  		thread_group_retain(tg_adaptive);
449  		break;
450  	case THREAD_GROUP_VM:
451  		result = tg_vm;
452  		thread_group_retain(tg_vm);
453  		break;
454  	case THREAD_GROUP_IO_STORAGE:
455  		result = tg_io_storage;
456  		thread_group_retain(tg_io_storage);
457  		break;
458  	case THREAD_GROUP_PERF_CONTROLLER:
459  		result = tg_perf_controller;
460  		thread_group_retain(tg_perf_controller);
461  		break;
462  	default:
463  		lck_mtx_lock(&tg_lock);
464  		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
465  			if (tg->tg_id == id && thread_group_retain_try(tg)) {
466  				result = tg;
467  				break;
468  			}
469  		}
470  		lck_mtx_unlock(&tg_lock);
471  	}
472  	return result;
473  }
474  
475  /*
476   * Add new reference to specified thread group
477   */
478  struct thread_group *
479  thread_group_retain(struct thread_group *tg)
480  {
481  	os_ref_retain(&tg->tg_refcount);
482  	return tg;
483  }
484  
485  /*
486   * Similar to thread_group_retain, but fails for thread groups with a
487   * zero reference count. Returns true if retained successfully.
488   */
489  static bool
490  thread_group_retain_try(struct thread_group *tg)
491  {
492  	return os_ref_retain_try(&tg->tg_refcount);
493  }
494  
495  /*
496   * Drop a reference to specified thread group
497   */
498  void
499  thread_group_release(struct thread_group *tg)
500  {
501  	if (os_ref_release(&tg->tg_refcount) == 0) {
502  		lck_mtx_lock(&tg_lock);
503  		tg_count--;
504  		remqueue(&tg->tg_queue_chain);
505  		lck_mtx_unlock(&tg_lock);
506  		static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 2), "thread group name is too short");
507  		static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
508  #if defined(__LP64__)
509  		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
510  		    tg->tg_id,
511  		    *(uint64_t*)(void*)&tg->tg_name[0],
512  		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)]
513  		    );
514  #else /* defined(__LP64__) */
515  		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
516  		    tg->tg_id,
517  		    *(uint32_t*)(void*)&tg->tg_name[0],
518  		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)]
519  		    );
520  #endif /* defined(__LP64__) */
521  		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
522  #if CONFIG_SCHED_CLUTCH
523  		sched_clutch_destroy(&(tg->tg_sched_clutch));
524  #endif /* CONFIG_SCHED_CLUTCH */
525  		machine_thread_group_deinit(tg);
526  		zfree(tg_zone, tg);
527  	}
528  }
529  
530  /*
531   * Get thread's current thread group
532   */
533  inline struct thread_group *
534  thread_group_get(thread_t t)
535  {
536  	return t->thread_group;
537  }
538  
539  struct thread_group *
540  thread_group_get_home_group(thread_t t)
541  {
542  	return task_coalition_get_thread_group(t->task);
543  }
544  
545  #if CONFIG_SCHED_AUTO_JOIN
546  
547  /*
548   * thread_set_thread_group_auto_join()
549   *
550   * Sets the thread group of a thread based on auto-join rules.
551   *
552   * Preconditions:
553   * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
554   * - Thread must be locked by the caller already
555   */
556  static void
557  thread_set_thread_group_auto_join(thread_t t, struct thread_group *tg, __unused struct thread_group *old_tg)
558  {
559  	assert(t->runq == PROCESSOR_NULL);
560  	t->thread_group = tg;
561  
562  	/*
563  	 * If the thread group is being changed for the current thread, callout to
564  	 * CLPC to update the thread's information at that layer. This makes sure CLPC
565  	 * has consistent state when the current thread is going off-core.
566  	 */
567  	if (t == current_thread()) {
568  		uint64_t ctime = mach_approximate_time();
569  		uint64_t arg1, arg2;
570  		machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
571  		machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
572  	}
573  }
574  
575  #endif /* CONFIG_SCHED_AUTO_JOIN */
576  
577  /*
578   * thread_set_thread_group_explicit()
579   *
580   * Sets the thread group of a thread based on default non auto-join rules.
581   *
582   * Preconditions:
583   * - Thread must be the current thread
584   * - Caller must not have the thread locked
585   * - Interrupts must be disabled
586   */
587  static void
588  thread_set_thread_group_explicit(thread_t t, struct thread_group *tg, __unused struct thread_group *old_tg)
589  {
590  	assert(t == current_thread());
591  	/*
592  	 * In the clutch scheduler world, the runq membership of the thread
593  	 * is based on its thread group membership and its scheduling bucket.
594  	 * In order to synchronize with the priority (and therefore bucket)
595  	 * getting updated concurrently, it is important to perform the
596  	 * thread group change also under the thread lock.
597  	 */
598  	thread_lock(t);
599  	t->thread_group = tg;
600  
601  #if CONFIG_SCHED_CLUTCH
602  	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
603  	sched_clutch_t new_clutch = (tg) ? &(tg->tg_sched_clutch) : NULL;
604  	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
605  		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
606  	}
607  #endif /* CONFIG_SCHED_CLUTCH */
608  
609  	thread_unlock(t);
610  
611  	uint64_t ctime = mach_approximate_time();
612  	uint64_t arg1, arg2;
613  	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
614  	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
615  }
616  
617  /*
618   * thread_set_thread_group()
619   *
620   * Overrides the current home thread group with an override group. However,
621   * an adopted work interval overrides the override. Does not take a reference
622   * on the group, so caller must guarantee group lifetime lasts as long as the
623   * group is set.
624   *
625   * The thread group is set according to a hierarchy:
626   *
627   * 1) work interval specified group (explicit API)
628   * 2) Auto-join thread group (wakeup tracking for special work intervals)
629   * 3) bank voucher carried group (implicitly set)
630   * 4) coalition default thread group (ambient)
631   */
632  static void
633  thread_set_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
634  {
635  	struct thread_group *home_tg = thread_group_get_home_group(t);
636  	struct thread_group *old_tg = NULL;
637  
638  	if (tg == NULL) {
639  		/* when removing an override, revert to home group */
640  		tg = home_tg;
641  	}
642  
643  	spl_t s = splsched();
644  
645  	old_tg = t->thread_group;
646  
647  	if (old_tg != tg) {
648  		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
649  		    t->thread_group ? t->thread_group->tg_id : 0,
650  		    tg->tg_id, (uintptr_t)thread_tid(t), home_tg->tg_id);
651  
652  		/*
653  		 * Based on whether this is a change due to auto-join, the join does
654  		 * different things and has different expectations.
655  		 */
656  		if (auto_join) {
657  #if CONFIG_SCHED_AUTO_JOIN
658  			/*
659  			 * set thread group with auto-join rules. This has the
660  			 * implicit assumption that the thread lock is already held.
661  			 * Also this could happen to any thread (current or thread
662  			 * being context switched).
663  			 */
664  			thread_set_thread_group_auto_join(t, tg, old_tg);
665  #else /* CONFIG_SCHED_AUTO_JOIN */
666  			panic("Auto-Join unsupported on this platform");
667  #endif /* CONFIG_SCHED_AUTO_JOIN */
668  		} else {
669  			/*
670  			 * set thread group with the explicit join rules. This has
671  			 * the implicit assumption that the thread is not locked. Also
672  			 * this would be done only to the current thread.
673  			 */
674  			thread_set_thread_group_explicit(t, tg, old_tg);
675  		}
676  	}
677  
678  	splx(s);
679  }
680  
681  void
682  thread_group_set_bank(thread_t t, struct thread_group *tg)
683  {
684  	/* work interval group overrides any bank override group */
685  	if (t->th_work_interval) {
686  		return;
687  	}
688  
689  	/* boot arg disables groups in bank */
690  	if (tg_set_by_bankvoucher == FALSE) {
691  		return;
692  	}
693  
694  	thread_set_thread_group(t, tg, false);
695  }
696  
697  /*
698   * thread_set_work_interval_thread_group()
699   *
700   * Sets the thread's group to the work interval thread group.
701   * If auto_join == true, thread group is being overriden through scheduler
702   * auto-join policies.
703   *
704   * Preconditions for auto-join case:
705   * - t is not current_thread and t should be locked.
706   * - t should not be running on a remote core; thread context switching is a valid state for this.
707   */
708  void
709  thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
710  {
711  	if (tg == NULL) {
712  		/*
713  		 * when removing a work interval override, fall back
714  		 * to the current voucher override.
715  		 *
716  		 * In the auto_join case, the thread is already locked by the caller so
717  		 * its unsafe to get the thread group from the current voucher (since
718  		 * that might require taking task lock and ivac lock). However, the
719  		 * auto-join policy does not allow threads to switch thread groups based
720  		 * on voucher overrides.
721  		 *
722  		 * For the normal case, lookup the thread group from the currently adopted
723  		 * voucher and use that as the fallback tg.
724  		 */
725  
726  		if (auto_join == false) {
727  			tg = thread_get_current_voucher_thread_group(t);
728  		}
729  	}
730  
731  	thread_set_thread_group(t, tg, auto_join);
732  }
733  
734  inline cluster_type_t
735  thread_group_recommendation(struct thread_group *tg)
736  {
737  	if (tg == NULL) {
738  		return CLUSTER_TYPE_SMP;
739  	} else {
740  		return tg->tg_recommendation;
741  	}
742  }
743  
744  inline uint64_t
745  thread_group_get_id(struct thread_group *tg)
746  {
747  	return tg->tg_id;
748  }
749  
750  uint32_t
751  thread_group_count(void)
752  {
753  	return tg_count;
754  }
755  
756  /*
757   * Can only be called while tg cannot be destroyed
758   */
759  inline const char*
760  thread_group_get_name(struct thread_group *tg)
761  {
762  	return tg->tg_name;
763  }
764  
765  inline void *
766  thread_group_get_machine_data(struct thread_group *tg)
767  {
768  	return &tg->tg_machine_data;
769  }
770  
771  inline uint32_t
772  thread_group_machine_data_size(void)
773  {
774  	return tg_machine_data_size;
775  }
776  
777  kern_return_t
778  thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
779  {
780  	struct thread_group *tg;
781  	int i = 0;
782  	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
783  		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
784  			return KERN_FAILURE;
785  		}
786  		callout(arg, i, tg);
787  		i++;
788  	}
789  	return KERN_SUCCESS;
790  }
791  
792  void
793  thread_group_join_io_storage(void)
794  {
795  	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
796  	assert(tg != NULL);
797  	thread_set_thread_group(current_thread(), tg, false);
798  }
799  
800  void
801  thread_group_join_perf_controller(void)
802  {
803  	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
804  	assert(tg != NULL);
805  	thread_set_thread_group(current_thread(), tg, false);
806  }
807  
808  void
809  thread_group_vm_add(void)
810  {
811  	assert(tg_vm != NULL);
812  	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM), false);
813  }
814  
815  uint32_t
816  thread_group_get_flags(struct thread_group *tg)
817  {
818  	return tg->tg_flags;
819  }
820  
821  /*
822   * Returns whether the thread group is restricted to the E-cluster when CLPC is
823   * turned off.
824   */
825  boolean_t
826  thread_group_smp_restricted(struct thread_group *tg)
827  {
828  	if (tg->tg_flags & THREAD_GROUP_FLAGS_SMP_RESTRICT) {
829  		return true;
830  	} else {
831  		return false;
832  	}
833  }
834  
835  void
836  thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
837  {
838  	/*
839  	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
840  	 * where a thread/thread group needs to be placed, it is important to use
841  	 * atomic operations to update the recommendation.
842  	 */
843  	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
844  }
845  
846  #if CONFIG_SCHED_EDGE
847  
848  int sched_edge_restrict_ut = 1;
849  int sched_edge_restrict_bg = 1;
850  
851  void
852  sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
853  {
854  	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
855  	/*
856  	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
857  	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
858  	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
859  	 */
860  	assert(new_recommendation != CLUSTER_TYPE_SMP);
861  	/*
862  	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
863  	 * routine is being called, fake out the call from the old CLPC interface.
864  	 */
865  	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
866  	/*
867  	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
868  	 */
869  	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
870  		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
871  	}
872  	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
873  	if (!sched_edge_restrict_ut) {
874  		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
875  	}
876  	if (!sched_edge_restrict_bg) {
877  		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
878  	}
879  	sched_perfcontrol_preferred_cluster_options_t options = 0;
880  	if (new_recommendation == CLUSTER_TYPE_P) {
881  		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
882  	}
883  	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
884  }
885  
886  void
887  sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
888  {
889  	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
890  }
891  
892  void
893  sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
894  {
895  	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
896  }
897  
898  void
899  sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
900      uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
901  {
902  	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
903  	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
904  		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
905  		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
906  		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
907  		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
908  		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
909  		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
910  	};
911  	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
912  }
913  
914  #else /* CONFIG_SCHED_EDGE */
915  
916  void
917  sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
918  {
919  	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
920  	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
921  }
922  
923  void
924  sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
925  {
926  }
927  
928  void
929  sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
930  {
931  }
932  
933  void
934  sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
935      __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
936  {
937  }
938  
939  #endif /* CONFIG_SCHED_EDGE */
940  
941  #endif /* CONFIG_THREAD_GROUPS */