/ duct-tape / xnu / osfmk / kern / thread_policy.c
thread_policy.c
   1  /*
   2   * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. The rights granted to you under the License
  10   * may not be used to create, or enable the creation or redistribution of,
  11   * unlawful or unlicensed copies of an Apple operating system, or to
  12   * circumvent, violate, or enable the circumvention or violation of, any
  13   * terms of an Apple operating system software license agreement.
  14   *
  15   * Please obtain a copy of the License at
  16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17   *
  18   * The Original Code and all software distributed under the License are
  19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23   * Please see the License for the specific language governing rights and
  24   * limitations under the License.
  25   *
  26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27   */
  28  
  29  #include <mach/mach_types.h>
  30  #include <mach/thread_act_server.h>
  31  
  32  #include <kern/kern_types.h>
  33  #include <kern/processor.h>
  34  #include <kern/thread.h>
  35  #include <kern/affinity.h>
  36  #include <mach/task_policy.h>
  37  #include <kern/sfi.h>
  38  #include <kern/policy_internal.h>
  39  #include <sys/errno.h>
  40  #include <sys/ulock.h>
  41  
  42  #include <mach/machine/sdt.h>
  43  
  44  #ifdef MACH_BSD
  45  extern int      proc_selfpid(void);
  46  extern char *   proc_name_address(void *p);
  47  extern void     rethrottle_thread(void * uthread);
  48  #endif /* MACH_BSD */
  49  
  50  #define QOS_EXTRACT(q)        ((q) & 0xff)
  51  
  52  uint32_t qos_override_mode;
  53  #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54  #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55  #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56  #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
  57  
  58  extern zone_t thread_qos_override_zone;
  59  
  60  static void
  61  proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
  62  
  63  /*
  64   * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  65   * to threads that don't have a QoS class set.
  66   */
  67  const qos_policy_params_t thread_qos_policy_params = {
  68  	/*
  69  	 * This table defines the starting base priority of the thread,
  70  	 * which will be modified by the thread importance and the task max priority
  71  	 * before being applied.
  72  	 */
  73  	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  74  	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  75  	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  76  	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  77  	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  78  	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  79  	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  80  
  81  	/*
  82  	 * This table defines the highest IO priority that a thread marked with this
  83  	 * QoS class can have.
  84  	 */
  85  	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  86  	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  87  	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  88  	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  89  	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
  90  	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
  91  	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  92  
  93  	/*
  94  	 * This table defines the highest QoS level that
  95  	 * a thread marked with this QoS class can have.
  96  	 */
  97  
  98  	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
  99  	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 100  	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 101  	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 102  	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 103  	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 104  	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 105  
 106  	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 107  	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 108  	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 109  	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 110  	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 111  	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 112  	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 113  };
 114  
 115  static void
 116  thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 117  
 118  static int
 119  thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 120  
 121  static void
 122  proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 123  
 124  static void
 125  proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 126  
 127  static void
 128  proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 129  
 130  static void
 131  thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 132  
 133  static int
 134  thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 135  
 136  static int
 137  proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 138  
 139  static void
 140  thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
 141  
 142  static void
 143  thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
 144  
 145  void
 146  thread_policy_init(void)
 147  {
 148  	if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 149  		printf("QOS override mode: 0x%08x\n", qos_override_mode);
 150  	} else {
 151  		qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 152  	}
 153  }
 154  
 155  boolean_t
 156  thread_has_qos_policy(thread_t thread)
 157  {
 158  	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 159  }
 160  
 161  
 162  static void
 163  thread_remove_qos_policy_locked(thread_t thread,
 164      task_pend_token_t pend_token)
 165  {
 166  	__unused int prev_qos = thread->requested_policy.thrp_qos;
 167  
 168  	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 169  
 170  	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 171  	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
 172  }
 173  
 174  kern_return_t
 175  thread_remove_qos_policy(thread_t thread)
 176  {
 177  	struct task_pend_token pend_token = {};
 178  
 179  	thread_mtx_lock(thread);
 180  	if (!thread->active) {
 181  		thread_mtx_unlock(thread);
 182  		return KERN_TERMINATED;
 183  	}
 184  
 185  	thread_remove_qos_policy_locked(thread, &pend_token);
 186  
 187  	thread_mtx_unlock(thread);
 188  
 189  	thread_policy_update_complete_unlocked(thread, &pend_token);
 190  
 191  	return KERN_SUCCESS;
 192  }
 193  
 194  
 195  boolean_t
 196  thread_is_static_param(thread_t thread)
 197  {
 198  	if (thread->static_param) {
 199  		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 200  		return TRUE;
 201  	}
 202  	return FALSE;
 203  }
 204  
 205  /*
 206   * Relative priorities can range between 0REL and -15REL. These
 207   * map to QoS-specific ranges, to create non-overlapping priority
 208   * ranges.
 209   */
 210  static int
 211  thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 212  {
 213  	int next_lower_qos;
 214  
 215  	/* Fast path, since no validation or scaling is needed */
 216  	if (qos_relprio == 0) {
 217  		return 0;
 218  	}
 219  
 220  	switch (qos) {
 221  	case THREAD_QOS_USER_INTERACTIVE:
 222  		next_lower_qos = THREAD_QOS_USER_INITIATED;
 223  		break;
 224  	case THREAD_QOS_USER_INITIATED:
 225  		next_lower_qos = THREAD_QOS_LEGACY;
 226  		break;
 227  	case THREAD_QOS_LEGACY:
 228  		next_lower_qos = THREAD_QOS_UTILITY;
 229  		break;
 230  	case THREAD_QOS_UTILITY:
 231  		next_lower_qos = THREAD_QOS_BACKGROUND;
 232  		break;
 233  	case THREAD_QOS_MAINTENANCE:
 234  	case THREAD_QOS_BACKGROUND:
 235  		next_lower_qos = 0;
 236  		break;
 237  	default:
 238  		panic("Unrecognized QoS %d", qos);
 239  		return 0;
 240  	}
 241  
 242  	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 243  	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 244  
 245  	/*
 246  	 * We now have the valid range that the scaled relative priority can map to. Note
 247  	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
 248  	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 249  	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 250  	 * remainder.
 251  	 */
 252  	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 253  
 254  	return scaled_relprio;
 255  }
 256  
 257  /*
 258   * flag set by -qos-policy-allow boot-arg to allow
 259   * testing thread qos policy from userspace
 260   */
 261  static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
 262  
 263  kern_return_t
 264  thread_policy_set(
 265  	thread_t                                thread,
 266  	thread_policy_flavor_t  flavor,
 267  	thread_policy_t                 policy_info,
 268  	mach_msg_type_number_t  count)
 269  {
 270  	thread_qos_policy_data_t req_qos;
 271  	kern_return_t kr;
 272  
 273  	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 274  
 275  	if (thread == THREAD_NULL) {
 276  		return KERN_INVALID_ARGUMENT;
 277  	}
 278  
 279  	if (!allow_qos_policy_set) {
 280  		if (thread_is_static_param(thread)) {
 281  			return KERN_POLICY_STATIC;
 282  		}
 283  
 284  		if (flavor == THREAD_QOS_POLICY) {
 285  			return KERN_INVALID_ARGUMENT;
 286  		}
 287  	}
 288  
 289  	/* Threads without static_param set reset their QoS when other policies are applied. */
 290  	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 291  		/* Store the existing tier, if we fail this call it is used to reset back. */
 292  		req_qos.qos_tier = thread->requested_policy.thrp_qos;
 293  		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 294  
 295  		kr = thread_remove_qos_policy(thread);
 296  		if (kr != KERN_SUCCESS) {
 297  			return kr;
 298  		}
 299  	}
 300  
 301  	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 302  
 303  	/* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 304  	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 305  		if (kr != KERN_SUCCESS) {
 306  			/* Reset back to our original tier as the set failed. */
 307  			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 308  		}
 309  	}
 310  
 311  	return kr;
 312  }
 313  
 314  kern_return_t
 315  thread_policy_set_internal(
 316  	thread_t                     thread,
 317  	thread_policy_flavor_t       flavor,
 318  	thread_policy_t              policy_info,
 319  	mach_msg_type_number_t       count)
 320  {
 321  	kern_return_t result = KERN_SUCCESS;
 322  	struct task_pend_token pend_token = {};
 323  
 324  	thread_mtx_lock(thread);
 325  	if (!thread->active) {
 326  		thread_mtx_unlock(thread);
 327  
 328  		return KERN_TERMINATED;
 329  	}
 330  
 331  	switch (flavor) {
 332  	case THREAD_EXTENDED_POLICY:
 333  	{
 334  		boolean_t timeshare = TRUE;
 335  
 336  		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 337  			thread_extended_policy_t info;
 338  
 339  			info = (thread_extended_policy_t)policy_info;
 340  			timeshare = info->timeshare;
 341  		}
 342  
 343  		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 344  
 345  		spl_t s = splsched();
 346  		thread_lock(thread);
 347  
 348  		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 349  
 350  		thread_unlock(thread);
 351  		splx(s);
 352  
 353  		pend_token.tpt_update_thread_sfi = 1;
 354  
 355  		break;
 356  	}
 357  
 358  	case THREAD_TIME_CONSTRAINT_POLICY:
 359  	{
 360  		thread_time_constraint_policy_t info;
 361  
 362  		if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 363  			result = KERN_INVALID_ARGUMENT;
 364  			break;
 365  		}
 366  
 367  		info = (thread_time_constraint_policy_t)policy_info;
 368  
 369  
 370  		if (info->constraint < info->computation ||
 371  		    info->computation > max_rt_quantum ||
 372  		    info->computation < min_rt_quantum) {
 373  			result = KERN_INVALID_ARGUMENT;
 374  			break;
 375  		}
 376  
 377  		spl_t s = splsched();
 378  		thread_lock(thread);
 379  
 380  		thread->realtime.period         = info->period;
 381  		thread->realtime.computation    = info->computation;
 382  		thread->realtime.constraint     = info->constraint;
 383  		thread->realtime.preemptible    = info->preemptible;
 384  
 385  		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 386  
 387  		thread_unlock(thread);
 388  		splx(s);
 389  
 390  		pend_token.tpt_update_thread_sfi = 1;
 391  
 392  		break;
 393  	}
 394  
 395  	case THREAD_PRECEDENCE_POLICY:
 396  	{
 397  		thread_precedence_policy_t info;
 398  
 399  		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 400  			result = KERN_INVALID_ARGUMENT;
 401  			break;
 402  		}
 403  		info = (thread_precedence_policy_t)policy_info;
 404  
 405  		spl_t s = splsched();
 406  		thread_lock(thread);
 407  
 408  		thread->importance = info->importance;
 409  
 410  		thread_recompute_priority(thread);
 411  
 412  		thread_unlock(thread);
 413  		splx(s);
 414  
 415  		break;
 416  	}
 417  
 418  	case THREAD_AFFINITY_POLICY:
 419  	{
 420  		thread_affinity_policy_t info;
 421  
 422  		if (!thread_affinity_is_supported()) {
 423  			result = KERN_NOT_SUPPORTED;
 424  			break;
 425  		}
 426  		if (count < THREAD_AFFINITY_POLICY_COUNT) {
 427  			result = KERN_INVALID_ARGUMENT;
 428  			break;
 429  		}
 430  
 431  		info = (thread_affinity_policy_t) policy_info;
 432  		/*
 433  		 * Unlock the thread mutex here and
 434  		 * return directly after calling thread_affinity_set().
 435  		 * This is necessary for correct lock ordering because
 436  		 * thread_affinity_set() takes the task lock.
 437  		 */
 438  		thread_mtx_unlock(thread);
 439  		return thread_affinity_set(thread, info->affinity_tag);
 440  	}
 441  
 442  #if !defined(XNU_TARGET_OS_OSX)
 443  	case THREAD_BACKGROUND_POLICY:
 444  	{
 445  		thread_background_policy_t info;
 446  
 447  		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 448  			result = KERN_INVALID_ARGUMENT;
 449  			break;
 450  		}
 451  
 452  		if (thread->task != current_task()) {
 453  			result = KERN_PROTECTION_FAILURE;
 454  			break;
 455  		}
 456  
 457  		info = (thread_background_policy_t) policy_info;
 458  
 459  		int enable;
 460  
 461  		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
 462  			enable = TASK_POLICY_ENABLE;
 463  		} else {
 464  			enable = TASK_POLICY_DISABLE;
 465  		}
 466  
 467  		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 468  
 469  		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 470  
 471  		break;
 472  	}
 473  #endif /* !defined(XNU_TARGET_OS_OSX) */
 474  
 475  	case THREAD_THROUGHPUT_QOS_POLICY:
 476  	{
 477  		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 478  		thread_throughput_qos_t tqos;
 479  
 480  		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 481  			result = KERN_INVALID_ARGUMENT;
 482  			break;
 483  		}
 484  
 485  		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
 486  			break;
 487  		}
 488  
 489  		tqos = qos_extract(info->thread_throughput_qos_tier);
 490  
 491  		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 492  		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 493  
 494  		break;
 495  	}
 496  
 497  	case THREAD_LATENCY_QOS_POLICY:
 498  	{
 499  		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 500  		thread_latency_qos_t lqos;
 501  
 502  		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 503  			result = KERN_INVALID_ARGUMENT;
 504  			break;
 505  		}
 506  
 507  		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
 508  			break;
 509  		}
 510  
 511  		lqos = qos_extract(info->thread_latency_qos_tier);
 512  
 513  		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 514  		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 515  
 516  		break;
 517  	}
 518  
 519  	case THREAD_QOS_POLICY:
 520  	{
 521  		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 522  
 523  		if (count < THREAD_QOS_POLICY_COUNT) {
 524  			result = KERN_INVALID_ARGUMENT;
 525  			break;
 526  		}
 527  
 528  		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 529  			result = KERN_INVALID_ARGUMENT;
 530  			break;
 531  		}
 532  
 533  		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 534  			result = KERN_INVALID_ARGUMENT;
 535  			break;
 536  		}
 537  
 538  		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 539  			result = KERN_INVALID_ARGUMENT;
 540  			break;
 541  		}
 542  
 543  		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 544  		    info->qos_tier, -info->tier_importance, &pend_token);
 545  
 546  		break;
 547  	}
 548  
 549  	default:
 550  		result = KERN_INVALID_ARGUMENT;
 551  		break;
 552  	}
 553  
 554  	thread_mtx_unlock(thread);
 555  
 556  	thread_policy_update_complete_unlocked(thread, &pend_token);
 557  
 558  	return result;
 559  }
 560  
 561  /*
 562   * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 563   * Both result in FIXED mode scheduling.
 564   */
 565  static sched_mode_t
 566  convert_policy_to_sched_mode(integer_t policy)
 567  {
 568  	switch (policy) {
 569  	case POLICY_TIMESHARE:
 570  		return TH_MODE_TIMESHARE;
 571  	case POLICY_RR:
 572  	case POLICY_FIFO:
 573  		return TH_MODE_FIXED;
 574  	default:
 575  		panic("unexpected sched policy: %d", policy);
 576  		return TH_MODE_NONE;
 577  	}
 578  }
 579  
 580  /*
 581   * Called either with the thread mutex locked
 582   * or from the pthread kext in a 'safe place'.
 583   */
 584  static kern_return_t
 585  thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 586      sched_mode_t          mode,
 587      integer_t             priority,
 588      task_pend_token_t     pend_token)
 589  {
 590  	kern_return_t kr = KERN_SUCCESS;
 591  
 592  	spl_t s = splsched();
 593  	thread_lock(thread);
 594  
 595  	/* This path isn't allowed to change a thread out of realtime. */
 596  	if ((thread->sched_mode == TH_MODE_REALTIME) ||
 597  	    (thread->saved_mode == TH_MODE_REALTIME)) {
 598  		kr = KERN_FAILURE;
 599  		goto unlock;
 600  	}
 601  
 602  	if (thread->policy_reset) {
 603  		kr = KERN_SUCCESS;
 604  		goto unlock;
 605  	}
 606  
 607  	sched_mode_t old_mode = thread->sched_mode;
 608  
 609  	/*
 610  	 * Reverse engineer and apply the correct importance value
 611  	 * from the requested absolute priority value.
 612  	 *
 613  	 * TODO: Store the absolute priority value instead
 614  	 */
 615  
 616  	if (priority >= thread->max_priority) {
 617  		priority = thread->max_priority - thread->task_priority;
 618  	} else if (priority >= MINPRI_KERNEL) {
 619  		priority -=  MINPRI_KERNEL;
 620  	} else if (priority >= MINPRI_RESERVED) {
 621  		priority -=  MINPRI_RESERVED;
 622  	} else {
 623  		priority -= BASEPRI_DEFAULT;
 624  	}
 625  
 626  	priority += thread->task_priority;
 627  
 628  	if (priority > thread->max_priority) {
 629  		priority = thread->max_priority;
 630  	} else if (priority < MINPRI) {
 631  		priority = MINPRI;
 632  	}
 633  
 634  	thread->importance = priority - thread->task_priority;
 635  
 636  	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 637  
 638  	if (mode != old_mode) {
 639  		pend_token->tpt_update_thread_sfi = 1;
 640  	}
 641  
 642  unlock:
 643  	thread_unlock(thread);
 644  	splx(s);
 645  
 646  	return kr;
 647  }
 648  
 649  void
 650  thread_freeze_base_pri(thread_t thread)
 651  {
 652  	assert(thread == current_thread());
 653  
 654  	spl_t s = splsched();
 655  	thread_lock(thread);
 656  
 657  	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
 658  	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
 659  
 660  	thread_unlock(thread);
 661  	splx(s);
 662  }
 663  
 664  bool
 665  thread_unfreeze_base_pri(thread_t thread)
 666  {
 667  	assert(thread == current_thread());
 668  	integer_t base_pri;
 669  	ast_t ast = 0;
 670  
 671  	spl_t s = splsched();
 672  	thread_lock(thread);
 673  
 674  	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
 675  	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
 676  
 677  	base_pri = thread->req_base_pri;
 678  	if (base_pri != thread->base_pri) {
 679  		/*
 680  		 * This function returns "true" if the base pri change
 681  		 * is the most likely cause for the preemption.
 682  		 */
 683  		sched_set_thread_base_priority(thread, base_pri);
 684  		ast = ast_peek(AST_PREEMPT);
 685  	}
 686  
 687  	thread_unlock(thread);
 688  	splx(s);
 689  
 690  	return ast != 0;
 691  }
 692  
 693  uint8_t
 694  thread_workq_pri_for_qos(thread_qos_t qos)
 695  {
 696  	assert(qos < THREAD_QOS_LAST);
 697  	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
 698  }
 699  
 700  thread_qos_t
 701  thread_workq_qos_for_pri(int priority)
 702  {
 703  	thread_qos_t qos;
 704  	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
 705  		// indicate that workq should map >UI threads to workq's
 706  		// internal notation for above-UI work.
 707  		return THREAD_QOS_UNSPECIFIED;
 708  	}
 709  	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
 710  		// map a given priority up to the next nearest qos band.
 711  		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
 712  			return qos;
 713  		}
 714  	}
 715  	return THREAD_QOS_MAINTENANCE;
 716  }
 717  
 718  /*
 719   * private interface for pthread workqueues
 720   *
 721   * Set scheduling policy & absolute priority for thread
 722   * May be called with spinlocks held
 723   * Thread mutex lock is not held
 724   */
 725  void
 726  thread_reset_workq_qos(thread_t thread, uint32_t qos)
 727  {
 728  	struct task_pend_token pend_token = {};
 729  
 730  	assert(qos < THREAD_QOS_LAST);
 731  
 732  	spl_t s = splsched();
 733  	thread_lock(thread);
 734  
 735  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 736  	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 737  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 738  	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
 739  	    &pend_token);
 740  
 741  	assert(pend_token.tpt_update_sockets == 0);
 742  
 743  	thread_unlock(thread);
 744  	splx(s);
 745  
 746  	thread_policy_update_complete_unlocked(thread, &pend_token);
 747  }
 748  
 749  /*
 750   * private interface for pthread workqueues
 751   *
 752   * Set scheduling policy & absolute priority for thread
 753   * May be called with spinlocks held
 754   * Thread mutex lock is held
 755   */
 756  void
 757  thread_set_workq_override(thread_t thread, uint32_t qos)
 758  {
 759  	struct task_pend_token pend_token = {};
 760  
 761  	assert(qos < THREAD_QOS_LAST);
 762  
 763  	spl_t s = splsched();
 764  	thread_lock(thread);
 765  
 766  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 767  	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 768  
 769  	assert(pend_token.tpt_update_sockets == 0);
 770  
 771  	thread_unlock(thread);
 772  	splx(s);
 773  
 774  	thread_policy_update_complete_unlocked(thread, &pend_token);
 775  }
 776  
 777  /*
 778   * private interface for pthread workqueues
 779   *
 780   * Set scheduling policy & absolute priority for thread
 781   * May be called with spinlocks held
 782   * Thread mutex lock is not held
 783   */
 784  void
 785  thread_set_workq_pri(thread_t  thread,
 786      thread_qos_t qos,
 787      integer_t priority,
 788      integer_t policy)
 789  {
 790  	struct task_pend_token pend_token = {};
 791  	sched_mode_t mode = convert_policy_to_sched_mode(policy);
 792  
 793  	assert(qos < THREAD_QOS_LAST);
 794  	assert(thread->static_param);
 795  
 796  	if (!thread->static_param || !thread->active) {
 797  		return;
 798  	}
 799  
 800  	spl_t s = splsched();
 801  	thread_lock(thread);
 802  
 803  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 804  	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 805  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 806  	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
 807  	    0, &pend_token);
 808  
 809  	thread_unlock(thread);
 810  	splx(s);
 811  
 812  	/* Concern: this doesn't hold the mutex... */
 813  
 814  	__assert_only kern_return_t kr;
 815  	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
 816  	    &pend_token);
 817  	assert(kr == KERN_SUCCESS);
 818  
 819  	if (pend_token.tpt_update_thread_sfi) {
 820  		sfi_reevaluate(thread);
 821  	}
 822  }
 823  
 824  /*
 825   * thread_set_mode_and_absolute_pri:
 826   *
 827   * Set scheduling policy & absolute priority for thread, for deprecated
 828   * thread_set_policy and thread_policy interfaces.
 829   *
 830   * Called with nothing locked.
 831   */
 832  kern_return_t
 833  thread_set_mode_and_absolute_pri(thread_t   thread,
 834      integer_t  policy,
 835      integer_t  priority)
 836  {
 837  	kern_return_t kr = KERN_SUCCESS;
 838  	struct task_pend_token pend_token = {};
 839  
 840  	sched_mode_t mode = convert_policy_to_sched_mode(policy);
 841  
 842  	thread_mtx_lock(thread);
 843  
 844  	if (!thread->active) {
 845  		kr = KERN_TERMINATED;
 846  		goto unlock;
 847  	}
 848  
 849  	if (thread_is_static_param(thread)) {
 850  		kr = KERN_POLICY_STATIC;
 851  		goto unlock;
 852  	}
 853  
 854  	/* Setting legacy policies on threads kills the current QoS */
 855  	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 856  		thread_remove_qos_policy_locked(thread, &pend_token);
 857  	}
 858  
 859  	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 860  
 861  unlock:
 862  	thread_mtx_unlock(thread);
 863  
 864  	thread_policy_update_complete_unlocked(thread, &pend_token);
 865  
 866  	return kr;
 867  }
 868  
 869  /*
 870   * Set the thread's requested mode and recompute priority
 871   * Called with thread mutex and thread locked
 872   *
 873   * TODO: Mitigate potential problems caused by moving thread to end of runq
 874   * whenever its priority is recomputed
 875   *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 876   */
 877  static void
 878  thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 879  {
 880  	if (thread->policy_reset) {
 881  		return;
 882  	}
 883  
 884  	boolean_t removed = thread_run_queue_remove(thread);
 885  
 886  	/*
 887  	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 888  	 * That way there's zero confusion over which the user wants
 889  	 * and which the kernel wants.
 890  	 */
 891  	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
 892  		thread->saved_mode = mode;
 893  	} else {
 894  		sched_set_thread_mode(thread, mode);
 895  	}
 896  
 897  	thread_recompute_priority(thread);
 898  
 899  	if (removed) {
 900  		thread_run_queue_reinsert(thread, SCHED_TAILQ);
 901  	}
 902  }
 903  
 904  /* called at splsched with thread lock locked */
 905  static void
 906  thread_update_qos_cpu_time_locked(thread_t thread)
 907  {
 908  	task_t task = thread->task;
 909  	uint64_t timer_sum, timer_delta;
 910  
 911  	/*
 912  	 * This is only as accurate as the distance between
 913  	 * last context switch (embedded) or last user/kernel boundary transition (desktop)
 914  	 * because user_timer and system_timer are only updated then.
 915  	 *
 916  	 * TODO: Consider running a timer_update operation here to update it first.
 917  	 *       Maybe doable with interrupts disabled from current thread.
 918  	 *       If the thread is on a different core, may not be easy to get right.
 919  	 *
 920  	 * TODO: There should be a function for this in timer.c
 921  	 */
 922  
 923  	timer_sum = timer_grab(&thread->user_timer);
 924  	timer_sum += timer_grab(&thread->system_timer);
 925  	timer_delta = timer_sum - thread->vtimer_qos_save;
 926  
 927  	thread->vtimer_qos_save = timer_sum;
 928  
 929  	uint64_t* task_counter = NULL;
 930  
 931  	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 932  	switch (thread->effective_policy.thep_qos) {
 933  	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
 934  	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
 935  	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
 936  	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
 937  	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
 938  	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
 939  	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 940  	default:
 941  		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 942  	}
 943  
 944  	OSAddAtomic64(timer_delta, task_counter);
 945  
 946  	/* Update the task-level qos stats atomically, because we don't have the task lock. */
 947  	switch (thread->requested_policy.thrp_qos) {
 948  	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
 949  	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
 950  	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
 951  	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
 952  	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
 953  	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
 954  	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 955  	default:
 956  		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 957  	}
 958  
 959  	OSAddAtomic64(timer_delta, task_counter);
 960  }
 961  
 962  /*
 963   * called with no thread locks held
 964   * may hold task lock
 965   */
 966  void
 967  thread_update_qos_cpu_time(thread_t thread)
 968  {
 969  	thread_mtx_lock(thread);
 970  
 971  	spl_t s = splsched();
 972  	thread_lock(thread);
 973  
 974  	thread_update_qos_cpu_time_locked(thread);
 975  
 976  	thread_unlock(thread);
 977  	splx(s);
 978  
 979  	thread_mtx_unlock(thread);
 980  }
 981  
 982  /*
 983   * Calculate base priority from thread attributes, and set it on the thread
 984   *
 985   * Called with thread_lock and thread mutex held.
 986   */
 987  extern thread_t vm_pageout_scan_thread;
 988  extern boolean_t vps_dynamic_priority_enabled;
 989  
 990  void
 991  thread_recompute_priority(
 992  	thread_t                thread)
 993  {
 994  	integer_t               priority;
 995  
 996  	if (thread->policy_reset) {
 997  		return;
 998  	}
 999  
1000  	if (thread->sched_mode == TH_MODE_REALTIME) {
1001  		sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
1002  		return;
1003  	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1004  		int qos = thread->effective_policy.thep_qos;
1005  		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1006  		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1007  		int qos_scaled_relprio;
1008  
1009  		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1010  		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1011  
1012  		priority = thread_qos_policy_params.qos_pri[qos];
1013  		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1014  
1015  		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1016  			/* Bump priority 46 to 47 when in a frontmost app */
1017  			qos_scaled_relprio += 1;
1018  		}
1019  
1020  		/* TODO: factor in renice priority here? */
1021  
1022  		priority += qos_scaled_relprio;
1023  	} else {
1024  		if (thread->importance > MAXPRI) {
1025  			priority = MAXPRI;
1026  		} else if (thread->importance < -MAXPRI) {
1027  			priority = -MAXPRI;
1028  		} else {
1029  			priority = thread->importance;
1030  		}
1031  
1032  		priority += thread->task_priority;
1033  	}
1034  
1035  	priority = MAX(priority, thread->user_promotion_basepri);
1036  
1037  	/*
1038  	 * Clamp priority back into the allowed range for this task.
1039  	 *  The initial priority value could be out of this range due to:
1040  	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1041  	 *      Task is user task (max-pri is 63)
1042  	 *      Task is kernel task (max-pri is 95)
1043  	 * Note that thread->importance is user-settable to any integer
1044  	 * via THREAD_PRECEDENCE_POLICY.
1045  	 */
1046  	if (priority > thread->max_priority) {
1047  		if (thread->effective_policy.thep_promote_above_task) {
1048  			priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1049  		} else {
1050  			priority = thread->max_priority;
1051  		}
1052  	} else if (priority < MINPRI) {
1053  		priority = MINPRI;
1054  	}
1055  
1056  	if (thread->saved_mode == TH_MODE_REALTIME &&
1057  	    thread->sched_flags & TH_SFLAG_FAILSAFE) {
1058  		priority = DEPRESSPRI;
1059  	}
1060  
1061  	if (thread->effective_policy.thep_terminated == TRUE) {
1062  		/*
1063  		 * We temporarily want to override the expected priority to
1064  		 * ensure that the thread exits in a timely manner.
1065  		 * Note that this is allowed to exceed thread->max_priority
1066  		 * so that the thread is no longer clamped to background
1067  		 * during the final exit phase.
1068  		 */
1069  		if (priority < thread->task_priority) {
1070  			priority = thread->task_priority;
1071  		}
1072  		if (priority < BASEPRI_DEFAULT) {
1073  			priority = BASEPRI_DEFAULT;
1074  		}
1075  	}
1076  
1077  #if !defined(XNU_TARGET_OS_OSX)
1078  	/* No one can have a base priority less than MAXPRI_THROTTLE */
1079  	if (priority < MAXPRI_THROTTLE) {
1080  		priority = MAXPRI_THROTTLE;
1081  	}
1082  #endif /* !defined(XNU_TARGET_OS_OSX) */
1083  
1084  	sched_set_thread_base_priority(thread, priority);
1085  }
1086  
1087  /* Called with the task lock held, but not the thread mutex or spinlock */
1088  void
1089  thread_policy_update_tasklocked(
1090  	thread_t           thread,
1091  	integer_t          priority,
1092  	integer_t          max_priority,
1093  	task_pend_token_t  pend_token)
1094  {
1095  	thread_mtx_lock(thread);
1096  
1097  	if (!thread->active || thread->policy_reset) {
1098  		thread_mtx_unlock(thread);
1099  		return;
1100  	}
1101  
1102  	spl_t s = splsched();
1103  	thread_lock(thread);
1104  
1105  	__unused
1106  	integer_t old_max_priority = thread->max_priority;
1107  
1108  	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1109  	thread->task_priority = (int16_t)priority;
1110  
1111  	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1112  	thread->max_priority = (int16_t)max_priority;
1113  
1114  	/*
1115  	 * When backgrounding a thread, realtime and fixed priority threads
1116  	 * should be demoted to timeshare background threads.
1117  	 *
1118  	 * TODO: Do this inside the thread policy update routine in order to avoid double
1119  	 * remove/reinsert for a runnable thread
1120  	 */
1121  	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1122  		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1123  	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1124  		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1125  	}
1126  
1127  	thread_policy_update_spinlocked(thread, true, pend_token);
1128  
1129  	thread_unlock(thread);
1130  	splx(s);
1131  
1132  	thread_mtx_unlock(thread);
1133  }
1134  
1135  /*
1136   * Reset thread to default state in preparation for termination
1137   * Called with thread mutex locked
1138   *
1139   * Always called on current thread, so we don't need a run queue remove
1140   */
1141  void
1142  thread_policy_reset(
1143  	thread_t                thread)
1144  {
1145  	spl_t           s;
1146  
1147  	assert(thread == current_thread());
1148  
1149  	s = splsched();
1150  	thread_lock(thread);
1151  
1152  	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1153  		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1154  	}
1155  
1156  	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1157  		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1158  	}
1159  
1160  	/* At this point, the various demotions should be inactive */
1161  	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1162  	assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1163  	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1164  
1165  	/* Reset thread back to task-default basepri and mode  */
1166  	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1167  
1168  	sched_set_thread_mode(thread, newmode);
1169  
1170  	thread->importance = 0;
1171  
1172  	/* Prevent further changes to thread base priority or mode */
1173  	thread->policy_reset = 1;
1174  
1175  	sched_set_thread_base_priority(thread, thread->task_priority);
1176  
1177  	thread_unlock(thread);
1178  	splx(s);
1179  }
1180  
1181  kern_return_t
1182  thread_policy_get(
1183  	thread_t                                thread,
1184  	thread_policy_flavor_t  flavor,
1185  	thread_policy_t                 policy_info,
1186  	mach_msg_type_number_t  *count,
1187  	boolean_t                               *get_default)
1188  {
1189  	kern_return_t                   result = KERN_SUCCESS;
1190  
1191  	if (thread == THREAD_NULL) {
1192  		return KERN_INVALID_ARGUMENT;
1193  	}
1194  
1195  	thread_mtx_lock(thread);
1196  	if (!thread->active) {
1197  		thread_mtx_unlock(thread);
1198  
1199  		return KERN_TERMINATED;
1200  	}
1201  
1202  	switch (flavor) {
1203  	case THREAD_EXTENDED_POLICY:
1204  	{
1205  		boolean_t               timeshare = TRUE;
1206  
1207  		if (!(*get_default)) {
1208  			spl_t s = splsched();
1209  			thread_lock(thread);
1210  
1211  			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1212  			    (thread->saved_mode != TH_MODE_REALTIME)) {
1213  				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1214  					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1215  				} else {
1216  					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1217  				}
1218  			} else {
1219  				*get_default = TRUE;
1220  			}
1221  
1222  			thread_unlock(thread);
1223  			splx(s);
1224  		}
1225  
1226  		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1227  			thread_extended_policy_t        info;
1228  
1229  			info = (thread_extended_policy_t)policy_info;
1230  			info->timeshare = timeshare;
1231  		}
1232  
1233  		break;
1234  	}
1235  
1236  	case THREAD_TIME_CONSTRAINT_POLICY:
1237  	{
1238  		thread_time_constraint_policy_t         info;
1239  
1240  		if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1241  			result = KERN_INVALID_ARGUMENT;
1242  			break;
1243  		}
1244  
1245  		info = (thread_time_constraint_policy_t)policy_info;
1246  
1247  		if (!(*get_default)) {
1248  			spl_t s = splsched();
1249  			thread_lock(thread);
1250  
1251  			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1252  			    (thread->saved_mode == TH_MODE_REALTIME)) {
1253  				info->period = thread->realtime.period;
1254  				info->computation = thread->realtime.computation;
1255  				info->constraint = thread->realtime.constraint;
1256  				info->preemptible = thread->realtime.preemptible;
1257  			} else {
1258  				*get_default = TRUE;
1259  			}
1260  
1261  			thread_unlock(thread);
1262  			splx(s);
1263  		}
1264  
1265  		if (*get_default) {
1266  			info->period = 0;
1267  			info->computation = default_timeshare_computation;
1268  			info->constraint = default_timeshare_constraint;
1269  			info->preemptible = TRUE;
1270  		}
1271  
1272  
1273  		break;
1274  	}
1275  
1276  	case THREAD_PRECEDENCE_POLICY:
1277  	{
1278  		thread_precedence_policy_t              info;
1279  
1280  		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1281  			result = KERN_INVALID_ARGUMENT;
1282  			break;
1283  		}
1284  
1285  		info = (thread_precedence_policy_t)policy_info;
1286  
1287  		if (!(*get_default)) {
1288  			spl_t s = splsched();
1289  			thread_lock(thread);
1290  
1291  			info->importance = thread->importance;
1292  
1293  			thread_unlock(thread);
1294  			splx(s);
1295  		} else {
1296  			info->importance = 0;
1297  		}
1298  
1299  		break;
1300  	}
1301  
1302  	case THREAD_AFFINITY_POLICY:
1303  	{
1304  		thread_affinity_policy_t                info;
1305  
1306  		if (!thread_affinity_is_supported()) {
1307  			result = KERN_NOT_SUPPORTED;
1308  			break;
1309  		}
1310  		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1311  			result = KERN_INVALID_ARGUMENT;
1312  			break;
1313  		}
1314  
1315  		info = (thread_affinity_policy_t)policy_info;
1316  
1317  		if (!(*get_default)) {
1318  			info->affinity_tag = thread_affinity_get(thread);
1319  		} else {
1320  			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1321  		}
1322  
1323  		break;
1324  	}
1325  
1326  	case THREAD_POLICY_STATE:
1327  	{
1328  		thread_policy_state_t           info;
1329  
1330  		if (*count < THREAD_POLICY_STATE_COUNT) {
1331  			result = KERN_INVALID_ARGUMENT;
1332  			break;
1333  		}
1334  
1335  		/* Only root can get this info */
1336  		if (current_task()->sec_token.val[0] != 0) {
1337  			result = KERN_PROTECTION_FAILURE;
1338  			break;
1339  		}
1340  
1341  		info = (thread_policy_state_t)(void*)policy_info;
1342  
1343  		if (!(*get_default)) {
1344  			info->flags = 0;
1345  
1346  			spl_t s = splsched();
1347  			thread_lock(thread);
1348  
1349  			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1350  
1351  			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1352  			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1353  
1354  			info->thps_user_promotions          = 0;
1355  			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1356  			info->thps_ipc_overrides            = thread->kevent_overrides;
1357  
1358  			proc_get_thread_policy_bitfield(thread, info);
1359  
1360  			thread_unlock(thread);
1361  			splx(s);
1362  		} else {
1363  			info->requested = 0;
1364  			info->effective = 0;
1365  			info->pending = 0;
1366  		}
1367  
1368  		break;
1369  	}
1370  
1371  	case THREAD_LATENCY_QOS_POLICY:
1372  	{
1373  		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1374  		thread_latency_qos_t plqos;
1375  
1376  		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1377  			result = KERN_INVALID_ARGUMENT;
1378  			break;
1379  		}
1380  
1381  		if (*get_default) {
1382  			plqos = 0;
1383  		} else {
1384  			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1385  		}
1386  
1387  		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1388  	}
1389  	break;
1390  
1391  	case THREAD_THROUGHPUT_QOS_POLICY:
1392  	{
1393  		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1394  		thread_throughput_qos_t ptqos;
1395  
1396  		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1397  			result = KERN_INVALID_ARGUMENT;
1398  			break;
1399  		}
1400  
1401  		if (*get_default) {
1402  			ptqos = 0;
1403  		} else {
1404  			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1405  		}
1406  
1407  		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1408  	}
1409  	break;
1410  
1411  	case THREAD_QOS_POLICY:
1412  	{
1413  		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1414  
1415  		if (*count < THREAD_QOS_POLICY_COUNT) {
1416  			result = KERN_INVALID_ARGUMENT;
1417  			break;
1418  		}
1419  
1420  		if (!(*get_default)) {
1421  			int relprio_value = 0;
1422  			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1423  			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1424  
1425  			info->tier_importance = -relprio_value;
1426  		} else {
1427  			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1428  			info->tier_importance = 0;
1429  		}
1430  
1431  		break;
1432  	}
1433  
1434  	default:
1435  		result = KERN_INVALID_ARGUMENT;
1436  		break;
1437  	}
1438  
1439  	thread_mtx_unlock(thread);
1440  
1441  	return result;
1442  }
1443  
1444  void
1445  thread_policy_create(thread_t thread)
1446  {
1447  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1448  	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1449  	    thread_tid(thread), theffective_0(thread),
1450  	    theffective_1(thread), thread->base_pri, 0);
1451  
1452  	/* We pass a pend token but ignore it */
1453  	struct task_pend_token pend_token = {};
1454  
1455  	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1456  
1457  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1458  	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1459  	    thread_tid(thread), theffective_0(thread),
1460  	    theffective_1(thread), thread->base_pri, 0);
1461  }
1462  
1463  static void
1464  thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1465  {
1466  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1467  	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1468  	    thread_tid(thread), theffective_0(thread),
1469  	    theffective_1(thread), thread->base_pri, 0);
1470  
1471  	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1472  
1473  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1474  	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1475  	    thread_tid(thread), theffective_0(thread),
1476  	    theffective_1(thread), thread->base_pri, 0);
1477  }
1478  
1479  
1480  
1481  /*
1482   * One thread state update function TO RULE THEM ALL
1483   *
1484   * This function updates the thread effective policy fields
1485   * and pushes the results to the relevant subsystems.
1486   *
1487   * Returns TRUE if a pended action needs to be run.
1488   *
1489   * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1490   */
1491  static void
1492  thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1493      task_pend_token_t pend_token)
1494  {
1495  	/*
1496  	 * Step 1:
1497  	 *  Gather requested policy and effective task state
1498  	 */
1499  
1500  	struct thread_requested_policy requested = thread->requested_policy;
1501  	struct task_effective_policy task_effective = thread->task->effective_policy;
1502  
1503  	/*
1504  	 * Step 2:
1505  	 *  Calculate new effective policies from requested policy, task and thread state
1506  	 *  Rules:
1507  	 *      Don't change requested, it won't take effect
1508  	 */
1509  
1510  	struct thread_effective_policy next = {};
1511  
1512  	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1513  
1514  	uint32_t next_qos = requested.thrp_qos;
1515  
1516  	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1517  		next_qos = MAX(requested.thrp_qos_override, next_qos);
1518  		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1519  		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1520  		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1521  		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1522  	}
1523  
1524  	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1525  	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1526  		/*
1527  		 * This thread is turnstile-boosted higher than the adaptive clamp
1528  		 * by a synchronous waiter. Allow that to override the adaptive
1529  		 * clamp temporarily for this thread only.
1530  		 */
1531  		next.thep_promote_above_task = true;
1532  		next_qos = requested.thrp_qos_promote;
1533  	}
1534  
1535  	next.thep_qos = next_qos;
1536  
1537  	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1538  	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1539  		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1540  			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1541  		} else {
1542  			next.thep_qos = task_effective.tep_qos_clamp;
1543  		}
1544  	}
1545  
1546  	/*
1547  	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1548  	 * This allows QoS promotions to work properly even after the process is unclamped.
1549  	 */
1550  	next.thep_qos_promote = next.thep_qos;
1551  
1552  	/* The ceiling only applies to threads that are in the QoS world */
1553  	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1554  	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1555  	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1556  		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1557  	}
1558  
1559  	/*
1560  	 * The QoS relative priority is only applicable when the original programmer's
1561  	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1562  	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1563  	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1564  	 * presence of boosting, the programmer doesn't know what other actors
1565  	 * are boosting the thread.
1566  	 */
1567  	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1568  	    (requested.thrp_qos == next.thep_qos) &&
1569  	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1570  		next.thep_qos_relprio = requested.thrp_qos_relprio;
1571  	} else {
1572  		next.thep_qos_relprio = 0;
1573  	}
1574  
1575  	/* Calculate DARWIN_BG */
1576  	bool wants_darwinbg        = false;
1577  	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1578  
1579  	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1580  		wants_darwinbg = true;
1581  	}
1582  
1583  	/*
1584  	 * If DARWIN_BG has been requested at either level, it's engaged.
1585  	 * darwinbg threads always create bg sockets,
1586  	 * but only some types of darwinbg change the sockets
1587  	 * after they're created
1588  	 */
1589  	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1590  		wants_all_sockets_bg = wants_darwinbg = true;
1591  	}
1592  
1593  	if (requested.thrp_pidbind_bg) {
1594  		wants_all_sockets_bg = wants_darwinbg = true;
1595  	}
1596  
1597  	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1598  	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1599  		wants_darwinbg = true;
1600  	}
1601  
1602  	/* Calculate side effects of DARWIN_BG */
1603  
1604  	if (wants_darwinbg) {
1605  		next.thep_darwinbg = 1;
1606  	}
1607  
1608  	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1609  		next.thep_new_sockets_bg = 1;
1610  	}
1611  
1612  	/* Don't use task_effective.tep_all_sockets_bg here */
1613  	if (wants_all_sockets_bg) {
1614  		next.thep_all_sockets_bg = 1;
1615  	}
1616  
1617  	/* darwinbg implies background QOS (or lower) */
1618  	if (next.thep_darwinbg &&
1619  	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1620  		next.thep_qos = THREAD_QOS_BACKGROUND;
1621  		next.thep_qos_relprio = 0;
1622  	}
1623  
1624  	/* Calculate IO policy */
1625  
1626  	int iopol = THROTTLE_LEVEL_TIER0;
1627  
1628  	/* Factor in the task's IO policy */
1629  	if (next.thep_darwinbg) {
1630  		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1631  	}
1632  
1633  	if (!next.thep_promote_above_task) {
1634  		iopol = MAX(iopol, task_effective.tep_io_tier);
1635  	}
1636  
1637  	/* Look up the associated IO tier value for the QoS class */
1638  	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1639  
1640  	iopol = MAX(iopol, requested.thrp_int_iotier);
1641  	iopol = MAX(iopol, requested.thrp_ext_iotier);
1642  
1643  	next.thep_io_tier = iopol;
1644  
1645  	/*
1646  	 * If a QoS override is causing IO to go into a lower tier, we also set
1647  	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1648  	 * window when the override goes away.
1649  	 */
1650  
1651  	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1652  	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1653  	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1654  
1655  	/* Calculate Passive IO policy */
1656  	if (requested.thrp_ext_iopassive ||
1657  	    requested.thrp_int_iopassive ||
1658  	    qos_io_override_active ||
1659  	    task_effective.tep_io_passive) {
1660  		next.thep_io_passive = 1;
1661  	}
1662  
1663  	/* Calculate timer QOS */
1664  	uint32_t latency_qos = requested.thrp_latency_qos;
1665  
1666  	if (!next.thep_promote_above_task) {
1667  		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1668  	}
1669  
1670  	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1671  
1672  	next.thep_latency_qos = latency_qos;
1673  
1674  	/* Calculate throughput QOS */
1675  	uint32_t through_qos = requested.thrp_through_qos;
1676  
1677  	if (!next.thep_promote_above_task) {
1678  		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1679  	}
1680  
1681  	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1682  
1683  	next.thep_through_qos = through_qos;
1684  
1685  	if (task_effective.tep_terminated || requested.thrp_terminated) {
1686  		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1687  		next.thep_terminated    = 1;
1688  		next.thep_darwinbg      = 0;
1689  		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1690  		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1691  		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1692  		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1693  	}
1694  
1695  	/*
1696  	 * Step 3:
1697  	 *  Swap out old policy for new policy
1698  	 */
1699  
1700  	struct thread_effective_policy prev = thread->effective_policy;
1701  
1702  	thread_update_qos_cpu_time_locked(thread);
1703  
1704  	/* This is the point where the new values become visible to other threads */
1705  	thread->effective_policy = next;
1706  
1707  	/*
1708  	 * Step 4:
1709  	 *  Pend updates that can't be done while holding the thread lock
1710  	 */
1711  
1712  	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1713  		pend_token->tpt_update_sockets = 1;
1714  	}
1715  
1716  	/* TODO: Doesn't this only need to be done if the throttle went up? */
1717  	if (prev.thep_io_tier != next.thep_io_tier) {
1718  		pend_token->tpt_update_throttle = 1;
1719  	}
1720  
1721  	/*
1722  	 * Check for the attributes that sfi_thread_classify() consults,
1723  	 *  and trigger SFI re-evaluation.
1724  	 */
1725  	if (prev.thep_qos != next.thep_qos ||
1726  	    prev.thep_darwinbg != next.thep_darwinbg) {
1727  		pend_token->tpt_update_thread_sfi = 1;
1728  	}
1729  
1730  	integer_t old_base_pri = thread->base_pri;
1731  
1732  	/*
1733  	 * Step 5:
1734  	 *  Update other subsystems as necessary if something has changed
1735  	 */
1736  
1737  	/* Check for the attributes that thread_recompute_priority() consults */
1738  	if (prev.thep_qos != next.thep_qos ||
1739  	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1740  	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1741  	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1742  	    prev.thep_terminated != next.thep_terminated ||
1743  	    pend_token->tpt_force_recompute_pri == 1 ||
1744  	    recompute_priority) {
1745  		thread_recompute_priority(thread);
1746  	}
1747  
1748  	/*
1749  	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1750  	 */
1751  	if (pend_token->tpt_update_turnstile &&
1752  	    ((old_base_pri == thread->base_pri) ||
1753  	    !thread_get_waiting_turnstile(thread))) {
1754  		/*
1755  		 * Reset update turnstile pend token since either
1756  		 * the thread priority did not change or thread is
1757  		 * not blocked on a turnstile.
1758  		 */
1759  		pend_token->tpt_update_turnstile = 0;
1760  	}
1761  }
1762  
1763  
1764  /*
1765   * Initiate a thread policy state transition on a thread with its TID
1766   * Useful if you cannot guarantee the thread won't get terminated
1767   * Precondition: No locks are held
1768   * Will take task lock - using the non-tid variant is faster
1769   * if you already have a thread ref.
1770   */
1771  void
1772  proc_set_thread_policy_with_tid(task_t     task,
1773      uint64_t   tid,
1774      int        category,
1775      int        flavor,
1776      int        value)
1777  {
1778  	/* takes task lock, returns ref'ed thread or NULL */
1779  	thread_t thread = task_findtid(task, tid);
1780  
1781  	if (thread == THREAD_NULL) {
1782  		return;
1783  	}
1784  
1785  	proc_set_thread_policy(thread, category, flavor, value);
1786  
1787  	thread_deallocate(thread);
1788  }
1789  
1790  /*
1791   * Initiate a thread policy transition on a thread
1792   * This path supports networking transitions (i.e. darwinbg transitions)
1793   * Precondition: No locks are held
1794   */
1795  void
1796  proc_set_thread_policy(thread_t   thread,
1797      int        category,
1798      int        flavor,
1799      int        value)
1800  {
1801  	struct task_pend_token pend_token = {};
1802  
1803  	thread_mtx_lock(thread);
1804  
1805  	proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1806  
1807  	thread_mtx_unlock(thread);
1808  
1809  	thread_policy_update_complete_unlocked(thread, &pend_token);
1810  }
1811  
1812  /*
1813   * Do the things that can't be done while holding a thread mutex.
1814   * These are set up to call back into thread policy to get the latest value,
1815   * so they don't have to be synchronized with the update.
1816   * The only required semantic is 'call this sometime after updating effective policy'
1817   *
1818   * Precondition: Thread mutex is not held
1819   *
1820   * This may be called with the task lock held, but in that case it won't be
1821   * called with tpt_update_sockets set.
1822   */
1823  void
1824  thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1825  {
1826  #ifdef MACH_BSD
1827  	if (pend_token->tpt_update_sockets) {
1828  		proc_apply_task_networkbg(thread->task->bsd_info, thread);
1829  	}
1830  #endif /* MACH_BSD */
1831  
1832  	if (pend_token->tpt_update_throttle) {
1833  		rethrottle_thread(thread->uthread);
1834  	}
1835  
1836  	if (pend_token->tpt_update_thread_sfi) {
1837  		sfi_reevaluate(thread);
1838  	}
1839  
1840  	if (pend_token->tpt_update_turnstile) {
1841  		turnstile_update_thread_priority_chain(thread);
1842  	}
1843  }
1844  
1845  /*
1846   * Set and update thread policy
1847   * Thread mutex might be held
1848   */
1849  static void
1850  proc_set_thread_policy_locked(thread_t          thread,
1851      int               category,
1852      int               flavor,
1853      int               value,
1854      int               value2,
1855      task_pend_token_t pend_token)
1856  {
1857  	spl_t s = splsched();
1858  	thread_lock(thread);
1859  
1860  	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1861  
1862  	thread_unlock(thread);
1863  	splx(s);
1864  }
1865  
1866  /*
1867   * Set and update thread policy
1868   * Thread spinlock is held
1869   */
1870  static void
1871  proc_set_thread_policy_spinlocked(thread_t          thread,
1872      int               category,
1873      int               flavor,
1874      int               value,
1875      int               value2,
1876      task_pend_token_t pend_token)
1877  {
1878  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1879  	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1880  	    thread_tid(thread), threquested_0(thread),
1881  	    threquested_1(thread), value, 0);
1882  
1883  	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1884  
1885  	thread_policy_update_spinlocked(thread, false, pend_token);
1886  
1887  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1888  	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1889  	    thread_tid(thread), threquested_0(thread),
1890  	    threquested_1(thread), tpending(pend_token), 0);
1891  }
1892  
1893  /*
1894   * Set the requested state for a specific flavor to a specific value.
1895   */
1896  static void
1897  thread_set_requested_policy_spinlocked(thread_t     thread,
1898      int               category,
1899      int               flavor,
1900      int               value,
1901      int               value2,
1902      task_pend_token_t pend_token)
1903  {
1904  	int tier, passive;
1905  
1906  	struct thread_requested_policy requested = thread->requested_policy;
1907  
1908  	switch (flavor) {
1909  	/* Category: EXTERNAL and INTERNAL, thread and task */
1910  
1911  	case TASK_POLICY_DARWIN_BG:
1912  		if (category == TASK_POLICY_EXTERNAL) {
1913  			requested.thrp_ext_darwinbg = value;
1914  		} else {
1915  			requested.thrp_int_darwinbg = value;
1916  		}
1917  		break;
1918  
1919  	case TASK_POLICY_IOPOL:
1920  		proc_iopol_to_tier(value, &tier, &passive);
1921  		if (category == TASK_POLICY_EXTERNAL) {
1922  			requested.thrp_ext_iotier  = tier;
1923  			requested.thrp_ext_iopassive = passive;
1924  		} else {
1925  			requested.thrp_int_iotier  = tier;
1926  			requested.thrp_int_iopassive = passive;
1927  		}
1928  		break;
1929  
1930  	case TASK_POLICY_IO:
1931  		if (category == TASK_POLICY_EXTERNAL) {
1932  			requested.thrp_ext_iotier = value;
1933  		} else {
1934  			requested.thrp_int_iotier = value;
1935  		}
1936  		break;
1937  
1938  	case TASK_POLICY_PASSIVE_IO:
1939  		if (category == TASK_POLICY_EXTERNAL) {
1940  			requested.thrp_ext_iopassive = value;
1941  		} else {
1942  			requested.thrp_int_iopassive = value;
1943  		}
1944  		break;
1945  
1946  	/* Category: ATTRIBUTE, thread only */
1947  
1948  	case TASK_POLICY_PIDBIND_BG:
1949  		assert(category == TASK_POLICY_ATTRIBUTE);
1950  		requested.thrp_pidbind_bg = value;
1951  		break;
1952  
1953  	case TASK_POLICY_LATENCY_QOS:
1954  		assert(category == TASK_POLICY_ATTRIBUTE);
1955  		requested.thrp_latency_qos = value;
1956  		break;
1957  
1958  	case TASK_POLICY_THROUGH_QOS:
1959  		assert(category == TASK_POLICY_ATTRIBUTE);
1960  		requested.thrp_through_qos = value;
1961  		break;
1962  
1963  	case TASK_POLICY_QOS_OVERRIDE:
1964  		assert(category == TASK_POLICY_ATTRIBUTE);
1965  		requested.thrp_qos_override = value;
1966  		pend_token->tpt_update_turnstile = 1;
1967  		break;
1968  
1969  	case TASK_POLICY_QOS_AND_RELPRIO:
1970  		assert(category == TASK_POLICY_ATTRIBUTE);
1971  		requested.thrp_qos = value;
1972  		requested.thrp_qos_relprio = value2;
1973  		pend_token->tpt_update_turnstile = 1;
1974  		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1975  		break;
1976  
1977  	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1978  		assert(category == TASK_POLICY_ATTRIBUTE);
1979  		requested.thrp_qos_workq_override = value;
1980  		pend_token->tpt_update_turnstile = 1;
1981  		break;
1982  
1983  	case TASK_POLICY_QOS_PROMOTE:
1984  		assert(category == TASK_POLICY_ATTRIBUTE);
1985  		requested.thrp_qos_promote = value;
1986  		break;
1987  
1988  	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
1989  		assert(category == TASK_POLICY_ATTRIBUTE);
1990  		requested.thrp_qos_kevent_override = value;
1991  		pend_token->tpt_update_turnstile = 1;
1992  		break;
1993  
1994  	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
1995  		assert(category == TASK_POLICY_ATTRIBUTE);
1996  		requested.thrp_qos_wlsvc_override = value;
1997  		pend_token->tpt_update_turnstile = 1;
1998  		break;
1999  
2000  	case TASK_POLICY_TERMINATED:
2001  		assert(category == TASK_POLICY_ATTRIBUTE);
2002  		requested.thrp_terminated = value;
2003  		break;
2004  
2005  	default:
2006  		panic("unknown task policy: %d %d %d", category, flavor, value);
2007  		break;
2008  	}
2009  
2010  	thread->requested_policy = requested;
2011  }
2012  
2013  /*
2014   * Gets what you set. Effective values may be different.
2015   * Precondition: No locks are held
2016   */
2017  int
2018  proc_get_thread_policy(thread_t   thread,
2019      int        category,
2020      int        flavor)
2021  {
2022  	int value = 0;
2023  	thread_mtx_lock(thread);
2024  	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2025  	thread_mtx_unlock(thread);
2026  	return value;
2027  }
2028  
2029  static int
2030  proc_get_thread_policy_locked(thread_t   thread,
2031      int        category,
2032      int        flavor,
2033      int*       value2)
2034  {
2035  	int value = 0;
2036  
2037  	spl_t s = splsched();
2038  	thread_lock(thread);
2039  
2040  	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2041  
2042  	thread_unlock(thread);
2043  	splx(s);
2044  
2045  	return value;
2046  }
2047  
2048  /*
2049   * Gets what you set. Effective values may be different.
2050   */
2051  static int
2052  thread_get_requested_policy_spinlocked(thread_t thread,
2053      int      category,
2054      int      flavor,
2055      int*     value2)
2056  {
2057  	int value = 0;
2058  
2059  	struct thread_requested_policy requested = thread->requested_policy;
2060  
2061  	switch (flavor) {
2062  	case TASK_POLICY_DARWIN_BG:
2063  		if (category == TASK_POLICY_EXTERNAL) {
2064  			value = requested.thrp_ext_darwinbg;
2065  		} else {
2066  			value = requested.thrp_int_darwinbg;
2067  		}
2068  		break;
2069  	case TASK_POLICY_IOPOL:
2070  		if (category == TASK_POLICY_EXTERNAL) {
2071  			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2072  			    requested.thrp_ext_iopassive);
2073  		} else {
2074  			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2075  			    requested.thrp_int_iopassive);
2076  		}
2077  		break;
2078  	case TASK_POLICY_IO:
2079  		if (category == TASK_POLICY_EXTERNAL) {
2080  			value = requested.thrp_ext_iotier;
2081  		} else {
2082  			value = requested.thrp_int_iotier;
2083  		}
2084  		break;
2085  	case TASK_POLICY_PASSIVE_IO:
2086  		if (category == TASK_POLICY_EXTERNAL) {
2087  			value = requested.thrp_ext_iopassive;
2088  		} else {
2089  			value = requested.thrp_int_iopassive;
2090  		}
2091  		break;
2092  	case TASK_POLICY_QOS:
2093  		assert(category == TASK_POLICY_ATTRIBUTE);
2094  		value = requested.thrp_qos;
2095  		break;
2096  	case TASK_POLICY_QOS_OVERRIDE:
2097  		assert(category == TASK_POLICY_ATTRIBUTE);
2098  		value = requested.thrp_qos_override;
2099  		break;
2100  	case TASK_POLICY_LATENCY_QOS:
2101  		assert(category == TASK_POLICY_ATTRIBUTE);
2102  		value = requested.thrp_latency_qos;
2103  		break;
2104  	case TASK_POLICY_THROUGH_QOS:
2105  		assert(category == TASK_POLICY_ATTRIBUTE);
2106  		value = requested.thrp_through_qos;
2107  		break;
2108  	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2109  		assert(category == TASK_POLICY_ATTRIBUTE);
2110  		value = requested.thrp_qos_workq_override;
2111  		break;
2112  	case TASK_POLICY_QOS_AND_RELPRIO:
2113  		assert(category == TASK_POLICY_ATTRIBUTE);
2114  		assert(value2 != NULL);
2115  		value = requested.thrp_qos;
2116  		*value2 = requested.thrp_qos_relprio;
2117  		break;
2118  	case TASK_POLICY_QOS_PROMOTE:
2119  		assert(category == TASK_POLICY_ATTRIBUTE);
2120  		value = requested.thrp_qos_promote;
2121  		break;
2122  	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2123  		assert(category == TASK_POLICY_ATTRIBUTE);
2124  		value = requested.thrp_qos_kevent_override;
2125  		break;
2126  	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2127  		assert(category == TASK_POLICY_ATTRIBUTE);
2128  		value = requested.thrp_qos_wlsvc_override;
2129  		break;
2130  	case TASK_POLICY_TERMINATED:
2131  		assert(category == TASK_POLICY_ATTRIBUTE);
2132  		value = requested.thrp_terminated;
2133  		break;
2134  
2135  	default:
2136  		panic("unknown policy_flavor %d", flavor);
2137  		break;
2138  	}
2139  
2140  	return value;
2141  }
2142  
2143  /*
2144   * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2145   *
2146   * NOTE: This accessor does not take the task or thread lock.
2147   * Notifications of state updates need to be externally synchronized with state queries.
2148   * This routine *MUST* remain interrupt safe, as it is potentially invoked
2149   * within the context of a timer interrupt.
2150   *
2151   * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2152   *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2153   *      I don't think that cost is worth not having the right answer.
2154   */
2155  int
2156  proc_get_effective_thread_policy(thread_t thread,
2157      int      flavor)
2158  {
2159  	int value = 0;
2160  
2161  	switch (flavor) {
2162  	case TASK_POLICY_DARWIN_BG:
2163  		/*
2164  		 * This call is used within the timer layer, as well as
2165  		 * prioritizing requests to the graphics system.
2166  		 * It also informs SFI and originator-bg-state.
2167  		 * Returns 1 for background mode, 0 for normal mode
2168  		 */
2169  
2170  		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2171  		break;
2172  	case TASK_POLICY_IO:
2173  		/*
2174  		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2175  		 * Returns THROTTLE_LEVEL_* values
2176  		 */
2177  		value = thread->effective_policy.thep_io_tier;
2178  		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2179  			value = MIN(value, thread->iotier_override);
2180  		}
2181  		break;
2182  	case TASK_POLICY_PASSIVE_IO:
2183  		/*
2184  		 * The I/O system calls here to find out whether an operation should be passive.
2185  		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2186  		 * Returns 1 for passive mode, 0 for normal mode
2187  		 *
2188  		 * If an override is causing IO to go into a lower tier, we also set
2189  		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2190  		 * window when the override goes away.
2191  		 */
2192  		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2193  		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2194  		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2195  			value = 1;
2196  		}
2197  		break;
2198  	case TASK_POLICY_ALL_SOCKETS_BG:
2199  		/*
2200  		 * do_background_socket() calls this to determine whether
2201  		 * it should change the thread's sockets
2202  		 * Returns 1 for background mode, 0 for normal mode
2203  		 * This consults both thread and task so un-DBGing a thread while the task is BG
2204  		 * doesn't get you out of the network throttle.
2205  		 */
2206  		value = (thread->effective_policy.thep_all_sockets_bg ||
2207  		    thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2208  		break;
2209  	case TASK_POLICY_NEW_SOCKETS_BG:
2210  		/*
2211  		 * socreate() calls this to determine if it should mark a new socket as background
2212  		 * Returns 1 for background mode, 0 for normal mode
2213  		 */
2214  		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2215  		break;
2216  	case TASK_POLICY_LATENCY_QOS:
2217  		/*
2218  		 * timer arming calls into here to find out the timer coalescing level
2219  		 * Returns a latency QoS tier (0-6)
2220  		 */
2221  		value = thread->effective_policy.thep_latency_qos;
2222  		break;
2223  	case TASK_POLICY_THROUGH_QOS:
2224  		/*
2225  		 * This value is passed into the urgency callout from the scheduler
2226  		 * to the performance management subsystem.
2227  		 *
2228  		 * Returns a throughput QoS tier (0-6)
2229  		 */
2230  		value = thread->effective_policy.thep_through_qos;
2231  		break;
2232  	case TASK_POLICY_QOS:
2233  		/*
2234  		 * This is communicated to the performance management layer and SFI.
2235  		 *
2236  		 * Returns a QoS policy tier
2237  		 */
2238  		value = thread->effective_policy.thep_qos;
2239  		break;
2240  	default:
2241  		panic("unknown thread policy flavor %d", flavor);
2242  		break;
2243  	}
2244  
2245  	return value;
2246  }
2247  
2248  
2249  /*
2250   * (integer_t) casts limit the number of bits we can fit here
2251   * this interface is deprecated and replaced by the _EXT struct ?
2252   */
2253  static void
2254  proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2255  {
2256  	uint64_t bits = 0;
2257  	struct thread_requested_policy requested = thread->requested_policy;
2258  
2259  	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2260  	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2261  	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2262  	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2263  	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2264  	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2265  
2266  	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2267  	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2268  
2269  	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2270  
2271  	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2272  	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2273  
2274  	info->requested = (integer_t) bits;
2275  	bits = 0;
2276  
2277  	struct thread_effective_policy effective = thread->effective_policy;
2278  
2279  	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2280  
2281  	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2282  	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2283  	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2284  	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2285  
2286  	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2287  
2288  	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2289  	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2290  
2291  	info->effective = (integer_t)bits;
2292  	bits = 0;
2293  
2294  	info->pending = 0;
2295  }
2296  
2297  /*
2298   * Sneakily trace either the task and thread requested
2299   * or just the thread requested, depending on if we have enough room.
2300   * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2301   *
2302   *                                LP32            LP64
2303   * threquested_0(thread)          thread[0]       task[0]
2304   * threquested_1(thread)          thread[1]       thread[0]
2305   *
2306   */
2307  
2308  uintptr_t
2309  threquested_0(thread_t thread)
2310  {
2311  	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2312  
2313  	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2314  
2315  	return raw[0];
2316  }
2317  
2318  uintptr_t
2319  threquested_1(thread_t thread)
2320  {
2321  #if defined __LP64__
2322  	return *(uintptr_t*)&thread->task->requested_policy;
2323  #else
2324  	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2325  	return raw[1];
2326  #endif
2327  }
2328  
2329  uintptr_t
2330  theffective_0(thread_t thread)
2331  {
2332  	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2333  
2334  	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2335  	return raw[0];
2336  }
2337  
2338  uintptr_t
2339  theffective_1(thread_t thread)
2340  {
2341  #if defined __LP64__
2342  	return *(uintptr_t*)&thread->task->effective_policy;
2343  #else
2344  	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2345  	return raw[1];
2346  #endif
2347  }
2348  
2349  
2350  /*
2351   * Set an override on the thread which is consulted with a
2352   * higher priority than the task/thread policy. This should
2353   * only be set for temporary grants until the thread
2354   * returns to the userspace boundary
2355   *
2356   * We use atomic operations to swap in the override, with
2357   * the assumption that the thread itself can
2358   * read the override and clear it on return to userspace.
2359   *
2360   * No locking is performed, since it is acceptable to see
2361   * a stale override for one loop through throttle_lowpri_io().
2362   * However a thread reference must be held on the thread.
2363   */
2364  
2365  void
2366  set_thread_iotier_override(thread_t thread, int policy)
2367  {
2368  	int current_override;
2369  
2370  	/* Let most aggressive I/O policy win until user boundary */
2371  	do {
2372  		current_override = thread->iotier_override;
2373  
2374  		if (current_override != THROTTLE_LEVEL_NONE) {
2375  			policy = MIN(current_override, policy);
2376  		}
2377  
2378  		if (current_override == policy) {
2379  			/* no effective change */
2380  			return;
2381  		}
2382  	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2383  
2384  	/*
2385  	 * Since the thread may be currently throttled,
2386  	 * re-evaluate tiers and potentially break out
2387  	 * of an msleep
2388  	 */
2389  	rethrottle_thread(thread->uthread);
2390  }
2391  
2392  /*
2393   * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2394   * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2395   * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2396   * priority thread. In these cases, we attempt to propagate the priority token, as long
2397   * as the subsystem informs us of the relationships between the threads. The userspace
2398   * synchronization subsystem should maintain the information of owner->resource and
2399   * resource->waiters itself.
2400   */
2401  
2402  /*
2403   * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2404   * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2405   * to be handled specially in the future, but for now it's fine to slam
2406   * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2407   */
2408  static void
2409  canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2410  {
2411  	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2412  		/* Map all input resource/type to a single one */
2413  		*resource = USER_ADDR_NULL;
2414  		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2415  	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2416  		/* no transform */
2417  	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2418  		/* Map all mutex overrides to a single one, to avoid memory overhead */
2419  		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2420  			*resource = USER_ADDR_NULL;
2421  		}
2422  	}
2423  }
2424  
2425  /* This helper routine finds an existing override if known. Locking should be done by caller */
2426  static struct thread_qos_override *
2427  find_qos_override(thread_t thread,
2428      user_addr_t resource,
2429      int resource_type)
2430  {
2431  	struct thread_qos_override *override;
2432  
2433  	override = thread->overrides;
2434  	while (override) {
2435  		if (override->override_resource == resource &&
2436  		    override->override_resource_type == resource_type) {
2437  			return override;
2438  		}
2439  
2440  		override = override->override_next;
2441  	}
2442  
2443  	return NULL;
2444  }
2445  
2446  static void
2447  find_and_decrement_qos_override(thread_t       thread,
2448      user_addr_t    resource,
2449      int            resource_type,
2450      boolean_t      reset,
2451      struct thread_qos_override **free_override_list)
2452  {
2453  	struct thread_qos_override *override, *override_prev;
2454  
2455  	override_prev = NULL;
2456  	override = thread->overrides;
2457  	while (override) {
2458  		struct thread_qos_override *override_next = override->override_next;
2459  
2460  		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2461  		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2462  			if (reset) {
2463  				override->override_contended_resource_count = 0;
2464  			} else {
2465  				override->override_contended_resource_count--;
2466  			}
2467  
2468  			if (override->override_contended_resource_count == 0) {
2469  				if (override_prev == NULL) {
2470  					thread->overrides = override_next;
2471  				} else {
2472  					override_prev->override_next = override_next;
2473  				}
2474  
2475  				/* Add to out-param for later zfree */
2476  				override->override_next = *free_override_list;
2477  				*free_override_list = override;
2478  			} else {
2479  				override_prev = override;
2480  			}
2481  
2482  			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2483  				return;
2484  			}
2485  		} else {
2486  			override_prev = override;
2487  		}
2488  
2489  		override = override_next;
2490  	}
2491  }
2492  
2493  /* This helper recalculates the current requested override using the policy selected at boot */
2494  static int
2495  calculate_requested_qos_override(thread_t thread)
2496  {
2497  	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2498  		return THREAD_QOS_UNSPECIFIED;
2499  	}
2500  
2501  	/* iterate over all overrides and calculate MAX */
2502  	struct thread_qos_override *override;
2503  	int qos_override = THREAD_QOS_UNSPECIFIED;
2504  
2505  	override = thread->overrides;
2506  	while (override) {
2507  		qos_override = MAX(qos_override, override->override_qos);
2508  		override = override->override_next;
2509  	}
2510  
2511  	return qos_override;
2512  }
2513  
2514  /*
2515   * Returns:
2516   * - 0 on success
2517   * - EINVAL if some invalid input was passed
2518   */
2519  static int
2520  proc_thread_qos_add_override_internal(thread_t         thread,
2521      int              override_qos,
2522      boolean_t        first_override_for_resource,
2523      user_addr_t      resource,
2524      int              resource_type)
2525  {
2526  	struct task_pend_token pend_token = {};
2527  	int rc = 0;
2528  
2529  	thread_mtx_lock(thread);
2530  
2531  	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2532  	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2533  
2534  	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2535  	    uint64_t, thread->requested_policy.thrp_qos,
2536  	    uint64_t, thread->effective_policy.thep_qos,
2537  	    int, override_qos, boolean_t, first_override_for_resource);
2538  
2539  	struct thread_qos_override *override;
2540  	struct thread_qos_override *override_new = NULL;
2541  	int new_qos_override, prev_qos_override;
2542  	int new_effective_qos;
2543  
2544  	canonicalize_resource_and_type(&resource, &resource_type);
2545  
2546  	override = find_qos_override(thread, resource, resource_type);
2547  	if (first_override_for_resource && !override) {
2548  		/* We need to allocate a new object. Drop the thread lock and
2549  		 * recheck afterwards in case someone else added the override
2550  		 */
2551  		thread_mtx_unlock(thread);
2552  		override_new = zalloc(thread_qos_override_zone);
2553  		thread_mtx_lock(thread);
2554  		override = find_qos_override(thread, resource, resource_type);
2555  	}
2556  	if (first_override_for_resource && override) {
2557  		/* Someone else already allocated while the thread lock was dropped */
2558  		override->override_contended_resource_count++;
2559  	} else if (!override && override_new) {
2560  		override = override_new;
2561  		override_new = NULL;
2562  		override->override_next = thread->overrides;
2563  		/* since first_override_for_resource was TRUE */
2564  		override->override_contended_resource_count = 1;
2565  		override->override_resource = resource;
2566  		override->override_resource_type = (int16_t)resource_type;
2567  		override->override_qos = THREAD_QOS_UNSPECIFIED;
2568  		thread->overrides = override;
2569  	}
2570  
2571  	if (override) {
2572  		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2573  			override->override_qos = (int16_t)override_qos;
2574  		} else {
2575  			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2576  		}
2577  	}
2578  
2579  	/* Determine how to combine the various overrides into a single current
2580  	 * requested override
2581  	 */
2582  	new_qos_override = calculate_requested_qos_override(thread);
2583  
2584  	prev_qos_override = proc_get_thread_policy_locked(thread,
2585  	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2586  
2587  	if (new_qos_override != prev_qos_override) {
2588  		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2589  		    TASK_POLICY_QOS_OVERRIDE,
2590  		    new_qos_override, 0, &pend_token);
2591  	}
2592  
2593  	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2594  
2595  	thread_mtx_unlock(thread);
2596  
2597  	thread_policy_update_complete_unlocked(thread, &pend_token);
2598  
2599  	if (override_new) {
2600  		zfree(thread_qos_override_zone, override_new);
2601  	}
2602  
2603  	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2604  	    int, new_qos_override, int, new_effective_qos, int, rc);
2605  
2606  	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2607  	    new_qos_override, resource, resource_type, 0, 0);
2608  
2609  	return rc;
2610  }
2611  
2612  int
2613  proc_thread_qos_add_override(task_t           task,
2614      thread_t         thread,
2615      uint64_t         tid,
2616      int              override_qos,
2617      boolean_t        first_override_for_resource,
2618      user_addr_t      resource,
2619      int              resource_type)
2620  {
2621  	boolean_t has_thread_reference = FALSE;
2622  	int rc = 0;
2623  
2624  	if (thread == THREAD_NULL) {
2625  		thread = task_findtid(task, tid);
2626  		/* returns referenced thread */
2627  
2628  		if (thread == THREAD_NULL) {
2629  			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2630  			    tid, 0, 0xdead, 0, 0);
2631  			return ESRCH;
2632  		}
2633  		has_thread_reference = TRUE;
2634  	} else {
2635  		assert(thread->task == task);
2636  	}
2637  	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2638  	    first_override_for_resource, resource, resource_type);
2639  	if (has_thread_reference) {
2640  		thread_deallocate(thread);
2641  	}
2642  
2643  	return rc;
2644  }
2645  
2646  static void
2647  proc_thread_qos_remove_override_internal(thread_t       thread,
2648      user_addr_t    resource,
2649      int            resource_type,
2650      boolean_t      reset)
2651  {
2652  	struct task_pend_token pend_token = {};
2653  
2654  	struct thread_qos_override *deferred_free_override_list = NULL;
2655  	int new_qos_override, prev_qos_override, new_effective_qos;
2656  
2657  	thread_mtx_lock(thread);
2658  
2659  	canonicalize_resource_and_type(&resource, &resource_type);
2660  
2661  	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2662  
2663  	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2664  	    thread_tid(thread), resource, reset, 0, 0);
2665  
2666  	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2667  	    uint64_t, thread->requested_policy.thrp_qos,
2668  	    uint64_t, thread->effective_policy.thep_qos);
2669  
2670  	/* Determine how to combine the various overrides into a single current requested override */
2671  	new_qos_override = calculate_requested_qos_override(thread);
2672  
2673  	spl_t s = splsched();
2674  	thread_lock(thread);
2675  
2676  	/*
2677  	 * The override chain and therefore the value of the current override is locked with thread mutex,
2678  	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2679  	 * This means you can't change the current override from a spinlock-only setter.
2680  	 */
2681  	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2682  
2683  	if (new_qos_override != prev_qos_override) {
2684  		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2685  	}
2686  
2687  	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2688  
2689  	thread_unlock(thread);
2690  	splx(s);
2691  
2692  	thread_mtx_unlock(thread);
2693  
2694  	thread_policy_update_complete_unlocked(thread, &pend_token);
2695  
2696  	while (deferred_free_override_list) {
2697  		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2698  
2699  		zfree(thread_qos_override_zone, deferred_free_override_list);
2700  		deferred_free_override_list = override_next;
2701  	}
2702  
2703  	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2704  	    int, new_qos_override, int, new_effective_qos);
2705  
2706  	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2707  	    thread_tid(thread), 0, 0, 0, 0);
2708  }
2709  
2710  int
2711  proc_thread_qos_remove_override(task_t      task,
2712      thread_t    thread,
2713      uint64_t    tid,
2714      user_addr_t resource,
2715      int         resource_type)
2716  {
2717  	boolean_t has_thread_reference = FALSE;
2718  
2719  	if (thread == THREAD_NULL) {
2720  		thread = task_findtid(task, tid);
2721  		/* returns referenced thread */
2722  
2723  		if (thread == THREAD_NULL) {
2724  			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2725  			    tid, 0, 0xdead, 0, 0);
2726  			return ESRCH;
2727  		}
2728  		has_thread_reference = TRUE;
2729  	} else {
2730  		assert(task == thread->task);
2731  	}
2732  
2733  	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2734  
2735  	if (has_thread_reference) {
2736  		thread_deallocate(thread);
2737  	}
2738  
2739  	return 0;
2740  }
2741  
2742  /* Deallocate before thread termination */
2743  void
2744  proc_thread_qos_deallocate(thread_t thread)
2745  {
2746  	/* This thread must have no more IPC overrides. */
2747  	assert(thread->kevent_overrides == 0);
2748  	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2749  	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2750  
2751  	/*
2752  	 * Clear out any lingering override objects.
2753  	 */
2754  	struct thread_qos_override *override;
2755  
2756  	thread_mtx_lock(thread);
2757  	override = thread->overrides;
2758  	thread->overrides = NULL;
2759  	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2760  	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2761  	thread_mtx_unlock(thread);
2762  
2763  	while (override) {
2764  		struct thread_qos_override *override_next = override->override_next;
2765  
2766  		zfree(thread_qos_override_zone, override);
2767  		override = override_next;
2768  	}
2769  }
2770  
2771  /*
2772   * Set up the primordial thread's QoS
2773   */
2774  void
2775  task_set_main_thread_qos(task_t task, thread_t thread)
2776  {
2777  	struct task_pend_token pend_token = {};
2778  
2779  	assert(thread->task == task);
2780  
2781  	thread_mtx_lock(thread);
2782  
2783  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2784  	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2785  	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2786  	    thread->requested_policy.thrp_qos, 0);
2787  
2788  	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2789  
2790  	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2791  	    primordial_qos, 0, &pend_token);
2792  
2793  	thread_mtx_unlock(thread);
2794  
2795  	thread_policy_update_complete_unlocked(thread, &pend_token);
2796  
2797  	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2798  	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2799  	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2800  	    primordial_qos, 0);
2801  }
2802  
2803  /*
2804   * KPI for pthread kext
2805   *
2806   * Return a good guess at what the initial manager QoS will be
2807   * Dispatch can override this in userspace if it so chooses
2808   */
2809  thread_qos_t
2810  task_get_default_manager_qos(task_t task)
2811  {
2812  	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2813  
2814  	if (primordial_qos == THREAD_QOS_LEGACY) {
2815  		primordial_qos = THREAD_QOS_USER_INITIATED;
2816  	}
2817  
2818  	return primordial_qos;
2819  }
2820  
2821  /*
2822   * Check if the kernel promotion on thread has changed
2823   * and apply it.
2824   *
2825   * thread locked on entry and exit
2826   */
2827  boolean_t
2828  thread_recompute_kernel_promotion_locked(thread_t thread)
2829  {
2830  	boolean_t needs_update = FALSE;
2831  	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2832  
2833  	/*
2834  	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2835  	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2836  	 * and propagates the priority through the chain with the same cap, because as of now it does
2837  	 * not differenciate on the kernel primitive.
2838  	 *
2839  	 * If this assumption will change with the adoption of a kernel primitive that does not
2840  	 * cap the when adding/propagating,
2841  	 * then here is the place to put the generic cap for all kernel primitives
2842  	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2843  	 */
2844  	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2845  
2846  	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2847  		KDBG(MACHDBG_CODE(
2848  			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2849  		    thread_tid(thread),
2850  		    kern_promotion_schedpri,
2851  		    thread->kern_promotion_schedpri);
2852  
2853  		needs_update = TRUE;
2854  		thread->kern_promotion_schedpri = kern_promotion_schedpri;
2855  		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2856  	}
2857  
2858  	return needs_update;
2859  }
2860  
2861  /*
2862   * Check if the user promotion on thread has changed
2863   * and apply it.
2864   *
2865   * thread locked on entry, might drop the thread lock
2866   * and reacquire it.
2867   */
2868  boolean_t
2869  thread_recompute_user_promotion_locked(thread_t thread)
2870  {
2871  	boolean_t needs_update = FALSE;
2872  	struct task_pend_token pend_token = {};
2873  	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2874  	int old_base_pri = thread->base_pri;
2875  	thread_qos_t qos_promotion;
2876  
2877  	/* Check if user promotion has changed */
2878  	if (thread->user_promotion_basepri == user_promotion_basepri) {
2879  		return needs_update;
2880  	} else {
2881  		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2882  		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2883  		    thread_tid(thread),
2884  		    user_promotion_basepri,
2885  		    thread->user_promotion_basepri,
2886  		    0, 0);
2887  		KDBG(MACHDBG_CODE(
2888  			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2889  		    thread_tid(thread),
2890  		    user_promotion_basepri,
2891  		    thread->user_promotion_basepri);
2892  	}
2893  
2894  	/* Update the user promotion base pri */
2895  	thread->user_promotion_basepri = user_promotion_basepri;
2896  	pend_token.tpt_force_recompute_pri = 1;
2897  
2898  	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2899  		qos_promotion = THREAD_QOS_UNSPECIFIED;
2900  	} else {
2901  		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2902  	}
2903  
2904  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2905  	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2906  
2907  	if (thread_get_waiting_turnstile(thread) &&
2908  	    thread->base_pri != old_base_pri) {
2909  		needs_update = TRUE;
2910  	}
2911  
2912  	thread_unlock(thread);
2913  
2914  	thread_policy_update_complete_unlocked(thread, &pend_token);
2915  
2916  	thread_lock(thread);
2917  
2918  	return needs_update;
2919  }
2920  
2921  /*
2922   * Convert the thread user promotion base pri to qos for threads in qos world.
2923   * For priority above UI qos, the qos would be set to UI.
2924   */
2925  thread_qos_t
2926  thread_user_promotion_qos_for_pri(int priority)
2927  {
2928  	thread_qos_t qos;
2929  	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2930  		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2931  			return qos;
2932  		}
2933  	}
2934  	return THREAD_QOS_MAINTENANCE;
2935  }
2936  
2937  /*
2938   * Set the thread's QoS Kevent override
2939   * Owned by the Kevent subsystem
2940   *
2941   * May be called with spinlocks held, but not spinlocks
2942   * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2943   *
2944   * One 'add' must be balanced by one 'drop'.
2945   * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2946   * Before the thread is deallocated, there must be 0 remaining overrides.
2947   */
2948  static void
2949  thread_kevent_override(thread_t    thread,
2950      uint32_t    qos_override,
2951      boolean_t   is_new_override)
2952  {
2953  	struct task_pend_token pend_token = {};
2954  	boolean_t needs_update;
2955  
2956  	spl_t s = splsched();
2957  	thread_lock(thread);
2958  
2959  	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
2960  
2961  	assert(qos_override > THREAD_QOS_UNSPECIFIED);
2962  	assert(qos_override < THREAD_QOS_LAST);
2963  
2964  	if (is_new_override) {
2965  		if (thread->kevent_overrides++ == 0) {
2966  			/* This add is the first override for this thread */
2967  			assert(old_override == THREAD_QOS_UNSPECIFIED);
2968  		} else {
2969  			/* There are already other overrides in effect for this thread */
2970  			assert(old_override > THREAD_QOS_UNSPECIFIED);
2971  		}
2972  	} else {
2973  		/* There must be at least one override (the previous add call) in effect */
2974  		assert(thread->kevent_overrides > 0);
2975  		assert(old_override > THREAD_QOS_UNSPECIFIED);
2976  	}
2977  
2978  	/*
2979  	 * We can't allow lowering if there are several IPC overrides because
2980  	 * the caller can't possibly know the whole truth
2981  	 */
2982  	if (thread->kevent_overrides == 1) {
2983  		needs_update = qos_override != old_override;
2984  	} else {
2985  		needs_update = qos_override > old_override;
2986  	}
2987  
2988  	if (needs_update) {
2989  		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2990  		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
2991  		    qos_override, 0, &pend_token);
2992  		assert(pend_token.tpt_update_sockets == 0);
2993  	}
2994  
2995  	thread_unlock(thread);
2996  	splx(s);
2997  
2998  	thread_policy_update_complete_unlocked(thread, &pend_token);
2999  }
3000  
3001  void
3002  thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3003  {
3004  	thread_kevent_override(thread, qos_override, TRUE);
3005  }
3006  
3007  void
3008  thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3009  {
3010  	thread_kevent_override(thread, qos_override, FALSE);
3011  }
3012  
3013  void
3014  thread_drop_kevent_override(thread_t thread)
3015  {
3016  	struct task_pend_token pend_token = {};
3017  
3018  	spl_t s = splsched();
3019  	thread_lock(thread);
3020  
3021  	assert(thread->kevent_overrides > 0);
3022  
3023  	if (--thread->kevent_overrides == 0) {
3024  		/*
3025  		 * There are no more overrides for this thread, so we should
3026  		 * clear out the saturated override value
3027  		 */
3028  
3029  		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3030  		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3031  		    0, &pend_token);
3032  	}
3033  
3034  	thread_unlock(thread);
3035  	splx(s);
3036  
3037  	thread_policy_update_complete_unlocked(thread, &pend_token);
3038  }
3039  
3040  /*
3041   * Set the thread's QoS Workloop Servicer override
3042   * Owned by the Kevent subsystem
3043   *
3044   * May be called with spinlocks held, but not spinlocks
3045   * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3046   *
3047   * One 'add' must be balanced by one 'drop'.
3048   * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3049   * Before the thread is deallocated, there must be 0 remaining overrides.
3050   */
3051  static void
3052  thread_servicer_override(thread_t    thread,
3053      uint32_t    qos_override,
3054      boolean_t   is_new_override)
3055  {
3056  	struct task_pend_token pend_token = {};
3057  
3058  	spl_t s = splsched();
3059  	thread_lock(thread);
3060  
3061  	if (is_new_override) {
3062  		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3063  	} else {
3064  		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3065  	}
3066  
3067  	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3068  	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3069  	    qos_override, 0, &pend_token);
3070  
3071  	thread_unlock(thread);
3072  	splx(s);
3073  
3074  	assert(pend_token.tpt_update_sockets == 0);
3075  	thread_policy_update_complete_unlocked(thread, &pend_token);
3076  }
3077  
3078  void
3079  thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3080  {
3081  	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3082  	assert(qos_override < THREAD_QOS_LAST);
3083  
3084  	thread_servicer_override(thread, qos_override, TRUE);
3085  }
3086  
3087  void
3088  thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3089  {
3090  	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3091  	assert(qos_override < THREAD_QOS_LAST);
3092  
3093  	thread_servicer_override(thread, qos_override, FALSE);
3094  }
3095  
3096  void
3097  thread_drop_servicer_override(thread_t thread)
3098  {
3099  	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3100  }
3101  
3102  
3103  /* Get current requested qos / relpri, may be called from spinlock context */
3104  thread_qos_t
3105  thread_get_requested_qos(thread_t thread, int *relpri)
3106  {
3107  	int relprio_value = 0;
3108  	thread_qos_t qos;
3109  
3110  	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3111  	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3112  	if (relpri) {
3113  		*relpri = -relprio_value;
3114  	}
3115  	return qos;
3116  }
3117  
3118  /*
3119   * This function will promote the thread priority
3120   * since exec could block other threads calling
3121   * proc_find on the proc. This boost must be removed
3122   * via call to thread_clear_exec_promotion.
3123   *
3124   * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3125   */
3126  void
3127  thread_set_exec_promotion(thread_t thread)
3128  {
3129  	spl_t s = splsched();
3130  	thread_lock(thread);
3131  
3132  	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3133  
3134  	thread_unlock(thread);
3135  	splx(s);
3136  }
3137  
3138  /*
3139   * This function will clear the exec thread
3140   * promotion set on the thread by thread_set_exec_promotion.
3141   */
3142  void
3143  thread_clear_exec_promotion(thread_t thread)
3144  {
3145  	spl_t s = splsched();
3146  	thread_lock(thread);
3147  
3148  	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3149  
3150  	thread_unlock(thread);
3151  	splx(s);
3152  }