thread_policy.c
1 /* 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #include <mach/mach_types.h> 30 #include <mach/thread_act_server.h> 31 32 #include <kern/kern_types.h> 33 #include <kern/processor.h> 34 #include <kern/thread.h> 35 #include <kern/affinity.h> 36 #include <mach/task_policy.h> 37 #include <kern/sfi.h> 38 #include <kern/policy_internal.h> 39 #include <sys/errno.h> 40 #include <sys/ulock.h> 41 42 #include <mach/machine/sdt.h> 43 44 #ifdef MACH_BSD 45 extern int proc_selfpid(void); 46 extern char * proc_name_address(void *p); 47 extern void rethrottle_thread(void * uthread); 48 #endif /* MACH_BSD */ 49 50 #define QOS_EXTRACT(q) ((q) & 0xff) 51 52 uint32_t qos_override_mode; 53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0 54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1 55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2 56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3 57 58 extern zone_t thread_qos_override_zone; 59 60 static void 61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset); 62 63 /* 64 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit 65 * to threads that don't have a QoS class set. 66 */ 67 const qos_policy_params_t thread_qos_policy_params = { 68 /* 69 * This table defines the starting base priority of the thread, 70 * which will be modified by the thread importance and the task max priority 71 * before being applied. 72 */ 73 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */ 74 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */ 75 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED, 76 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT, 77 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY, 78 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE, 79 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE, 80 81 /* 82 * This table defines the highest IO priority that a thread marked with this 83 * QoS class can have. 84 */ 85 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0, 86 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0, 87 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0, 88 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0, 89 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1, 90 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */ 91 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3, 92 93 /* 94 * This table defines the highest QoS level that 95 * a thread marked with this QoS class can have. 96 */ 97 98 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED), 99 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0), 100 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), 101 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), 102 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2), 103 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), 104 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), 105 106 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED), 107 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0), 108 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1), 109 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1), 110 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3), 111 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3), 112 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3), 113 }; 114 115 static void 116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode); 117 118 static int 119 thread_qos_scaled_relative_priority(int qos, int qos_relprio); 120 121 static void 122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info); 123 124 static void 125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); 126 127 static void 128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); 129 130 static void 131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); 132 133 static int 134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2); 135 136 static int 137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2); 138 139 static void 140 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token); 141 142 static void 143 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token); 144 145 void 146 thread_policy_init(void) 147 { 148 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) { 149 printf("QOS override mode: 0x%08x\n", qos_override_mode); 150 } else { 151 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE; 152 } 153 } 154 155 boolean_t 156 thread_has_qos_policy(thread_t thread) 157 { 158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE; 159 } 160 161 162 static void 163 thread_remove_qos_policy_locked(thread_t thread, 164 task_pend_token_t pend_token) 165 { 166 __unused int prev_qos = thread->requested_policy.thrp_qos; 167 168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos); 169 170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, 171 THREAD_QOS_UNSPECIFIED, 0, pend_token); 172 } 173 174 kern_return_t 175 thread_remove_qos_policy(thread_t thread) 176 { 177 struct task_pend_token pend_token = {}; 178 179 thread_mtx_lock(thread); 180 if (!thread->active) { 181 thread_mtx_unlock(thread); 182 return KERN_TERMINATED; 183 } 184 185 thread_remove_qos_policy_locked(thread, &pend_token); 186 187 thread_mtx_unlock(thread); 188 189 thread_policy_update_complete_unlocked(thread, &pend_token); 190 191 return KERN_SUCCESS; 192 } 193 194 195 boolean_t 196 thread_is_static_param(thread_t thread) 197 { 198 if (thread->static_param) { 199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread); 200 return TRUE; 201 } 202 return FALSE; 203 } 204 205 /* 206 * Relative priorities can range between 0REL and -15REL. These 207 * map to QoS-specific ranges, to create non-overlapping priority 208 * ranges. 209 */ 210 static int 211 thread_qos_scaled_relative_priority(int qos, int qos_relprio) 212 { 213 int next_lower_qos; 214 215 /* Fast path, since no validation or scaling is needed */ 216 if (qos_relprio == 0) { 217 return 0; 218 } 219 220 switch (qos) { 221 case THREAD_QOS_USER_INTERACTIVE: 222 next_lower_qos = THREAD_QOS_USER_INITIATED; 223 break; 224 case THREAD_QOS_USER_INITIATED: 225 next_lower_qos = THREAD_QOS_LEGACY; 226 break; 227 case THREAD_QOS_LEGACY: 228 next_lower_qos = THREAD_QOS_UTILITY; 229 break; 230 case THREAD_QOS_UTILITY: 231 next_lower_qos = THREAD_QOS_BACKGROUND; 232 break; 233 case THREAD_QOS_MAINTENANCE: 234 case THREAD_QOS_BACKGROUND: 235 next_lower_qos = 0; 236 break; 237 default: 238 panic("Unrecognized QoS %d", qos); 239 return 0; 240 } 241 242 int prio_range_max = thread_qos_policy_params.qos_pri[qos]; 243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0; 244 245 /* 246 * We now have the valid range that the scaled relative priority can map to. Note 247 * that the lower bound is exclusive, but the upper bound is inclusive. If the 248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the 249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard 250 * remainder. 251 */ 252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4); 253 254 return scaled_relprio; 255 } 256 257 /* 258 * flag set by -qos-policy-allow boot-arg to allow 259 * testing thread qos policy from userspace 260 */ 261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false); 262 263 kern_return_t 264 thread_policy_set( 265 thread_t thread, 266 thread_policy_flavor_t flavor, 267 thread_policy_t policy_info, 268 mach_msg_type_number_t count) 269 { 270 thread_qos_policy_data_t req_qos; 271 kern_return_t kr; 272 273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED; 274 275 if (thread == THREAD_NULL) { 276 return KERN_INVALID_ARGUMENT; 277 } 278 279 if (!allow_qos_policy_set) { 280 if (thread_is_static_param(thread)) { 281 return KERN_POLICY_STATIC; 282 } 283 284 if (flavor == THREAD_QOS_POLICY) { 285 return KERN_INVALID_ARGUMENT; 286 } 287 } 288 289 /* Threads without static_param set reset their QoS when other policies are applied. */ 290 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { 291 /* Store the existing tier, if we fail this call it is used to reset back. */ 292 req_qos.qos_tier = thread->requested_policy.thrp_qos; 293 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio; 294 295 kr = thread_remove_qos_policy(thread); 296 if (kr != KERN_SUCCESS) { 297 return kr; 298 } 299 } 300 301 kr = thread_policy_set_internal(thread, flavor, policy_info, count); 302 303 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */ 304 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) { 305 if (kr != KERN_SUCCESS) { 306 /* Reset back to our original tier as the set failed. */ 307 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT); 308 } 309 } 310 311 return kr; 312 } 313 314 kern_return_t 315 thread_policy_set_internal( 316 thread_t thread, 317 thread_policy_flavor_t flavor, 318 thread_policy_t policy_info, 319 mach_msg_type_number_t count) 320 { 321 kern_return_t result = KERN_SUCCESS; 322 struct task_pend_token pend_token = {}; 323 324 thread_mtx_lock(thread); 325 if (!thread->active) { 326 thread_mtx_unlock(thread); 327 328 return KERN_TERMINATED; 329 } 330 331 switch (flavor) { 332 case THREAD_EXTENDED_POLICY: 333 { 334 boolean_t timeshare = TRUE; 335 336 if (count >= THREAD_EXTENDED_POLICY_COUNT) { 337 thread_extended_policy_t info; 338 339 info = (thread_extended_policy_t)policy_info; 340 timeshare = info->timeshare; 341 } 342 343 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED; 344 345 spl_t s = splsched(); 346 thread_lock(thread); 347 348 thread_set_user_sched_mode_and_recompute_pri(thread, mode); 349 350 thread_unlock(thread); 351 splx(s); 352 353 pend_token.tpt_update_thread_sfi = 1; 354 355 break; 356 } 357 358 case THREAD_TIME_CONSTRAINT_POLICY: 359 { 360 thread_time_constraint_policy_t info; 361 362 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) { 363 result = KERN_INVALID_ARGUMENT; 364 break; 365 } 366 367 info = (thread_time_constraint_policy_t)policy_info; 368 369 370 if (info->constraint < info->computation || 371 info->computation > max_rt_quantum || 372 info->computation < min_rt_quantum) { 373 result = KERN_INVALID_ARGUMENT; 374 break; 375 } 376 377 spl_t s = splsched(); 378 thread_lock(thread); 379 380 thread->realtime.period = info->period; 381 thread->realtime.computation = info->computation; 382 thread->realtime.constraint = info->constraint; 383 thread->realtime.preemptible = info->preemptible; 384 385 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME); 386 387 thread_unlock(thread); 388 splx(s); 389 390 pend_token.tpt_update_thread_sfi = 1; 391 392 break; 393 } 394 395 case THREAD_PRECEDENCE_POLICY: 396 { 397 thread_precedence_policy_t info; 398 399 if (count < THREAD_PRECEDENCE_POLICY_COUNT) { 400 result = KERN_INVALID_ARGUMENT; 401 break; 402 } 403 info = (thread_precedence_policy_t)policy_info; 404 405 spl_t s = splsched(); 406 thread_lock(thread); 407 408 thread->importance = info->importance; 409 410 thread_recompute_priority(thread); 411 412 thread_unlock(thread); 413 splx(s); 414 415 break; 416 } 417 418 case THREAD_AFFINITY_POLICY: 419 { 420 thread_affinity_policy_t info; 421 422 if (!thread_affinity_is_supported()) { 423 result = KERN_NOT_SUPPORTED; 424 break; 425 } 426 if (count < THREAD_AFFINITY_POLICY_COUNT) { 427 result = KERN_INVALID_ARGUMENT; 428 break; 429 } 430 431 info = (thread_affinity_policy_t) policy_info; 432 /* 433 * Unlock the thread mutex here and 434 * return directly after calling thread_affinity_set(). 435 * This is necessary for correct lock ordering because 436 * thread_affinity_set() takes the task lock. 437 */ 438 thread_mtx_unlock(thread); 439 return thread_affinity_set(thread, info->affinity_tag); 440 } 441 442 #if !defined(XNU_TARGET_OS_OSX) 443 case THREAD_BACKGROUND_POLICY: 444 { 445 thread_background_policy_t info; 446 447 if (count < THREAD_BACKGROUND_POLICY_COUNT) { 448 result = KERN_INVALID_ARGUMENT; 449 break; 450 } 451 452 if (thread->task != current_task()) { 453 result = KERN_PROTECTION_FAILURE; 454 break; 455 } 456 457 info = (thread_background_policy_t) policy_info; 458 459 int enable; 460 461 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) { 462 enable = TASK_POLICY_ENABLE; 463 } else { 464 enable = TASK_POLICY_DISABLE; 465 } 466 467 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; 468 469 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token); 470 471 break; 472 } 473 #endif /* !defined(XNU_TARGET_OS_OSX) */ 474 475 case THREAD_THROUGHPUT_QOS_POLICY: 476 { 477 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; 478 thread_throughput_qos_t tqos; 479 480 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { 481 result = KERN_INVALID_ARGUMENT; 482 break; 483 } 484 485 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) { 486 break; 487 } 488 489 tqos = qos_extract(info->thread_throughput_qos_tier); 490 491 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, 492 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token); 493 494 break; 495 } 496 497 case THREAD_LATENCY_QOS_POLICY: 498 { 499 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; 500 thread_latency_qos_t lqos; 501 502 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) { 503 result = KERN_INVALID_ARGUMENT; 504 break; 505 } 506 507 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) { 508 break; 509 } 510 511 lqos = qos_extract(info->thread_latency_qos_tier); 512 513 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, 514 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token); 515 516 break; 517 } 518 519 case THREAD_QOS_POLICY: 520 { 521 thread_qos_policy_t info = (thread_qos_policy_t)policy_info; 522 523 if (count < THREAD_QOS_POLICY_COUNT) { 524 result = KERN_INVALID_ARGUMENT; 525 break; 526 } 527 528 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) { 529 result = KERN_INVALID_ARGUMENT; 530 break; 531 } 532 533 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { 534 result = KERN_INVALID_ARGUMENT; 535 break; 536 } 537 538 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) { 539 result = KERN_INVALID_ARGUMENT; 540 break; 541 } 542 543 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, 544 info->qos_tier, -info->tier_importance, &pend_token); 545 546 break; 547 } 548 549 default: 550 result = KERN_INVALID_ARGUMENT; 551 break; 552 } 553 554 thread_mtx_unlock(thread); 555 556 thread_policy_update_complete_unlocked(thread, &pend_token); 557 558 return result; 559 } 560 561 /* 562 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO. 563 * Both result in FIXED mode scheduling. 564 */ 565 static sched_mode_t 566 convert_policy_to_sched_mode(integer_t policy) 567 { 568 switch (policy) { 569 case POLICY_TIMESHARE: 570 return TH_MODE_TIMESHARE; 571 case POLICY_RR: 572 case POLICY_FIFO: 573 return TH_MODE_FIXED; 574 default: 575 panic("unexpected sched policy: %d", policy); 576 return TH_MODE_NONE; 577 } 578 } 579 580 /* 581 * Called either with the thread mutex locked 582 * or from the pthread kext in a 'safe place'. 583 */ 584 static kern_return_t 585 thread_set_mode_and_absolute_pri_internal(thread_t thread, 586 sched_mode_t mode, 587 integer_t priority, 588 task_pend_token_t pend_token) 589 { 590 kern_return_t kr = KERN_SUCCESS; 591 592 spl_t s = splsched(); 593 thread_lock(thread); 594 595 /* This path isn't allowed to change a thread out of realtime. */ 596 if ((thread->sched_mode == TH_MODE_REALTIME) || 597 (thread->saved_mode == TH_MODE_REALTIME)) { 598 kr = KERN_FAILURE; 599 goto unlock; 600 } 601 602 if (thread->policy_reset) { 603 kr = KERN_SUCCESS; 604 goto unlock; 605 } 606 607 sched_mode_t old_mode = thread->sched_mode; 608 609 /* 610 * Reverse engineer and apply the correct importance value 611 * from the requested absolute priority value. 612 * 613 * TODO: Store the absolute priority value instead 614 */ 615 616 if (priority >= thread->max_priority) { 617 priority = thread->max_priority - thread->task_priority; 618 } else if (priority >= MINPRI_KERNEL) { 619 priority -= MINPRI_KERNEL; 620 } else if (priority >= MINPRI_RESERVED) { 621 priority -= MINPRI_RESERVED; 622 } else { 623 priority -= BASEPRI_DEFAULT; 624 } 625 626 priority += thread->task_priority; 627 628 if (priority > thread->max_priority) { 629 priority = thread->max_priority; 630 } else if (priority < MINPRI) { 631 priority = MINPRI; 632 } 633 634 thread->importance = priority - thread->task_priority; 635 636 thread_set_user_sched_mode_and_recompute_pri(thread, mode); 637 638 if (mode != old_mode) { 639 pend_token->tpt_update_thread_sfi = 1; 640 } 641 642 unlock: 643 thread_unlock(thread); 644 splx(s); 645 646 return kr; 647 } 648 649 void 650 thread_freeze_base_pri(thread_t thread) 651 { 652 assert(thread == current_thread()); 653 654 spl_t s = splsched(); 655 thread_lock(thread); 656 657 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0); 658 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN; 659 660 thread_unlock(thread); 661 splx(s); 662 } 663 664 bool 665 thread_unfreeze_base_pri(thread_t thread) 666 { 667 assert(thread == current_thread()); 668 integer_t base_pri; 669 ast_t ast = 0; 670 671 spl_t s = splsched(); 672 thread_lock(thread); 673 674 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN); 675 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN; 676 677 base_pri = thread->req_base_pri; 678 if (base_pri != thread->base_pri) { 679 /* 680 * This function returns "true" if the base pri change 681 * is the most likely cause for the preemption. 682 */ 683 sched_set_thread_base_priority(thread, base_pri); 684 ast = ast_peek(AST_PREEMPT); 685 } 686 687 thread_unlock(thread); 688 splx(s); 689 690 return ast != 0; 691 } 692 693 uint8_t 694 thread_workq_pri_for_qos(thread_qos_t qos) 695 { 696 assert(qos < THREAD_QOS_LAST); 697 return (uint8_t)thread_qos_policy_params.qos_pri[qos]; 698 } 699 700 thread_qos_t 701 thread_workq_qos_for_pri(int priority) 702 { 703 thread_qos_t qos; 704 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) { 705 // indicate that workq should map >UI threads to workq's 706 // internal notation for above-UI work. 707 return THREAD_QOS_UNSPECIFIED; 708 } 709 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) { 710 // map a given priority up to the next nearest qos band. 711 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) { 712 return qos; 713 } 714 } 715 return THREAD_QOS_MAINTENANCE; 716 } 717 718 /* 719 * private interface for pthread workqueues 720 * 721 * Set scheduling policy & absolute priority for thread 722 * May be called with spinlocks held 723 * Thread mutex lock is not held 724 */ 725 void 726 thread_reset_workq_qos(thread_t thread, uint32_t qos) 727 { 728 struct task_pend_token pend_token = {}; 729 730 assert(qos < THREAD_QOS_LAST); 731 732 spl_t s = splsched(); 733 thread_lock(thread); 734 735 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 736 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token); 737 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 738 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0, 739 &pend_token); 740 741 assert(pend_token.tpt_update_sockets == 0); 742 743 thread_unlock(thread); 744 splx(s); 745 746 thread_policy_update_complete_unlocked(thread, &pend_token); 747 } 748 749 /* 750 * private interface for pthread workqueues 751 * 752 * Set scheduling policy & absolute priority for thread 753 * May be called with spinlocks held 754 * Thread mutex lock is held 755 */ 756 void 757 thread_set_workq_override(thread_t thread, uint32_t qos) 758 { 759 struct task_pend_token pend_token = {}; 760 761 assert(qos < THREAD_QOS_LAST); 762 763 spl_t s = splsched(); 764 thread_lock(thread); 765 766 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 767 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token); 768 769 assert(pend_token.tpt_update_sockets == 0); 770 771 thread_unlock(thread); 772 splx(s); 773 774 thread_policy_update_complete_unlocked(thread, &pend_token); 775 } 776 777 /* 778 * private interface for pthread workqueues 779 * 780 * Set scheduling policy & absolute priority for thread 781 * May be called with spinlocks held 782 * Thread mutex lock is not held 783 */ 784 void 785 thread_set_workq_pri(thread_t thread, 786 thread_qos_t qos, 787 integer_t priority, 788 integer_t policy) 789 { 790 struct task_pend_token pend_token = {}; 791 sched_mode_t mode = convert_policy_to_sched_mode(policy); 792 793 assert(qos < THREAD_QOS_LAST); 794 assert(thread->static_param); 795 796 if (!thread->static_param || !thread->active) { 797 return; 798 } 799 800 spl_t s = splsched(); 801 thread_lock(thread); 802 803 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 804 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token); 805 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 806 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 807 0, &pend_token); 808 809 thread_unlock(thread); 810 splx(s); 811 812 /* Concern: this doesn't hold the mutex... */ 813 814 __assert_only kern_return_t kr; 815 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, 816 &pend_token); 817 assert(kr == KERN_SUCCESS); 818 819 if (pend_token.tpt_update_thread_sfi) { 820 sfi_reevaluate(thread); 821 } 822 } 823 824 /* 825 * thread_set_mode_and_absolute_pri: 826 * 827 * Set scheduling policy & absolute priority for thread, for deprecated 828 * thread_set_policy and thread_policy interfaces. 829 * 830 * Called with nothing locked. 831 */ 832 kern_return_t 833 thread_set_mode_and_absolute_pri(thread_t thread, 834 integer_t policy, 835 integer_t priority) 836 { 837 kern_return_t kr = KERN_SUCCESS; 838 struct task_pend_token pend_token = {}; 839 840 sched_mode_t mode = convert_policy_to_sched_mode(policy); 841 842 thread_mtx_lock(thread); 843 844 if (!thread->active) { 845 kr = KERN_TERMINATED; 846 goto unlock; 847 } 848 849 if (thread_is_static_param(thread)) { 850 kr = KERN_POLICY_STATIC; 851 goto unlock; 852 } 853 854 /* Setting legacy policies on threads kills the current QoS */ 855 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { 856 thread_remove_qos_policy_locked(thread, &pend_token); 857 } 858 859 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token); 860 861 unlock: 862 thread_mtx_unlock(thread); 863 864 thread_policy_update_complete_unlocked(thread, &pend_token); 865 866 return kr; 867 } 868 869 /* 870 * Set the thread's requested mode and recompute priority 871 * Called with thread mutex and thread locked 872 * 873 * TODO: Mitigate potential problems caused by moving thread to end of runq 874 * whenever its priority is recomputed 875 * Only remove when it actually changes? Attempt to re-insert at appropriate location? 876 */ 877 static void 878 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode) 879 { 880 if (thread->policy_reset) { 881 return; 882 } 883 884 boolean_t removed = thread_run_queue_remove(thread); 885 886 /* 887 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'. 888 * That way there's zero confusion over which the user wants 889 * and which the kernel wants. 890 */ 891 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) { 892 thread->saved_mode = mode; 893 } else { 894 sched_set_thread_mode(thread, mode); 895 } 896 897 thread_recompute_priority(thread); 898 899 if (removed) { 900 thread_run_queue_reinsert(thread, SCHED_TAILQ); 901 } 902 } 903 904 /* called at splsched with thread lock locked */ 905 static void 906 thread_update_qos_cpu_time_locked(thread_t thread) 907 { 908 task_t task = thread->task; 909 uint64_t timer_sum, timer_delta; 910 911 /* 912 * This is only as accurate as the distance between 913 * last context switch (embedded) or last user/kernel boundary transition (desktop) 914 * because user_timer and system_timer are only updated then. 915 * 916 * TODO: Consider running a timer_update operation here to update it first. 917 * Maybe doable with interrupts disabled from current thread. 918 * If the thread is on a different core, may not be easy to get right. 919 * 920 * TODO: There should be a function for this in timer.c 921 */ 922 923 timer_sum = timer_grab(&thread->user_timer); 924 timer_sum += timer_grab(&thread->system_timer); 925 timer_delta = timer_sum - thread->vtimer_qos_save; 926 927 thread->vtimer_qos_save = timer_sum; 928 929 uint64_t* task_counter = NULL; 930 931 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */ 932 switch (thread->effective_policy.thep_qos) { 933 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break; 934 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break; 935 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break; 936 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break; 937 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break; 938 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break; 939 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break; 940 default: 941 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos); 942 } 943 944 OSAddAtomic64(timer_delta, task_counter); 945 946 /* Update the task-level qos stats atomically, because we don't have the task lock. */ 947 switch (thread->requested_policy.thrp_qos) { 948 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break; 949 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break; 950 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break; 951 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break; 952 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break; 953 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break; 954 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break; 955 default: 956 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos); 957 } 958 959 OSAddAtomic64(timer_delta, task_counter); 960 } 961 962 /* 963 * called with no thread locks held 964 * may hold task lock 965 */ 966 void 967 thread_update_qos_cpu_time(thread_t thread) 968 { 969 thread_mtx_lock(thread); 970 971 spl_t s = splsched(); 972 thread_lock(thread); 973 974 thread_update_qos_cpu_time_locked(thread); 975 976 thread_unlock(thread); 977 splx(s); 978 979 thread_mtx_unlock(thread); 980 } 981 982 /* 983 * Calculate base priority from thread attributes, and set it on the thread 984 * 985 * Called with thread_lock and thread mutex held. 986 */ 987 extern thread_t vm_pageout_scan_thread; 988 extern boolean_t vps_dynamic_priority_enabled; 989 990 void 991 thread_recompute_priority( 992 thread_t thread) 993 { 994 integer_t priority; 995 996 if (thread->policy_reset) { 997 return; 998 } 999 1000 if (thread->sched_mode == TH_MODE_REALTIME) { 1001 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES); 1002 return; 1003 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) { 1004 int qos = thread->effective_policy.thep_qos; 1005 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent; 1006 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */ 1007 int qos_scaled_relprio; 1008 1009 assert(qos >= 0 && qos < THREAD_QOS_LAST); 1010 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE); 1011 1012 priority = thread_qos_policy_params.qos_pri[qos]; 1013 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio); 1014 1015 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) { 1016 /* Bump priority 46 to 47 when in a frontmost app */ 1017 qos_scaled_relprio += 1; 1018 } 1019 1020 /* TODO: factor in renice priority here? */ 1021 1022 priority += qos_scaled_relprio; 1023 } else { 1024 if (thread->importance > MAXPRI) { 1025 priority = MAXPRI; 1026 } else if (thread->importance < -MAXPRI) { 1027 priority = -MAXPRI; 1028 } else { 1029 priority = thread->importance; 1030 } 1031 1032 priority += thread->task_priority; 1033 } 1034 1035 priority = MAX(priority, thread->user_promotion_basepri); 1036 1037 /* 1038 * Clamp priority back into the allowed range for this task. 1039 * The initial priority value could be out of this range due to: 1040 * Task clamped to BG or Utility (max-pri is 4, or 20) 1041 * Task is user task (max-pri is 63) 1042 * Task is kernel task (max-pri is 95) 1043 * Note that thread->importance is user-settable to any integer 1044 * via THREAD_PRECEDENCE_POLICY. 1045 */ 1046 if (priority > thread->max_priority) { 1047 if (thread->effective_policy.thep_promote_above_task) { 1048 priority = MAX(thread->max_priority, thread->user_promotion_basepri); 1049 } else { 1050 priority = thread->max_priority; 1051 } 1052 } else if (priority < MINPRI) { 1053 priority = MINPRI; 1054 } 1055 1056 if (thread->saved_mode == TH_MODE_REALTIME && 1057 thread->sched_flags & TH_SFLAG_FAILSAFE) { 1058 priority = DEPRESSPRI; 1059 } 1060 1061 if (thread->effective_policy.thep_terminated == TRUE) { 1062 /* 1063 * We temporarily want to override the expected priority to 1064 * ensure that the thread exits in a timely manner. 1065 * Note that this is allowed to exceed thread->max_priority 1066 * so that the thread is no longer clamped to background 1067 * during the final exit phase. 1068 */ 1069 if (priority < thread->task_priority) { 1070 priority = thread->task_priority; 1071 } 1072 if (priority < BASEPRI_DEFAULT) { 1073 priority = BASEPRI_DEFAULT; 1074 } 1075 } 1076 1077 #if !defined(XNU_TARGET_OS_OSX) 1078 /* No one can have a base priority less than MAXPRI_THROTTLE */ 1079 if (priority < MAXPRI_THROTTLE) { 1080 priority = MAXPRI_THROTTLE; 1081 } 1082 #endif /* !defined(XNU_TARGET_OS_OSX) */ 1083 1084 sched_set_thread_base_priority(thread, priority); 1085 } 1086 1087 /* Called with the task lock held, but not the thread mutex or spinlock */ 1088 void 1089 thread_policy_update_tasklocked( 1090 thread_t thread, 1091 integer_t priority, 1092 integer_t max_priority, 1093 task_pend_token_t pend_token) 1094 { 1095 thread_mtx_lock(thread); 1096 1097 if (!thread->active || thread->policy_reset) { 1098 thread_mtx_unlock(thread); 1099 return; 1100 } 1101 1102 spl_t s = splsched(); 1103 thread_lock(thread); 1104 1105 __unused 1106 integer_t old_max_priority = thread->max_priority; 1107 1108 assert(priority >= INT16_MIN && priority <= INT16_MAX); 1109 thread->task_priority = (int16_t)priority; 1110 1111 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX); 1112 thread->max_priority = (int16_t)max_priority; 1113 1114 /* 1115 * When backgrounding a thread, realtime and fixed priority threads 1116 * should be demoted to timeshare background threads. 1117 * 1118 * TODO: Do this inside the thread policy update routine in order to avoid double 1119 * remove/reinsert for a runnable thread 1120 */ 1121 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) { 1122 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED); 1123 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) { 1124 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED); 1125 } 1126 1127 thread_policy_update_spinlocked(thread, true, pend_token); 1128 1129 thread_unlock(thread); 1130 splx(s); 1131 1132 thread_mtx_unlock(thread); 1133 } 1134 1135 /* 1136 * Reset thread to default state in preparation for termination 1137 * Called with thread mutex locked 1138 * 1139 * Always called on current thread, so we don't need a run queue remove 1140 */ 1141 void 1142 thread_policy_reset( 1143 thread_t thread) 1144 { 1145 spl_t s; 1146 1147 assert(thread == current_thread()); 1148 1149 s = splsched(); 1150 thread_lock(thread); 1151 1152 if (thread->sched_flags & TH_SFLAG_FAILSAFE) { 1153 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE); 1154 } 1155 1156 if (thread->sched_flags & TH_SFLAG_THROTTLED) { 1157 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED); 1158 } 1159 1160 /* At this point, the various demotions should be inactive */ 1161 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)); 1162 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED)); 1163 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)); 1164 1165 /* Reset thread back to task-default basepri and mode */ 1166 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task); 1167 1168 sched_set_thread_mode(thread, newmode); 1169 1170 thread->importance = 0; 1171 1172 /* Prevent further changes to thread base priority or mode */ 1173 thread->policy_reset = 1; 1174 1175 sched_set_thread_base_priority(thread, thread->task_priority); 1176 1177 thread_unlock(thread); 1178 splx(s); 1179 } 1180 1181 kern_return_t 1182 thread_policy_get( 1183 thread_t thread, 1184 thread_policy_flavor_t flavor, 1185 thread_policy_t policy_info, 1186 mach_msg_type_number_t *count, 1187 boolean_t *get_default) 1188 { 1189 kern_return_t result = KERN_SUCCESS; 1190 1191 if (thread == THREAD_NULL) { 1192 return KERN_INVALID_ARGUMENT; 1193 } 1194 1195 thread_mtx_lock(thread); 1196 if (!thread->active) { 1197 thread_mtx_unlock(thread); 1198 1199 return KERN_TERMINATED; 1200 } 1201 1202 switch (flavor) { 1203 case THREAD_EXTENDED_POLICY: 1204 { 1205 boolean_t timeshare = TRUE; 1206 1207 if (!(*get_default)) { 1208 spl_t s = splsched(); 1209 thread_lock(thread); 1210 1211 if ((thread->sched_mode != TH_MODE_REALTIME) && 1212 (thread->saved_mode != TH_MODE_REALTIME)) { 1213 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { 1214 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0; 1215 } else { 1216 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0; 1217 } 1218 } else { 1219 *get_default = TRUE; 1220 } 1221 1222 thread_unlock(thread); 1223 splx(s); 1224 } 1225 1226 if (*count >= THREAD_EXTENDED_POLICY_COUNT) { 1227 thread_extended_policy_t info; 1228 1229 info = (thread_extended_policy_t)policy_info; 1230 info->timeshare = timeshare; 1231 } 1232 1233 break; 1234 } 1235 1236 case THREAD_TIME_CONSTRAINT_POLICY: 1237 { 1238 thread_time_constraint_policy_t info; 1239 1240 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) { 1241 result = KERN_INVALID_ARGUMENT; 1242 break; 1243 } 1244 1245 info = (thread_time_constraint_policy_t)policy_info; 1246 1247 if (!(*get_default)) { 1248 spl_t s = splsched(); 1249 thread_lock(thread); 1250 1251 if ((thread->sched_mode == TH_MODE_REALTIME) || 1252 (thread->saved_mode == TH_MODE_REALTIME)) { 1253 info->period = thread->realtime.period; 1254 info->computation = thread->realtime.computation; 1255 info->constraint = thread->realtime.constraint; 1256 info->preemptible = thread->realtime.preemptible; 1257 } else { 1258 *get_default = TRUE; 1259 } 1260 1261 thread_unlock(thread); 1262 splx(s); 1263 } 1264 1265 if (*get_default) { 1266 info->period = 0; 1267 info->computation = default_timeshare_computation; 1268 info->constraint = default_timeshare_constraint; 1269 info->preemptible = TRUE; 1270 } 1271 1272 1273 break; 1274 } 1275 1276 case THREAD_PRECEDENCE_POLICY: 1277 { 1278 thread_precedence_policy_t info; 1279 1280 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) { 1281 result = KERN_INVALID_ARGUMENT; 1282 break; 1283 } 1284 1285 info = (thread_precedence_policy_t)policy_info; 1286 1287 if (!(*get_default)) { 1288 spl_t s = splsched(); 1289 thread_lock(thread); 1290 1291 info->importance = thread->importance; 1292 1293 thread_unlock(thread); 1294 splx(s); 1295 } else { 1296 info->importance = 0; 1297 } 1298 1299 break; 1300 } 1301 1302 case THREAD_AFFINITY_POLICY: 1303 { 1304 thread_affinity_policy_t info; 1305 1306 if (!thread_affinity_is_supported()) { 1307 result = KERN_NOT_SUPPORTED; 1308 break; 1309 } 1310 if (*count < THREAD_AFFINITY_POLICY_COUNT) { 1311 result = KERN_INVALID_ARGUMENT; 1312 break; 1313 } 1314 1315 info = (thread_affinity_policy_t)policy_info; 1316 1317 if (!(*get_default)) { 1318 info->affinity_tag = thread_affinity_get(thread); 1319 } else { 1320 info->affinity_tag = THREAD_AFFINITY_TAG_NULL; 1321 } 1322 1323 break; 1324 } 1325 1326 case THREAD_POLICY_STATE: 1327 { 1328 thread_policy_state_t info; 1329 1330 if (*count < THREAD_POLICY_STATE_COUNT) { 1331 result = KERN_INVALID_ARGUMENT; 1332 break; 1333 } 1334 1335 /* Only root can get this info */ 1336 if (current_task()->sec_token.val[0] != 0) { 1337 result = KERN_PROTECTION_FAILURE; 1338 break; 1339 } 1340 1341 info = (thread_policy_state_t)(void*)policy_info; 1342 1343 if (!(*get_default)) { 1344 info->flags = 0; 1345 1346 spl_t s = splsched(); 1347 thread_lock(thread); 1348 1349 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0); 1350 1351 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy); 1352 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy); 1353 1354 info->thps_user_promotions = 0; 1355 info->thps_user_promotion_basepri = thread->user_promotion_basepri; 1356 info->thps_ipc_overrides = thread->kevent_overrides; 1357 1358 proc_get_thread_policy_bitfield(thread, info); 1359 1360 thread_unlock(thread); 1361 splx(s); 1362 } else { 1363 info->requested = 0; 1364 info->effective = 0; 1365 info->pending = 0; 1366 } 1367 1368 break; 1369 } 1370 1371 case THREAD_LATENCY_QOS_POLICY: 1372 { 1373 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; 1374 thread_latency_qos_t plqos; 1375 1376 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) { 1377 result = KERN_INVALID_ARGUMENT; 1378 break; 1379 } 1380 1381 if (*get_default) { 1382 plqos = 0; 1383 } else { 1384 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL); 1385 } 1386 1387 info->thread_latency_qos_tier = qos_latency_policy_package(plqos); 1388 } 1389 break; 1390 1391 case THREAD_THROUGHPUT_QOS_POLICY: 1392 { 1393 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; 1394 thread_throughput_qos_t ptqos; 1395 1396 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { 1397 result = KERN_INVALID_ARGUMENT; 1398 break; 1399 } 1400 1401 if (*get_default) { 1402 ptqos = 0; 1403 } else { 1404 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL); 1405 } 1406 1407 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos); 1408 } 1409 break; 1410 1411 case THREAD_QOS_POLICY: 1412 { 1413 thread_qos_policy_t info = (thread_qos_policy_t)policy_info; 1414 1415 if (*count < THREAD_QOS_POLICY_COUNT) { 1416 result = KERN_INVALID_ARGUMENT; 1417 break; 1418 } 1419 1420 if (!(*get_default)) { 1421 int relprio_value = 0; 1422 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, 1423 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value); 1424 1425 info->tier_importance = -relprio_value; 1426 } else { 1427 info->qos_tier = THREAD_QOS_UNSPECIFIED; 1428 info->tier_importance = 0; 1429 } 1430 1431 break; 1432 } 1433 1434 default: 1435 result = KERN_INVALID_ARGUMENT; 1436 break; 1437 } 1438 1439 thread_mtx_unlock(thread); 1440 1441 return result; 1442 } 1443 1444 void 1445 thread_policy_create(thread_t thread) 1446 { 1447 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1448 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START, 1449 thread_tid(thread), theffective_0(thread), 1450 theffective_1(thread), thread->base_pri, 0); 1451 1452 /* We pass a pend token but ignore it */ 1453 struct task_pend_token pend_token = {}; 1454 1455 thread_policy_update_internal_spinlocked(thread, true, &pend_token); 1456 1457 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1458 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END, 1459 thread_tid(thread), theffective_0(thread), 1460 theffective_1(thread), thread->base_pri, 0); 1461 } 1462 1463 static void 1464 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token) 1465 { 1466 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1467 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START), 1468 thread_tid(thread), theffective_0(thread), 1469 theffective_1(thread), thread->base_pri, 0); 1470 1471 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token); 1472 1473 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1474 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END, 1475 thread_tid(thread), theffective_0(thread), 1476 theffective_1(thread), thread->base_pri, 0); 1477 } 1478 1479 1480 1481 /* 1482 * One thread state update function TO RULE THEM ALL 1483 * 1484 * This function updates the thread effective policy fields 1485 * and pushes the results to the relevant subsystems. 1486 * 1487 * Returns TRUE if a pended action needs to be run. 1488 * 1489 * Called with thread spinlock locked, task may be locked, thread mutex may be locked 1490 */ 1491 static void 1492 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, 1493 task_pend_token_t pend_token) 1494 { 1495 /* 1496 * Step 1: 1497 * Gather requested policy and effective task state 1498 */ 1499 1500 struct thread_requested_policy requested = thread->requested_policy; 1501 struct task_effective_policy task_effective = thread->task->effective_policy; 1502 1503 /* 1504 * Step 2: 1505 * Calculate new effective policies from requested policy, task and thread state 1506 * Rules: 1507 * Don't change requested, it won't take effect 1508 */ 1509 1510 struct thread_effective_policy next = {}; 1511 1512 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent; 1513 1514 uint32_t next_qos = requested.thrp_qos; 1515 1516 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) { 1517 next_qos = MAX(requested.thrp_qos_override, next_qos); 1518 next_qos = MAX(requested.thrp_qos_promote, next_qos); 1519 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos); 1520 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos); 1521 next_qos = MAX(requested.thrp_qos_workq_override, next_qos); 1522 } 1523 1524 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg && 1525 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) { 1526 /* 1527 * This thread is turnstile-boosted higher than the adaptive clamp 1528 * by a synchronous waiter. Allow that to override the adaptive 1529 * clamp temporarily for this thread only. 1530 */ 1531 next.thep_promote_above_task = true; 1532 next_qos = requested.thrp_qos_promote; 1533 } 1534 1535 next.thep_qos = next_qos; 1536 1537 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */ 1538 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) { 1539 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) { 1540 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos); 1541 } else { 1542 next.thep_qos = task_effective.tep_qos_clamp; 1543 } 1544 } 1545 1546 /* 1547 * Extract outbound-promotion QoS before applying task ceiling or BG clamp 1548 * This allows QoS promotions to work properly even after the process is unclamped. 1549 */ 1550 next.thep_qos_promote = next.thep_qos; 1551 1552 /* The ceiling only applies to threads that are in the QoS world */ 1553 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */ 1554 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED && 1555 next.thep_qos != THREAD_QOS_UNSPECIFIED) { 1556 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos); 1557 } 1558 1559 /* 1560 * The QoS relative priority is only applicable when the original programmer's 1561 * intended (requested) QoS is in effect. When the QoS is clamped (e.g. 1562 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored, 1563 * since otherwise it would be lower than unclamped threads. Similarly, in the 1564 * presence of boosting, the programmer doesn't know what other actors 1565 * are boosting the thread. 1566 */ 1567 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) && 1568 (requested.thrp_qos == next.thep_qos) && 1569 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) { 1570 next.thep_qos_relprio = requested.thrp_qos_relprio; 1571 } else { 1572 next.thep_qos_relprio = 0; 1573 } 1574 1575 /* Calculate DARWIN_BG */ 1576 bool wants_darwinbg = false; 1577 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */ 1578 1579 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) { 1580 wants_darwinbg = true; 1581 } 1582 1583 /* 1584 * If DARWIN_BG has been requested at either level, it's engaged. 1585 * darwinbg threads always create bg sockets, 1586 * but only some types of darwinbg change the sockets 1587 * after they're created 1588 */ 1589 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) { 1590 wants_all_sockets_bg = wants_darwinbg = true; 1591 } 1592 1593 if (requested.thrp_pidbind_bg) { 1594 wants_all_sockets_bg = wants_darwinbg = true; 1595 } 1596 1597 if (next.thep_qos == THREAD_QOS_BACKGROUND || 1598 next.thep_qos == THREAD_QOS_MAINTENANCE) { 1599 wants_darwinbg = true; 1600 } 1601 1602 /* Calculate side effects of DARWIN_BG */ 1603 1604 if (wants_darwinbg) { 1605 next.thep_darwinbg = 1; 1606 } 1607 1608 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) { 1609 next.thep_new_sockets_bg = 1; 1610 } 1611 1612 /* Don't use task_effective.tep_all_sockets_bg here */ 1613 if (wants_all_sockets_bg) { 1614 next.thep_all_sockets_bg = 1; 1615 } 1616 1617 /* darwinbg implies background QOS (or lower) */ 1618 if (next.thep_darwinbg && 1619 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) { 1620 next.thep_qos = THREAD_QOS_BACKGROUND; 1621 next.thep_qos_relprio = 0; 1622 } 1623 1624 /* Calculate IO policy */ 1625 1626 int iopol = THROTTLE_LEVEL_TIER0; 1627 1628 /* Factor in the task's IO policy */ 1629 if (next.thep_darwinbg) { 1630 iopol = MAX(iopol, task_effective.tep_bg_iotier); 1631 } 1632 1633 if (!next.thep_promote_above_task) { 1634 iopol = MAX(iopol, task_effective.tep_io_tier); 1635 } 1636 1637 /* Look up the associated IO tier value for the QoS class */ 1638 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]); 1639 1640 iopol = MAX(iopol, requested.thrp_int_iotier); 1641 iopol = MAX(iopol, requested.thrp_ext_iotier); 1642 1643 next.thep_io_tier = iopol; 1644 1645 /* 1646 * If a QoS override is causing IO to go into a lower tier, we also set 1647 * the passive bit so that a thread doesn't end up stuck in its own throttle 1648 * window when the override goes away. 1649 */ 1650 1651 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos]; 1652 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos]; 1653 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier); 1654 1655 /* Calculate Passive IO policy */ 1656 if (requested.thrp_ext_iopassive || 1657 requested.thrp_int_iopassive || 1658 qos_io_override_active || 1659 task_effective.tep_io_passive) { 1660 next.thep_io_passive = 1; 1661 } 1662 1663 /* Calculate timer QOS */ 1664 uint32_t latency_qos = requested.thrp_latency_qos; 1665 1666 if (!next.thep_promote_above_task) { 1667 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos); 1668 } 1669 1670 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]); 1671 1672 next.thep_latency_qos = latency_qos; 1673 1674 /* Calculate throughput QOS */ 1675 uint32_t through_qos = requested.thrp_through_qos; 1676 1677 if (!next.thep_promote_above_task) { 1678 through_qos = MAX(through_qos, task_effective.tep_through_qos); 1679 } 1680 1681 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]); 1682 1683 next.thep_through_qos = through_qos; 1684 1685 if (task_effective.tep_terminated || requested.thrp_terminated) { 1686 /* Shoot down the throttles that slow down exit or response to SIGTERM */ 1687 next.thep_terminated = 1; 1688 next.thep_darwinbg = 0; 1689 next.thep_io_tier = THROTTLE_LEVEL_TIER0; 1690 next.thep_qos = THREAD_QOS_UNSPECIFIED; 1691 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED; 1692 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED; 1693 } 1694 1695 /* 1696 * Step 3: 1697 * Swap out old policy for new policy 1698 */ 1699 1700 struct thread_effective_policy prev = thread->effective_policy; 1701 1702 thread_update_qos_cpu_time_locked(thread); 1703 1704 /* This is the point where the new values become visible to other threads */ 1705 thread->effective_policy = next; 1706 1707 /* 1708 * Step 4: 1709 * Pend updates that can't be done while holding the thread lock 1710 */ 1711 1712 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) { 1713 pend_token->tpt_update_sockets = 1; 1714 } 1715 1716 /* TODO: Doesn't this only need to be done if the throttle went up? */ 1717 if (prev.thep_io_tier != next.thep_io_tier) { 1718 pend_token->tpt_update_throttle = 1; 1719 } 1720 1721 /* 1722 * Check for the attributes that sfi_thread_classify() consults, 1723 * and trigger SFI re-evaluation. 1724 */ 1725 if (prev.thep_qos != next.thep_qos || 1726 prev.thep_darwinbg != next.thep_darwinbg) { 1727 pend_token->tpt_update_thread_sfi = 1; 1728 } 1729 1730 integer_t old_base_pri = thread->base_pri; 1731 1732 /* 1733 * Step 5: 1734 * Update other subsystems as necessary if something has changed 1735 */ 1736 1737 /* Check for the attributes that thread_recompute_priority() consults */ 1738 if (prev.thep_qos != next.thep_qos || 1739 prev.thep_qos_relprio != next.thep_qos_relprio || 1740 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent || 1741 prev.thep_promote_above_task != next.thep_promote_above_task || 1742 prev.thep_terminated != next.thep_terminated || 1743 pend_token->tpt_force_recompute_pri == 1 || 1744 recompute_priority) { 1745 thread_recompute_priority(thread); 1746 } 1747 1748 /* 1749 * Check if the thread is waiting on a turnstile and needs priority propagation. 1750 */ 1751 if (pend_token->tpt_update_turnstile && 1752 ((old_base_pri == thread->base_pri) || 1753 !thread_get_waiting_turnstile(thread))) { 1754 /* 1755 * Reset update turnstile pend token since either 1756 * the thread priority did not change or thread is 1757 * not blocked on a turnstile. 1758 */ 1759 pend_token->tpt_update_turnstile = 0; 1760 } 1761 } 1762 1763 1764 /* 1765 * Initiate a thread policy state transition on a thread with its TID 1766 * Useful if you cannot guarantee the thread won't get terminated 1767 * Precondition: No locks are held 1768 * Will take task lock - using the non-tid variant is faster 1769 * if you already have a thread ref. 1770 */ 1771 void 1772 proc_set_thread_policy_with_tid(task_t task, 1773 uint64_t tid, 1774 int category, 1775 int flavor, 1776 int value) 1777 { 1778 /* takes task lock, returns ref'ed thread or NULL */ 1779 thread_t thread = task_findtid(task, tid); 1780 1781 if (thread == THREAD_NULL) { 1782 return; 1783 } 1784 1785 proc_set_thread_policy(thread, category, flavor, value); 1786 1787 thread_deallocate(thread); 1788 } 1789 1790 /* 1791 * Initiate a thread policy transition on a thread 1792 * This path supports networking transitions (i.e. darwinbg transitions) 1793 * Precondition: No locks are held 1794 */ 1795 void 1796 proc_set_thread_policy(thread_t thread, 1797 int category, 1798 int flavor, 1799 int value) 1800 { 1801 struct task_pend_token pend_token = {}; 1802 1803 thread_mtx_lock(thread); 1804 1805 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token); 1806 1807 thread_mtx_unlock(thread); 1808 1809 thread_policy_update_complete_unlocked(thread, &pend_token); 1810 } 1811 1812 /* 1813 * Do the things that can't be done while holding a thread mutex. 1814 * These are set up to call back into thread policy to get the latest value, 1815 * so they don't have to be synchronized with the update. 1816 * The only required semantic is 'call this sometime after updating effective policy' 1817 * 1818 * Precondition: Thread mutex is not held 1819 * 1820 * This may be called with the task lock held, but in that case it won't be 1821 * called with tpt_update_sockets set. 1822 */ 1823 void 1824 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token) 1825 { 1826 #ifdef MACH_BSD 1827 if (pend_token->tpt_update_sockets) { 1828 proc_apply_task_networkbg(thread->task->bsd_info, thread); 1829 } 1830 #endif /* MACH_BSD */ 1831 1832 if (pend_token->tpt_update_throttle) { 1833 rethrottle_thread(thread->uthread); 1834 } 1835 1836 if (pend_token->tpt_update_thread_sfi) { 1837 sfi_reevaluate(thread); 1838 } 1839 1840 if (pend_token->tpt_update_turnstile) { 1841 turnstile_update_thread_priority_chain(thread); 1842 } 1843 } 1844 1845 /* 1846 * Set and update thread policy 1847 * Thread mutex might be held 1848 */ 1849 static void 1850 proc_set_thread_policy_locked(thread_t thread, 1851 int category, 1852 int flavor, 1853 int value, 1854 int value2, 1855 task_pend_token_t pend_token) 1856 { 1857 spl_t s = splsched(); 1858 thread_lock(thread); 1859 1860 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token); 1861 1862 thread_unlock(thread); 1863 splx(s); 1864 } 1865 1866 /* 1867 * Set and update thread policy 1868 * Thread spinlock is held 1869 */ 1870 static void 1871 proc_set_thread_policy_spinlocked(thread_t thread, 1872 int category, 1873 int flavor, 1874 int value, 1875 int value2, 1876 task_pend_token_t pend_token) 1877 { 1878 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1879 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, 1880 thread_tid(thread), threquested_0(thread), 1881 threquested_1(thread), value, 0); 1882 1883 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token); 1884 1885 thread_policy_update_spinlocked(thread, false, pend_token); 1886 1887 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1888 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, 1889 thread_tid(thread), threquested_0(thread), 1890 threquested_1(thread), tpending(pend_token), 0); 1891 } 1892 1893 /* 1894 * Set the requested state for a specific flavor to a specific value. 1895 */ 1896 static void 1897 thread_set_requested_policy_spinlocked(thread_t thread, 1898 int category, 1899 int flavor, 1900 int value, 1901 int value2, 1902 task_pend_token_t pend_token) 1903 { 1904 int tier, passive; 1905 1906 struct thread_requested_policy requested = thread->requested_policy; 1907 1908 switch (flavor) { 1909 /* Category: EXTERNAL and INTERNAL, thread and task */ 1910 1911 case TASK_POLICY_DARWIN_BG: 1912 if (category == TASK_POLICY_EXTERNAL) { 1913 requested.thrp_ext_darwinbg = value; 1914 } else { 1915 requested.thrp_int_darwinbg = value; 1916 } 1917 break; 1918 1919 case TASK_POLICY_IOPOL: 1920 proc_iopol_to_tier(value, &tier, &passive); 1921 if (category == TASK_POLICY_EXTERNAL) { 1922 requested.thrp_ext_iotier = tier; 1923 requested.thrp_ext_iopassive = passive; 1924 } else { 1925 requested.thrp_int_iotier = tier; 1926 requested.thrp_int_iopassive = passive; 1927 } 1928 break; 1929 1930 case TASK_POLICY_IO: 1931 if (category == TASK_POLICY_EXTERNAL) { 1932 requested.thrp_ext_iotier = value; 1933 } else { 1934 requested.thrp_int_iotier = value; 1935 } 1936 break; 1937 1938 case TASK_POLICY_PASSIVE_IO: 1939 if (category == TASK_POLICY_EXTERNAL) { 1940 requested.thrp_ext_iopassive = value; 1941 } else { 1942 requested.thrp_int_iopassive = value; 1943 } 1944 break; 1945 1946 /* Category: ATTRIBUTE, thread only */ 1947 1948 case TASK_POLICY_PIDBIND_BG: 1949 assert(category == TASK_POLICY_ATTRIBUTE); 1950 requested.thrp_pidbind_bg = value; 1951 break; 1952 1953 case TASK_POLICY_LATENCY_QOS: 1954 assert(category == TASK_POLICY_ATTRIBUTE); 1955 requested.thrp_latency_qos = value; 1956 break; 1957 1958 case TASK_POLICY_THROUGH_QOS: 1959 assert(category == TASK_POLICY_ATTRIBUTE); 1960 requested.thrp_through_qos = value; 1961 break; 1962 1963 case TASK_POLICY_QOS_OVERRIDE: 1964 assert(category == TASK_POLICY_ATTRIBUTE); 1965 requested.thrp_qos_override = value; 1966 pend_token->tpt_update_turnstile = 1; 1967 break; 1968 1969 case TASK_POLICY_QOS_AND_RELPRIO: 1970 assert(category == TASK_POLICY_ATTRIBUTE); 1971 requested.thrp_qos = value; 1972 requested.thrp_qos_relprio = value2; 1973 pend_token->tpt_update_turnstile = 1; 1974 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio); 1975 break; 1976 1977 case TASK_POLICY_QOS_WORKQ_OVERRIDE: 1978 assert(category == TASK_POLICY_ATTRIBUTE); 1979 requested.thrp_qos_workq_override = value; 1980 pend_token->tpt_update_turnstile = 1; 1981 break; 1982 1983 case TASK_POLICY_QOS_PROMOTE: 1984 assert(category == TASK_POLICY_ATTRIBUTE); 1985 requested.thrp_qos_promote = value; 1986 break; 1987 1988 case TASK_POLICY_QOS_KEVENT_OVERRIDE: 1989 assert(category == TASK_POLICY_ATTRIBUTE); 1990 requested.thrp_qos_kevent_override = value; 1991 pend_token->tpt_update_turnstile = 1; 1992 break; 1993 1994 case TASK_POLICY_QOS_SERVICER_OVERRIDE: 1995 assert(category == TASK_POLICY_ATTRIBUTE); 1996 requested.thrp_qos_wlsvc_override = value; 1997 pend_token->tpt_update_turnstile = 1; 1998 break; 1999 2000 case TASK_POLICY_TERMINATED: 2001 assert(category == TASK_POLICY_ATTRIBUTE); 2002 requested.thrp_terminated = value; 2003 break; 2004 2005 default: 2006 panic("unknown task policy: %d %d %d", category, flavor, value); 2007 break; 2008 } 2009 2010 thread->requested_policy = requested; 2011 } 2012 2013 /* 2014 * Gets what you set. Effective values may be different. 2015 * Precondition: No locks are held 2016 */ 2017 int 2018 proc_get_thread_policy(thread_t thread, 2019 int category, 2020 int flavor) 2021 { 2022 int value = 0; 2023 thread_mtx_lock(thread); 2024 value = proc_get_thread_policy_locked(thread, category, flavor, NULL); 2025 thread_mtx_unlock(thread); 2026 return value; 2027 } 2028 2029 static int 2030 proc_get_thread_policy_locked(thread_t thread, 2031 int category, 2032 int flavor, 2033 int* value2) 2034 { 2035 int value = 0; 2036 2037 spl_t s = splsched(); 2038 thread_lock(thread); 2039 2040 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2); 2041 2042 thread_unlock(thread); 2043 splx(s); 2044 2045 return value; 2046 } 2047 2048 /* 2049 * Gets what you set. Effective values may be different. 2050 */ 2051 static int 2052 thread_get_requested_policy_spinlocked(thread_t thread, 2053 int category, 2054 int flavor, 2055 int* value2) 2056 { 2057 int value = 0; 2058 2059 struct thread_requested_policy requested = thread->requested_policy; 2060 2061 switch (flavor) { 2062 case TASK_POLICY_DARWIN_BG: 2063 if (category == TASK_POLICY_EXTERNAL) { 2064 value = requested.thrp_ext_darwinbg; 2065 } else { 2066 value = requested.thrp_int_darwinbg; 2067 } 2068 break; 2069 case TASK_POLICY_IOPOL: 2070 if (category == TASK_POLICY_EXTERNAL) { 2071 value = proc_tier_to_iopol(requested.thrp_ext_iotier, 2072 requested.thrp_ext_iopassive); 2073 } else { 2074 value = proc_tier_to_iopol(requested.thrp_int_iotier, 2075 requested.thrp_int_iopassive); 2076 } 2077 break; 2078 case TASK_POLICY_IO: 2079 if (category == TASK_POLICY_EXTERNAL) { 2080 value = requested.thrp_ext_iotier; 2081 } else { 2082 value = requested.thrp_int_iotier; 2083 } 2084 break; 2085 case TASK_POLICY_PASSIVE_IO: 2086 if (category == TASK_POLICY_EXTERNAL) { 2087 value = requested.thrp_ext_iopassive; 2088 } else { 2089 value = requested.thrp_int_iopassive; 2090 } 2091 break; 2092 case TASK_POLICY_QOS: 2093 assert(category == TASK_POLICY_ATTRIBUTE); 2094 value = requested.thrp_qos; 2095 break; 2096 case TASK_POLICY_QOS_OVERRIDE: 2097 assert(category == TASK_POLICY_ATTRIBUTE); 2098 value = requested.thrp_qos_override; 2099 break; 2100 case TASK_POLICY_LATENCY_QOS: 2101 assert(category == TASK_POLICY_ATTRIBUTE); 2102 value = requested.thrp_latency_qos; 2103 break; 2104 case TASK_POLICY_THROUGH_QOS: 2105 assert(category == TASK_POLICY_ATTRIBUTE); 2106 value = requested.thrp_through_qos; 2107 break; 2108 case TASK_POLICY_QOS_WORKQ_OVERRIDE: 2109 assert(category == TASK_POLICY_ATTRIBUTE); 2110 value = requested.thrp_qos_workq_override; 2111 break; 2112 case TASK_POLICY_QOS_AND_RELPRIO: 2113 assert(category == TASK_POLICY_ATTRIBUTE); 2114 assert(value2 != NULL); 2115 value = requested.thrp_qos; 2116 *value2 = requested.thrp_qos_relprio; 2117 break; 2118 case TASK_POLICY_QOS_PROMOTE: 2119 assert(category == TASK_POLICY_ATTRIBUTE); 2120 value = requested.thrp_qos_promote; 2121 break; 2122 case TASK_POLICY_QOS_KEVENT_OVERRIDE: 2123 assert(category == TASK_POLICY_ATTRIBUTE); 2124 value = requested.thrp_qos_kevent_override; 2125 break; 2126 case TASK_POLICY_QOS_SERVICER_OVERRIDE: 2127 assert(category == TASK_POLICY_ATTRIBUTE); 2128 value = requested.thrp_qos_wlsvc_override; 2129 break; 2130 case TASK_POLICY_TERMINATED: 2131 assert(category == TASK_POLICY_ATTRIBUTE); 2132 value = requested.thrp_terminated; 2133 break; 2134 2135 default: 2136 panic("unknown policy_flavor %d", flavor); 2137 break; 2138 } 2139 2140 return value; 2141 } 2142 2143 /* 2144 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. 2145 * 2146 * NOTE: This accessor does not take the task or thread lock. 2147 * Notifications of state updates need to be externally synchronized with state queries. 2148 * This routine *MUST* remain interrupt safe, as it is potentially invoked 2149 * within the context of a timer interrupt. 2150 * 2151 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever. 2152 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates. 2153 * I don't think that cost is worth not having the right answer. 2154 */ 2155 int 2156 proc_get_effective_thread_policy(thread_t thread, 2157 int flavor) 2158 { 2159 int value = 0; 2160 2161 switch (flavor) { 2162 case TASK_POLICY_DARWIN_BG: 2163 /* 2164 * This call is used within the timer layer, as well as 2165 * prioritizing requests to the graphics system. 2166 * It also informs SFI and originator-bg-state. 2167 * Returns 1 for background mode, 0 for normal mode 2168 */ 2169 2170 value = thread->effective_policy.thep_darwinbg ? 1 : 0; 2171 break; 2172 case TASK_POLICY_IO: 2173 /* 2174 * The I/O system calls here to find out what throttling tier to apply to an operation. 2175 * Returns THROTTLE_LEVEL_* values 2176 */ 2177 value = thread->effective_policy.thep_io_tier; 2178 if (thread->iotier_override != THROTTLE_LEVEL_NONE) { 2179 value = MIN(value, thread->iotier_override); 2180 } 2181 break; 2182 case TASK_POLICY_PASSIVE_IO: 2183 /* 2184 * The I/O system calls here to find out whether an operation should be passive. 2185 * (i.e. not cause operations with lower throttle tiers to be throttled) 2186 * Returns 1 for passive mode, 0 for normal mode 2187 * 2188 * If an override is causing IO to go into a lower tier, we also set 2189 * the passive bit so that a thread doesn't end up stuck in its own throttle 2190 * window when the override goes away. 2191 */ 2192 value = thread->effective_policy.thep_io_passive ? 1 : 0; 2193 if (thread->iotier_override != THROTTLE_LEVEL_NONE && 2194 thread->iotier_override < thread->effective_policy.thep_io_tier) { 2195 value = 1; 2196 } 2197 break; 2198 case TASK_POLICY_ALL_SOCKETS_BG: 2199 /* 2200 * do_background_socket() calls this to determine whether 2201 * it should change the thread's sockets 2202 * Returns 1 for background mode, 0 for normal mode 2203 * This consults both thread and task so un-DBGing a thread while the task is BG 2204 * doesn't get you out of the network throttle. 2205 */ 2206 value = (thread->effective_policy.thep_all_sockets_bg || 2207 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0; 2208 break; 2209 case TASK_POLICY_NEW_SOCKETS_BG: 2210 /* 2211 * socreate() calls this to determine if it should mark a new socket as background 2212 * Returns 1 for background mode, 0 for normal mode 2213 */ 2214 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0; 2215 break; 2216 case TASK_POLICY_LATENCY_QOS: 2217 /* 2218 * timer arming calls into here to find out the timer coalescing level 2219 * Returns a latency QoS tier (0-6) 2220 */ 2221 value = thread->effective_policy.thep_latency_qos; 2222 break; 2223 case TASK_POLICY_THROUGH_QOS: 2224 /* 2225 * This value is passed into the urgency callout from the scheduler 2226 * to the performance management subsystem. 2227 * 2228 * Returns a throughput QoS tier (0-6) 2229 */ 2230 value = thread->effective_policy.thep_through_qos; 2231 break; 2232 case TASK_POLICY_QOS: 2233 /* 2234 * This is communicated to the performance management layer and SFI. 2235 * 2236 * Returns a QoS policy tier 2237 */ 2238 value = thread->effective_policy.thep_qos; 2239 break; 2240 default: 2241 panic("unknown thread policy flavor %d", flavor); 2242 break; 2243 } 2244 2245 return value; 2246 } 2247 2248 2249 /* 2250 * (integer_t) casts limit the number of bits we can fit here 2251 * this interface is deprecated and replaced by the _EXT struct ? 2252 */ 2253 static void 2254 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info) 2255 { 2256 uint64_t bits = 0; 2257 struct thread_requested_policy requested = thread->requested_policy; 2258 2259 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); 2260 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); 2261 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); 2262 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); 2263 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); 2264 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); 2265 2266 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0); 2267 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0); 2268 2269 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); 2270 2271 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); 2272 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); 2273 2274 info->requested = (integer_t) bits; 2275 bits = 0; 2276 2277 struct thread_effective_policy effective = thread->effective_policy; 2278 2279 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0); 2280 2281 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); 2282 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0); 2283 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); 2284 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); 2285 2286 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0); 2287 2288 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); 2289 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); 2290 2291 info->effective = (integer_t)bits; 2292 bits = 0; 2293 2294 info->pending = 0; 2295 } 2296 2297 /* 2298 * Sneakily trace either the task and thread requested 2299 * or just the thread requested, depending on if we have enough room. 2300 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's. 2301 * 2302 * LP32 LP64 2303 * threquested_0(thread) thread[0] task[0] 2304 * threquested_1(thread) thread[1] thread[0] 2305 * 2306 */ 2307 2308 uintptr_t 2309 threquested_0(thread_t thread) 2310 { 2311 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated"); 2312 2313 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy; 2314 2315 return raw[0]; 2316 } 2317 2318 uintptr_t 2319 threquested_1(thread_t thread) 2320 { 2321 #if defined __LP64__ 2322 return *(uintptr_t*)&thread->task->requested_policy; 2323 #else 2324 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy; 2325 return raw[1]; 2326 #endif 2327 } 2328 2329 uintptr_t 2330 theffective_0(thread_t thread) 2331 { 2332 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated"); 2333 2334 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy; 2335 return raw[0]; 2336 } 2337 2338 uintptr_t 2339 theffective_1(thread_t thread) 2340 { 2341 #if defined __LP64__ 2342 return *(uintptr_t*)&thread->task->effective_policy; 2343 #else 2344 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy; 2345 return raw[1]; 2346 #endif 2347 } 2348 2349 2350 /* 2351 * Set an override on the thread which is consulted with a 2352 * higher priority than the task/thread policy. This should 2353 * only be set for temporary grants until the thread 2354 * returns to the userspace boundary 2355 * 2356 * We use atomic operations to swap in the override, with 2357 * the assumption that the thread itself can 2358 * read the override and clear it on return to userspace. 2359 * 2360 * No locking is performed, since it is acceptable to see 2361 * a stale override for one loop through throttle_lowpri_io(). 2362 * However a thread reference must be held on the thread. 2363 */ 2364 2365 void 2366 set_thread_iotier_override(thread_t thread, int policy) 2367 { 2368 int current_override; 2369 2370 /* Let most aggressive I/O policy win until user boundary */ 2371 do { 2372 current_override = thread->iotier_override; 2373 2374 if (current_override != THROTTLE_LEVEL_NONE) { 2375 policy = MIN(current_override, policy); 2376 } 2377 2378 if (current_override == policy) { 2379 /* no effective change */ 2380 return; 2381 } 2382 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override)); 2383 2384 /* 2385 * Since the thread may be currently throttled, 2386 * re-evaluate tiers and potentially break out 2387 * of an msleep 2388 */ 2389 rethrottle_thread(thread->uthread); 2390 } 2391 2392 /* 2393 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks, 2394 * semaphores, dispatch_sync) may result in priority inversions where a higher priority 2395 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower 2396 * priority thread. In these cases, we attempt to propagate the priority token, as long 2397 * as the subsystem informs us of the relationships between the threads. The userspace 2398 * synchronization subsystem should maintain the information of owner->resource and 2399 * resource->waiters itself. 2400 */ 2401 2402 /* 2403 * This helper canonicalizes the resource/resource_type given the current qos_override_mode 2404 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need 2405 * to be handled specially in the future, but for now it's fine to slam 2406 * *resource to USER_ADDR_NULL even if it was previously a wildcard. 2407 */ 2408 static void 2409 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) 2410 { 2411 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { 2412 /* Map all input resource/type to a single one */ 2413 *resource = USER_ADDR_NULL; 2414 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN; 2415 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) { 2416 /* no transform */ 2417 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) { 2418 /* Map all mutex overrides to a single one, to avoid memory overhead */ 2419 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) { 2420 *resource = USER_ADDR_NULL; 2421 } 2422 } 2423 } 2424 2425 /* This helper routine finds an existing override if known. Locking should be done by caller */ 2426 static struct thread_qos_override * 2427 find_qos_override(thread_t thread, 2428 user_addr_t resource, 2429 int resource_type) 2430 { 2431 struct thread_qos_override *override; 2432 2433 override = thread->overrides; 2434 while (override) { 2435 if (override->override_resource == resource && 2436 override->override_resource_type == resource_type) { 2437 return override; 2438 } 2439 2440 override = override->override_next; 2441 } 2442 2443 return NULL; 2444 } 2445 2446 static void 2447 find_and_decrement_qos_override(thread_t thread, 2448 user_addr_t resource, 2449 int resource_type, 2450 boolean_t reset, 2451 struct thread_qos_override **free_override_list) 2452 { 2453 struct thread_qos_override *override, *override_prev; 2454 2455 override_prev = NULL; 2456 override = thread->overrides; 2457 while (override) { 2458 struct thread_qos_override *override_next = override->override_next; 2459 2460 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) && 2461 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) { 2462 if (reset) { 2463 override->override_contended_resource_count = 0; 2464 } else { 2465 override->override_contended_resource_count--; 2466 } 2467 2468 if (override->override_contended_resource_count == 0) { 2469 if (override_prev == NULL) { 2470 thread->overrides = override_next; 2471 } else { 2472 override_prev->override_next = override_next; 2473 } 2474 2475 /* Add to out-param for later zfree */ 2476 override->override_next = *free_override_list; 2477 *free_override_list = override; 2478 } else { 2479 override_prev = override; 2480 } 2481 2482 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) { 2483 return; 2484 } 2485 } else { 2486 override_prev = override; 2487 } 2488 2489 override = override_next; 2490 } 2491 } 2492 2493 /* This helper recalculates the current requested override using the policy selected at boot */ 2494 static int 2495 calculate_requested_qos_override(thread_t thread) 2496 { 2497 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { 2498 return THREAD_QOS_UNSPECIFIED; 2499 } 2500 2501 /* iterate over all overrides and calculate MAX */ 2502 struct thread_qos_override *override; 2503 int qos_override = THREAD_QOS_UNSPECIFIED; 2504 2505 override = thread->overrides; 2506 while (override) { 2507 qos_override = MAX(qos_override, override->override_qos); 2508 override = override->override_next; 2509 } 2510 2511 return qos_override; 2512 } 2513 2514 /* 2515 * Returns: 2516 * - 0 on success 2517 * - EINVAL if some invalid input was passed 2518 */ 2519 static int 2520 proc_thread_qos_add_override_internal(thread_t thread, 2521 int override_qos, 2522 boolean_t first_override_for_resource, 2523 user_addr_t resource, 2524 int resource_type) 2525 { 2526 struct task_pend_token pend_token = {}; 2527 int rc = 0; 2528 2529 thread_mtx_lock(thread); 2530 2531 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START, 2532 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0); 2533 2534 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread), 2535 uint64_t, thread->requested_policy.thrp_qos, 2536 uint64_t, thread->effective_policy.thep_qos, 2537 int, override_qos, boolean_t, first_override_for_resource); 2538 2539 struct thread_qos_override *override; 2540 struct thread_qos_override *override_new = NULL; 2541 int new_qos_override, prev_qos_override; 2542 int new_effective_qos; 2543 2544 canonicalize_resource_and_type(&resource, &resource_type); 2545 2546 override = find_qos_override(thread, resource, resource_type); 2547 if (first_override_for_resource && !override) { 2548 /* We need to allocate a new object. Drop the thread lock and 2549 * recheck afterwards in case someone else added the override 2550 */ 2551 thread_mtx_unlock(thread); 2552 override_new = zalloc(thread_qos_override_zone); 2553 thread_mtx_lock(thread); 2554 override = find_qos_override(thread, resource, resource_type); 2555 } 2556 if (first_override_for_resource && override) { 2557 /* Someone else already allocated while the thread lock was dropped */ 2558 override->override_contended_resource_count++; 2559 } else if (!override && override_new) { 2560 override = override_new; 2561 override_new = NULL; 2562 override->override_next = thread->overrides; 2563 /* since first_override_for_resource was TRUE */ 2564 override->override_contended_resource_count = 1; 2565 override->override_resource = resource; 2566 override->override_resource_type = (int16_t)resource_type; 2567 override->override_qos = THREAD_QOS_UNSPECIFIED; 2568 thread->overrides = override; 2569 } 2570 2571 if (override) { 2572 if (override->override_qos == THREAD_QOS_UNSPECIFIED) { 2573 override->override_qos = (int16_t)override_qos; 2574 } else { 2575 override->override_qos = MAX(override->override_qos, (int16_t)override_qos); 2576 } 2577 } 2578 2579 /* Determine how to combine the various overrides into a single current 2580 * requested override 2581 */ 2582 new_qos_override = calculate_requested_qos_override(thread); 2583 2584 prev_qos_override = proc_get_thread_policy_locked(thread, 2585 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL); 2586 2587 if (new_qos_override != prev_qos_override) { 2588 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, 2589 TASK_POLICY_QOS_OVERRIDE, 2590 new_qos_override, 0, &pend_token); 2591 } 2592 2593 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS); 2594 2595 thread_mtx_unlock(thread); 2596 2597 thread_policy_update_complete_unlocked(thread, &pend_token); 2598 2599 if (override_new) { 2600 zfree(thread_qos_override_zone, override_new); 2601 } 2602 2603 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override, 2604 int, new_qos_override, int, new_effective_qos, int, rc); 2605 2606 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END, 2607 new_qos_override, resource, resource_type, 0, 0); 2608 2609 return rc; 2610 } 2611 2612 int 2613 proc_thread_qos_add_override(task_t task, 2614 thread_t thread, 2615 uint64_t tid, 2616 int override_qos, 2617 boolean_t first_override_for_resource, 2618 user_addr_t resource, 2619 int resource_type) 2620 { 2621 boolean_t has_thread_reference = FALSE; 2622 int rc = 0; 2623 2624 if (thread == THREAD_NULL) { 2625 thread = task_findtid(task, tid); 2626 /* returns referenced thread */ 2627 2628 if (thread == THREAD_NULL) { 2629 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE, 2630 tid, 0, 0xdead, 0, 0); 2631 return ESRCH; 2632 } 2633 has_thread_reference = TRUE; 2634 } else { 2635 assert(thread->task == task); 2636 } 2637 rc = proc_thread_qos_add_override_internal(thread, override_qos, 2638 first_override_for_resource, resource, resource_type); 2639 if (has_thread_reference) { 2640 thread_deallocate(thread); 2641 } 2642 2643 return rc; 2644 } 2645 2646 static void 2647 proc_thread_qos_remove_override_internal(thread_t thread, 2648 user_addr_t resource, 2649 int resource_type, 2650 boolean_t reset) 2651 { 2652 struct task_pend_token pend_token = {}; 2653 2654 struct thread_qos_override *deferred_free_override_list = NULL; 2655 int new_qos_override, prev_qos_override, new_effective_qos; 2656 2657 thread_mtx_lock(thread); 2658 2659 canonicalize_resource_and_type(&resource, &resource_type); 2660 2661 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list); 2662 2663 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START, 2664 thread_tid(thread), resource, reset, 0, 0); 2665 2666 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread), 2667 uint64_t, thread->requested_policy.thrp_qos, 2668 uint64_t, thread->effective_policy.thep_qos); 2669 2670 /* Determine how to combine the various overrides into a single current requested override */ 2671 new_qos_override = calculate_requested_qos_override(thread); 2672 2673 spl_t s = splsched(); 2674 thread_lock(thread); 2675 2676 /* 2677 * The override chain and therefore the value of the current override is locked with thread mutex, 2678 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock. 2679 * This means you can't change the current override from a spinlock-only setter. 2680 */ 2681 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL); 2682 2683 if (new_qos_override != prev_qos_override) { 2684 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token); 2685 } 2686 2687 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS); 2688 2689 thread_unlock(thread); 2690 splx(s); 2691 2692 thread_mtx_unlock(thread); 2693 2694 thread_policy_update_complete_unlocked(thread, &pend_token); 2695 2696 while (deferred_free_override_list) { 2697 struct thread_qos_override *override_next = deferred_free_override_list->override_next; 2698 2699 zfree(thread_qos_override_zone, deferred_free_override_list); 2700 deferred_free_override_list = override_next; 2701 } 2702 2703 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override, 2704 int, new_qos_override, int, new_effective_qos); 2705 2706 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END, 2707 thread_tid(thread), 0, 0, 0, 0); 2708 } 2709 2710 int 2711 proc_thread_qos_remove_override(task_t task, 2712 thread_t thread, 2713 uint64_t tid, 2714 user_addr_t resource, 2715 int resource_type) 2716 { 2717 boolean_t has_thread_reference = FALSE; 2718 2719 if (thread == THREAD_NULL) { 2720 thread = task_findtid(task, tid); 2721 /* returns referenced thread */ 2722 2723 if (thread == THREAD_NULL) { 2724 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE, 2725 tid, 0, 0xdead, 0, 0); 2726 return ESRCH; 2727 } 2728 has_thread_reference = TRUE; 2729 } else { 2730 assert(task == thread->task); 2731 } 2732 2733 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE); 2734 2735 if (has_thread_reference) { 2736 thread_deallocate(thread); 2737 } 2738 2739 return 0; 2740 } 2741 2742 /* Deallocate before thread termination */ 2743 void 2744 proc_thread_qos_deallocate(thread_t thread) 2745 { 2746 /* This thread must have no more IPC overrides. */ 2747 assert(thread->kevent_overrides == 0); 2748 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED); 2749 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED); 2750 2751 /* 2752 * Clear out any lingering override objects. 2753 */ 2754 struct thread_qos_override *override; 2755 2756 thread_mtx_lock(thread); 2757 override = thread->overrides; 2758 thread->overrides = NULL; 2759 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED; 2760 /* We don't need to re-evaluate thread policy here because the thread has already exited */ 2761 thread_mtx_unlock(thread); 2762 2763 while (override) { 2764 struct thread_qos_override *override_next = override->override_next; 2765 2766 zfree(thread_qos_override_zone, override); 2767 override = override_next; 2768 } 2769 } 2770 2771 /* 2772 * Set up the primordial thread's QoS 2773 */ 2774 void 2775 task_set_main_thread_qos(task_t task, thread_t thread) 2776 { 2777 struct task_pend_token pend_token = {}; 2778 2779 assert(thread->task == task); 2780 2781 thread_mtx_lock(thread); 2782 2783 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2784 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START, 2785 thread_tid(thread), threquested_0(thread), threquested_1(thread), 2786 thread->requested_policy.thrp_qos, 0); 2787 2788 thread_qos_t primordial_qos = task_compute_main_thread_qos(task); 2789 2790 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, 2791 primordial_qos, 0, &pend_token); 2792 2793 thread_mtx_unlock(thread); 2794 2795 thread_policy_update_complete_unlocked(thread, &pend_token); 2796 2797 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2798 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END, 2799 thread_tid(thread), threquested_0(thread), threquested_1(thread), 2800 primordial_qos, 0); 2801 } 2802 2803 /* 2804 * KPI for pthread kext 2805 * 2806 * Return a good guess at what the initial manager QoS will be 2807 * Dispatch can override this in userspace if it so chooses 2808 */ 2809 thread_qos_t 2810 task_get_default_manager_qos(task_t task) 2811 { 2812 thread_qos_t primordial_qos = task_compute_main_thread_qos(task); 2813 2814 if (primordial_qos == THREAD_QOS_LEGACY) { 2815 primordial_qos = THREAD_QOS_USER_INITIATED; 2816 } 2817 2818 return primordial_qos; 2819 } 2820 2821 /* 2822 * Check if the kernel promotion on thread has changed 2823 * and apply it. 2824 * 2825 * thread locked on entry and exit 2826 */ 2827 boolean_t 2828 thread_recompute_kernel_promotion_locked(thread_t thread) 2829 { 2830 boolean_t needs_update = FALSE; 2831 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread); 2832 2833 /* 2834 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE. 2835 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE 2836 * and propagates the priority through the chain with the same cap, because as of now it does 2837 * not differenciate on the kernel primitive. 2838 * 2839 * If this assumption will change with the adoption of a kernel primitive that does not 2840 * cap the when adding/propagating, 2841 * then here is the place to put the generic cap for all kernel primitives 2842 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE)) 2843 */ 2844 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE); 2845 2846 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) { 2847 KDBG(MACHDBG_CODE( 2848 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE, 2849 thread_tid(thread), 2850 kern_promotion_schedpri, 2851 thread->kern_promotion_schedpri); 2852 2853 needs_update = TRUE; 2854 thread->kern_promotion_schedpri = kern_promotion_schedpri; 2855 thread_recompute_sched_pri(thread, SETPRI_DEFAULT); 2856 } 2857 2858 return needs_update; 2859 } 2860 2861 /* 2862 * Check if the user promotion on thread has changed 2863 * and apply it. 2864 * 2865 * thread locked on entry, might drop the thread lock 2866 * and reacquire it. 2867 */ 2868 boolean_t 2869 thread_recompute_user_promotion_locked(thread_t thread) 2870 { 2871 boolean_t needs_update = FALSE; 2872 struct task_pend_token pend_token = {}; 2873 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER); 2874 int old_base_pri = thread->base_pri; 2875 thread_qos_t qos_promotion; 2876 2877 /* Check if user promotion has changed */ 2878 if (thread->user_promotion_basepri == user_promotion_basepri) { 2879 return needs_update; 2880 } else { 2881 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2882 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE, 2883 thread_tid(thread), 2884 user_promotion_basepri, 2885 thread->user_promotion_basepri, 2886 0, 0); 2887 KDBG(MACHDBG_CODE( 2888 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE, 2889 thread_tid(thread), 2890 user_promotion_basepri, 2891 thread->user_promotion_basepri); 2892 } 2893 2894 /* Update the user promotion base pri */ 2895 thread->user_promotion_basepri = user_promotion_basepri; 2896 pend_token.tpt_force_recompute_pri = 1; 2897 2898 if (user_promotion_basepri <= MAXPRI_THROTTLE) { 2899 qos_promotion = THREAD_QOS_UNSPECIFIED; 2900 } else { 2901 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri); 2902 } 2903 2904 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 2905 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token); 2906 2907 if (thread_get_waiting_turnstile(thread) && 2908 thread->base_pri != old_base_pri) { 2909 needs_update = TRUE; 2910 } 2911 2912 thread_unlock(thread); 2913 2914 thread_policy_update_complete_unlocked(thread, &pend_token); 2915 2916 thread_lock(thread); 2917 2918 return needs_update; 2919 } 2920 2921 /* 2922 * Convert the thread user promotion base pri to qos for threads in qos world. 2923 * For priority above UI qos, the qos would be set to UI. 2924 */ 2925 thread_qos_t 2926 thread_user_promotion_qos_for_pri(int priority) 2927 { 2928 thread_qos_t qos; 2929 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) { 2930 if (thread_qos_policy_params.qos_pri[qos] <= priority) { 2931 return qos; 2932 } 2933 } 2934 return THREAD_QOS_MAINTENANCE; 2935 } 2936 2937 /* 2938 * Set the thread's QoS Kevent override 2939 * Owned by the Kevent subsystem 2940 * 2941 * May be called with spinlocks held, but not spinlocks 2942 * that may deadlock against the thread lock, the throttle lock, or the SFI lock. 2943 * 2944 * One 'add' must be balanced by one 'drop'. 2945 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'. 2946 * Before the thread is deallocated, there must be 0 remaining overrides. 2947 */ 2948 static void 2949 thread_kevent_override(thread_t thread, 2950 uint32_t qos_override, 2951 boolean_t is_new_override) 2952 { 2953 struct task_pend_token pend_token = {}; 2954 boolean_t needs_update; 2955 2956 spl_t s = splsched(); 2957 thread_lock(thread); 2958 2959 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override; 2960 2961 assert(qos_override > THREAD_QOS_UNSPECIFIED); 2962 assert(qos_override < THREAD_QOS_LAST); 2963 2964 if (is_new_override) { 2965 if (thread->kevent_overrides++ == 0) { 2966 /* This add is the first override for this thread */ 2967 assert(old_override == THREAD_QOS_UNSPECIFIED); 2968 } else { 2969 /* There are already other overrides in effect for this thread */ 2970 assert(old_override > THREAD_QOS_UNSPECIFIED); 2971 } 2972 } else { 2973 /* There must be at least one override (the previous add call) in effect */ 2974 assert(thread->kevent_overrides > 0); 2975 assert(old_override > THREAD_QOS_UNSPECIFIED); 2976 } 2977 2978 /* 2979 * We can't allow lowering if there are several IPC overrides because 2980 * the caller can't possibly know the whole truth 2981 */ 2982 if (thread->kevent_overrides == 1) { 2983 needs_update = qos_override != old_override; 2984 } else { 2985 needs_update = qos_override > old_override; 2986 } 2987 2988 if (needs_update) { 2989 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 2990 TASK_POLICY_QOS_KEVENT_OVERRIDE, 2991 qos_override, 0, &pend_token); 2992 assert(pend_token.tpt_update_sockets == 0); 2993 } 2994 2995 thread_unlock(thread); 2996 splx(s); 2997 2998 thread_policy_update_complete_unlocked(thread, &pend_token); 2999 } 3000 3001 void 3002 thread_add_kevent_override(thread_t thread, uint32_t qos_override) 3003 { 3004 thread_kevent_override(thread, qos_override, TRUE); 3005 } 3006 3007 void 3008 thread_update_kevent_override(thread_t thread, uint32_t qos_override) 3009 { 3010 thread_kevent_override(thread, qos_override, FALSE); 3011 } 3012 3013 void 3014 thread_drop_kevent_override(thread_t thread) 3015 { 3016 struct task_pend_token pend_token = {}; 3017 3018 spl_t s = splsched(); 3019 thread_lock(thread); 3020 3021 assert(thread->kevent_overrides > 0); 3022 3023 if (--thread->kevent_overrides == 0) { 3024 /* 3025 * There are no more overrides for this thread, so we should 3026 * clear out the saturated override value 3027 */ 3028 3029 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 3030 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED, 3031 0, &pend_token); 3032 } 3033 3034 thread_unlock(thread); 3035 splx(s); 3036 3037 thread_policy_update_complete_unlocked(thread, &pend_token); 3038 } 3039 3040 /* 3041 * Set the thread's QoS Workloop Servicer override 3042 * Owned by the Kevent subsystem 3043 * 3044 * May be called with spinlocks held, but not spinlocks 3045 * that may deadlock against the thread lock, the throttle lock, or the SFI lock. 3046 * 3047 * One 'add' must be balanced by one 'drop'. 3048 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'. 3049 * Before the thread is deallocated, there must be 0 remaining overrides. 3050 */ 3051 static void 3052 thread_servicer_override(thread_t thread, 3053 uint32_t qos_override, 3054 boolean_t is_new_override) 3055 { 3056 struct task_pend_token pend_token = {}; 3057 3058 spl_t s = splsched(); 3059 thread_lock(thread); 3060 3061 if (is_new_override) { 3062 assert(!thread->requested_policy.thrp_qos_wlsvc_override); 3063 } else { 3064 assert(thread->requested_policy.thrp_qos_wlsvc_override); 3065 } 3066 3067 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, 3068 TASK_POLICY_QOS_SERVICER_OVERRIDE, 3069 qos_override, 0, &pend_token); 3070 3071 thread_unlock(thread); 3072 splx(s); 3073 3074 assert(pend_token.tpt_update_sockets == 0); 3075 thread_policy_update_complete_unlocked(thread, &pend_token); 3076 } 3077 3078 void 3079 thread_add_servicer_override(thread_t thread, uint32_t qos_override) 3080 { 3081 assert(qos_override > THREAD_QOS_UNSPECIFIED); 3082 assert(qos_override < THREAD_QOS_LAST); 3083 3084 thread_servicer_override(thread, qos_override, TRUE); 3085 } 3086 3087 void 3088 thread_update_servicer_override(thread_t thread, uint32_t qos_override) 3089 { 3090 assert(qos_override > THREAD_QOS_UNSPECIFIED); 3091 assert(qos_override < THREAD_QOS_LAST); 3092 3093 thread_servicer_override(thread, qos_override, FALSE); 3094 } 3095 3096 void 3097 thread_drop_servicer_override(thread_t thread) 3098 { 3099 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE); 3100 } 3101 3102 3103 /* Get current requested qos / relpri, may be called from spinlock context */ 3104 thread_qos_t 3105 thread_get_requested_qos(thread_t thread, int *relpri) 3106 { 3107 int relprio_value = 0; 3108 thread_qos_t qos; 3109 3110 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, 3111 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value); 3112 if (relpri) { 3113 *relpri = -relprio_value; 3114 } 3115 return qos; 3116 } 3117 3118 /* 3119 * This function will promote the thread priority 3120 * since exec could block other threads calling 3121 * proc_find on the proc. This boost must be removed 3122 * via call to thread_clear_exec_promotion. 3123 * 3124 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397) 3125 */ 3126 void 3127 thread_set_exec_promotion(thread_t thread) 3128 { 3129 spl_t s = splsched(); 3130 thread_lock(thread); 3131 3132 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0); 3133 3134 thread_unlock(thread); 3135 splx(s); 3136 } 3137 3138 /* 3139 * This function will clear the exec thread 3140 * promotion set on the thread by thread_set_exec_promotion. 3141 */ 3142 void 3143 thread_clear_exec_promotion(thread_t thread) 3144 { 3145 spl_t s = splsched(); 3146 thread_lock(thread); 3147 3148 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0); 3149 3150 thread_unlock(thread); 3151 splx(s); 3152 }