timer_call.c
1 /* 2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 /* 29 * Timer interrupt callout module. 30 */ 31 32 #include <mach/mach_types.h> 33 34 #include <kern/clock.h> 35 #include <kern/smp.h> 36 #include <kern/processor.h> 37 #include <kern/timer_call.h> 38 #include <kern/timer_queue.h> 39 #include <kern/thread.h> 40 #include <kern/policy_internal.h> 41 42 #include <sys/kdebug.h> 43 44 #if CONFIG_DTRACE 45 #include <mach/sdt.h> 46 #endif 47 48 49 #if DEBUG 50 #define TIMER_ASSERT 1 51 #endif 52 53 //#define TIMER_ASSERT 1 54 //#define TIMER_DBG 1 55 56 #if TIMER_DBG 57 #define DBG(x...) kprintf("DBG: " x); 58 #else 59 #define DBG(x...) 60 #endif 61 62 #if TIMER_TRACE 63 #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST 64 #else 65 #define TIMER_KDEBUG_TRACE(x...) 66 #endif 67 68 LCK_GRP_DECLARE(timer_call_lck_grp, "timer_call"); 69 LCK_GRP_DECLARE(timer_longterm_lck_grp, "timer_longterm"); 70 71 /* Timer queue lock must be acquired with interrupts disabled (under splclock()) */ 72 #define timer_queue_lock_spin(queue) \ 73 lck_mtx_lock_spin_always(&queue->lock_data) 74 75 #define timer_queue_unlock(queue) \ 76 lck_mtx_unlock_always(&queue->lock_data) 77 78 /* 79 * The longterm timer object is a global structure holding all timers 80 * beyond the short-term, local timer queue threshold. The boot processor 81 * is responsible for moving each timer to its local timer queue 82 * if and when that timer becomes due within the threshold. 83 */ 84 85 /* Sentinel for "no time set": */ 86 #define TIMER_LONGTERM_NONE EndOfAllTime 87 /* The default threadhold is the delta above which a timer is "long-term" */ 88 #if defined(__x86_64__) 89 #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */ 90 #else 91 #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */ 92 #endif 93 94 /* 95 * The scan_limit throttles processing of the longterm queue. 96 * If the scan time exceeds this limit, we terminate, unlock 97 * and defer for scan_interval. This prevents unbounded holding of 98 * timer queue locks with interrupts masked. 99 */ 100 #define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */ 101 #define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */ 102 /* Sentinel for "scan limit exceeded": */ 103 #define TIMER_LONGTERM_SCAN_AGAIN 0 104 105 typedef struct { 106 uint64_t interval; /* longterm timer interval */ 107 uint64_t margin; /* fudge factor (10% of interval */ 108 uint64_t deadline; /* first/soonest longterm deadline */ 109 uint64_t preempted; /* sooner timer has pre-empted */ 110 timer_call_t call; /* first/soonest longterm timer call */ 111 uint64_t deadline_set; /* next timer set */ 112 timer_call_data_t timer; /* timer used by threshold management */ 113 /* Stats: */ 114 uint64_t scans; /* num threshold timer scans */ 115 uint64_t preempts; /* num threshold reductions */ 116 uint64_t latency; /* average threshold latency */ 117 uint64_t latency_min; /* minimum threshold latency */ 118 uint64_t latency_max; /* maximum threshold latency */ 119 } threshold_t; 120 121 typedef struct { 122 mpqueue_head_t queue; /* longterm timer list */ 123 uint64_t enqueues; /* num timers queued */ 124 uint64_t dequeues; /* num timers dequeued */ 125 uint64_t escalates; /* num timers becoming shortterm */ 126 uint64_t scan_time; /* last time the list was scanned */ 127 threshold_t threshold; /* longterm timer threshold */ 128 uint64_t scan_limit; /* maximum scan time */ 129 uint64_t scan_interval; /* interval between LT "escalation" scans */ 130 uint64_t scan_pauses; /* num scans exceeding time limit */ 131 } timer_longterm_t; 132 133 timer_longterm_t timer_longterm = { 134 .scan_limit = TIMER_LONGTERM_SCAN_LIMIT, 135 .scan_interval = TIMER_LONGTERM_SCAN_INTERVAL, 136 }; 137 138 static mpqueue_head_t *timer_longterm_queue = NULL; 139 140 static void timer_longterm_init(void); 141 static void timer_longterm_callout( 142 timer_call_param_t p0, 143 timer_call_param_t p1); 144 extern void timer_longterm_scan( 145 timer_longterm_t *tlp, 146 uint64_t now); 147 static void timer_longterm_update( 148 timer_longterm_t *tlp); 149 static void timer_longterm_update_locked( 150 timer_longterm_t *tlp); 151 static mpqueue_head_t * timer_longterm_enqueue_unlocked( 152 timer_call_t call, 153 uint64_t now, 154 uint64_t deadline, 155 mpqueue_head_t ** old_queue, 156 uint64_t soft_deadline, 157 uint64_t ttd, 158 timer_call_param_t param1, 159 uint32_t callout_flags); 160 static void timer_longterm_dequeued_locked( 161 timer_call_t call); 162 163 uint64_t past_deadline_timers; 164 uint64_t past_deadline_deltas; 165 uint64_t past_deadline_longest; 166 uint64_t past_deadline_shortest = ~0ULL; 167 enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; 168 169 uint64_t past_deadline_timer_adjustment; 170 171 static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); 172 boolean_t mach_timer_coalescing_enabled = TRUE; 173 174 mpqueue_head_t *timer_call_enqueue_deadline_unlocked( 175 timer_call_t call, 176 mpqueue_head_t *queue, 177 uint64_t deadline, 178 uint64_t soft_deadline, 179 uint64_t ttd, 180 timer_call_param_t param1, 181 uint32_t flags); 182 183 mpqueue_head_t *timer_call_dequeue_unlocked( 184 timer_call_t call); 185 186 timer_coalescing_priority_params_t tcoal_prio_params; 187 188 #if TCOAL_PRIO_STATS 189 int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; 190 #define TCOAL_PRIO_STAT(x) (x++) 191 #else 192 #define TCOAL_PRIO_STAT(x) 193 #endif 194 195 static void 196 timer_call_init_abstime(void) 197 { 198 int i; 199 uint64_t result; 200 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); 201 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); 202 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); 203 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; 204 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); 205 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; 206 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); 207 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; 208 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; 209 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; 210 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; 211 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; 212 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; 213 214 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, 215 &tcoal_prio_params.timer_coalesce_rt_abstime_max); 216 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, 217 &tcoal_prio_params.timer_coalesce_bg_abstime_max); 218 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, 219 &tcoal_prio_params.timer_coalesce_kt_abstime_max); 220 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, 221 &tcoal_prio_params.timer_coalesce_fp_abstime_max); 222 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, 223 &tcoal_prio_params.timer_coalesce_ts_abstime_max); 224 225 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { 226 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; 227 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], 228 &tcoal_prio_params.latency_qos_abstime_max[i]); 229 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; 230 } 231 } 232 233 234 void 235 timer_call_init(void) 236 { 237 timer_longterm_init(); 238 timer_call_init_abstime(); 239 } 240 241 242 void 243 timer_call_queue_init(mpqueue_head_t *queue) 244 { 245 DBG("timer_call_queue_init(%p)\n", queue); 246 mpqueue_init(queue, &timer_call_lck_grp, LCK_ATTR_NULL); 247 } 248 249 250 void 251 timer_call_setup( 252 timer_call_t call, 253 timer_call_func_t func, 254 timer_call_param_t param0) 255 { 256 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); 257 258 *call = (struct timer_call) { 259 .tc_func = func, 260 .tc_param0 = param0, 261 .tc_async_dequeue = false, 262 }; 263 264 simple_lock_init(&(call)->tc_lock, 0); 265 } 266 267 static mpqueue_head_t* 268 mpqueue_for_timer_call(timer_call_t entry) 269 { 270 queue_t queue_entry_is_on = entry->tc_queue; 271 /* 'cast' the queue back to the orignal mpqueue */ 272 return __container_of(queue_entry_is_on, struct mpqueue_head, head); 273 } 274 275 276 static __inline__ mpqueue_head_t * 277 timer_call_entry_dequeue( 278 timer_call_t entry) 279 { 280 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry); 281 282 /* The entry was always on a queue */ 283 assert(old_mpqueue != NULL); 284 285 #if TIMER_ASSERT 286 if (!hw_lock_held((hw_lock_t)&entry->tc_lock)) { 287 panic("_call_entry_dequeue() " 288 "entry %p is not locked\n", entry); 289 } 290 291 /* 292 * XXX The queue lock is actually a mutex in spin mode 293 * but there's no way to test for it being held 294 * so we pretend it's a spinlock! 295 */ 296 if (!hw_lock_held((hw_lock_t)&old_mpqueue->lock_data)) { 297 panic("_call_entry_dequeue() " 298 "queue %p is not locked\n", old_mpqueue); 299 } 300 #endif /* TIMER_ASSERT */ 301 302 if (old_mpqueue != timer_longterm_queue) { 303 priority_queue_remove(&old_mpqueue->mpq_pqhead, 304 &entry->tc_pqlink); 305 } 306 307 remqueue(&entry->tc_qlink); 308 309 entry->tc_queue = NULL; 310 311 old_mpqueue->count--; 312 313 return old_mpqueue; 314 } 315 316 static __inline__ mpqueue_head_t * 317 timer_call_entry_enqueue_deadline( 318 timer_call_t entry, 319 mpqueue_head_t *new_mpqueue, 320 uint64_t deadline) 321 { 322 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry); 323 324 #if TIMER_ASSERT 325 if (!hw_lock_held((hw_lock_t)&entry->tc_lock)) { 326 panic("_call_entry_enqueue_deadline() " 327 "entry %p is not locked\n", entry); 328 } 329 330 /* XXX More lock pretense: */ 331 if (!hw_lock_held((hw_lock_t)&new_mpqueue->lock_data)) { 332 panic("_call_entry_enqueue_deadline() " 333 "queue %p is not locked\n", new_mpqueue); 334 } 335 336 if (old_mpqueue != NULL && old_mpqueue != new_mpqueue) { 337 panic("_call_entry_enqueue_deadline() " 338 "old_mpqueue %p != new_mpqueue", old_mpqueue); 339 } 340 #endif /* TIMER_ASSERT */ 341 342 /* no longterm queue involved */ 343 assert(new_mpqueue != timer_longterm_queue); 344 assert(old_mpqueue != timer_longterm_queue); 345 346 if (old_mpqueue == new_mpqueue) { 347 /* optimize the same-queue case to avoid a full re-insert */ 348 uint64_t old_deadline = entry->tc_pqlink.deadline; 349 entry->tc_pqlink.deadline = deadline; 350 351 if (old_deadline < deadline) { 352 priority_queue_entry_increased(&new_mpqueue->mpq_pqhead, 353 &entry->tc_pqlink); 354 } else { 355 priority_queue_entry_decreased(&new_mpqueue->mpq_pqhead, 356 &entry->tc_pqlink); 357 } 358 } else { 359 if (old_mpqueue != NULL) { 360 priority_queue_remove(&old_mpqueue->mpq_pqhead, 361 &entry->tc_pqlink); 362 363 re_queue_tail(&new_mpqueue->head, &entry->tc_qlink); 364 } else { 365 enqueue_tail(&new_mpqueue->head, &entry->tc_qlink); 366 } 367 368 entry->tc_queue = &new_mpqueue->head; 369 entry->tc_pqlink.deadline = deadline; 370 371 priority_queue_insert(&new_mpqueue->mpq_pqhead, &entry->tc_pqlink); 372 } 373 374 375 /* For efficiency, track the earliest soft deadline on the queue, 376 * so that fuzzy decisions can be made without lock acquisitions. 377 */ 378 379 timer_call_t thead = priority_queue_min(&new_mpqueue->mpq_pqhead, struct timer_call, tc_pqlink); 380 381 new_mpqueue->earliest_soft_deadline = thead->tc_flags & TIMER_CALL_RATELIMITED ? thead->tc_pqlink.deadline : thead->tc_soft_deadline; 382 383 if (old_mpqueue) { 384 old_mpqueue->count--; 385 } 386 new_mpqueue->count++; 387 388 return old_mpqueue; 389 } 390 391 static __inline__ void 392 timer_call_entry_enqueue_tail( 393 timer_call_t entry, 394 mpqueue_head_t *queue) 395 { 396 /* entry is always dequeued before this call */ 397 assert(entry->tc_queue == NULL); 398 399 /* 400 * this is only used for timer_longterm_queue, which is unordered 401 * and thus needs no priority queueing 402 */ 403 assert(queue == timer_longterm_queue); 404 405 enqueue_tail(&queue->head, &entry->tc_qlink); 406 407 entry->tc_queue = &queue->head; 408 409 queue->count++; 410 return; 411 } 412 413 /* 414 * Remove timer entry from its queue but don't change the queue pointer 415 * and set the async_dequeue flag. This is locking case 2b. 416 */ 417 static __inline__ void 418 timer_call_entry_dequeue_async( 419 timer_call_t entry) 420 { 421 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry); 422 if (old_mpqueue) { 423 old_mpqueue->count--; 424 425 if (old_mpqueue != timer_longterm_queue) { 426 priority_queue_remove(&old_mpqueue->mpq_pqhead, 427 &entry->tc_pqlink); 428 } 429 430 remqueue(&entry->tc_qlink); 431 entry->tc_async_dequeue = true; 432 } 433 return; 434 } 435 436 #if TIMER_ASSERT 437 unsigned timer_call_enqueue_deadline_unlocked_async1; 438 unsigned timer_call_enqueue_deadline_unlocked_async2; 439 #endif 440 /* 441 * Assumes call_entry and queues unlocked, interrupts disabled. 442 */ 443 __inline__ mpqueue_head_t * 444 timer_call_enqueue_deadline_unlocked( 445 timer_call_t call, 446 mpqueue_head_t *queue, 447 uint64_t deadline, 448 uint64_t soft_deadline, 449 uint64_t ttd, 450 timer_call_param_t param1, 451 uint32_t callout_flags) 452 { 453 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); 454 455 simple_lock(&call->tc_lock, LCK_GRP_NULL); 456 457 mpqueue_head_t *old_queue = mpqueue_for_timer_call(call); 458 459 if (old_queue != NULL) { 460 timer_queue_lock_spin(old_queue); 461 if (call->tc_async_dequeue) { 462 /* collision (1c): timer already dequeued, clear flag */ 463 #if TIMER_ASSERT 464 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 465 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 466 VM_KERNEL_UNSLIDE_OR_PERM(call), 467 call->tc_async_dequeue, 468 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 469 0x1c, 0); 470 timer_call_enqueue_deadline_unlocked_async1++; 471 #endif 472 call->tc_async_dequeue = false; 473 call->tc_queue = NULL; 474 } else if (old_queue != queue) { 475 timer_call_entry_dequeue(call); 476 #if TIMER_ASSERT 477 timer_call_enqueue_deadline_unlocked_async2++; 478 #endif 479 } 480 if (old_queue == timer_longterm_queue) { 481 timer_longterm_dequeued_locked(call); 482 } 483 if (old_queue != queue) { 484 timer_queue_unlock(old_queue); 485 timer_queue_lock_spin(queue); 486 } 487 } else { 488 timer_queue_lock_spin(queue); 489 } 490 491 call->tc_soft_deadline = soft_deadline; 492 call->tc_flags = callout_flags; 493 call->tc_param1 = param1; 494 call->tc_ttd = ttd; 495 496 timer_call_entry_enqueue_deadline(call, queue, deadline); 497 timer_queue_unlock(queue); 498 simple_unlock(&call->tc_lock); 499 500 return old_queue; 501 } 502 503 #if TIMER_ASSERT 504 unsigned timer_call_dequeue_unlocked_async1; 505 unsigned timer_call_dequeue_unlocked_async2; 506 #endif 507 mpqueue_head_t * 508 timer_call_dequeue_unlocked( 509 timer_call_t call) 510 { 511 DBG("timer_call_dequeue_unlocked(%p)\n", call); 512 513 simple_lock(&call->tc_lock, LCK_GRP_NULL); 514 515 mpqueue_head_t *old_queue = mpqueue_for_timer_call(call); 516 517 #if TIMER_ASSERT 518 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 519 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 520 VM_KERNEL_UNSLIDE_OR_PERM(call), 521 call->tc_async_dequeue, 522 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 523 0, 0); 524 #endif 525 if (old_queue != NULL) { 526 timer_queue_lock_spin(old_queue); 527 if (call->tc_async_dequeue) { 528 /* collision (1c): timer already dequeued, clear flag */ 529 #if TIMER_ASSERT 530 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 531 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 532 VM_KERNEL_UNSLIDE_OR_PERM(call), 533 call->tc_async_dequeue, 534 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 535 0x1c, 0); 536 timer_call_dequeue_unlocked_async1++; 537 #endif 538 call->tc_async_dequeue = false; 539 call->tc_queue = NULL; 540 } else { 541 timer_call_entry_dequeue(call); 542 } 543 if (old_queue == timer_longterm_queue) { 544 timer_longterm_dequeued_locked(call); 545 } 546 timer_queue_unlock(old_queue); 547 } 548 simple_unlock(&call->tc_lock); 549 return old_queue; 550 } 551 552 uint64_t 553 timer_call_past_deadline_timer_handle(uint64_t deadline, uint64_t ctime) 554 { 555 uint64_t delta = (ctime - deadline); 556 557 past_deadline_timers++; 558 past_deadline_deltas += delta; 559 if (delta > past_deadline_longest) { 560 past_deadline_longest = deadline; 561 } 562 if (delta < past_deadline_shortest) { 563 past_deadline_shortest = delta; 564 } 565 566 return ctime + past_deadline_timer_adjustment; 567 } 568 569 /* 570 * Timer call entry locking model 571 * ============================== 572 * 573 * Timer call entries are linked on per-cpu timer queues which are protected 574 * by the queue lock and the call entry lock. The locking protocol is: 575 * 576 * 0) The canonical locking order is timer call entry followed by queue. 577 * 578 * 1) With only the entry lock held, entry.queue is valid: 579 * 1a) NULL: the entry is not queued, or 580 * 1b) non-NULL: this queue must be locked before the entry is modified. 581 * After locking the queue, the call.async_dequeue flag must be checked: 582 * 1c) TRUE: the entry was removed from the queue by another thread 583 * and we must NULL the entry.queue and reset this flag, or 584 * 1d) FALSE: (ie. queued), the entry can be manipulated. 585 * 586 * 2) If a queue lock is obtained first, the queue is stable: 587 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on 588 * and dequeued. 589 * 2b) If a try-lock fails, it indicates that another thread is attempting 590 * to change the entry and move it to a different position in this queue 591 * or to different queue. The entry can be dequeued but it should not be 592 * operated upon since it is being changed. Furthermore, we don't null 593 * the entry.queue pointer (protected by the entry lock we don't own). 594 * Instead, we set the async_dequeue flag -- see (1c). 595 * 2c) Same as 2b but occurring when a longterm timer is matured. 596 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) 597 * should be manipulated with the appropriate timer queue lock held, 598 * to prevent queue traversal observations from observing inconsistent 599 * updates to an in-flight callout. 600 */ 601 602 /* 603 * In the debug case, we assert that the timer call locking protocol 604 * is being obeyed. 605 */ 606 607 static boolean_t 608 timer_call_enter_internal( 609 timer_call_t call, 610 timer_call_param_t param1, 611 uint64_t deadline, 612 uint64_t leeway, 613 uint32_t flags, 614 boolean_t ratelimited) 615 { 616 mpqueue_head_t *queue = NULL; 617 mpqueue_head_t *old_queue; 618 spl_t s; 619 uint64_t slop; 620 uint32_t urgency; 621 uint64_t sdeadline, ttd; 622 623 assert(call->tc_func != NULL); 624 s = splclock(); 625 626 sdeadline = deadline; 627 uint64_t ctime = mach_absolute_time(); 628 629 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 630 DECR_TIMER_ENTER | DBG_FUNC_START, 631 VM_KERNEL_UNSLIDE_OR_PERM(call), 632 VM_KERNEL_ADDRHIDE(param1), deadline, flags, 0); 633 634 urgency = (flags & TIMER_CALL_URGENCY_MASK); 635 636 boolean_t slop_ratelimited = FALSE; 637 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); 638 639 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) { 640 slop = leeway; 641 } 642 643 if (UINT64_MAX - deadline <= slop) { 644 deadline = UINT64_MAX; 645 } else { 646 deadline += slop; 647 } 648 649 if (__improbable(deadline < ctime)) { 650 deadline = timer_call_past_deadline_timer_handle(deadline, ctime); 651 sdeadline = deadline; 652 } 653 654 if (ratelimited || slop_ratelimited) { 655 flags |= TIMER_CALL_RATELIMITED; 656 } else { 657 flags &= ~TIMER_CALL_RATELIMITED; 658 } 659 660 ttd = sdeadline - ctime; 661 #if CONFIG_DTRACE 662 DTRACE_TMR7(callout__create, timer_call_func_t, call->tc_func, 663 timer_call_param_t, call->tc_param0, uint32_t, flags, 664 (deadline - sdeadline), 665 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); 666 #endif 667 668 /* Program timer callout parameters under the appropriate per-CPU or 669 * longterm queue lock. The callout may have been previously enqueued 670 * and in-flight on this or another timer queue. 671 */ 672 if (!ratelimited && !slop_ratelimited) { 673 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); 674 } 675 676 if (queue == NULL) { 677 queue = timer_queue_assign(deadline); 678 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); 679 } 680 681 #if TIMER_TRACE 682 call->tc_entry_time = ctime; 683 #endif 684 685 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 686 DECR_TIMER_ENTER | DBG_FUNC_END, 687 VM_KERNEL_UNSLIDE_OR_PERM(call), 688 (old_queue != NULL), deadline, queue->count, 0); 689 690 splx(s); 691 692 return old_queue != NULL; 693 } 694 695 /* 696 * timer_call_*() 697 * return boolean indicating whether the call was previously queued. 698 */ 699 boolean_t 700 timer_call_enter( 701 timer_call_t call, 702 uint64_t deadline, 703 uint32_t flags) 704 { 705 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); 706 } 707 708 boolean_t 709 timer_call_enter1( 710 timer_call_t call, 711 timer_call_param_t param1, 712 uint64_t deadline, 713 uint32_t flags) 714 { 715 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); 716 } 717 718 boolean_t 719 timer_call_enter_with_leeway( 720 timer_call_t call, 721 timer_call_param_t param1, 722 uint64_t deadline, 723 uint64_t leeway, 724 uint32_t flags, 725 boolean_t ratelimited) 726 { 727 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); 728 } 729 730 boolean_t 731 timer_call_cancel( 732 timer_call_t call) 733 { 734 mpqueue_head_t *old_queue; 735 spl_t s; 736 737 s = splclock(); 738 739 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 740 DECR_TIMER_CANCEL | DBG_FUNC_START, 741 VM_KERNEL_UNSLIDE_OR_PERM(call), 742 call->tc_pqlink.deadline, call->tc_soft_deadline, call->tc_flags, 0); 743 744 old_queue = timer_call_dequeue_unlocked(call); 745 746 if (old_queue != NULL) { 747 timer_queue_lock_spin(old_queue); 748 749 timer_call_t new_head = priority_queue_min(&old_queue->mpq_pqhead, struct timer_call, tc_pqlink); 750 751 if (new_head) { 752 timer_queue_cancel(old_queue, call->tc_pqlink.deadline, new_head->tc_pqlink.deadline); 753 old_queue->earliest_soft_deadline = new_head->tc_flags & TIMER_CALL_RATELIMITED ? new_head->tc_pqlink.deadline : new_head->tc_soft_deadline; 754 } else { 755 timer_queue_cancel(old_queue, call->tc_pqlink.deadline, UINT64_MAX); 756 old_queue->earliest_soft_deadline = UINT64_MAX; 757 } 758 759 timer_queue_unlock(old_queue); 760 } 761 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 762 DECR_TIMER_CANCEL | DBG_FUNC_END, 763 VM_KERNEL_UNSLIDE_OR_PERM(call), 764 VM_KERNEL_UNSLIDE_OR_PERM(old_queue), 765 call->tc_pqlink.deadline - mach_absolute_time(), 766 call->tc_pqlink.deadline - call->tc_entry_time, 0); 767 splx(s); 768 769 #if CONFIG_DTRACE 770 DTRACE_TMR6(callout__cancel, timer_call_func_t, call->tc_func, 771 timer_call_param_t, call->tc_param0, uint32_t, call->tc_flags, 0, 772 (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF)); 773 #endif /* CONFIG_DTRACE */ 774 775 return old_queue != NULL; 776 } 777 778 static uint32_t timer_queue_shutdown_lock_skips; 779 static uint32_t timer_queue_shutdown_discarded; 780 781 void 782 timer_queue_shutdown( 783 mpqueue_head_t *queue) 784 { 785 timer_call_t call; 786 mpqueue_head_t *new_queue; 787 spl_t s; 788 789 790 DBG("timer_queue_shutdown(%p)\n", queue); 791 792 s = splclock(); 793 794 while (TRUE) { 795 timer_queue_lock_spin(queue); 796 797 call = qe_queue_first(&queue->head, struct timer_call, tc_qlink); 798 799 if (call == NULL) { 800 break; 801 } 802 803 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) { 804 /* 805 * case (2b) lock order inversion, dequeue and skip 806 * Don't change the call_entry queue back-pointer 807 * but set the async_dequeue field. 808 */ 809 timer_queue_shutdown_lock_skips++; 810 timer_call_entry_dequeue_async(call); 811 #if TIMER_ASSERT 812 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 813 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 814 VM_KERNEL_UNSLIDE_OR_PERM(call), 815 call->tc_async_dequeue, 816 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 817 0x2b, 0); 818 #endif 819 timer_queue_unlock(queue); 820 continue; 821 } 822 823 boolean_t call_local = ((call->tc_flags & TIMER_CALL_LOCAL) != 0); 824 825 /* remove entry from old queue */ 826 timer_call_entry_dequeue(call); 827 timer_queue_unlock(queue); 828 829 if (call_local == FALSE) { 830 /* and queue it on new, discarding LOCAL timers */ 831 new_queue = timer_queue_assign(call->tc_pqlink.deadline); 832 timer_queue_lock_spin(new_queue); 833 timer_call_entry_enqueue_deadline( 834 call, new_queue, call->tc_pqlink.deadline); 835 timer_queue_unlock(new_queue); 836 } else { 837 timer_queue_shutdown_discarded++; 838 } 839 840 assert(call_local == FALSE); 841 simple_unlock(&call->tc_lock); 842 } 843 844 timer_queue_unlock(queue); 845 splx(s); 846 } 847 848 849 static uint32_t timer_queue_expire_lock_skips; 850 uint64_t 851 timer_queue_expire_with_options( 852 mpqueue_head_t *queue, 853 uint64_t deadline, 854 boolean_t rescan) 855 { 856 timer_call_t call = NULL; 857 uint32_t tc_iterations = 0; 858 DBG("timer_queue_expire(%p,)\n", queue); 859 860 /* 'rescan' means look at every timer in the list, instead of 861 * early-exiting when the head of the list expires in the future. 862 * when 'rescan' is true, iterate by linked list instead of priority queue. 863 * 864 * TODO: if we keep a deadline ordered and soft-deadline ordered 865 * priority queue, then it's no longer necessary to do that 866 */ 867 868 uint64_t cur_deadline = deadline; 869 timer_queue_lock_spin(queue); 870 871 while (!queue_empty(&queue->head)) { 872 /* Upon processing one or more timer calls, refresh the 873 * deadline to account for time elapsed in the callout 874 */ 875 if (++tc_iterations > 1) { 876 cur_deadline = mach_absolute_time(); 877 } 878 879 if (call == NULL) { 880 if (rescan == FALSE) { 881 call = priority_queue_min(&queue->mpq_pqhead, struct timer_call, tc_pqlink); 882 } else { 883 call = qe_queue_first(&queue->head, struct timer_call, tc_qlink); 884 } 885 } 886 887 if (call->tc_soft_deadline <= cur_deadline) { 888 timer_call_func_t func; 889 timer_call_param_t param0, param1; 890 891 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->tc_soft_deadline, 0, 0, 0); 892 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 893 DECR_TIMER_EXPIRE | DBG_FUNC_NONE, 894 VM_KERNEL_UNSLIDE_OR_PERM(call), 895 call->tc_soft_deadline, 896 call->tc_pqlink.deadline, 897 call->tc_entry_time, 0); 898 899 if ((call->tc_flags & TIMER_CALL_RATELIMITED) && 900 (call->tc_pqlink.deadline > cur_deadline)) { 901 if (rescan == FALSE) { 902 break; 903 } 904 } 905 906 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) { 907 /* case (2b) lock inversion, dequeue and skip */ 908 timer_queue_expire_lock_skips++; 909 timer_call_entry_dequeue_async(call); 910 call = NULL; 911 continue; 912 } 913 914 timer_call_entry_dequeue(call); 915 916 func = call->tc_func; 917 param0 = call->tc_param0; 918 param1 = call->tc_param1; 919 920 simple_unlock(&call->tc_lock); 921 timer_queue_unlock(queue); 922 923 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 924 DECR_TIMER_CALLOUT | DBG_FUNC_START, 925 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), 926 VM_KERNEL_ADDRHIDE(param0), 927 VM_KERNEL_ADDRHIDE(param1), 928 0); 929 930 #if CONFIG_DTRACE 931 DTRACE_TMR7(callout__start, timer_call_func_t, func, 932 timer_call_param_t, param0, unsigned, call->tc_flags, 933 0, (call->tc_ttd >> 32), 934 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call); 935 #endif 936 #ifndef __DARLING__ 937 /* Maintain time-to-deadline in per-processor data 938 * structure for thread wakeup deadline statistics. 939 */ 940 uint64_t *ttdp = ¤t_processor()->timer_call_ttd; 941 *ttdp = call->tc_ttd; 942 #endif // __DARLING__ 943 (*func)(param0, param1); 944 #ifndef __DARLING__ 945 *ttdp = 0; 946 #endif // __DARLING__ 947 #if CONFIG_DTRACE 948 DTRACE_TMR4(callout__end, timer_call_func_t, func, 949 param0, param1, call); 950 #endif 951 952 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 953 DECR_TIMER_CALLOUT | DBG_FUNC_END, 954 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), 955 VM_KERNEL_ADDRHIDE(param0), 956 VM_KERNEL_ADDRHIDE(param1), 957 0); 958 call = NULL; 959 timer_queue_lock_spin(queue); 960 } else { 961 if (__probable(rescan == FALSE)) { 962 break; 963 } else { 964 int64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline; 965 assert(call->tc_pqlink.deadline >= call->tc_soft_deadline); 966 967 /* DRK: On a latency quality-of-service level change, 968 * re-sort potentially rate-limited timers. The platform 969 * layer determines which timers require 970 * this. In the absence of the per-callout 971 * synchronization requirement, a global resort could 972 * be more efficient. The re-sort effectively 973 * annuls all timer adjustments, i.e. the "soft 974 * deadline" is the sort key. 975 */ 976 977 if (timer_resort_threshold(skew)) { 978 if (__probable(simple_lock_try(&call->tc_lock, LCK_GRP_NULL))) { 979 /* TODO: don't need to dequeue before enqueue */ 980 timer_call_entry_dequeue(call); 981 timer_call_entry_enqueue_deadline(call, queue, call->tc_soft_deadline); 982 simple_unlock(&call->tc_lock); 983 call = NULL; 984 } 985 } 986 if (call) { 987 call = qe_queue_next(&queue->head, call, struct timer_call, tc_qlink); 988 989 if (call == NULL) { 990 break; 991 } 992 } 993 } 994 } 995 } 996 997 call = priority_queue_min(&queue->mpq_pqhead, struct timer_call, tc_pqlink); 998 999 if (call) { 1000 cur_deadline = call->tc_pqlink.deadline; 1001 queue->earliest_soft_deadline = (call->tc_flags & TIMER_CALL_RATELIMITED) ? call->tc_pqlink.deadline: call->tc_soft_deadline; 1002 } else { 1003 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; 1004 } 1005 1006 timer_queue_unlock(queue); 1007 1008 return cur_deadline; 1009 } 1010 1011 uint64_t 1012 timer_queue_expire( 1013 mpqueue_head_t *queue, 1014 uint64_t deadline) 1015 { 1016 return timer_queue_expire_with_options(queue, deadline, FALSE); 1017 } 1018 1019 extern int serverperfmode; 1020 static uint32_t timer_queue_migrate_lock_skips; 1021 /* 1022 * timer_queue_migrate() is called by timer_queue_migrate_cpu() 1023 * to move timer requests from the local processor (queue_from) 1024 * to a target processor's (queue_to). 1025 */ 1026 int 1027 timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) 1028 { 1029 timer_call_t call; 1030 timer_call_t head_to; 1031 int timers_migrated = 0; 1032 1033 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); 1034 1035 assert(!ml_get_interrupts_enabled()); 1036 assert(queue_from != queue_to); 1037 1038 if (serverperfmode) { 1039 /* 1040 * if we're running a high end server 1041 * avoid migrations... they add latency 1042 * and don't save us power under typical 1043 * server workloads 1044 */ 1045 return -4; 1046 } 1047 1048 /* 1049 * Take both local (from) and target (to) timer queue locks while 1050 * moving the timers from the local queue to the target processor. 1051 * We assume that the target is always the boot processor. 1052 * But only move if all of the following is true: 1053 * - the target queue is non-empty 1054 * - the local queue is non-empty 1055 * - the local queue's first deadline is later than the target's 1056 * - the local queue contains no non-migrateable "local" call 1057 * so that we need not have the target resync. 1058 */ 1059 1060 timer_queue_lock_spin(queue_to); 1061 1062 head_to = priority_queue_min(&queue_to->mpq_pqhead, struct timer_call, tc_pqlink); 1063 1064 if (head_to == NULL) { 1065 timers_migrated = -1; 1066 goto abort1; 1067 } 1068 1069 timer_queue_lock_spin(queue_from); 1070 1071 call = priority_queue_min(&queue_from->mpq_pqhead, struct timer_call, tc_pqlink); 1072 1073 if (call == NULL) { 1074 timers_migrated = -2; 1075 goto abort2; 1076 } 1077 1078 if (call->tc_pqlink.deadline < head_to->tc_pqlink.deadline) { 1079 timers_migrated = 0; 1080 goto abort2; 1081 } 1082 1083 /* perform scan for non-migratable timers */ 1084 qe_foreach_element(call, &queue_from->head, tc_qlink) { 1085 if (call->tc_flags & TIMER_CALL_LOCAL) { 1086 timers_migrated = -3; 1087 goto abort2; 1088 } 1089 } 1090 1091 /* migration loop itself -- both queues are locked */ 1092 qe_foreach_element_safe(call, &queue_from->head, tc_qlink) { 1093 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) { 1094 /* case (2b) lock order inversion, dequeue only */ 1095 #ifdef TIMER_ASSERT 1096 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1097 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 1098 VM_KERNEL_UNSLIDE_OR_PERM(call), 1099 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 1100 0, 1101 0x2b, 0); 1102 #endif 1103 timer_queue_migrate_lock_skips++; 1104 timer_call_entry_dequeue_async(call); 1105 continue; 1106 } 1107 timer_call_entry_dequeue(call); 1108 timer_call_entry_enqueue_deadline( 1109 call, queue_to, call->tc_pqlink.deadline); 1110 timers_migrated++; 1111 simple_unlock(&call->tc_lock); 1112 } 1113 queue_from->earliest_soft_deadline = UINT64_MAX; 1114 abort2: 1115 timer_queue_unlock(queue_from); 1116 abort1: 1117 timer_queue_unlock(queue_to); 1118 1119 return timers_migrated; 1120 } 1121 1122 void 1123 timer_queue_trace_cpu(int ncpu) 1124 { 1125 timer_call_nosync_cpu( 1126 ncpu, 1127 (void (*)(void *))timer_queue_trace, 1128 (void*) timer_queue_cpu(ncpu)); 1129 } 1130 1131 void 1132 timer_queue_trace( 1133 mpqueue_head_t *queue) 1134 { 1135 timer_call_t call; 1136 spl_t s; 1137 1138 if (!kdebug_enable) { 1139 return; 1140 } 1141 1142 s = splclock(); 1143 timer_queue_lock_spin(queue); 1144 1145 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1146 DECR_TIMER_QUEUE | DBG_FUNC_START, 1147 queue->count, mach_absolute_time(), 0, 0, 0); 1148 1149 qe_foreach_element(call, &queue->head, tc_qlink) { 1150 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1151 DECR_TIMER_QUEUE | DBG_FUNC_NONE, 1152 call->tc_soft_deadline, 1153 call->tc_pqlink.deadline, 1154 call->tc_entry_time, 1155 VM_KERNEL_UNSLIDE(call->tc_func), 1156 0); 1157 } 1158 1159 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1160 DECR_TIMER_QUEUE | DBG_FUNC_END, 1161 queue->count, mach_absolute_time(), 0, 0, 0); 1162 1163 timer_queue_unlock(queue); 1164 splx(s); 1165 } 1166 1167 void 1168 timer_longterm_dequeued_locked(timer_call_t call) 1169 { 1170 timer_longterm_t *tlp = &timer_longterm; 1171 1172 tlp->dequeues++; 1173 if (call == tlp->threshold.call) { 1174 tlp->threshold.call = NULL; 1175 } 1176 } 1177 1178 /* 1179 * Place a timer call in the longterm list 1180 * and adjust the next timer callout deadline if the new timer is first. 1181 */ 1182 mpqueue_head_t * 1183 timer_longterm_enqueue_unlocked(timer_call_t call, 1184 uint64_t now, 1185 uint64_t deadline, 1186 mpqueue_head_t **old_queue, 1187 uint64_t soft_deadline, 1188 uint64_t ttd, 1189 timer_call_param_t param1, 1190 uint32_t callout_flags) 1191 { 1192 timer_longterm_t *tlp = &timer_longterm; 1193 boolean_t update_required = FALSE; 1194 uint64_t longterm_threshold; 1195 1196 longterm_threshold = now + tlp->threshold.interval; 1197 1198 /* 1199 * Return NULL without doing anything if: 1200 * - this timer is local, or 1201 * - the longterm mechanism is disabled, or 1202 * - this deadline is too short. 1203 */ 1204 if ((callout_flags & TIMER_CALL_LOCAL) != 0 || 1205 (tlp->threshold.interval == TIMER_LONGTERM_NONE) || 1206 (deadline <= longterm_threshold)) { 1207 return NULL; 1208 } 1209 1210 /* 1211 * Remove timer from its current queue, if any. 1212 */ 1213 *old_queue = timer_call_dequeue_unlocked(call); 1214 1215 /* 1216 * Lock the longterm queue, queue timer and determine 1217 * whether an update is necessary. 1218 */ 1219 assert(!ml_get_interrupts_enabled()); 1220 simple_lock(&call->tc_lock, LCK_GRP_NULL); 1221 timer_queue_lock_spin(timer_longterm_queue); 1222 call->tc_pqlink.deadline = deadline; 1223 call->tc_param1 = param1; 1224 call->tc_ttd = ttd; 1225 call->tc_soft_deadline = soft_deadline; 1226 call->tc_flags = callout_flags; 1227 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1228 1229 tlp->enqueues++; 1230 1231 /* 1232 * We'll need to update the currently set threshold timer 1233 * if the new deadline is sooner and no sooner update is in flight. 1234 */ 1235 if (deadline < tlp->threshold.deadline && 1236 deadline < tlp->threshold.preempted) { 1237 tlp->threshold.preempted = deadline; 1238 tlp->threshold.call = call; 1239 update_required = TRUE; 1240 } 1241 timer_queue_unlock(timer_longterm_queue); 1242 simple_unlock(&call->tc_lock); 1243 1244 if (update_required) { 1245 /* 1246 * Note: this call expects that calling the master cpu 1247 * alone does not involve locking the topo lock. 1248 */ 1249 timer_call_nosync_cpu( 1250 master_cpu, 1251 (void (*)(void *))timer_longterm_update, 1252 (void *)tlp); 1253 } 1254 1255 return timer_longterm_queue; 1256 } 1257 1258 /* 1259 * Scan for timers below the longterm threshold. 1260 * Move these to the local timer queue (of the boot processor on which the 1261 * calling thread is running). 1262 * Both the local (boot) queue and the longterm queue are locked. 1263 * The scan is similar to the timer migrate sequence but is performed by 1264 * successively examining each timer on the longterm queue: 1265 * - if within the short-term threshold 1266 * - enter on the local queue (unless being deleted), 1267 * - otherwise: 1268 * - if sooner, deadline becomes the next threshold deadline. 1269 * The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be 1270 * exceeded, we abort and reschedule again so that we don't shut others from 1271 * the timer queues. Longterm timers firing late is not critical. 1272 */ 1273 void 1274 timer_longterm_scan(timer_longterm_t *tlp, 1275 uint64_t time_start) 1276 { 1277 timer_call_t call; 1278 uint64_t threshold; 1279 uint64_t deadline; 1280 uint64_t time_limit = time_start + tlp->scan_limit; 1281 mpqueue_head_t *timer_master_queue; 1282 1283 assert(!ml_get_interrupts_enabled()); 1284 assert(cpu_number() == master_cpu); 1285 1286 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { 1287 threshold = time_start + tlp->threshold.interval; 1288 } 1289 1290 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1291 tlp->threshold.call = NULL; 1292 1293 if (queue_empty(&timer_longterm_queue->head)) { 1294 return; 1295 } 1296 1297 timer_master_queue = timer_queue_cpu(master_cpu); 1298 timer_queue_lock_spin(timer_master_queue); 1299 1300 qe_foreach_element_safe(call, &timer_longterm_queue->head, tc_qlink) { 1301 deadline = call->tc_soft_deadline; 1302 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) { 1303 /* case (2c) lock order inversion, dequeue only */ 1304 #ifdef TIMER_ASSERT 1305 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1306 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, 1307 VM_KERNEL_UNSLIDE_OR_PERM(call), 1308 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue), 1309 0, 1310 0x2c, 0); 1311 #endif 1312 timer_call_entry_dequeue_async(call); 1313 continue; 1314 } 1315 if (deadline < threshold) { 1316 /* 1317 * This timer needs moving (escalating) 1318 * to the local (boot) processor's queue. 1319 */ 1320 #ifdef TIMER_ASSERT 1321 if (deadline < time_start) { 1322 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1323 DECR_TIMER_OVERDUE | DBG_FUNC_NONE, 1324 VM_KERNEL_UNSLIDE_OR_PERM(call), 1325 deadline, 1326 time_start, 1327 threshold, 1328 0); 1329 } 1330 #endif 1331 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1332 DECR_TIMER_ESCALATE | DBG_FUNC_NONE, 1333 VM_KERNEL_UNSLIDE_OR_PERM(call), 1334 call->tc_pqlink.deadline, 1335 call->tc_entry_time, 1336 VM_KERNEL_UNSLIDE(call->tc_func), 1337 0); 1338 tlp->escalates++; 1339 timer_call_entry_dequeue(call); 1340 timer_call_entry_enqueue_deadline( 1341 call, timer_master_queue, call->tc_pqlink.deadline); 1342 /* 1343 * A side-effect of the following call is to update 1344 * the actual hardware deadline if required. 1345 */ 1346 (void) timer_queue_assign(deadline); 1347 } else { 1348 if (deadline < tlp->threshold.deadline) { 1349 tlp->threshold.deadline = deadline; 1350 tlp->threshold.call = call; 1351 } 1352 } 1353 simple_unlock(&call->tc_lock); 1354 1355 /* Abort scan if we're taking too long. */ 1356 if (mach_absolute_time() > time_limit) { 1357 tlp->threshold.deadline = TIMER_LONGTERM_SCAN_AGAIN; 1358 tlp->scan_pauses++; 1359 DBG("timer_longterm_scan() paused %llu, qlen: %llu\n", 1360 time_limit, tlp->queue.count); 1361 break; 1362 } 1363 } 1364 1365 timer_queue_unlock(timer_master_queue); 1366 } 1367 1368 void 1369 timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) 1370 { 1371 timer_longterm_t *tlp = (timer_longterm_t *) p0; 1372 1373 timer_longterm_update(tlp); 1374 } 1375 1376 void 1377 timer_longterm_update_locked(timer_longterm_t *tlp) 1378 { 1379 uint64_t latency; 1380 1381 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1382 DECR_TIMER_UPDATE | DBG_FUNC_START, 1383 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), 1384 tlp->threshold.deadline, 1385 tlp->threshold.preempted, 1386 tlp->queue.count, 0); 1387 1388 tlp->scan_time = mach_absolute_time(); 1389 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { 1390 tlp->threshold.preempts++; 1391 tlp->threshold.deadline = tlp->threshold.preempted; 1392 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1393 /* 1394 * Note: in the unlikely event that a pre-empted timer has 1395 * itself been cancelled, we'll simply re-scan later at the 1396 * time of the preempted/cancelled timer. 1397 */ 1398 } else { 1399 tlp->threshold.scans++; 1400 1401 /* 1402 * Maintain a moving average of our wakeup latency. 1403 * Clamp latency to 0 and ignore above threshold interval. 1404 */ 1405 if (tlp->scan_time > tlp->threshold.deadline_set) { 1406 latency = tlp->scan_time - tlp->threshold.deadline_set; 1407 } else { 1408 latency = 0; 1409 } 1410 if (latency < tlp->threshold.interval) { 1411 tlp->threshold.latency_min = 1412 MIN(tlp->threshold.latency_min, latency); 1413 tlp->threshold.latency_max = 1414 MAX(tlp->threshold.latency_max, latency); 1415 tlp->threshold.latency = 1416 (tlp->threshold.latency * 99 + latency) / 100; 1417 } 1418 1419 timer_longterm_scan(tlp, tlp->scan_time); 1420 } 1421 1422 tlp->threshold.deadline_set = tlp->threshold.deadline; 1423 /* The next deadline timer to be set is adjusted */ 1424 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE && 1425 tlp->threshold.deadline != TIMER_LONGTERM_SCAN_AGAIN) { 1426 tlp->threshold.deadline_set -= tlp->threshold.margin; 1427 tlp->threshold.deadline_set -= tlp->threshold.latency; 1428 } 1429 1430 /* Throttle next scan time */ 1431 uint64_t scan_clamp = mach_absolute_time() + tlp->scan_interval; 1432 if (tlp->threshold.deadline_set < scan_clamp) { 1433 tlp->threshold.deadline_set = scan_clamp; 1434 } 1435 1436 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 1437 DECR_TIMER_UPDATE | DBG_FUNC_END, 1438 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), 1439 tlp->threshold.deadline, 1440 tlp->threshold.scans, 1441 tlp->queue.count, 0); 1442 } 1443 1444 void 1445 timer_longterm_update(timer_longterm_t *tlp) 1446 { 1447 spl_t s = splclock(); 1448 1449 timer_queue_lock_spin(timer_longterm_queue); 1450 1451 if (cpu_number() != master_cpu) { 1452 panic("timer_longterm_update_master() on non-boot cpu"); 1453 } 1454 1455 timer_longterm_update_locked(tlp); 1456 1457 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1458 timer_call_enter( 1459 &tlp->threshold.timer, 1460 tlp->threshold.deadline_set, 1461 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1462 } 1463 1464 timer_queue_unlock(timer_longterm_queue); 1465 splx(s); 1466 } 1467 1468 void 1469 timer_longterm_init(void) 1470 { 1471 uint32_t longterm; 1472 timer_longterm_t *tlp = &timer_longterm; 1473 1474 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); 1475 1476 /* 1477 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD 1478 * or TIMER_LONGTERM_NONE (disabled) for server; 1479 * overridden longterm boot-arg 1480 */ 1481 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE 1482 : TIMER_LONGTERM_THRESHOLD; 1483 if (PE_parse_boot_argn("longterm", &longterm, sizeof(longterm))) { 1484 tlp->threshold.interval = (longterm == 0) ? 1485 TIMER_LONGTERM_NONE : 1486 longterm * NSEC_PER_MSEC; 1487 } 1488 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { 1489 printf("Longterm timer threshold: %llu ms\n", 1490 tlp->threshold.interval / NSEC_PER_MSEC); 1491 kprintf("Longterm timer threshold: %llu ms\n", 1492 tlp->threshold.interval / NSEC_PER_MSEC); 1493 nanoseconds_to_absolutetime(tlp->threshold.interval, 1494 &tlp->threshold.interval); 1495 tlp->threshold.margin = tlp->threshold.interval / 10; 1496 tlp->threshold.latency_min = EndOfAllTime; 1497 tlp->threshold.latency_max = 0; 1498 } 1499 1500 tlp->threshold.preempted = TIMER_LONGTERM_NONE; 1501 tlp->threshold.deadline = TIMER_LONGTERM_NONE; 1502 1503 mpqueue_init(&tlp->queue, &timer_longterm_lck_grp, LCK_ATTR_NULL); 1504 1505 timer_call_setup(&tlp->threshold.timer, 1506 timer_longterm_callout, (timer_call_param_t) tlp); 1507 1508 timer_longterm_queue = &tlp->queue; 1509 } 1510 1511 enum { 1512 THRESHOLD, QCOUNT, 1513 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, 1514 LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES 1515 }; 1516 uint64_t 1517 timer_sysctl_get(int oid) 1518 { 1519 timer_longterm_t *tlp = &timer_longterm; 1520 1521 switch (oid) { 1522 case THRESHOLD: 1523 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? 1524 0 : tlp->threshold.interval / NSEC_PER_MSEC; 1525 case QCOUNT: 1526 return tlp->queue.count; 1527 case ENQUEUES: 1528 return tlp->enqueues; 1529 case DEQUEUES: 1530 return tlp->dequeues; 1531 case ESCALATES: 1532 return tlp->escalates; 1533 case SCANS: 1534 return tlp->threshold.scans; 1535 case PREEMPTS: 1536 return tlp->threshold.preempts; 1537 case LATENCY: 1538 return tlp->threshold.latency; 1539 case LATENCY_MIN: 1540 return tlp->threshold.latency_min; 1541 case LATENCY_MAX: 1542 return tlp->threshold.latency_max; 1543 case SCAN_LIMIT: 1544 return tlp->scan_limit; 1545 case SCAN_INTERVAL: 1546 return tlp->scan_interval; 1547 case PAUSES: 1548 return tlp->scan_pauses; 1549 default: 1550 return 0; 1551 } 1552 } 1553 1554 /* 1555 * timer_master_scan() is the inverse of timer_longterm_scan() 1556 * since it un-escalates timers to the longterm queue. 1557 */ 1558 static void 1559 timer_master_scan(timer_longterm_t *tlp, 1560 uint64_t now) 1561 { 1562 timer_call_t call; 1563 uint64_t threshold; 1564 uint64_t deadline; 1565 mpqueue_head_t *timer_master_queue; 1566 1567 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { 1568 threshold = now + tlp->threshold.interval; 1569 } else { 1570 threshold = TIMER_LONGTERM_NONE; 1571 } 1572 1573 timer_master_queue = timer_queue_cpu(master_cpu); 1574 timer_queue_lock_spin(timer_master_queue); 1575 1576 qe_foreach_element_safe(call, &timer_master_queue->head, tc_qlink) { 1577 deadline = call->tc_pqlink.deadline; 1578 if ((call->tc_flags & TIMER_CALL_LOCAL) != 0) { 1579 continue; 1580 } 1581 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) { 1582 /* case (2c) lock order inversion, dequeue only */ 1583 timer_call_entry_dequeue_async(call); 1584 continue; 1585 } 1586 if (deadline > threshold) { 1587 /* move from master to longterm */ 1588 timer_call_entry_dequeue(call); 1589 timer_call_entry_enqueue_tail(call, timer_longterm_queue); 1590 if (deadline < tlp->threshold.deadline) { 1591 tlp->threshold.deadline = deadline; 1592 tlp->threshold.call = call; 1593 } 1594 } 1595 simple_unlock(&call->tc_lock); 1596 } 1597 timer_queue_unlock(timer_master_queue); 1598 } 1599 1600 static void 1601 timer_sysctl_set_threshold(uint64_t value) 1602 { 1603 timer_longterm_t *tlp = &timer_longterm; 1604 spl_t s = splclock(); 1605 boolean_t threshold_increase; 1606 1607 timer_queue_lock_spin(timer_longterm_queue); 1608 1609 timer_call_cancel(&tlp->threshold.timer); 1610 1611 /* 1612 * Set the new threshold and note whther it's increasing. 1613 */ 1614 if (value == 0) { 1615 tlp->threshold.interval = TIMER_LONGTERM_NONE; 1616 threshold_increase = TRUE; 1617 timer_call_cancel(&tlp->threshold.timer); 1618 } else { 1619 uint64_t old_interval = tlp->threshold.interval; 1620 tlp->threshold.interval = value * NSEC_PER_MSEC; 1621 nanoseconds_to_absolutetime(tlp->threshold.interval, 1622 &tlp->threshold.interval); 1623 tlp->threshold.margin = tlp->threshold.interval / 10; 1624 if (old_interval == TIMER_LONGTERM_NONE) { 1625 threshold_increase = FALSE; 1626 } else { 1627 threshold_increase = (tlp->threshold.interval > old_interval); 1628 } 1629 } 1630 1631 if (threshold_increase /* or removal */) { 1632 /* Escalate timers from the longterm queue */ 1633 timer_longterm_scan(tlp, mach_absolute_time()); 1634 } else { /* decrease or addition */ 1635 /* 1636 * We scan the local/master queue for timers now longterm. 1637 * To be strictly correct, we should scan all processor queues 1638 * but timer migration results in most timers gravitating to the 1639 * master processor in any case. 1640 */ 1641 timer_master_scan(tlp, mach_absolute_time()); 1642 } 1643 1644 /* Set new timer accordingly */ 1645 tlp->threshold.deadline_set = tlp->threshold.deadline; 1646 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { 1647 tlp->threshold.deadline_set -= tlp->threshold.margin; 1648 tlp->threshold.deadline_set -= tlp->threshold.latency; 1649 timer_call_enter( 1650 &tlp->threshold.timer, 1651 tlp->threshold.deadline_set, 1652 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); 1653 } 1654 1655 /* Reset stats */ 1656 tlp->enqueues = 0; 1657 tlp->dequeues = 0; 1658 tlp->escalates = 0; 1659 tlp->scan_pauses = 0; 1660 tlp->threshold.scans = 0; 1661 tlp->threshold.preempts = 0; 1662 tlp->threshold.latency = 0; 1663 tlp->threshold.latency_min = EndOfAllTime; 1664 tlp->threshold.latency_max = 0; 1665 1666 timer_queue_unlock(timer_longterm_queue); 1667 splx(s); 1668 } 1669 1670 int 1671 timer_sysctl_set(int oid, uint64_t value) 1672 { 1673 switch (oid) { 1674 case THRESHOLD: 1675 timer_call_cpu( 1676 master_cpu, 1677 (void (*)(void *))timer_sysctl_set_threshold, 1678 (void *) value); 1679 return KERN_SUCCESS; 1680 case SCAN_LIMIT: 1681 timer_longterm.scan_limit = value; 1682 return KERN_SUCCESS; 1683 case SCAN_INTERVAL: 1684 timer_longterm.scan_interval = value; 1685 return KERN_SUCCESS; 1686 default: 1687 return KERN_INVALID_ARGUMENT; 1688 } 1689 } 1690 1691 1692 /* Select timer coalescing window based on per-task quality-of-service hints */ 1693 static boolean_t 1694 tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) 1695 { 1696 uint32_t latency_qos; 1697 boolean_t adjusted = FALSE; 1698 task_t ctask = t->task; 1699 1700 if (ctask) { 1701 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); 1702 1703 assert(latency_qos <= NUM_LATENCY_QOS_TIERS); 1704 1705 if (latency_qos) { 1706 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; 1707 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; 1708 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; 1709 adjusted = TRUE; 1710 } 1711 } 1712 return adjusted; 1713 } 1714 1715 1716 /* Adjust timer deadlines based on priority of the thread and the 1717 * urgency value provided at timeout establishment. With this mechanism, 1718 * timers are no longer necessarily sorted in order of soft deadline 1719 * on a given timer queue, i.e. they may be differentially skewed. 1720 * In the current scheme, this could lead to fewer pending timers 1721 * processed than is technically possible when the HW deadline arrives. 1722 */ 1723 static void 1724 timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) 1725 { 1726 int16_t tpri = cthread->sched_pri; 1727 if ((urgency & TIMER_CALL_USER_MASK) != 0) { 1728 if (tpri >= BASEPRI_RTQUEUES || 1729 urgency == TIMER_CALL_USER_CRITICAL) { 1730 *tshift = tcoal_prio_params.timer_coalesce_rt_shift; 1731 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; 1732 TCOAL_PRIO_STAT(rt_tcl); 1733 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || 1734 (urgency == TIMER_CALL_USER_BACKGROUND)) { 1735 /* Determine if timer should be subjected to a lower QoS */ 1736 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { 1737 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { 1738 return; 1739 } else { 1740 *pratelimited = FALSE; 1741 } 1742 } 1743 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 1744 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; 1745 TCOAL_PRIO_STAT(bg_tcl); 1746 } else if (tpri >= MINPRI_KERNEL) { 1747 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 1748 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; 1749 TCOAL_PRIO_STAT(kt_tcl); 1750 } else if (cthread->sched_mode == TH_MODE_FIXED) { 1751 *tshift = tcoal_prio_params.timer_coalesce_fp_shift; 1752 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; 1753 TCOAL_PRIO_STAT(fp_tcl); 1754 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { 1755 TCOAL_PRIO_STAT(qos_tcl); 1756 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { 1757 *tshift = tcoal_prio_params.timer_coalesce_ts_shift; 1758 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; 1759 TCOAL_PRIO_STAT(ts_tcl); 1760 } else { 1761 TCOAL_PRIO_STAT(nc_tcl); 1762 } 1763 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { 1764 *tshift = tcoal_prio_params.timer_coalesce_bg_shift; 1765 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; 1766 TCOAL_PRIO_STAT(bg_tcl); 1767 } else { 1768 *tshift = tcoal_prio_params.timer_coalesce_kt_shift; 1769 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; 1770 TCOAL_PRIO_STAT(kt_tcl); 1771 } 1772 } 1773 1774 1775 int timer_user_idle_level; 1776 1777 uint64_t 1778 timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) 1779 { 1780 int32_t tcs_shift = 0; 1781 uint64_t tcs_max_abstime = 0; 1782 uint64_t adjval; 1783 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); 1784 1785 if (mach_timer_coalescing_enabled && 1786 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { 1787 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); 1788 1789 if (tcs_shift >= 0) { 1790 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); 1791 } else { 1792 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); 1793 } 1794 /* Apply adjustments derived from "user idle level" heuristic */ 1795 adjval += (adjval * timer_user_idle_level) >> 7; 1796 return adjval; 1797 } else { 1798 return 0; 1799 } 1800 } 1801 1802 int 1803 timer_get_user_idle_level(void) 1804 { 1805 return timer_user_idle_level; 1806 } 1807 1808 kern_return_t 1809 timer_set_user_idle_level(int ilevel) 1810 { 1811 boolean_t do_reeval = FALSE; 1812 1813 if ((ilevel < 0) || (ilevel > 128)) { 1814 return KERN_INVALID_ARGUMENT; 1815 } 1816 1817 if (ilevel < timer_user_idle_level) { 1818 do_reeval = TRUE; 1819 } 1820 1821 timer_user_idle_level = ilevel; 1822 1823 if (do_reeval) { 1824 ml_timer_evaluate(); 1825 } 1826 1827 return KERN_SUCCESS; 1828 } 1829 1830 #pragma mark - running timers 1831 1832 #define RUNNING_TIMER_FAKE_FLAGS (TIMER_CALL_SYS_CRITICAL | \ 1833 TIMER_CALL_LOCAL) 1834 1835 /* 1836 * timer_call_trace_* functions mimic the tracing behavior from the normal 1837 * timer_call subsystem, so tools continue to function. 1838 */ 1839 1840 static void 1841 timer_call_trace_enter_before(struct timer_call *call, uint64_t deadline, 1842 uint32_t flags, uint64_t now) 1843 { 1844 #pragma unused(call, deadline, flags, now) 1845 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_START, 1846 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_ADDRHIDE(call->tc_param1), 1847 deadline, flags, 0); 1848 #if CONFIG_DTRACE 1849 uint64_t ttd = deadline - now; 1850 DTRACE_TMR7(callout__create, timer_call_func_t, call->tc_func, 1851 timer_call_param_t, call->tc_param0, uint32_t, flags, 0, 1852 (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF), NULL); 1853 #endif /* CONFIG_DTRACE */ 1854 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END, 1855 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline, 0, 0); 1856 } 1857 1858 static void 1859 timer_call_trace_enter_after(struct timer_call *call, uint64_t deadline) 1860 { 1861 #pragma unused(call, deadline) 1862 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END, 1863 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline, 0, 0); 1864 } 1865 1866 static void 1867 timer_call_trace_cancel(struct timer_call *call) 1868 { 1869 #pragma unused(call) 1870 __unused uint64_t deadline = call->tc_pqlink.deadline; 1871 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CANCEL | DBG_FUNC_START, 1872 VM_KERNEL_UNSLIDE_OR_PERM(call), deadline, 0, 1873 call->tc_flags, 0); 1874 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CANCEL | DBG_FUNC_END, 1875 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline - mach_absolute_time(), 1876 deadline - call->tc_entry_time, 0); 1877 #if CONFIG_DTRACE 1878 #if TIMER_TRACE 1879 uint64_t ttd = deadline - call->tc_entry_time; 1880 #else 1881 uint64_t ttd = UINT64_MAX; 1882 #endif /* TIMER_TRACE */ 1883 DTRACE_TMR6(callout__cancel, timer_call_func_t, call->tc_func, 1884 timer_call_param_t, call->tc_param0, uint32_t, call->tc_flags, 0, 1885 (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF)); 1886 #endif /* CONFIG_DTRACE */ 1887 } 1888 1889 static void 1890 timer_call_trace_expire_entry(struct timer_call *call) 1891 { 1892 #pragma unused(call) 1893 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_START, 1894 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(call->tc_func), 1895 VM_KERNEL_ADDRHIDE(call->tc_param0), 1896 VM_KERNEL_ADDRHIDE(call->tc_param1), 1897 0); 1898 #if CONFIG_DTRACE 1899 #if TIMER_TRACE 1900 uint64_t ttd = call->tc_pqlink.deadline - call->tc_entry_time; 1901 #else /* TIMER_TRACE */ 1902 uint64_t ttd = UINT64_MAX; 1903 #endif /* TIMER_TRACE */ 1904 DTRACE_TMR7(callout__start, timer_call_func_t, call->tc_func, 1905 timer_call_param_t, call->tc_param0, unsigned, call->tc_flags, 1906 0, (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF), NULL); 1907 #endif /* CONFIG_DTRACE */ 1908 } 1909 1910 static void 1911 timer_call_trace_expire_return(struct timer_call *call) 1912 { 1913 #pragma unused(call) 1914 #if CONFIG_DTRACE 1915 DTRACE_TMR4(callout__end, timer_call_func_t, call->tc_func, 1916 call->tc_param0, call->tc_param1, NULL); 1917 #endif /* CONFIG_DTRACE */ 1918 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_END, 1919 VM_KERNEL_UNSLIDE_OR_PERM(call), 1920 VM_KERNEL_UNSLIDE(call->tc_func), 1921 VM_KERNEL_ADDRHIDE(call->tc_param0), 1922 VM_KERNEL_ADDRHIDE(call->tc_param1), 1923 0); 1924 } 1925 1926 /* 1927 * Set a new deadline for a running timer on this processor. 1928 */ 1929 void 1930 running_timer_setup(processor_t processor, enum running_timer timer, 1931 void *param, uint64_t deadline, uint64_t now) 1932 { 1933 assert(timer < RUNNING_TIMER_MAX); 1934 assert(ml_get_interrupts_enabled() == FALSE); 1935 1936 struct timer_call *call = &processor->running_timers[timer]; 1937 1938 timer_call_trace_enter_before(call, deadline, RUNNING_TIMER_FAKE_FLAGS, 1939 now); 1940 1941 if (__improbable(deadline < now)) { 1942 deadline = timer_call_past_deadline_timer_handle(deadline, now); 1943 } 1944 1945 call->tc_pqlink.deadline = deadline; 1946 #if TIMER_TRACE 1947 call->tc_entry_time = now; 1948 #endif /* TIMER_TRACE */ 1949 call->tc_param1 = param; 1950 1951 timer_call_trace_enter_after(call, deadline); 1952 } 1953 1954 void 1955 running_timers_sync(void) 1956 { 1957 timer_resync_deadlines(); 1958 } 1959 1960 void 1961 timer_call_sloptimer_call_sloptimer_call_slop(processor_t processor, unsigned int timer, 1962 void *param, uint64_t deadline, uint64_t now) 1963 { 1964 running_timer_setup(processor, timer, param, deadline, now); 1965 running_timers_sync(); 1966 } 1967 1968 /* 1969 * Call the callback for any running timers that fired for this processor. 1970 * Returns true if any timers were past their deadline. 1971 */ 1972 bool 1973 running_timers_expire(processor_t processor, uint64_t now) 1974 { 1975 bool expired = false; 1976 1977 if (!processor->running_timers_active) { 1978 return expired; 1979 } 1980 1981 for (int i = 0; i < RUNNING_TIMER_MAX; i++) { 1982 struct timer_call *call = &processor->running_timers[i]; 1983 1984 uint64_t deadline = call->tc_pqlink.deadline; 1985 if (deadline > now) { 1986 continue; 1987 } 1988 1989 expired = true; 1990 timer_call_trace_expire_entry(call); 1991 call->tc_func(call->tc_param0, call->tc_param1); 1992 timer_call_trace_expire_return(call); 1993 } 1994 1995 return expired; 1996 } 1997 1998 void 1999 running_timer_clear(processor_t processor, enum running_timer timer) 2000 { 2001 struct timer_call *call = &processor->running_timers[timer]; 2002 uint64_t deadline = call->tc_pqlink.deadline; 2003 if (deadline == EndOfAllTime) { 2004 return; 2005 } 2006 2007 call->tc_pqlink.deadline = EndOfAllTime; 2008 #if TIMER_TRACE 2009 call->tc_entry_time = 0; 2010 #endif /* TIMER_TRACE */ 2011 timer_call_trace_cancel(call); 2012 } 2013 2014 void 2015 running_timer_cancel(processor_t processor, unsigned int timer) 2016 { 2017 running_timer_clear(processor, timer); 2018 running_timers_sync(); 2019 } 2020 2021 uint64_t 2022 running_timers_deadline(processor_t processor) 2023 { 2024 if (!processor->running_timers_active) { 2025 return EndOfAllTime; 2026 } 2027 2028 uint64_t deadline = EndOfAllTime; 2029 for (int i = 0; i < RUNNING_TIMER_MAX; i++) { 2030 uint64_t candidate = 2031 processor->running_timers[i].tc_pqlink.deadline; 2032 if (candidate != 0 && candidate < deadline) { 2033 deadline = candidate; 2034 } 2035 } 2036 2037 return deadline; 2038 } 2039 2040 void 2041 running_timers_activate(processor_t processor) 2042 { 2043 processor->running_timers_active = true; 2044 running_timers_sync(); 2045 } 2046 2047 void 2048 running_timers_deactivate(processor_t processor) 2049 { 2050 assert(processor->running_timers_active == true); 2051 processor->running_timers_active = false; 2052 running_timers_sync(); 2053 }