waitq.c
1 /* 2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 /* 29 * @OSF_FREE_COPYRIGHT@ 30 */ 31 /* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 57 /* 58 * un-comment the following lines to debug the link/prepost tables 59 * NOTE: this expands each element by ~40 bytes 60 */ 61 //#define KEEP_WAITQ_LINK_STATS 62 //#define KEEP_WAITQ_PREPOST_STATS 63 64 #include <kern/ast.h> 65 #include <kern/backtrace.h> 66 #include <kern/kern_types.h> 67 #include <kern/ltable.h> 68 #include <kern/mach_param.h> 69 #include <kern/percpu.h> 70 #include <kern/queue.h> 71 #include <kern/sched_prim.h> 72 #include <kern/simple_lock.h> 73 #include <kern/spl.h> 74 #include <kern/waitq.h> 75 #include <kern/zalloc.h> 76 #include <kern/policy_internal.h> 77 #include <kern/turnstile.h> 78 79 #include <os/hash.h> 80 #include <libkern/OSAtomic.h> 81 #include <mach/sync_policy.h> 82 #include <vm/vm_kern.h> 83 84 #include <sys/kdebug.h> 85 86 #if defined(KEEP_WAITQ_LINK_STATS) || defined(KEEP_WAITQ_PREPOST_STATS) 87 # if !CONFIG_LTABLE_STATS 88 # error "You must configure LTABLE_STATS to use WAITQ_[LINK|PREPOST]_STATS" 89 # endif 90 # if !CONFIG_WAITQ_STATS 91 # error "You must configure WAITQ_STATS to use WAITQ_[LINK|PREPOST]_STATS" 92 # endif 93 #endif 94 95 #ifdef __DARLING__ 96 #include <darlingserver/duct-tape/log.h> 97 #define wqdbg(fmt, ...) dtape_log_debug("WQ[%s]: " fmt, __func__, ## __VA_ARGS__) 98 #define wqdbg_v(fmt, ...) wqdbg(fmt, ## __VA_ARGS__) 99 #define wqinfo(fmt, ...) dtape_log_info("WQ[%s]: " fmt, __func__, ## __VA_ARGS__) 100 #define wqerr(fmt, ...) dtape_log_error("WQ[%s]: " fmt, __func__, ## __VA_ARGS__) 101 #else 102 #if CONFIG_WAITQ_DEBUG 103 #define wqdbg(fmt, ...) \ 104 printf("WQ[%s]: " fmt "\n", __func__, ## __VA_ARGS__) 105 #else 106 #define wqdbg(fmt, ...) do { } while (0) 107 #endif 108 109 #ifdef WAITQ_VERBOSE_DEBUG 110 #define wqdbg_v(fmt, ...) \ 111 printf("WQ[v:%s]: " fmt "\n", __func__, ## __VA_ARGS__) 112 #else 113 #define wqdbg_v(fmt, ...) do { } while (0) 114 #endif 115 116 #define wqinfo(fmt, ...) \ 117 printf("WQ[%s]: " fmt "\n", __func__, ## __VA_ARGS__) 118 119 #define wqerr(fmt, ...) \ 120 printf("WQ[%s] ERROR: " fmt "\n", __func__, ## __VA_ARGS__) 121 #endif // __DARLING__ 122 123 /* 124 * file-static functions / data 125 */ 126 static thread_t waitq_select_one_locked(struct waitq *waitq, event64_t event, 127 uint64_t *reserved_preposts, 128 int priority, spl_t *spl); 129 130 static kern_return_t waitq_select_thread_locked(struct waitq *waitq, 131 event64_t event, 132 thread_t thread, spl_t *spl); 133 134 ZONE_DECLARE(waitq_set_zone, "waitq sets", 135 sizeof(struct waitq_set), ZC_NOENCRYPT); 136 137 /* waitq prepost cache */ 138 #define WQP_CACHE_MAX 50 139 struct wqp_cache { 140 uint64_t head; 141 unsigned int avail; 142 }; 143 static struct wqp_cache PERCPU_DATA(wqp_cache); 144 145 #define P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align))) 146 #define ROUNDDOWN(x, y) (((x)/(y))*(y)) 147 148 149 #if CONFIG_LTABLE_STATS || CONFIG_WAITQ_STATS 150 static __inline__ void waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip); 151 #endif 152 153 LCK_GRP_DECLARE(waitq_lck_grp, "waitq"); 154 155 #ifdef __DARLING__ 156 extern void waitq_lock_init(struct waitq* wq); 157 #else 158 #if __arm64__ 159 160 #define waitq_lock_to(wq, to) \ 161 (hw_lock_bit_to(&(wq)->waitq_interlock, LCK_ILOCK, to, &waitq_lck_grp)) 162 163 #define waitq_lock_unlock(wq) \ 164 (hw_unlock_bit(&(wq)->waitq_interlock, LCK_ILOCK)) 165 166 #define waitq_lock_init(wq) \ 167 (wq->waitq_interlock = 0) 168 169 #else 170 171 #define waitq_lock_to(wq, to) \ 172 (hw_lock_to(&(wq)->waitq_interlock, to, &waitq_lck_grp)) 173 174 #define waitq_lock_unlock(wq) \ 175 (hw_lock_unlock(&(wq)->waitq_interlock)) 176 177 #define waitq_lock_init(wq) \ 178 (hw_lock_init(&(wq)->waitq_interlock)) 179 180 #endif /* __arm64__ */ 181 #endif // __DARLING__ 182 183 /* 184 * Prepost callback function for specially marked waitq sets 185 * (prepost alternative) 186 */ 187 extern void waitq_set__CALLING_PREPOST_HOOK__(waitq_set_prepost_hook_t *ctx); 188 189 #define DEFAULT_MIN_FREE_TABLE_ELEM 100 190 static uint32_t g_min_free_table_elem; 191 static uint32_t g_min_free_cache; 192 193 194 /* ---------------------------------------------------------------------- 195 * 196 * SetID Link Table Implementation 197 * 198 * ---------------------------------------------------------------------- */ 199 static struct link_table g_wqlinktable; 200 201 enum wq_link_type { 202 WQL_ALL = -1, 203 WQL_FREE = LT_FREE, 204 WQL_WQS = LT_ELEM, 205 WQL_LINK = LT_LINK, 206 }; 207 208 struct waitq_link { 209 struct lt_elem wqte; 210 211 union { 212 /* wqt_type == WQL_WQS (LT_ELEM) */ 213 struct { 214 struct waitq_set *wql_set; 215 /* uint64_t sl_prepost_id; */ 216 } wql_wqs; 217 218 /* wqt_type == WQL_LINK (LT_LINK) */ 219 struct { 220 uint64_t left_setid; 221 uint64_t right_setid; 222 } wql_link; 223 }; 224 #ifdef KEEP_WAITQ_LINK_STATS 225 thread_t sl_alloc_th; 226 task_t sl_alloc_task; 227 uintptr_t sl_alloc_bt[NWAITQ_BTFRAMES]; 228 uint64_t sl_alloc_ts; 229 uintptr_t sl_invalidate_bt[NWAITQ_BTFRAMES]; 230 uint64_t sl_invalidate_ts; 231 uintptr_t sl_mkvalid_bt[NWAITQ_BTFRAMES]; 232 uint64_t sl_mkvalid_ts; 233 uint64_t sl_free_ts; 234 #endif 235 }; 236 #if !defined(KEEP_WAITQ_LINK_STATS) 237 static_assert((sizeof(struct waitq_link) & (sizeof(struct waitq_link) - 1)) == 0, 238 "waitq_link struct must be a power of two!"); 239 #endif 240 241 #define wql_refcnt(link) \ 242 (lt_bits_refcnt((link)->wqte.lt_bits)) 243 244 #define wql_type(link) \ 245 (lt_bits_type((link)->wqte.lt_bits)) 246 247 #define wql_mkvalid(link) \ 248 do { \ 249 lt_elem_mkvalid(&(link)->wqte); \ 250 wql_do_mkvalid_stats(&(link)->wqte); \ 251 } while (0) 252 253 #define wql_is_valid(link) \ 254 lt_bits_valid((link)->wqte.lt_bits) 255 256 #define wql_setid wqte.lt_id 257 258 #define WQL_WQS_POISON ((void *)(0xf00df00d)) 259 #define WQL_LINK_POISON (0x0bad0badffffffffull) 260 261 static void 262 wql_poison(struct link_table *table, struct lt_elem *elem) 263 { 264 struct waitq_link *link = (struct waitq_link *)elem; 265 (void)table; 266 267 switch (wql_type(link)) { 268 case WQL_WQS: 269 link->wql_wqs.wql_set = WQL_WQS_POISON; 270 break; 271 case WQL_LINK: 272 link->wql_link.left_setid = WQL_LINK_POISON; 273 link->wql_link.right_setid = WQL_LINK_POISON; 274 break; 275 default: 276 break; 277 } 278 #ifdef KEEP_WAITQ_LINK_STATS 279 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt)); 280 link->sl_alloc_ts = 0; 281 memset(link->sl_mkvalid_bt, 0, sizeof(link->sl_mkvalid_bt)); 282 link->sl_mkvalid_ts = 0; 283 284 link->sl_alloc_th = THREAD_NULL; 285 /* leave the sl_alloc_task in place for debugging */ 286 287 link->sl_free_ts = mach_absolute_time(); 288 #endif 289 } 290 291 #ifdef KEEP_WAITQ_LINK_STATS 292 static __inline__ void 293 wql_do_alloc_stats(struct lt_elem *elem) 294 { 295 if (elem) { 296 struct waitq_link *link = (struct waitq_link *)elem; 297 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt)); 298 waitq_grab_backtrace(link->sl_alloc_bt, 0); 299 link->sl_alloc_th = current_thread(); 300 link->sl_alloc_task = current_task(); 301 302 assert(link->sl_alloc_ts == 0); 303 link->sl_alloc_ts = mach_absolute_time(); 304 305 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt)); 306 link->sl_invalidate_ts = 0; 307 } 308 } 309 310 static __inline__ void 311 wql_do_invalidate_stats(struct lt_elem *elem) 312 { 313 struct waitq_link *link = (struct waitq_link *)elem; 314 315 if (!elem) { 316 return; 317 } 318 319 assert(link->sl_mkvalid_ts > 0); 320 321 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt)); 322 link->sl_invalidate_ts = mach_absolute_time(); 323 waitq_grab_backtrace(link->sl_invalidate_bt, 0); 324 } 325 326 static __inline__ void 327 wql_do_mkvalid_stats(struct lt_elem *elem) 328 { 329 struct waitq_link *link = (struct waitq_link *)elem; 330 331 if (!elem) { 332 return; 333 } 334 335 memset(link->sl_mkvalid_bt, 0, sizeof(link->sl_mkvalid_bt)); 336 link->sl_mkvalid_ts = mach_absolute_time(); 337 waitq_grab_backtrace(link->sl_mkvalid_bt, 0); 338 } 339 #else 340 #define wql_do_alloc_stats(e) 341 #define wql_do_invalidate_stats(e) 342 #define wql_do_mkvalid_stats(e) 343 #endif /* KEEP_WAITQ_LINK_STATS */ 344 345 static void 346 wql_init(void) 347 { 348 uint32_t tablesz = 0, max_links = 0; 349 350 if (PE_parse_boot_argn("wql_tsize", &tablesz, sizeof(tablesz)) != TRUE) { 351 tablesz = (uint32_t)g_lt_max_tbl_size; 352 } 353 354 tablesz = P2ROUNDUP(tablesz, PAGE_SIZE); 355 max_links = tablesz / sizeof(struct waitq_link); 356 assert(max_links > 0 && tablesz > 0); 357 358 /* we have a restricted index range */ 359 if (max_links > (LT_IDX_MAX + 1)) { 360 max_links = LT_IDX_MAX + 1; 361 } 362 363 wqinfo("init linktable with max:%d elements (%d bytes)", 364 max_links, tablesz); 365 ltable_init(&g_wqlinktable, "wqslab.wql", max_links, 366 sizeof(struct waitq_link), wql_poison); 367 } 368 369 static void 370 wql_ensure_free_space(void) 371 { 372 if (g_wqlinktable.nelem - g_wqlinktable.used_elem < g_min_free_table_elem) { 373 /* 374 * we don't hold locks on these values, so check for underflow 375 */ 376 if (g_wqlinktable.used_elem <= g_wqlinktable.nelem) { 377 wqdbg_v("Forcing table growth: nelem=%d, used=%d, min_free=%d", 378 g_wqlinktable.nelem, g_wqlinktable.used_elem, 379 g_min_free_table_elem); 380 ltable_grow(&g_wqlinktable, g_min_free_table_elem); 381 } 382 } 383 } 384 385 static struct waitq_link * 386 wql_alloc_link(int type) 387 { 388 struct lt_elem *elem; 389 390 elem = ltable_alloc_elem(&g_wqlinktable, type, 1, 0); 391 wql_do_alloc_stats(elem); 392 return (struct waitq_link *)elem; 393 } 394 395 static void 396 wql_realloc_link(struct waitq_link *link, int type) 397 { 398 ltable_realloc_elem(&g_wqlinktable, &link->wqte, type); 399 #ifdef KEEP_WAITQ_LINK_STATS 400 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt)); 401 link->sl_alloc_ts = 0; 402 wql_do_alloc_stats(&link->wqte); 403 404 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt)); 405 link->sl_invalidate_ts = 0; 406 #endif 407 } 408 409 static void 410 wql_invalidate(struct waitq_link *link) 411 { 412 lt_elem_invalidate(&link->wqte); 413 wql_do_invalidate_stats(&link->wqte); 414 } 415 416 static struct waitq_link * 417 wql_get_link(uint64_t setid) 418 { 419 struct lt_elem *elem; 420 421 elem = ltable_get_elem(&g_wqlinktable, setid); 422 return (struct waitq_link *)elem; 423 } 424 425 static void 426 wql_put_link(struct waitq_link *link) 427 { 428 if (!link) { 429 return; 430 } 431 ltable_put_elem(&g_wqlinktable, (struct lt_elem *)link); 432 } 433 434 static struct waitq_link * 435 wql_get_reserved(uint64_t setid, int type) 436 { 437 struct lt_elem *elem; 438 439 elem = lt_elem_list_first(&g_wqlinktable, setid); 440 if (!elem) { 441 return NULL; 442 } 443 ltable_realloc_elem(&g_wqlinktable, elem, type); 444 return (struct waitq_link *)elem; 445 } 446 447 448 static inline int waitq_maybe_remove_link(struct waitq *waitq, 449 uint64_t setid, 450 struct waitq_link *parent, 451 struct waitq_link *left, 452 struct waitq_link *right); 453 454 enum { 455 LINK_WALK_ONE_LEVEL = 0, 456 LINK_WALK_FULL_DAG = 1, 457 LINK_WALK_FULL_DAG_UNLOCKED = 2, 458 }; 459 460 typedef int (*wql_callback_func)(struct waitq *waitq, void *ctx, 461 struct waitq_link *link); 462 463 /** 464 * walk_waitq_links: walk all table elements (of type 'link_type') pointed to by 'setid' 465 * 466 * Conditions: 467 * waitq is locked (or NULL) 468 * 'setid' is managed by 'waitq' 469 * this could be direct (waitq->waitq_set_id == setid) 470 * OR indirect (setid is the left/right ID in a LINK chain, 471 * whose root is waitq->waitq_set_id) 472 * 473 * Notes: 474 * This function uses recursion to walk the set of table elements 475 * pointed to by 'setid'. For each element encountered, 'cb' will be 476 * called. If non-zero, the return value of this callback function can 477 * early-out of the table walk. 478 * 479 * For each link element encountered, the function takes a reference to 480 * it. The reference is dropped only after the callback and any recursion 481 * has completed. 482 * 483 * The assumed table/link/tree structure: 484 * 'setid' 485 * / \ 486 * / \ 487 * L(LINK) R(LINK) 488 * /\ /\ 489 * / \ / \ 490 * / \ Rl(*) Rr(*) 491 * Ll(*) Lr(*) /\ /\ 492 * /\ /\ ... ... ... ... 493 * ... ... ... ... 494 * \ 495 * WQS(wqset_q.waitq_setid == Sx) 496 * [waitq set is a membet of setid, 'Sx') 497 * 498 * 'Sx' 499 * / \ 500 * / \ 501 * L(LINK) R(LINK) 502 * /\ /\ 503 * ... ... ... ... 504 * 505 * The basic algorithm is as follows: 506 * *) take a reference to the table object pointed to by 'setid' 507 * *) if appropriate, call 'cb' (potentially early-out on non-zero return) 508 * *) if the link object points to a waitq set, and the walk type 509 * is 'FULL_DAG' (full directed-acyclic-graph), then try to lock 510 * the associated waitq set object and recursively walk all sets to 511 * which that set belongs. This is a DFS of the tree structure. 512 * *) recurse down the left side of the tree (following the 513 * 'left_setid' pointer in the link object 514 * *) recurse down the right side of the tree (following the 515 * 'right_setid' pointer in the link object 516 */ 517 static __attribute__((noinline)) 518 int 519 walk_waitq_links(int walk_type, struct waitq *waitq, 520 uint64_t setid, int link_type, 521 void *ctx, wql_callback_func cb) 522 { 523 struct waitq_link *link; 524 uint64_t nextid; 525 int wqltype; 526 527 link = wql_get_link(setid); 528 529 /* invalid link */ 530 if (!link) { 531 return WQ_ITERATE_CONTINUE; 532 } 533 534 setid = nextid = 0; 535 wqltype = wql_type(link); 536 if (wqltype == WQL_LINK) { 537 setid = link->wql_link.left_setid; 538 nextid = link->wql_link.right_setid; 539 } 540 541 /* 542 * Make the callback only on specified link_type (or all links) 543 * Note that after the callback, the link object may be 544 * invalid. The only valid thing we can do is put our 545 * reference to it (which may put it back on the free list) 546 */ 547 if (link_type == WQL_ALL || link_type == wqltype) { 548 /* allow the callback to early-out */ 549 int ret = cb(waitq, ctx, link); 550 if (ret != WQ_ITERATE_CONTINUE) { 551 wql_put_link(link); 552 return ret; 553 } 554 } 555 556 if (wqltype == WQL_WQS && 557 (walk_type == LINK_WALK_FULL_DAG || 558 walk_type == LINK_WALK_FULL_DAG_UNLOCKED)) { 559 /* 560 * Recurse down any sets to which this wait queue set was 561 * added. We do this just before we put our reference to 562 * the link object (which may free it). 563 */ 564 struct waitq_set *wqset = link->wql_wqs.wql_set; 565 int ret = WQ_ITERATE_CONTINUE; 566 int should_unlock = 0; 567 uint64_t wqset_setid = 0; 568 569 if (waitq_set_is_valid(wqset) && walk_type == LINK_WALK_FULL_DAG) { 570 assert(!waitq_irq_safe(&wqset->wqset_q)); 571 waitq_set_lock(wqset); 572 should_unlock = 1; 573 } 574 575 /* 576 * verify the linked waitq set as it could have been 577 * invalidated before we grabbed the lock! 578 */ 579 if (wqset->wqset_id != link->wql_setid.id) { 580 /* This is the bottom of the tree: just get out */ 581 if (should_unlock) { 582 waitq_set_unlock(wqset); 583 } 584 wql_put_link(link); 585 return WQ_ITERATE_CONTINUE; 586 } 587 588 wqset_setid = wqset->wqset_q.waitq_set_id; 589 590 if (wqset_setid > 0) { 591 ret = walk_waitq_links(walk_type, &wqset->wqset_q, 592 wqset_setid, link_type, ctx, cb); 593 } 594 if (should_unlock) { 595 waitq_set_unlock(wqset); 596 } 597 if (ret != WQ_ITERATE_CONTINUE) { 598 wql_put_link(link); 599 return ret; 600 } 601 } 602 603 wql_put_link(link); 604 605 /* recurse down left side of the tree */ 606 if (setid) { 607 int ret = walk_waitq_links(walk_type, waitq, setid, link_type, ctx, cb); 608 if (ret != WQ_ITERATE_CONTINUE) { 609 return ret; 610 } 611 } 612 613 /* recurse down right side of the tree */ 614 if (nextid) { 615 return walk_waitq_links(walk_type, waitq, nextid, link_type, ctx, cb); 616 } 617 618 return WQ_ITERATE_CONTINUE; 619 } 620 621 /* ---------------------------------------------------------------------- 622 * 623 * Prepost Link Table Implementation 624 * 625 * ---------------------------------------------------------------------- */ 626 static struct link_table g_prepost_table; 627 628 enum wq_prepost_type { 629 WQP_FREE = LT_FREE, 630 WQP_WQ = LT_ELEM, 631 WQP_POST = LT_LINK, 632 }; 633 634 struct wq_prepost { 635 struct lt_elem wqte; 636 637 union { 638 /* wqt_type == WQP_WQ (LT_ELEM) */ 639 struct { 640 struct waitq *wqp_wq_ptr; 641 } wqp_wq; 642 /* wqt_type == WQP_POST (LT_LINK) */ 643 struct { 644 uint64_t wqp_next_id; 645 uint64_t wqp_wq_id; 646 } wqp_post; 647 }; 648 #ifdef KEEP_WAITQ_PREPOST_STATS 649 thread_t wqp_alloc_th; 650 task_t wqp_alloc_task; 651 uintptr_t wqp_alloc_bt[NWAITQ_BTFRAMES]; 652 #endif 653 }; 654 #if !defined(KEEP_WAITQ_PREPOST_STATS) 655 static_assert((sizeof(struct wq_prepost) & (sizeof(struct wq_prepost) - 1)) == 0, 656 "wq_prepost struct must be a power of two!"); 657 #endif 658 659 #define wqp_refcnt(wqp) \ 660 (lt_bits_refcnt((wqp)->wqte.lt_bits)) 661 662 #define wqp_type(wqp) \ 663 (lt_bits_type((wqp)->wqte.lt_bits)) 664 665 #define wqp_set_valid(wqp) \ 666 lt_elem_mkvalid(&(wqp)->wqte) 667 668 #define wqp_is_valid(wqp) \ 669 lt_bits_valid((wqp)->wqte.lt_bits) 670 671 #define wqp_prepostid wqte.lt_id 672 673 #define WQP_WQ_POISON (0x0bad0badffffffffull) 674 #define WQP_POST_POISON (0xf00df00df00df00d) 675 676 static void 677 wqp_poison(struct link_table *table, struct lt_elem *elem) 678 { 679 struct wq_prepost *wqp = (struct wq_prepost *)elem; 680 (void)table; 681 682 switch (wqp_type(wqp)) { 683 case WQP_WQ: 684 break; 685 case WQP_POST: 686 wqp->wqp_post.wqp_next_id = WQP_POST_POISON; 687 wqp->wqp_post.wqp_wq_id = WQP_POST_POISON; 688 break; 689 default: 690 break; 691 } 692 } 693 694 #ifdef KEEP_WAITQ_PREPOST_STATS 695 static __inline__ void 696 wqp_do_alloc_stats(struct lt_elem *elem) 697 { 698 if (!elem) { 699 return; 700 } 701 702 struct wq_prepost *wqp = (struct wq_prepost *)elem; 703 uintptr_t alloc_bt[sizeof(wqp->wqp_alloc_bt)]; 704 705 waitq_grab_backtrace(alloc_bt, NWAITQ_BTFRAMES); 706 707 /* be sure the take stats for _all_ allocated objects */ 708 for (;;) { 709 memcpy(wqp->wqp_alloc_bt, alloc_bt, sizeof(alloc_bt)); 710 wqp->wqp_alloc_th = current_thread(); 711 wqp->wqp_alloc_task = current_task(); 712 wqp = (struct wq_prepost *)lt_elem_list_next(&g_prepost_table, &wqp->wqte); 713 if (!wqp) { 714 break; 715 } 716 } 717 } 718 #else 719 #define wqp_do_alloc_stats(e) 720 #endif /* KEEP_WAITQ_LINK_STATS */ 721 722 static void 723 wqp_init(void) 724 { 725 uint32_t tablesz = 0, max_wqp = 0; 726 727 if (PE_parse_boot_argn("wqp_tsize", &tablesz, sizeof(tablesz)) != TRUE) { 728 tablesz = (uint32_t)g_lt_max_tbl_size; 729 } 730 731 tablesz = P2ROUNDUP(tablesz, PAGE_SIZE); 732 max_wqp = tablesz / sizeof(struct wq_prepost); 733 assert(max_wqp > 0 && tablesz > 0); 734 735 /* we have a restricted index range */ 736 if (max_wqp > (LT_IDX_MAX + 1)) { 737 max_wqp = LT_IDX_MAX + 1; 738 } 739 740 wqinfo("init prepost table with max:%d elements (%d bytes)", 741 max_wqp, tablesz); 742 ltable_init(&g_prepost_table, "wqslab.prepost", max_wqp, 743 sizeof(struct wq_prepost), wqp_poison); 744 } 745 746 /* 747 * Refill the per-CPU cache. 748 */ 749 static void 750 wq_prepost_refill_cpu_cache(uint32_t nalloc) 751 { 752 #ifndef __DARLING__ 753 struct lt_elem *new_head, *old_head; 754 struct wqp_cache *cache; 755 756 /* require preemption enabled to allocate elements */ 757 if (get_preemption_level() != 0) { 758 return; 759 } 760 761 new_head = ltable_alloc_elem(&g_prepost_table, 762 LT_RESERVED, nalloc, 1); 763 if (new_head == NULL) { 764 return; 765 } 766 767 disable_preemption(); 768 cache = PERCPU_GET(wqp_cache); 769 770 /* check once more before putting these elements on the list */ 771 if (cache->avail >= WQP_CACHE_MAX) { 772 lt_elem_list_release(&g_prepost_table, new_head, LT_RESERVED); 773 enable_preemption(); 774 return; 775 } 776 777 cache->avail += nalloc; 778 if (cache->head == 0 || cache->head == LT_IDX_MAX) { 779 cache->head = new_head->lt_id.id; 780 goto out; 781 } 782 783 old_head = lt_elem_list_first(&g_prepost_table, cache->head); 784 (void)lt_elem_list_link(&g_prepost_table, new_head, old_head); 785 cache->head = new_head->lt_id.id; 786 787 out: 788 enable_preemption(); 789 return; 790 #endif // __DARLING__ 791 } 792 793 static void 794 wq_prepost_ensure_free_space(void) 795 { 796 uint32_t free_elem; 797 uint32_t min_free; 798 struct wqp_cache *cache; 799 800 if (g_min_free_cache == 0) { 801 g_min_free_cache = (WQP_CACHE_MAX * ml_wait_max_cpus()); 802 } 803 804 #ifndef __DARLING__ 805 /* 806 * Ensure that we always have a pool of per-CPU prepost elements 807 */ 808 disable_preemption(); 809 cache = PERCPU_GET(wqp_cache); 810 free_elem = cache->avail; 811 enable_preemption(); 812 813 if (free_elem < (WQP_CACHE_MAX / 3)) { 814 wq_prepost_refill_cpu_cache(WQP_CACHE_MAX - free_elem); 815 } 816 #endif // __DARLING__ 817 818 /* 819 * Now ensure that we have a sufficient amount of free table space 820 */ 821 free_elem = g_prepost_table.nelem - g_prepost_table.used_elem; 822 min_free = g_min_free_table_elem + g_min_free_cache; 823 if (free_elem < min_free) { 824 /* 825 * we don't hold locks on these values, so check for underflow 826 */ 827 if (g_prepost_table.used_elem <= g_prepost_table.nelem) { 828 wqdbg_v("Forcing table growth: nelem=%d, used=%d, min_free=%d+%d", 829 g_prepost_table.nelem, g_prepost_table.used_elem, 830 g_min_free_table_elem, g_min_free_cache); 831 ltable_grow(&g_prepost_table, min_free); 832 } 833 } 834 } 835 836 static struct wq_prepost * 837 wq_prepost_alloc(int type, int nelem) 838 { 839 struct lt_elem *elem; 840 struct wq_prepost *wqp; 841 struct wqp_cache *cache; 842 843 if (type != LT_RESERVED) { 844 goto do_alloc; 845 } 846 if (nelem == 0) { 847 return NULL; 848 } 849 850 #ifndef __DARLING__ 851 /* 852 * First try to grab the elements from the per-CPU cache if we are 853 * allocating RESERVED elements 854 */ 855 disable_preemption(); 856 cache = PERCPU_GET(wqp_cache); 857 if (nelem <= (int)cache->avail) { 858 struct lt_elem *first, *next = NULL; 859 int nalloc = nelem; 860 861 cache->avail -= nelem; 862 863 /* grab the first element */ 864 first = lt_elem_list_first(&g_prepost_table, cache->head); 865 866 /* find the last element and re-adjust the cache head */ 867 for (elem = first; elem != NULL && nalloc > 0; elem = next) { 868 next = lt_elem_list_next(&g_prepost_table, elem); 869 if (--nalloc == 0) { 870 /* terminate the allocated list */ 871 elem->lt_next_idx = LT_IDX_MAX; 872 break; 873 } 874 } 875 assert(nalloc == 0); 876 if (!next) { 877 cache->head = LT_IDX_MAX; 878 } else { 879 cache->head = next->lt_id.id; 880 } 881 /* assert that we don't have mis-matched book keeping */ 882 assert(!(cache->head == LT_IDX_MAX && cache->avail > 0)); 883 enable_preemption(); 884 elem = first; 885 goto out; 886 } 887 enable_preemption(); 888 #endif // __DARLING__ 889 890 do_alloc: 891 /* fall-back to standard table allocation */ 892 elem = ltable_alloc_elem(&g_prepost_table, type, nelem, 0); 893 if (!elem) { 894 return NULL; 895 } 896 897 out: 898 wqp = (struct wq_prepost *)elem; 899 wqp_do_alloc_stats(elem); 900 return wqp; 901 } 902 903 static void 904 wq_prepost_invalidate(struct wq_prepost *wqp) 905 { 906 lt_elem_invalidate(&wqp->wqte); 907 } 908 909 static struct wq_prepost * 910 wq_prepost_get(uint64_t wqp_id) 911 { 912 struct lt_elem *elem; 913 914 elem = ltable_get_elem(&g_prepost_table, wqp_id); 915 return (struct wq_prepost *)elem; 916 } 917 918 static void 919 wq_prepost_put(struct wq_prepost *wqp) 920 { 921 ltable_put_elem(&g_prepost_table, (struct lt_elem *)wqp); 922 } 923 924 static int 925 wq_prepost_rlink(struct wq_prepost *parent, struct wq_prepost *child) 926 { 927 return lt_elem_list_link(&g_prepost_table, &parent->wqte, &child->wqte); 928 } 929 930 static struct wq_prepost * 931 wq_prepost_get_rnext(struct wq_prepost *head) 932 { 933 struct lt_elem *elem; 934 struct wq_prepost *wqp; 935 uint64_t id; 936 937 elem = lt_elem_list_next(&g_prepost_table, &head->wqte); 938 if (!elem) { 939 return NULL; 940 } 941 id = elem->lt_id.id; 942 elem = ltable_get_elem(&g_prepost_table, id); 943 944 if (!elem) { 945 return NULL; 946 } 947 wqp = (struct wq_prepost *)elem; 948 if (elem->lt_id.id != id || 949 wqp_type(wqp) != WQP_POST || 950 wqp->wqp_post.wqp_next_id != head->wqp_prepostid.id) { 951 ltable_put_elem(&g_prepost_table, elem); 952 return NULL; 953 } 954 955 return wqp; 956 } 957 958 static void 959 wq_prepost_reset_rnext(struct wq_prepost *wqp) 960 { 961 (void)lt_elem_list_break(&g_prepost_table, &wqp->wqte); 962 } 963 964 965 /** 966 * remove 'wqp' from the prepost list on 'wqset' 967 * 968 * Conditions: 969 * wqset is locked 970 * caller holds a reference on wqp (and is responsible to release it) 971 * 972 * Result: 973 * wqp is invalidated, wqset is potentially updated with a new 974 * prepost ID, and the next element of the prepost list may be 975 * consumed as well (if the list contained only 2 objects) 976 */ 977 static int 978 wq_prepost_remove(struct waitq_set *wqset, 979 struct wq_prepost *wqp) 980 { 981 int more_posts = 1; 982 uint64_t next_id = wqp->wqp_post.wqp_next_id; 983 uint64_t wqp_id = wqp->wqp_prepostid.id; 984 struct wq_prepost *prev_wqp, *next_wqp; 985 986 assert(wqp_type(wqp) == WQP_POST); 987 assert(wqset->wqset_q.waitq_prepost == 1); 988 989 if (next_id == wqp_id) { 990 /* the list is singular and becoming empty */ 991 wqset->wqset_prepost_id = 0; 992 more_posts = 0; 993 goto out; 994 } 995 996 prev_wqp = wq_prepost_get_rnext(wqp); 997 assert(prev_wqp != NULL); 998 assert(prev_wqp->wqp_post.wqp_next_id == wqp_id); 999 assert(prev_wqp->wqp_prepostid.id != wqp_id); 1000 assert(wqp_type(prev_wqp) == WQP_POST); 1001 1002 if (prev_wqp->wqp_prepostid.id == next_id) { 1003 /* 1004 * There are two items in the list, and we're removing one. We 1005 * only need to keep the WQP_WQ pointer from 'prev_wqp' 1006 */ 1007 wqset->wqset_prepost_id = prev_wqp->wqp_post.wqp_wq_id; 1008 wq_prepost_invalidate(prev_wqp); 1009 wq_prepost_put(prev_wqp); 1010 more_posts = 0; 1011 goto out; 1012 } 1013 1014 /* prev->next = next */ 1015 prev_wqp->wqp_post.wqp_next_id = next_id; 1016 1017 /* next->prev = prev */ 1018 next_wqp = wq_prepost_get(next_id); 1019 assert(next_wqp != NULL); 1020 assert(next_wqp != wqp); 1021 assert(next_wqp != prev_wqp); 1022 assert(wqp_type(next_wqp) == WQP_POST); 1023 1024 wq_prepost_reset_rnext(next_wqp); 1025 wq_prepost_rlink(next_wqp, prev_wqp); 1026 1027 /* If we remove the head of the list, update the wqset */ 1028 if (wqp_id == wqset->wqset_prepost_id) { 1029 wqset->wqset_prepost_id = next_id; 1030 } 1031 1032 wq_prepost_put(prev_wqp); 1033 wq_prepost_put(next_wqp); 1034 1035 out: 1036 wq_prepost_reset_rnext(wqp); 1037 wq_prepost_invalidate(wqp); 1038 return more_posts; 1039 } 1040 1041 static struct wq_prepost * 1042 wq_prepost_rfirst(uint64_t id) 1043 { 1044 struct lt_elem *elem; 1045 elem = lt_elem_list_first(&g_prepost_table, id); 1046 wqp_do_alloc_stats(elem); 1047 return (struct wq_prepost *)(void *)elem; 1048 } 1049 1050 static struct wq_prepost * 1051 wq_prepost_rpop(uint64_t *id, int type) 1052 { 1053 struct lt_elem *elem; 1054 elem = lt_elem_list_pop(&g_prepost_table, id, type); 1055 wqp_do_alloc_stats(elem); 1056 return (struct wq_prepost *)(void *)elem; 1057 } 1058 1059 static void 1060 wq_prepost_release_rlist(struct wq_prepost *wqp) 1061 { 1062 int nelem = 0; 1063 struct wqp_cache *cache; 1064 struct lt_elem *elem; 1065 1066 if (!wqp) { 1067 return; 1068 } 1069 1070 elem = &wqp->wqte; 1071 1072 #ifndef __DARLING__ 1073 /* 1074 * These are reserved elements: release them back to the per-cpu pool 1075 * if our cache is running low. 1076 */ 1077 disable_preemption(); 1078 cache = PERCPU_GET(wqp_cache); 1079 if (cache->avail < WQP_CACHE_MAX) { 1080 struct lt_elem *tmp = NULL; 1081 if (cache->head != LT_IDX_MAX) { 1082 tmp = lt_elem_list_first(&g_prepost_table, cache->head); 1083 } 1084 nelem = lt_elem_list_link(&g_prepost_table, elem, tmp); 1085 cache->head = elem->lt_id.id; 1086 cache->avail += nelem; 1087 enable_preemption(); 1088 return; 1089 } 1090 enable_preemption(); 1091 #endif // __DARLING__ 1092 1093 /* release these elements back to the main table */ 1094 nelem = lt_elem_list_release(&g_prepost_table, elem, LT_RESERVED); 1095 1096 #if CONFIG_WAITQ_STATS 1097 g_prepost_table.nreserved_releases += 1; 1098 OSDecrementAtomic64(&g_prepost_table.nreservations); 1099 #endif 1100 } 1101 1102 typedef int (*wqp_callback_func)(struct waitq_set *wqset, 1103 void *ctx, 1104 struct wq_prepost *wqp, 1105 struct waitq *waitq); 1106 1107 /** 1108 * iterate over a chain of preposts associated with a waitq set. 1109 * 1110 * Conditions: 1111 * wqset is locked 1112 * 1113 * Notes: 1114 * This loop performs automatic prepost chain management / culling, and 1115 * may reset or adjust the waitq set's prepost ID pointer. If you don't 1116 * want this extra processing, you can use wq_prepost_iterate(). 1117 */ 1118 static int 1119 wq_prepost_foreach_locked(struct waitq_set *wqset, 1120 void *ctx, wqp_callback_func cb) 1121 { 1122 int ret = WQ_ITERATE_SUCCESS; 1123 struct wq_prepost *wqp, *tmp_wqp; 1124 1125 assert(cb != NULL); 1126 1127 if (!wqset || !waitq_set_maybe_preposted(wqset)) { 1128 return WQ_ITERATE_SUCCESS; 1129 } 1130 1131 restart: 1132 wqp = wq_prepost_get(wqset->wqset_prepost_id); 1133 if (!wqp) { 1134 /* 1135 * The prepost object is no longer valid, reset the waitq 1136 * set's prepost id. 1137 */ 1138 wqset->wqset_prepost_id = 0; 1139 return WQ_ITERATE_SUCCESS; 1140 } 1141 1142 if (wqp_type(wqp) == WQP_WQ) { 1143 uint64_t __assert_only wqp_id = wqp->wqp_prepostid.id; 1144 1145 ret = cb(wqset, ctx, wqp, wqp->wqp_wq.wqp_wq_ptr); 1146 1147 switch (ret) { 1148 case WQ_ITERATE_INVALIDATE_CONTINUE: 1149 /* the caller wants to remove the only prepost here */ 1150 assert(wqp_id == wqset->wqset_prepost_id); 1151 wqset->wqset_prepost_id = 0; 1152 OS_FALLTHROUGH; 1153 case WQ_ITERATE_CONTINUE: 1154 wq_prepost_put(wqp); 1155 ret = WQ_ITERATE_SUCCESS; 1156 break; 1157 case WQ_ITERATE_RESTART: 1158 wq_prepost_put(wqp); 1159 OS_FALLTHROUGH; 1160 case WQ_ITERATE_DROPPED: 1161 goto restart; 1162 default: 1163 wq_prepost_put(wqp); 1164 break; 1165 } 1166 return ret; 1167 } 1168 1169 assert(wqp->wqp_prepostid.id == wqset->wqset_prepost_id); 1170 assert(wqp_type(wqp) == WQP_POST); 1171 1172 /* 1173 * At this point we know we have a list of POST objects. 1174 * Grab a handle to the last element in the list and start 1175 * the iteration. 1176 */ 1177 tmp_wqp = wq_prepost_get_rnext(wqp); 1178 assert(tmp_wqp != NULL && wqp_type(tmp_wqp) == WQP_POST); 1179 1180 uint64_t last_id = tmp_wqp->wqp_prepostid.id; 1181 wq_prepost_put(tmp_wqp); 1182 1183 ret = WQ_ITERATE_SUCCESS; 1184 for (;;) { 1185 uint64_t wqp_id, first_id, next_id; 1186 1187 wqp_id = wqp->wqp_prepostid.id; 1188 first_id = wqset->wqset_prepost_id; 1189 next_id = wqp->wqp_post.wqp_next_id; 1190 1191 /* grab the WQP_WQ object this _POST points to */ 1192 tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id); 1193 if (!tmp_wqp) { 1194 /* 1195 * This WQP_POST object points to an invalid 1196 * WQP_WQ object - remove the POST object from 1197 * the list. 1198 */ 1199 if (wq_prepost_remove(wqset, wqp) == 0) { 1200 wq_prepost_put(wqp); 1201 goto restart; 1202 } 1203 goto next_prepost; 1204 } 1205 assert(wqp_type(tmp_wqp) == WQP_WQ); 1206 /* 1207 * make the callback: note that this could remove 'wqp' or 1208 * drop the lock on our waitq set. We need to re-validate 1209 * our state when this function returns. 1210 */ 1211 ret = cb(wqset, ctx, wqp, tmp_wqp->wqp_wq.wqp_wq_ptr); 1212 wq_prepost_put(tmp_wqp); 1213 1214 switch (ret) { 1215 case WQ_ITERATE_CONTINUE: 1216 /* continue iteration */ 1217 break; 1218 case WQ_ITERATE_INVALIDATE_CONTINUE: 1219 assert(next_id == wqp->wqp_post.wqp_next_id); 1220 if (wq_prepost_remove(wqset, wqp) == 0) { 1221 wq_prepost_put(wqp); 1222 goto restart; 1223 } 1224 goto next_prepost; 1225 case WQ_ITERATE_RESTART: 1226 wq_prepost_put(wqp); 1227 OS_FALLTHROUGH; 1228 case WQ_ITERATE_DROPPED: 1229 /* the callback dropped the ref to wqp: just restart */ 1230 goto restart; 1231 default: 1232 /* break out of the iteration for some other reason */ 1233 goto finish_prepost_foreach; 1234 } 1235 1236 /* 1237 * the set lock may have been dropped during callback, 1238 * if something looks different, restart the prepost iteration 1239 */ 1240 if (!wqp_is_valid(wqp) || 1241 (wqp->wqp_post.wqp_next_id != next_id) || 1242 wqset->wqset_prepost_id != first_id) { 1243 wq_prepost_put(wqp); 1244 goto restart; 1245 } 1246 1247 next_prepost: 1248 /* this was the last object in the list */ 1249 if (wqp_id == last_id) { 1250 break; 1251 } 1252 1253 /* get the next object */ 1254 tmp_wqp = wq_prepost_get(next_id); 1255 if (!tmp_wqp) { 1256 /* 1257 * At this point we've already checked our state 1258 * after the callback (which may have dropped the set 1259 * lock). If we find an invalid member of the list 1260 * then something is wrong. 1261 */ 1262 panic("Invalid WQP_POST member 0x%llx in waitq set " 1263 "0x%llx prepost list (first:%llx, " 1264 "wqp:%p)", 1265 next_id, wqset->wqset_id, first_id, wqp); 1266 } 1267 wq_prepost_put(wqp); 1268 wqp = tmp_wqp; 1269 1270 assert(wqp_type(wqp) == WQP_POST); 1271 } 1272 1273 finish_prepost_foreach: 1274 wq_prepost_put(wqp); 1275 if (ret == WQ_ITERATE_CONTINUE) { 1276 ret = WQ_ITERATE_SUCCESS; 1277 } 1278 1279 return ret; 1280 } 1281 1282 /** 1283 * Perform a simple loop over a chain of prepost objects 1284 * 1285 * Conditions: 1286 * If 'prepost_id' is associated with a waitq (set) then that object must 1287 * be locked before calling this function. 1288 * Callback function, 'cb', must be able to handle a NULL wqset pointer 1289 * and a NULL waitq pointer! 1290 * 1291 * Notes: 1292 * This prepost chain iteration will _not_ automatically adjust any chain 1293 * element or linkage. This is the responsibility of the caller! If you 1294 * want automatic prepost chain management (at a cost of extra CPU time), 1295 * you can use: wq_prepost_foreach_locked(). 1296 */ 1297 static int 1298 wq_prepost_iterate(uint64_t prepost_id, 1299 void *ctx, wqp_callback_func cb) 1300 { 1301 int ret; 1302 struct wq_prepost *wqp; 1303 1304 if (!prepost_id) { 1305 return WQ_ITERATE_SUCCESS; 1306 } 1307 1308 wqp = wq_prepost_get(prepost_id); 1309 if (!wqp) { 1310 return WQ_ITERATE_SUCCESS; 1311 } 1312 1313 if (wqp_type(wqp) == WQP_WQ) { 1314 ret = WQ_ITERATE_SUCCESS; 1315 if (cb) { 1316 ret = cb(NULL, ctx, wqp, wqp->wqp_wq.wqp_wq_ptr); 1317 } 1318 1319 if (ret != WQ_ITERATE_DROPPED) { 1320 wq_prepost_put(wqp); 1321 } 1322 return ret; 1323 } 1324 1325 assert(wqp->wqp_prepostid.id == prepost_id); 1326 assert(wqp_type(wqp) == WQP_POST); 1327 1328 /* at this point we know we have a list of POST objects */ 1329 uint64_t next_id; 1330 1331 ret = WQ_ITERATE_CONTINUE; 1332 do { 1333 struct wq_prepost *tmp_wqp; 1334 struct waitq *wq = NULL; 1335 1336 next_id = wqp->wqp_post.wqp_next_id; 1337 1338 /* grab the WQP_WQ object this _POST points to */ 1339 tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id); 1340 if (tmp_wqp) { 1341 assert(wqp_type(tmp_wqp) == WQP_WQ); 1342 wq = tmp_wqp->wqp_wq.wqp_wq_ptr; 1343 } 1344 1345 if (cb) { 1346 ret = cb(NULL, ctx, wqp, wq); 1347 } 1348 if (tmp_wqp) { 1349 wq_prepost_put(tmp_wqp); 1350 } 1351 1352 if (ret != WQ_ITERATE_CONTINUE) { 1353 break; 1354 } 1355 1356 tmp_wqp = wq_prepost_get(next_id); 1357 if (!tmp_wqp) { 1358 /* 1359 * the chain is broken: nothing we can do here besides 1360 * bail from the iteration. 1361 */ 1362 ret = WQ_ITERATE_ABORTED; 1363 break; 1364 } 1365 1366 wq_prepost_put(wqp); 1367 wqp = tmp_wqp; 1368 1369 assert(wqp_type(wqp) == WQP_POST); 1370 } while (next_id != prepost_id); 1371 1372 if (ret != WQ_ITERATE_DROPPED) { 1373 wq_prepost_put(wqp); 1374 } 1375 1376 if (ret == WQ_ITERATE_CONTINUE) { 1377 ret = WQ_ITERATE_SUCCESS; 1378 } 1379 return ret; 1380 } 1381 1382 1383 struct _is_posted_ctx { 1384 struct waitq *posting_wq; 1385 int did_prepost; 1386 }; 1387 1388 static int 1389 wq_is_preposted_on_set_cb(struct waitq_set *wqset, void *ctx, 1390 struct wq_prepost *wqp, struct waitq *waitq) 1391 { 1392 struct _is_posted_ctx *pctx = (struct _is_posted_ctx *)ctx; 1393 1394 (void)wqset; 1395 (void)wqp; 1396 1397 /* 1398 * Don't early-out, run through the _entire_ list: 1399 * This ensures that we retain a minimum number of invalid elements. 1400 */ 1401 if (pctx->posting_wq == waitq) { 1402 pctx->did_prepost = 1; 1403 } 1404 1405 return WQ_ITERATE_CONTINUE; 1406 } 1407 1408 1409 /** 1410 * checks if 'waitq' has already preposted on 'wqset' 1411 * 1412 * Parameters: 1413 * waitq The waitq that's preposting 1414 * wqset The set onto which waitq may be preposted 1415 * 1416 * Conditions: 1417 * both waitq and wqset are locked 1418 * 1419 * Returns non-zero if 'waitq' has already preposted to 'wqset' 1420 */ 1421 static int 1422 wq_is_preposted_on_set(struct waitq *waitq, struct waitq_set *wqset) 1423 { 1424 int ret; 1425 struct _is_posted_ctx pctx; 1426 1427 /* 1428 * If the set's only prepost matches the waitq's prepost ID, 1429 * then it obviously already preposted to the set. 1430 */ 1431 if (waitq->waitq_prepost_id != 0 && 1432 wqset->wqset_prepost_id == waitq->waitq_prepost_id) { 1433 return 1; 1434 } 1435 1436 /* use full prepost iteration: always trim the list */ 1437 pctx.posting_wq = waitq; 1438 pctx.did_prepost = 0; 1439 ret = wq_prepost_foreach_locked(wqset, (void *)&pctx, 1440 wq_is_preposted_on_set_cb); 1441 return pctx.did_prepost; 1442 } 1443 1444 static struct wq_prepost * 1445 wq_get_prepost_obj(uint64_t *reserved, int type) 1446 { 1447 struct wq_prepost *wqp = NULL; 1448 /* 1449 * don't fail just because the caller doesn't have enough 1450 * reservations, we've kept a low-water mark on the prepost table, 1451 * so there should be some available for us. 1452 */ 1453 if (reserved && *reserved) { 1454 wqp = wq_prepost_rpop(reserved, type); 1455 assert(wqp->wqte.lt_id.idx < g_prepost_table.nelem); 1456 } else { 1457 /* 1458 * TODO: if in interrupt context, grab from a special 1459 * region / reserved list! 1460 */ 1461 wqp = wq_prepost_alloc(type, 1); 1462 } 1463 1464 if (wqp == NULL) { 1465 panic("Couldn't allocate prepost object!"); 1466 } 1467 return wqp; 1468 } 1469 1470 1471 /** 1472 * prepost a waitq onto a waitq set 1473 * 1474 * Parameters: 1475 * wqset The set onto which waitq will be preposted 1476 * waitq The waitq that's preposting 1477 * reserved List (lt_elem_list_ style) of pre-allocated prepost elements 1478 * Could be NULL 1479 * 1480 * Conditions: 1481 * both wqset and waitq are locked 1482 * 1483 * Notes: 1484 * If reserved is NULL, this may block on prepost table growth. 1485 */ 1486 static void 1487 wq_prepost_do_post_locked(struct waitq_set *wqset, 1488 struct waitq *waitq, 1489 uint64_t *reserved) 1490 { 1491 struct wq_prepost *wqp_post, *wqp_head, *wqp_tail; 1492 1493 assert(waitq_held(waitq) && waitq_held(&wqset->wqset_q)); 1494 1495 /* 1496 * nothing to do if it's already preposted: 1497 * note that this also culls any invalid prepost objects 1498 */ 1499 if (wq_is_preposted_on_set(waitq, wqset)) { 1500 return; 1501 } 1502 1503 assert(waitqs_is_linked(wqset)); 1504 1505 /* 1506 * This function is called because an event is being posted to 'waitq'. 1507 * We need a prepost object associated with this queue. Allocate one 1508 * now if the waitq isn't already associated with one. 1509 */ 1510 if (waitq->waitq_prepost_id == 0) { 1511 struct wq_prepost *wqp; 1512 wqp = wq_get_prepost_obj(reserved, WQP_WQ); 1513 wqp->wqp_wq.wqp_wq_ptr = waitq; 1514 wqp_set_valid(wqp); 1515 waitq->waitq_prepost_id = wqp->wqp_prepostid.id; 1516 wq_prepost_put(wqp); 1517 } 1518 1519 #if CONFIG_LTABLE_STATS 1520 g_prepost_table.npreposts += 1; 1521 #endif 1522 1523 wqdbg_v("preposting waitq %p (0x%llx) to set 0x%llx", 1524 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), 1525 waitq->waitq_prepost_id, wqset->wqset_id); 1526 1527 if (wqset->wqset_prepost_id == 0) { 1528 /* the set has no previous preposts */ 1529 wqset->wqset_prepost_id = waitq->waitq_prepost_id; 1530 return; 1531 } 1532 1533 wqp_head = wq_prepost_get(wqset->wqset_prepost_id); 1534 if (!wqp_head) { 1535 /* the previous prepost has become invalid */ 1536 wqset->wqset_prepost_id = waitq->waitq_prepost_id; 1537 return; 1538 } 1539 1540 assert(wqp_head->wqp_prepostid.id == wqset->wqset_prepost_id); 1541 1542 /* 1543 * If we get here, we're going to need at least one new wq_prepost 1544 * object. If the previous wqset_prepost_id points to a WQP_WQ, we 1545 * actually need to allocate 2 wq_prepost objects because the WQP_WQ 1546 * is tied to the waitq and shared across all sets. 1547 */ 1548 wqp_post = wq_get_prepost_obj(reserved, WQP_POST); 1549 1550 wqp_post->wqp_post.wqp_wq_id = waitq->waitq_prepost_id; 1551 wqdbg_v("POST 0x%llx :: WQ 0x%llx", wqp_post->wqp_prepostid.id, 1552 waitq->waitq_prepost_id); 1553 1554 if (wqp_type(wqp_head) == WQP_WQ) { 1555 /* 1556 * We must replace the wqset_prepost_id with a pointer 1557 * to two new WQP_POST objects 1558 */ 1559 uint64_t wqp_id = wqp_head->wqp_prepostid.id; 1560 wqdbg_v("set 0x%llx previous had 1 WQ prepost (0x%llx): " 1561 "replacing with two POST preposts", 1562 wqset->wqset_id, wqp_id); 1563 1564 /* drop the old reference */ 1565 wq_prepost_put(wqp_head); 1566 1567 /* grab another new object (the 2nd of two) */ 1568 wqp_head = wq_get_prepost_obj(reserved, WQP_POST); 1569 1570 /* point this one to the original WQP_WQ object */ 1571 wqp_head->wqp_post.wqp_wq_id = wqp_id; 1572 wqdbg_v("POST 0x%llx :: WQ 0x%llx", 1573 wqp_head->wqp_prepostid.id, wqp_id); 1574 1575 /* link it to the new wqp_post object allocated earlier */ 1576 wqp_head->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id; 1577 /* make the list a double-linked and circular */ 1578 wq_prepost_rlink(wqp_head, wqp_post); 1579 1580 /* 1581 * Finish setting up the new prepost: point it back to the 1582 * POST object we allocated to replace the original wqset 1583 * WQ prepost object 1584 */ 1585 wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id; 1586 wq_prepost_rlink(wqp_post, wqp_head); 1587 1588 /* mark objects valid, and reset the wqset prepost list head */ 1589 wqp_set_valid(wqp_head); 1590 wqp_set_valid(wqp_post); 1591 wqset->wqset_prepost_id = wqp_head->wqp_prepostid.id; 1592 1593 /* release both references */ 1594 wq_prepost_put(wqp_head); 1595 wq_prepost_put(wqp_post); 1596 1597 wqdbg_v("set 0x%llx: 0x%llx/0x%llx -> 0x%llx/0x%llx -> 0x%llx", 1598 wqset->wqset_id, wqset->wqset_prepost_id, 1599 wqp_head->wqp_prepostid.id, wqp_head->wqp_post.wqp_next_id, 1600 wqp_post->wqp_prepostid.id, 1601 wqp_post->wqp_post.wqp_next_id); 1602 return; 1603 } 1604 1605 assert(wqp_type(wqp_head) == WQP_POST); 1606 1607 /* 1608 * Add the new prepost to the end of the prepost list 1609 */ 1610 wqp_tail = wq_prepost_get_rnext(wqp_head); 1611 assert(wqp_tail != NULL); 1612 assert(wqp_tail->wqp_post.wqp_next_id == wqset->wqset_prepost_id); 1613 1614 /* 1615 * link the head to the new tail 1616 * NOTE: this needs to happen first in case wqp_tail == wqp_head 1617 */ 1618 wq_prepost_reset_rnext(wqp_head); 1619 wq_prepost_rlink(wqp_head, wqp_post); 1620 1621 /* point the new object to the list head, and list tail */ 1622 wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id; 1623 wq_prepost_rlink(wqp_post, wqp_tail); 1624 1625 /* point the last item in the waitq set's list to the new object */ 1626 wqp_tail->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id; 1627 1628 wqp_set_valid(wqp_post); 1629 1630 wq_prepost_put(wqp_head); 1631 wq_prepost_put(wqp_tail); 1632 wq_prepost_put(wqp_post); 1633 1634 wqdbg_v("set 0x%llx (wqp:0x%llx) last_prepost:0x%llx, " 1635 "new_prepost:0x%llx->0x%llx", wqset->wqset_id, 1636 wqset->wqset_prepost_id, wqp_head->wqp_prepostid.id, 1637 wqp_post->wqp_prepostid.id, wqp_post->wqp_post.wqp_next_id); 1638 1639 return; 1640 } 1641 1642 1643 /* ---------------------------------------------------------------------- 1644 * 1645 * Stats collection / reporting 1646 * 1647 * ---------------------------------------------------------------------- */ 1648 #if CONFIG_LTABLE_STATS && CONFIG_WAITQ_STATS 1649 static void 1650 wq_table_stats(struct link_table *table, struct wq_table_stats *stats) 1651 { 1652 stats->version = WAITQ_STATS_VERSION; 1653 stats->table_elements = table->nelem; 1654 stats->table_used_elems = table->used_elem; 1655 stats->table_elem_sz = table->elem_sz; 1656 stats->table_slabs = table->nslabs; 1657 stats->table_slab_sz = table->slab_sz; 1658 1659 stats->table_num_allocs = table->nallocs; 1660 stats->table_num_preposts = table->npreposts; 1661 stats->table_num_reservations = table->nreservations; 1662 1663 stats->table_max_used = table->max_used; 1664 stats->table_avg_used = table->avg_used; 1665 stats->table_max_reservations = table->max_reservations; 1666 stats->table_avg_reservations = table->avg_reservations; 1667 } 1668 1669 void 1670 waitq_link_stats(struct wq_table_stats *stats) 1671 { 1672 if (!stats) { 1673 return; 1674 } 1675 wq_table_stats(&g_wqlinktable, stats); 1676 } 1677 1678 void 1679 waitq_prepost_stats(struct wq_table_stats *stats) 1680 { 1681 wq_table_stats(&g_prepost_table, stats); 1682 } 1683 #endif 1684 1685 1686 /* ---------------------------------------------------------------------- 1687 * 1688 * Global Wait Queues 1689 * 1690 * ---------------------------------------------------------------------- */ 1691 1692 static struct waitq g_boot_waitq; 1693 static struct waitq *global_waitqs = &g_boot_waitq; 1694 static uint32_t g_num_waitqs = 1; 1695 1696 /* 1697 * Zero out the used MSBs of the event. 1698 */ 1699 #define _CAST_TO_EVENT_MASK(event) ((uintptr_t)(event) & ((1ul << _EVENT_MASK_BITS) - 1ul)) 1700 1701 static __inline__ uint32_t 1702 waitq_hash(char *key, size_t length) 1703 { 1704 uint32_t hash = os_hash_jenkins(key, length); 1705 1706 hash &= (g_num_waitqs - 1); 1707 return hash; 1708 } 1709 1710 /* return a global waitq pointer corresponding to the given event */ 1711 struct waitq * 1712 _global_eventq(char *event, size_t event_length) 1713 { 1714 return &global_waitqs[waitq_hash(event, event_length)]; 1715 } 1716 1717 /* return an indexed global waitq pointer */ 1718 struct waitq * 1719 global_waitq(int index) 1720 { 1721 return &global_waitqs[index % g_num_waitqs]; 1722 } 1723 1724 1725 #if CONFIG_LTABLE_STATS || CONFIG_WAITQ_STATS 1726 /* this global is for lldb */ 1727 const uint32_t g_nwaitq_btframes = NWAITQ_BTFRAMES; 1728 1729 static __inline__ void 1730 waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip) 1731 { 1732 uintptr_t buf[NWAITQ_BTFRAMES + skip]; 1733 if (skip < 0) { 1734 skip = 0; 1735 } 1736 memset(buf, 0, (NWAITQ_BTFRAMES + skip) * sizeof(uintptr_t)); 1737 backtrace(buf, g_nwaitq_btframes + skip, NULL); 1738 memcpy(&bt[0], &buf[skip], NWAITQ_BTFRAMES * sizeof(uintptr_t)); 1739 } 1740 #else /* no stats */ 1741 #define waitq_grab_backtrace(...) 1742 #endif 1743 1744 #if CONFIG_WAITQ_STATS 1745 1746 struct wq_stats g_boot_stats; 1747 struct wq_stats *g_waitq_stats = &g_boot_stats; 1748 1749 static __inline__ struct wq_stats * 1750 waitq_global_stats(struct waitq *waitq) 1751 { 1752 struct wq_stats *wqs; 1753 uint32_t idx; 1754 1755 if (!waitq_is_global(waitq)) { 1756 return NULL; 1757 } 1758 1759 idx = (uint32_t)(((uintptr_t)waitq - (uintptr_t)global_waitqs) / sizeof(*waitq)); 1760 assert(idx < g_num_waitqs); 1761 wqs = &g_waitq_stats[idx]; 1762 return wqs; 1763 } 1764 1765 static __inline__ void 1766 waitq_stats_count_wait(struct waitq *waitq) 1767 { 1768 struct wq_stats *wqs = waitq_global_stats(waitq); 1769 if (wqs != NULL) { 1770 wqs->waits++; 1771 waitq_grab_backtrace(wqs->last_wait, 2); 1772 } 1773 } 1774 1775 static __inline__ void 1776 waitq_stats_count_wakeup(struct waitq *waitq) 1777 { 1778 struct wq_stats *wqs = waitq_global_stats(waitq); 1779 if (wqs != NULL) { 1780 wqs->wakeups++; 1781 waitq_grab_backtrace(wqs->last_wakeup, 2); 1782 } 1783 } 1784 1785 static __inline__ void 1786 waitq_stats_count_clear_wakeup(struct waitq *waitq) 1787 { 1788 struct wq_stats *wqs = waitq_global_stats(waitq); 1789 if (wqs != NULL) { 1790 wqs->wakeups++; 1791 wqs->clears++; 1792 waitq_grab_backtrace(wqs->last_wakeup, 2); 1793 } 1794 } 1795 1796 static __inline__ void 1797 waitq_stats_count_fail(struct waitq *waitq) 1798 { 1799 struct wq_stats *wqs = waitq_global_stats(waitq); 1800 if (wqs != NULL) { 1801 wqs->failed_wakeups++; 1802 waitq_grab_backtrace(wqs->last_failed_wakeup, 2); 1803 } 1804 } 1805 #else /* !CONFIG_WAITQ_STATS */ 1806 #define waitq_stats_count_wait(q) do { } while (0) 1807 #define waitq_stats_count_wakeup(q) do { } while (0) 1808 #define waitq_stats_count_clear_wakeup(q) do { } while (0) 1809 #define waitq_stats_count_fail(q) do { } while (0) 1810 #endif 1811 1812 int 1813 waitq_is_valid(struct waitq *waitq) 1814 { 1815 return (waitq != NULL) && waitq->waitq_isvalid; 1816 } 1817 1818 int 1819 waitq_set_is_valid(struct waitq_set *wqset) 1820 { 1821 return (wqset != NULL) && wqset->wqset_q.waitq_isvalid && waitqs_is_set(wqset); 1822 } 1823 1824 int 1825 waitq_is_global(struct waitq *waitq) 1826 { 1827 if (waitq >= global_waitqs && waitq < global_waitqs + g_num_waitqs) { 1828 return 1; 1829 } 1830 return 0; 1831 } 1832 1833 int 1834 waitq_irq_safe(struct waitq *waitq) 1835 { 1836 /* global wait queues have this bit set on initialization */ 1837 return waitq->waitq_irq; 1838 } 1839 1840 static inline bool 1841 waitq_empty(struct waitq *wq) 1842 { 1843 if (waitq_is_turnstile_queue(wq)) { 1844 return priority_queue_empty(&wq->waitq_prio_queue); 1845 } else if (waitq_is_turnstile_proxy(wq)) { 1846 struct turnstile *ts = wq->waitq_ts; 1847 return ts == TURNSTILE_NULL || 1848 priority_queue_empty(&ts->ts_waitq.waitq_prio_queue); 1849 } else { 1850 return queue_empty(&wq->waitq_queue); 1851 } 1852 } 1853 1854 static struct waitq * 1855 waitq_get_safeq(struct waitq *waitq) 1856 { 1857 /* Check if it's a port waitq */ 1858 if (waitq_is_turnstile_proxy(waitq)) { 1859 struct turnstile *ts = waitq->waitq_ts; 1860 return ts ? &ts->ts_waitq : NULL; 1861 } 1862 return global_eventq(waitq); 1863 } 1864 1865 static uint32_t 1866 waitq_hash_size(void) 1867 { 1868 uint32_t hsize, queues; 1869 1870 if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize))) { 1871 return hsize; 1872 } 1873 1874 queues = thread_max / 5; 1875 hsize = P2ROUNDUP(queues * sizeof(struct waitq), PAGE_SIZE); 1876 1877 return hsize; 1878 } 1879 1880 /* 1881 * Since the priority ordered waitq uses basepri as the 1882 * ordering key assert that this value fits in a uint8_t. 1883 */ 1884 static_assert(MAXPRI <= UINT8_MAX); 1885 1886 static inline void 1887 waitq_thread_insert(struct waitq *wq, 1888 thread_t thread, boolean_t fifo) 1889 { 1890 if (waitq_is_turnstile_queue(wq)) { 1891 turnstile_stats_update(0, TSU_TURNSTILE_BLOCK_COUNT, NULL); 1892 turnstile_waitq_add_thread_priority_queue(wq, thread); 1893 } else { 1894 turnstile_stats_update(0, TSU_REGULAR_WAITQ_BLOCK_COUNT, NULL); 1895 if (fifo) { 1896 enqueue_tail(&wq->waitq_queue, &thread->wait_links); 1897 } else { 1898 enqueue_head(&wq->waitq_queue, &thread->wait_links); 1899 } 1900 } 1901 } 1902 1903 static inline void 1904 waitq_thread_remove(struct waitq *wq, 1905 thread_t thread) 1906 { 1907 if (waitq_is_turnstile_queue(wq)) { 1908 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1909 (TURNSTILE_CODE(TURNSTILE_HEAP_OPERATIONS, (THREAD_REMOVED_FROM_TURNSTILE_WAITQ))) | DBG_FUNC_NONE, 1910 VM_KERNEL_UNSLIDE_OR_PERM(waitq_to_turnstile(wq)), 1911 thread_tid(thread), 1912 0, 0, 0); 1913 priority_queue_remove(&wq->waitq_prio_queue, &thread->wait_prioq_links); 1914 } else { 1915 remqueue(&(thread->wait_links)); 1916 } 1917 } 1918 1919 void 1920 waitq_bootstrap(void) 1921 { 1922 kern_return_t kret; 1923 uint32_t whsize, qsz, tmp32; 1924 1925 g_min_free_table_elem = DEFAULT_MIN_FREE_TABLE_ELEM; 1926 if (PE_parse_boot_argn("wqt_min_free", &tmp32, sizeof(tmp32)) == TRUE) { 1927 g_min_free_table_elem = tmp32; 1928 } 1929 wqdbg("Minimum free table elements: %d", tmp32); 1930 1931 /* 1932 * Determine the amount of memory we're willing to reserve for 1933 * the waitqueue hash table 1934 */ 1935 whsize = waitq_hash_size(); 1936 1937 /* Determine the number of waitqueues we can fit. */ 1938 qsz = sizeof(struct waitq); 1939 whsize = ROUNDDOWN(whsize, qsz); 1940 g_num_waitqs = whsize / qsz; 1941 1942 /* 1943 * The hash algorithm requires that this be a power of 2, so we 1944 * just mask off all the low-order bits. 1945 */ 1946 for (uint32_t i = 0; i < 31; i++) { 1947 uint32_t bit = (1 << i); 1948 if ((g_num_waitqs & bit) == g_num_waitqs) { 1949 break; 1950 } 1951 g_num_waitqs &= ~bit; 1952 } 1953 assert(g_num_waitqs > 0); 1954 1955 /* Now determine how much memory we really need. */ 1956 whsize = P2ROUNDUP(g_num_waitqs * qsz, PAGE_SIZE); 1957 1958 wqdbg("allocating %d global queues (%d bytes)", g_num_waitqs, whsize); 1959 kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&global_waitqs, 1960 whsize, 0, KMA_KOBJECT | KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ); 1961 if (kret != KERN_SUCCESS || global_waitqs == NULL) { 1962 panic("kernel_memory_allocate() failed to alloc global_waitqs" 1963 ", error: %d, whsize: 0x%x", kret, whsize); 1964 } 1965 1966 #if CONFIG_WAITQ_STATS 1967 whsize = P2ROUNDUP(g_num_waitqs * sizeof(struct wq_stats), PAGE_SIZE); 1968 kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&g_waitq_stats, 1969 whsize, 0, KMA_KOBJECT | KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ); 1970 if (kret != KERN_SUCCESS || global_waitqs == NULL) { 1971 panic("kernel_memory_allocate() failed to alloc g_waitq_stats" 1972 ", error: %d, whsize: 0x%x", kret, whsize); 1973 } 1974 memset(g_waitq_stats, 0, whsize); 1975 #endif 1976 1977 for (uint32_t i = 0; i < g_num_waitqs; i++) { 1978 waitq_init(&global_waitqs[i], SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ); 1979 } 1980 1981 /* initialize the global waitq link table */ 1982 wql_init(); 1983 1984 /* initialize the global waitq prepost table */ 1985 wqp_init(); 1986 } 1987 1988 1989 /* ---------------------------------------------------------------------- 1990 * 1991 * Wait Queue Implementation 1992 * 1993 * ---------------------------------------------------------------------- */ 1994 1995 #ifndef __DARLING__ 1996 /* 1997 * Double the standard lock timeout, because wait queues tend 1998 * to iterate over a number of threads - locking each. If there is 1999 * a problem with a thread lock, it normally times out at the wait 2000 * queue level first, hiding the real problem. 2001 */ 2002 /* For x86, the hardware timeout is in TSC units. */ 2003 #if defined(__i386__) || defined(__x86_64__) 2004 #define hwLockTimeOut LockTimeOutTSC 2005 #else 2006 #define hwLockTimeOut LockTimeOut 2007 #endif 2008 2009 void 2010 waitq_lock(struct waitq *wq) 2011 { 2012 if (__improbable(waitq_lock_to(wq, 2013 hwLockTimeOut * 2) == 0)) { 2014 boolean_t wql_acquired = FALSE; 2015 2016 while (machine_timeout_suspended()) { 2017 mp_enable_preemption(); 2018 wql_acquired = waitq_lock_to(wq, 2019 hwLockTimeOut * 2); 2020 if (wql_acquired) { 2021 break; 2022 } 2023 } 2024 if (wql_acquired == FALSE) { 2025 panic("waitq deadlock - waitq=%p, cpu=%d\n", 2026 wq, cpu_number()); 2027 } 2028 } 2029 #if defined(__x86_64__) 2030 pltrace(FALSE); 2031 #endif 2032 assert(waitq_held(wq)); 2033 } 2034 2035 void 2036 waitq_unlock(struct waitq *wq) 2037 { 2038 assert(waitq_held(wq)); 2039 #if defined(__x86_64__) 2040 pltrace(TRUE); 2041 #endif 2042 waitq_lock_unlock(wq); 2043 } 2044 #endif // __DARLING__ 2045 2046 2047 /** 2048 * clear the thread-related waitq state 2049 * 2050 * Conditions: 2051 * 'thread' is locked 2052 */ 2053 static inline void 2054 thread_clear_waitq_state(thread_t thread) 2055 { 2056 thread->waitq = NULL; 2057 thread->wait_event = NO_EVENT64; 2058 thread->at_safe_point = FALSE; 2059 } 2060 2061 2062 typedef thread_t (*waitq_select_cb)(void *ctx, struct waitq *waitq, 2063 int is_global, thread_t thread); 2064 2065 struct waitq_select_args { 2066 /* input parameters */ 2067 struct waitq *posted_waitq; 2068 struct waitq *waitq; 2069 event64_t event; 2070 waitq_select_cb select_cb; 2071 void *select_ctx; 2072 int priority; 2073 2074 uint64_t *reserved_preposts; 2075 2076 /* output parameters */ 2077 queue_t threadq; 2078 int max_threads; 2079 int *nthreads; 2080 spl_t *spl; 2081 }; 2082 2083 static void do_waitq_select_n_locked(struct waitq_select_args *args); 2084 2085 /** 2086 * callback invoked once for every waitq set to which a waitq belongs 2087 * 2088 * Conditions: 2089 * ctx->posted_waitq is locked 2090 * 'link' points to a valid waitq set 2091 * 2092 * Notes: 2093 * Takes the waitq set lock on the set pointed to by 'link' 2094 * Calls do_waitq_select_n_locked() which could recurse back into 2095 * this function if the waitq set is a member of other sets. 2096 * If no threads were selected, it preposts the input waitq 2097 * onto the waitq set pointed to by 'link'. 2098 */ 2099 static int 2100 waitq_select_walk_cb(struct waitq *waitq, void *ctx, 2101 struct waitq_link *link) 2102 { 2103 int ret = WQ_ITERATE_CONTINUE; 2104 struct waitq_select_args args = *((struct waitq_select_args *)ctx); 2105 struct waitq_set *wqset; 2106 2107 (void)waitq; 2108 assert(wql_type(link) == WQL_WQS); 2109 2110 wqset = link->wql_wqs.wql_set; 2111 args.waitq = &wqset->wqset_q; 2112 2113 assert(!waitq_irq_safe(waitq)); 2114 assert(!waitq_irq_safe(&wqset->wqset_q)); 2115 2116 waitq_set_lock(wqset); 2117 /* 2118 * verify that the link wasn't invalidated just before 2119 * we were able to take the lock. 2120 */ 2121 if (wqset->wqset_id != link->wql_setid.id) { 2122 goto out_unlock; 2123 } 2124 2125 assert(waitqs_is_linked(wqset)); 2126 2127 /* 2128 * Find any threads waiting on this wait queue set, 2129 * and recurse into any waitq set to which this set belongs. 2130 */ 2131 do_waitq_select_n_locked(&args); 2132 2133 if (*args.nthreads > 0 || (args.threadq && !queue_empty(args.threadq))) { 2134 /* at least 1 thread was selected and returned: don't prepost */ 2135 if (args.max_threads > 0 && *args.nthreads >= args.max_threads) { 2136 /* break out of the setid walk */ 2137 ret = WQ_ITERATE_FOUND; 2138 } 2139 } else if (args.event == NO_EVENT64) { 2140 /* 2141 * No thread selected: prepost 'waitq' to 'wqset' 2142 * if wqset can handle preposts and the event is set to 0. 2143 * We also make sure to not post waitq sets to other sets. 2144 * 2145 * If the set doesn't support preposts, but does support 2146 * prepost callout/hook interaction, invoke the predefined 2147 * callout function and pass the set's 'prepost_hook.' This 2148 * could potentially release another thread to handle events. 2149 */ 2150 if (waitq_set_can_prepost(wqset)) { 2151 wq_prepost_do_post_locked( 2152 wqset, waitq, args.reserved_preposts); 2153 } else if (waitq_set_has_prepost_hook(wqset)) { 2154 waitq_set_prepost_hook_t *hook = wqset->wqset_prepost_hook; 2155 2156 /* 2157 * When calling out to the prepost hook, 2158 * we drop the waitq lock, to allow for the kevent 2159 * subsytem to call into the waitq subsystem again, 2160 * without risking a deadlock. 2161 * 2162 * However, we need to guard against wqset going away, 2163 * so we increment the prepost hook use count 2164 * while the lock is dropped. 2165 * 2166 * This lets waitq_set_deinit() know to wait for the 2167 * prepost hook call to be done before it can proceed. 2168 * 2169 * Note: we need to keep preemption disabled the whole 2170 * time as waitq_set_deinit will spin on this. 2171 */ 2172 2173 disable_preemption(); 2174 os_atomic_add(hook, (uint16_t)1, relaxed); 2175 waitq_set_unlock(wqset); 2176 2177 waitq_set__CALLING_PREPOST_HOOK__(hook); 2178 2179 /* Note: after this decrement, the wqset may be deallocated */ 2180 os_atomic_add(hook, (uint16_t)-1, relaxed); 2181 enable_preemption(); 2182 return ret; 2183 } 2184 } 2185 2186 out_unlock: 2187 waitq_set_unlock(wqset); 2188 return ret; 2189 } 2190 2191 /** 2192 * Routine to iterate over the waitq for non-priority ordered waitqs 2193 * 2194 * Conditions: 2195 * args->waitq (and args->posted_waitq) is locked 2196 * 2197 * Notes: 2198 * Uses the optional select callback function to refine the selection 2199 * of one or more threads from a waitq. The select callback is invoked 2200 * once for every thread that is found to be waiting on the input args->waitq. 2201 * 2202 * If one or more threads are selected, this may disable interrupts. 2203 * The previous interrupt state is returned in args->spl and should 2204 * be used in a call to splx() if threads are returned to the caller. 2205 */ 2206 static thread_t 2207 waitq_queue_iterate_locked(struct waitq *safeq, struct waitq *waitq, 2208 spl_t spl, struct waitq_select_args *args, 2209 uint32_t *remaining_eventmask) 2210 { 2211 int max_threads = args->max_threads; 2212 int *nthreads = args->nthreads; 2213 thread_t thread = THREAD_NULL; 2214 thread_t first_thread = THREAD_NULL; 2215 2216 qe_foreach_element_safe(thread, &safeq->waitq_queue, wait_links) { 2217 thread_t t = THREAD_NULL; 2218 assert_thread_magic(thread); 2219 2220 /* 2221 * For non-priority ordered waitqs, we allow multiple events to be 2222 * mux'ed into the same waitq. Also safeqs may contain threads from 2223 * multiple waitqs. Only pick threads that match the 2224 * requested wait event. 2225 */ 2226 if (thread->waitq == waitq && thread->wait_event == args->event) { 2227 t = thread; 2228 if (first_thread == THREAD_NULL) { 2229 first_thread = thread; 2230 } 2231 2232 /* allow the caller to futher refine the selection */ 2233 if (args->select_cb) { 2234 t = args->select_cb(args->select_ctx, waitq, 2235 waitq_is_global(waitq), thread); 2236 } 2237 if (t != THREAD_NULL) { 2238 *nthreads += 1; 2239 if (args->threadq) { 2240 /* if output queue, add locked thread to it */ 2241 if (*nthreads == 1) { 2242 *(args->spl) = (safeq != waitq) ? spl : splsched(); 2243 } 2244 thread_lock(t); 2245 thread_clear_waitq_state(t); 2246 re_queue_tail(args->threadq, &t->wait_links); 2247 } 2248 /* only enqueue up to 'max' threads */ 2249 if (*nthreads >= max_threads && max_threads > 0) { 2250 break; 2251 } 2252 } 2253 } 2254 /* thread wasn't selected so track it's event */ 2255 if (t == THREAD_NULL) { 2256 *remaining_eventmask |= (thread->waitq != safeq) ? 2257 _CAST_TO_EVENT_MASK(thread->waitq) : _CAST_TO_EVENT_MASK(thread->wait_event); 2258 } 2259 } 2260 2261 return first_thread; 2262 } 2263 2264 /** 2265 * Routine to iterate and remove threads from priority ordered waitqs 2266 * 2267 * Conditions: 2268 * args->waitq (and args->posted_waitq) is locked 2269 * 2270 * Notes: 2271 * The priority ordered waitqs only support maximum priority element removal. 2272 * 2273 * Also, the implementation makes sure that all threads in a priority ordered 2274 * waitq are waiting on the same wait event. This is not necessarily true for 2275 * non-priority ordered waitqs. If one or more threads are selected, this may 2276 * disable interrupts. The previous interrupt state is returned in args->spl 2277 * and should be used in a call to splx() if threads are returned to the caller. 2278 * 2279 * In the future, we could support priority ordered waitqs with multiple wait 2280 * events in the same queue. The way to implement that would be to keep removing 2281 * elements from the waitq and if the event does not match the requested one, 2282 * add it to a local list. This local list of elements needs to be re-inserted 2283 * into the priority queue at the end and the select_cb return value & 2284 * remaining_eventmask would need to be handled appropriately. The implementation 2285 * is not very efficient but would work functionally. 2286 */ 2287 static thread_t 2288 waitq_prioq_iterate_locked(struct waitq *safeq, struct waitq *waitq, 2289 spl_t spl, struct waitq_select_args *args, 2290 uint32_t *remaining_eventmask) 2291 { 2292 int max_threads = args->max_threads; 2293 int *nthreads = args->nthreads; 2294 thread_t first_thread = THREAD_NULL; 2295 thread_t thread = THREAD_NULL; 2296 2297 /* 2298 * The waitq select routines need to handle two cases: 2299 * Case 1: Peek at maximum priority thread in the waitq (remove_op = 0) 2300 * Get the maximum priority thread from the waitq without removing it. 2301 * In that case args->threadq == NULL and max_threads == 1. 2302 * Case 2: Remove 'n' highest priority threads from waitq (remove_op = 1) 2303 * Get max_threads (if available) while removing them from the waitq. 2304 * In that case args->threadq != NULL and max_threads is one of {-1, 1}. 2305 * 2306 * The only possible values for remaining_eventmask for the priority queue 2307 * waitq are either 0 (for the remove all threads case) or the original 2308 * safeq->waitq_eventmask (for the lookup/remove one thread cases). 2309 */ 2310 *remaining_eventmask = safeq->waitq_eventmask; 2311 boolean_t remove_op = !!(args->threadq); 2312 2313 while ((max_threads <= 0) || (*nthreads < max_threads)) { 2314 if (priority_queue_empty(&(safeq->waitq_prio_queue))) { 2315 *remaining_eventmask = 0; 2316 break; 2317 } 2318 2319 if (remove_op) { 2320 thread = priority_queue_remove_max(&safeq->waitq_prio_queue, 2321 struct thread, wait_prioq_links); 2322 } else { 2323 /* For the peek operation, the only valid value for max_threads is 1 */ 2324 assert(max_threads == 1); 2325 thread = priority_queue_max(&safeq->waitq_prio_queue, 2326 struct thread, wait_prioq_links); 2327 } 2328 /* 2329 * Ensure the wait event matches since priority ordered waitqs do not 2330 * support multiple events in the same waitq. 2331 */ 2332 assert((thread->waitq == waitq) && (thread->wait_event == args->event)); 2333 2334 if (args->select_cb) { 2335 /* 2336 * Call the select_cb passed into the waitq_select args. The callback 2337 * updates the select_ctx with information about the highest priority 2338 * thread which is eventually used by the caller. 2339 */ 2340 thread_t __assert_only ret_thread = args->select_cb(args->select_ctx, waitq, 2341 waitq_is_global(waitq), thread); 2342 if (!remove_op) { 2343 /* For the peek operation, the thread should not be selected for addition */ 2344 assert(ret_thread == THREAD_NULL); 2345 } else { 2346 /* 2347 * For the remove operation, the select routine should always return a valid 2348 * thread for priority waitqs. Since all threads in a prioq are equally 2349 * eligible, it should match the thread removed from the prioq. If this 2350 * invariant changes, the implementation would need to handle the 2351 * remaining_eventmask here correctly. 2352 */ 2353 assert(ret_thread == thread); 2354 } 2355 } 2356 2357 if (first_thread == THREAD_NULL) { 2358 first_thread = thread; 2359 /* 2360 * turnstile_kernel_update_inheritor_on_wake_locked will lock 2361 * first_thread, so call it before locking it. 2362 */ 2363 if (args->priority == WAITQ_PROMOTE_ON_WAKE && first_thread != THREAD_NULL && waitq_is_turnstile_queue(safeq)) { 2364 turnstile_kernel_update_inheritor_on_wake_locked(waitq_to_turnstile(safeq), (turnstile_inheritor_t)first_thread, TURNSTILE_INHERITOR_THREAD); 2365 } 2366 } 2367 2368 /* For the peek operation, break out early */ 2369 if (!remove_op) { 2370 break; 2371 } 2372 2373 /* Add the thread to the result thread list */ 2374 *nthreads += 1; 2375 if (*nthreads == 1) { 2376 *(args->spl) = (safeq != waitq) ? spl : splsched(); 2377 } 2378 thread_lock(thread); 2379 thread_clear_waitq_state(thread); 2380 enqueue_tail(args->threadq, &(thread->wait_links)); 2381 } 2382 2383 return first_thread; 2384 } 2385 2386 /** 2387 * generic thread selection from a waitq (and sets to which the waitq belongs) 2388 * 2389 * Conditions: 2390 * args->waitq (and args->posted_waitq) is locked 2391 * 2392 * Notes: 2393 * Uses the optional select callback function to refine the selection 2394 * of one or more threads from a waitq and any set to which the waitq 2395 * belongs. The select callback is invoked once for every thread that 2396 * is found to be waiting on the input args->waitq. 2397 * 2398 * If one or more threads are selected, this may disable interrupts. 2399 * The previous interrupt state is returned in args->spl and should 2400 * be used in a call to splx() if threads are returned to the caller. 2401 */ 2402 static void 2403 do_waitq_select_n_locked(struct waitq_select_args *args) 2404 { 2405 struct waitq *waitq = args->waitq; 2406 int max_threads = args->max_threads; 2407 thread_t first_thread = THREAD_NULL; 2408 struct waitq *safeq; 2409 uint32_t remaining_eventmask = 0; 2410 uint32_t eventmask; 2411 int *nthreads = args->nthreads; 2412 spl_t spl = 0; 2413 2414 assert(max_threads != 0); 2415 2416 if (!waitq_irq_safe(waitq)) { 2417 /* JMM - add flag to waitq to avoid global lookup if no waiters */ 2418 eventmask = _CAST_TO_EVENT_MASK(waitq); 2419 safeq = waitq_get_safeq(waitq); 2420 if (safeq == NULL) { 2421 /* 2422 * in the WQT_TSPROXY case, if there's no turnstile, 2423 * there's no queue and no waiters, so we can move straight 2424 * to the waitq set recursion 2425 */ 2426 goto handle_waitq_set; 2427 } 2428 2429 if (*nthreads == 0) { 2430 spl = splsched(); 2431 } 2432 waitq_lock(safeq); 2433 } else { 2434 eventmask = _CAST_TO_EVENT_MASK(args->event); 2435 safeq = waitq; 2436 } 2437 2438 /* 2439 * If the safeq doesn't have an eventmask (not global) or the event 2440 * we're looking for IS set in its eventmask, then scan the threads 2441 * in that queue for ones that match the original <waitq,event> pair. 2442 */ 2443 if (!waitq_is_global(safeq) || 2444 (safeq->waitq_eventmask & eventmask) == eventmask) { 2445 if (waitq_is_turnstile_queue(safeq)) { 2446 first_thread = waitq_prioq_iterate_locked(safeq, waitq, 2447 spl, args, 2448 &remaining_eventmask); 2449 } else { 2450 first_thread = waitq_queue_iterate_locked(safeq, waitq, 2451 spl, args, 2452 &remaining_eventmask); 2453 } 2454 2455 /* 2456 * Update the eventmask of global queues we just scanned: 2457 * - If we selected all the threads in the queue, we can clear its 2458 * eventmask. 2459 * 2460 * - If we didn't find enough threads to fill our needs, then we can 2461 * assume we looked at every thread in the queue and the mask we 2462 * computed is complete - so reset it. 2463 */ 2464 if (waitq_is_global(safeq)) { 2465 if (waitq_empty(safeq)) { 2466 safeq->waitq_eventmask = 0; 2467 } else if (max_threads < 0 || *nthreads < max_threads) { 2468 safeq->waitq_eventmask = remaining_eventmask; 2469 } 2470 } 2471 } 2472 2473 /* 2474 * Grab the first thread in the queue if no other thread was selected. 2475 * We can guarantee that no one has manipulated this thread because 2476 * it's waiting on the given waitq, and we have that waitq locked. 2477 */ 2478 if (*nthreads == 0 && first_thread != THREAD_NULL && args->threadq) { 2479 /* we know this is the first (and only) thread */ 2480 ++(*nthreads); 2481 *(args->spl) = (safeq != waitq) ? spl : splsched(); 2482 2483 thread_lock(first_thread); 2484 thread_clear_waitq_state(first_thread); 2485 waitq_thread_remove(safeq, first_thread); 2486 enqueue_tail(args->threadq, &(first_thread->wait_links)); 2487 2488 /* update the eventmask on [now] empty global queues */ 2489 if (waitq_is_global(safeq) && waitq_empty(safeq)) { 2490 safeq->waitq_eventmask = 0; 2491 } 2492 } 2493 2494 /* unlock the safe queue if we locked one above */ 2495 if (safeq != waitq) { 2496 waitq_unlock(safeq); 2497 if (*nthreads == 0) { 2498 splx(spl); 2499 } 2500 } 2501 2502 if (max_threads > 0 && *nthreads >= max_threads) { 2503 return; 2504 } 2505 2506 handle_waitq_set: 2507 /* 2508 * wait queues that are not in any sets 2509 * are the bottom of the recursion 2510 */ 2511 if (!waitq->waitq_set_id) { 2512 return; 2513 } 2514 2515 /* check to see if the set ID for this wait queue is valid */ 2516 struct waitq_link *link = wql_get_link(waitq->waitq_set_id); 2517 if (!link) { 2518 /* the waitq set to which this waitq belonged, has been invalidated */ 2519 waitq->waitq_set_id = 0; 2520 return; 2521 } 2522 2523 wql_put_link(link); 2524 2525 /* 2526 * If this waitq is a member of any wait queue sets, we need to look 2527 * for waiting thread(s) in any of those sets, and prepost all sets that 2528 * don't have active waiters. 2529 * 2530 * Note that we do a local walk of this waitq's links - we manually 2531 * recurse down wait queue set's with non-zero wqset_q.waitq_set_id 2532 */ 2533 (void)walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id, 2534 WQL_WQS, (void *)args, waitq_select_walk_cb); 2535 } 2536 2537 /** 2538 * main entry point for thread selection from a waitq 2539 * 2540 * Conditions: 2541 * waitq is locked 2542 * 2543 * Returns: 2544 * The number of threads waiting on 'waitq' for 'event' which have 2545 * been placed onto the input 'threadq' 2546 * 2547 * Notes: 2548 * The 'select_cb' function is invoked for every thread found waiting on 2549 * 'waitq' for 'event'. The thread is _not_ locked upon callback 2550 * invocation. This parameter may be NULL. 2551 * 2552 * If one or more threads are returned in 'threadq' then the caller is 2553 * responsible to call splx() using the returned 'spl' value. Each 2554 * returned thread is locked. 2555 */ 2556 static __inline__ int 2557 waitq_select_n_locked(struct waitq *waitq, 2558 event64_t event, 2559 waitq_select_cb select_cb, 2560 void *select_ctx, 2561 uint64_t *reserved_preposts, 2562 queue_t threadq, 2563 int max_threads, spl_t *spl, 2564 int priority) 2565 { 2566 int nthreads = 0; 2567 2568 struct waitq_select_args args = { 2569 .posted_waitq = waitq, 2570 .waitq = waitq, 2571 .event = event, 2572 .select_cb = select_cb, 2573 .select_ctx = select_ctx, 2574 .priority = priority, 2575 .reserved_preposts = reserved_preposts, 2576 .threadq = threadq, 2577 .max_threads = max_threads, 2578 .nthreads = &nthreads, 2579 .spl = spl, 2580 }; 2581 2582 do_waitq_select_n_locked(&args); 2583 return nthreads; 2584 } 2585 2586 /** 2587 * select from a waitq a single thread waiting for a given event 2588 * 2589 * Conditions: 2590 * 'waitq' is locked 2591 * 2592 * Returns: 2593 * A locked thread that's been removed from the waitq, but has not 2594 * yet been put on a run queue. Caller is responsible to call splx 2595 * with the '*spl' value. 2596 */ 2597 static thread_t 2598 waitq_select_one_locked(struct waitq *waitq, event64_t event, 2599 uint64_t *reserved_preposts, 2600 int priority, spl_t *spl) 2601 { 2602 int nthreads; 2603 queue_head_t threadq; 2604 2605 queue_init(&threadq); 2606 2607 nthreads = waitq_select_n_locked(waitq, event, NULL, NULL, 2608 reserved_preposts, &threadq, 1, spl, priority); 2609 2610 /* if we selected a thread, return it (still locked) */ 2611 if (!queue_empty(&threadq)) { 2612 thread_t t; 2613 queue_entry_t qe = dequeue_head(&threadq); 2614 t = qe_element(qe, struct thread, wait_links); 2615 assert(queue_empty(&threadq)); /* there should be 1 entry */ 2616 /* t has been locked and removed from all queues */ 2617 return t; 2618 } 2619 2620 return THREAD_NULL; 2621 } 2622 2623 struct select_thread_ctx { 2624 thread_t thread; 2625 event64_t event; 2626 spl_t *spl; 2627 }; 2628 2629 /** 2630 * link walk callback invoked once for each set to which a waitq belongs 2631 * 2632 * Conditions: 2633 * initial waitq is locked 2634 * ctx->thread is unlocked 2635 * 2636 * Notes: 2637 * This may disable interrupts and early-out of the full DAG link walk by 2638 * returning KERN_ALREADY_IN_SET. In this case, the returned thread has 2639 * been removed from the waitq, it's waitq state has been reset, and the 2640 * caller is responsible to call splx() with the returned interrupt state 2641 * in ctx->spl. 2642 */ 2643 static int 2644 waitq_select_thread_cb(struct waitq *waitq, void *ctx, 2645 struct waitq_link *link) 2646 { 2647 struct select_thread_ctx *stctx = (struct select_thread_ctx *)ctx; 2648 struct waitq_set *wqset; 2649 struct waitq *wqsetq; 2650 struct waitq *safeq; 2651 spl_t s; 2652 2653 (void)waitq; 2654 2655 thread_t thread = stctx->thread; 2656 event64_t event = stctx->event; 2657 2658 if (wql_type(link) != WQL_WQS) { 2659 return WQ_ITERATE_CONTINUE; 2660 } 2661 2662 wqset = link->wql_wqs.wql_set; 2663 wqsetq = &wqset->wqset_q; 2664 2665 assert(!waitq_irq_safe(waitq)); 2666 assert(!waitq_irq_safe(wqsetq)); 2667 2668 waitq_set_lock(wqset); 2669 2670 s = splsched(); 2671 2672 /* find and lock the interrupt-safe waitq the thread is thought to be on */ 2673 safeq = waitq_get_safeq(wqsetq); 2674 waitq_lock(safeq); 2675 2676 thread_lock(thread); 2677 2678 if ((thread->waitq == wqsetq) && (thread->wait_event == event)) { 2679 waitq_thread_remove(wqsetq, thread); 2680 if (waitq_empty(safeq)) { 2681 safeq->waitq_eventmask = 0; 2682 } 2683 thread_clear_waitq_state(thread); 2684 waitq_unlock(safeq); 2685 waitq_set_unlock(wqset); 2686 /* 2687 * thread still locked, 2688 * return non-zero to break out of WQS walk 2689 */ 2690 *(stctx->spl) = s; 2691 return WQ_ITERATE_FOUND; 2692 } 2693 2694 thread_unlock(thread); 2695 waitq_set_unlock(wqset); 2696 waitq_unlock(safeq); 2697 splx(s); 2698 2699 return WQ_ITERATE_CONTINUE; 2700 } 2701 2702 /** 2703 * returns KERN_SUCCESS and locks 'thread' if-and-only-if 'thread' is waiting 2704 * on 'waitq' (or any set to which waitq belongs) for 'event' 2705 * 2706 * Conditions: 2707 * 'waitq' is locked 2708 * 'thread' is unlocked 2709 */ 2710 static kern_return_t 2711 waitq_select_thread_locked(struct waitq *waitq, 2712 event64_t event, 2713 thread_t thread, spl_t *spl) 2714 { 2715 struct waitq *safeq; 2716 struct waitq_link *link; 2717 struct select_thread_ctx ctx; 2718 kern_return_t kr; 2719 spl_t s; 2720 2721 /* Find and lock the interrupts disabled queue the thread is actually on */ 2722 if (!waitq_irq_safe(waitq)) { 2723 safeq = waitq_get_safeq(waitq); 2724 if (safeq == NULL) { 2725 /* 2726 * in the WQT_TSPROXY case, if there's no turnstile, 2727 * there's no queue and no waiters, so we can move straight 2728 * to the waitq set recursion 2729 */ 2730 goto handle_waitq_set; 2731 } 2732 2733 s = splsched(); 2734 waitq_lock(safeq); 2735 } else { 2736 s = splsched(); 2737 safeq = waitq; 2738 } 2739 2740 thread_lock(thread); 2741 2742 if ((thread->waitq == waitq) && (thread->wait_event == event)) { 2743 waitq_thread_remove(safeq, thread); 2744 if (waitq_empty(safeq)) { 2745 safeq->waitq_eventmask = 0; 2746 } 2747 thread_clear_waitq_state(thread); 2748 *spl = s; 2749 /* thread still locked */ 2750 return KERN_SUCCESS; 2751 } 2752 2753 thread_unlock(thread); 2754 2755 if (safeq != waitq) { 2756 waitq_unlock(safeq); 2757 } 2758 2759 splx(s); 2760 2761 handle_waitq_set: 2762 if (!waitq->waitq_set_id) { 2763 return KERN_NOT_WAITING; 2764 } 2765 2766 /* check to see if the set ID for this wait queue is valid */ 2767 link = wql_get_link(waitq->waitq_set_id); 2768 if (!link) { 2769 /* the waitq to which this set belonged, has been invalidated */ 2770 waitq->waitq_set_id = 0; 2771 return KERN_NOT_WAITING; 2772 } 2773 2774 /* 2775 * The thread may be waiting on a wait queue set to which 2776 * the input 'waitq' belongs. Go look for the thread in 2777 * all wait queue sets. If it's there, we'll remove it 2778 * because it's equivalent to waiting directly on the input waitq. 2779 */ 2780 ctx.thread = thread; 2781 ctx.event = event; 2782 ctx.spl = spl; 2783 kr = walk_waitq_links(LINK_WALK_FULL_DAG, waitq, waitq->waitq_set_id, 2784 WQL_WQS, (void *)&ctx, waitq_select_thread_cb); 2785 2786 wql_put_link(link); 2787 2788 /* we found a thread, return success */ 2789 if (kr == WQ_ITERATE_FOUND) { 2790 return KERN_SUCCESS; 2791 } 2792 2793 return KERN_NOT_WAITING; 2794 } 2795 2796 static int 2797 prepost_exists_cb(struct waitq_set __unused *wqset, 2798 void __unused *ctx, 2799 struct wq_prepost __unused *wqp, 2800 struct waitq __unused *waitq) 2801 { 2802 /* if we get here, then we know that there is a valid prepost object! */ 2803 return WQ_ITERATE_FOUND; 2804 } 2805 2806 /** 2807 * declare a thread's intent to wait on 'waitq' for 'wait_event' 2808 * 2809 * Conditions: 2810 * 'waitq' is locked 2811 */ 2812 wait_result_t 2813 waitq_assert_wait64_locked(struct waitq *waitq, 2814 event64_t wait_event, 2815 wait_interrupt_t interruptible, 2816 wait_timeout_urgency_t urgency, 2817 uint64_t deadline, 2818 uint64_t leeway, 2819 thread_t thread) 2820 { 2821 wait_result_t wait_result; 2822 int realtime = 0; 2823 struct waitq *safeq; 2824 uintptr_t eventmask; 2825 spl_t s; 2826 2827 2828 /* 2829 * Warning: Do _not_ place debugging print statements here. 2830 * The waitq is locked! 2831 */ 2832 assert(!thread->started || thread == current_thread()); 2833 2834 if (thread->waitq != NULL) { 2835 panic("thread already waiting on %p", thread->waitq); 2836 } 2837 2838 if (waitq_is_set(waitq)) { 2839 struct waitq_set *wqset = (struct waitq_set *)waitq; 2840 /* 2841 * early-out if the thread is waiting on a wait queue set 2842 * that has already been pre-posted. 2843 */ 2844 if (wait_event == NO_EVENT64 && waitq_set_maybe_preposted(wqset)) { 2845 int ret; 2846 /* 2847 * Run through the list of potential preposts. Because 2848 * this is a hot path, we short-circuit the iteration 2849 * if we find just one prepost object. 2850 */ 2851 ret = wq_prepost_foreach_locked(wqset, NULL, 2852 prepost_exists_cb); 2853 if (ret == WQ_ITERATE_FOUND) { 2854 s = splsched(); 2855 thread_lock(thread); 2856 thread->wait_result = THREAD_AWAKENED; 2857 thread_unlock(thread); 2858 splx(s); 2859 return THREAD_AWAKENED; 2860 } 2861 } 2862 } 2863 2864 s = splsched(); 2865 2866 /* 2867 * If already dealing with an irq safe wait queue, we are all set. 2868 * Otherwise, determine a global queue to use and lock it. 2869 */ 2870 if (!waitq_irq_safe(waitq)) { 2871 safeq = waitq_get_safeq(waitq); 2872 if (__improbable(safeq == NULL)) { 2873 panic("Trying to assert_wait on a turnstile proxy " 2874 "that hasn't been donated one (waitq: %p)", waitq); 2875 } 2876 eventmask = _CAST_TO_EVENT_MASK(waitq); 2877 waitq_lock(safeq); 2878 } else { 2879 safeq = waitq; 2880 eventmask = _CAST_TO_EVENT_MASK(wait_event); 2881 } 2882 2883 /* lock the thread now that we have the irq-safe waitq locked */ 2884 thread_lock(thread); 2885 2886 /* 2887 * Realtime threads get priority for wait queue placements. 2888 * This allows wait_queue_wakeup_one to prefer a waiting 2889 * realtime thread, similar in principle to performing 2890 * a wait_queue_wakeup_all and allowing scheduler prioritization 2891 * to run the realtime thread, but without causing the 2892 * lock contention of that scenario. 2893 */ 2894 if (thread->sched_pri >= BASEPRI_REALTIME) { 2895 realtime = 1; 2896 } 2897 2898 /* 2899 * This is the extent to which we currently take scheduling attributes 2900 * into account. If the thread is vm priviledged, we stick it at 2901 * the front of the queue. Later, these queues will honor the policy 2902 * value set at waitq_init time. 2903 */ 2904 wait_result = thread_mark_wait_locked(thread, interruptible); 2905 /* thread->wait_result has been set */ 2906 if (wait_result == THREAD_WAITING) { 2907 if (!safeq->waitq_fifo 2908 || (thread->options & TH_OPT_VMPRIV) || realtime) { 2909 waitq_thread_insert(safeq, thread, false); 2910 } else { 2911 waitq_thread_insert(safeq, thread, true); 2912 } 2913 2914 /* mark the event and real waitq, even if enqueued on a global safeq */ 2915 thread->wait_event = wait_event; 2916 thread->waitq = waitq; 2917 2918 if (deadline != 0) { 2919 boolean_t act; 2920 2921 act = timer_call_enter_with_leeway(&thread->wait_timer, 2922 NULL, 2923 deadline, leeway, 2924 urgency, FALSE); 2925 if (!act) { 2926 thread->wait_timer_active++; 2927 } 2928 thread->wait_timer_is_set = TRUE; 2929 } 2930 2931 if (waitq_is_global(safeq)) { 2932 safeq->waitq_eventmask |= eventmask; 2933 } 2934 2935 waitq_stats_count_wait(waitq); 2936 } 2937 2938 /* unlock the thread */ 2939 thread_unlock(thread); 2940 2941 /* update the inheritor's thread priority if the waitq is embedded in turnstile */ 2942 if (waitq_is_turnstile_queue(safeq) && wait_result == THREAD_WAITING) { 2943 turnstile_recompute_priority_locked(waitq_to_turnstile(safeq)); 2944 turnstile_update_inheritor_locked(waitq_to_turnstile(safeq)); 2945 } 2946 2947 /* unlock the safeq if we locked it here */ 2948 if (safeq != waitq) { 2949 waitq_unlock(safeq); 2950 } 2951 2952 splx(s); 2953 2954 return wait_result; 2955 } 2956 2957 /** 2958 * remove 'thread' from its current blocking state on 'waitq' 2959 * 2960 * Conditions: 2961 * 'thread' is locked 2962 * 2963 * Notes: 2964 * This function is primarily used by clear_wait_internal in 2965 * sched_prim.c from the thread timer wakeup path 2966 * (i.e. the thread was waiting on 'waitq' with a timeout that expired) 2967 */ 2968 int 2969 waitq_pull_thread_locked(struct waitq *waitq, thread_t thread) 2970 { 2971 struct waitq *safeq; 2972 2973 assert_thread_magic(thread); 2974 assert(thread->waitq == waitq); 2975 2976 /* Find the interrupts disabled queue thread is waiting on */ 2977 if (!waitq_irq_safe(waitq)) { 2978 safeq = waitq_get_safeq(waitq); 2979 if (__improbable(safeq == NULL)) { 2980 panic("Trying to clear_wait on a turnstile proxy " 2981 "that hasn't been donated one (waitq: %p)", waitq); 2982 } 2983 } else { 2984 safeq = waitq; 2985 } 2986 2987 /* thread is already locked so have to try for the waitq lock */ 2988 if (!waitq_lock_try(safeq)) { 2989 return 0; 2990 } 2991 2992 waitq_thread_remove(safeq, thread); 2993 thread_clear_waitq_state(thread); 2994 waitq_stats_count_clear_wakeup(waitq); 2995 2996 /* clear the global event mask if this was the last thread there! */ 2997 if (waitq_is_global(safeq) && waitq_empty(safeq)) { 2998 safeq->waitq_eventmask = 0; 2999 /* JMM - also mark no-waiters on waitq (if not the same as the safeq) */ 3000 } 3001 3002 waitq_unlock(safeq); 3003 3004 return 1; 3005 } 3006 3007 3008 static __inline__ 3009 void 3010 maybe_adjust_thread_pri(thread_t thread, 3011 int priority, 3012 __kdebug_only struct waitq *waitq) 3013 { 3014 /* 3015 * If the caller is requesting the waitq subsystem to promote the 3016 * priority of the awoken thread, then boost the thread's priority to 3017 * the default WAITQ_BOOST_PRIORITY (if it's not already equal or 3018 * higher priority). This boost must be removed via a call to 3019 * waitq_clear_promotion_locked before the thread waits again. 3020 * 3021 * WAITQ_PROMOTE_PRIORITY is -2. 3022 * Anything above 0 represents a mutex promotion. 3023 * The default 'no action' value is -1. 3024 * TODO: define this in a header 3025 */ 3026 if (priority == WAITQ_PROMOTE_PRIORITY) { 3027 uintptr_t trace_waitq = 0; 3028 if (__improbable(kdebug_enable)) { 3029 trace_waitq = VM_KERNEL_UNSLIDE_OR_PERM(waitq); 3030 } 3031 3032 sched_thread_promote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, trace_waitq); 3033 } 3034 } 3035 3036 /* 3037 * Clear a potential thread priority promotion from a waitq wakeup 3038 * with WAITQ_PROMOTE_PRIORITY. 3039 * 3040 * This must be called on the thread which was woken up with TH_SFLAG_WAITQ_PROMOTED. 3041 */ 3042 void 3043 waitq_clear_promotion_locked(struct waitq *waitq, thread_t thread) 3044 { 3045 spl_t s; 3046 3047 assert(waitq_held(waitq)); 3048 assert(thread != THREAD_NULL); 3049 assert(thread == current_thread()); 3050 3051 /* This flag is only cleared by the thread itself, so safe to check outside lock */ 3052 if ((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) != TH_SFLAG_WAITQ_PROMOTED) { 3053 return; 3054 } 3055 3056 if (!waitq_irq_safe(waitq)) { 3057 s = splsched(); 3058 } 3059 thread_lock(thread); 3060 3061 sched_thread_unpromote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, 0); 3062 3063 thread_unlock(thread); 3064 if (!waitq_irq_safe(waitq)) { 3065 splx(s); 3066 } 3067 } 3068 3069 /** 3070 * wakeup all threads waiting on 'waitq' for 'wake_event' 3071 * 3072 * Conditions: 3073 * 'waitq' is locked 3074 * 3075 * Notes: 3076 * May temporarily disable and re-enable interrupts 3077 * and re-adjust thread priority of each awoken thread. 3078 * 3079 * If the input 'lock_state' == WAITQ_UNLOCK then the waitq will have 3080 * been unlocked before calling thread_go() on any returned threads, and 3081 * is guaranteed to be unlocked upon function return. 3082 */ 3083 kern_return_t 3084 waitq_wakeup64_all_locked(struct waitq *waitq, 3085 event64_t wake_event, 3086 wait_result_t result, 3087 uint64_t *reserved_preposts, 3088 int priority, 3089 waitq_lock_state_t lock_state) 3090 { 3091 kern_return_t ret; 3092 thread_t thread; 3093 spl_t th_spl; 3094 int nthreads; 3095 queue_head_t wakeup_queue; 3096 3097 assert(waitq_held(waitq)); 3098 queue_init(&wakeup_queue); 3099 3100 nthreads = waitq_select_n_locked(waitq, wake_event, NULL, NULL, 3101 reserved_preposts, 3102 &wakeup_queue, -1, &th_spl, priority); 3103 3104 /* set each thread running */ 3105 ret = KERN_NOT_WAITING; 3106 3107 #if CONFIG_WAITQ_STATS 3108 qe_foreach_element(thread, &wakeup_queue, wait_links) 3109 waitq_stats_count_wakeup(waitq); 3110 #endif 3111 if (lock_state == WAITQ_UNLOCK) { 3112 waitq_unlock(waitq); 3113 } 3114 3115 qe_foreach_element_safe(thread, &wakeup_queue, wait_links) { 3116 assert_thread_magic(thread); 3117 remqueue(&thread->wait_links); 3118 maybe_adjust_thread_pri(thread, priority, waitq); 3119 ret = thread_go(thread, result, WQ_OPTION_NONE); 3120 assert(ret == KERN_SUCCESS); 3121 thread_unlock(thread); 3122 } 3123 if (nthreads > 0) { 3124 splx(th_spl); 3125 } else { 3126 waitq_stats_count_fail(waitq); 3127 } 3128 3129 return ret; 3130 } 3131 3132 /** 3133 * wakeup one thread waiting on 'waitq' for 'wake_event' 3134 * 3135 * Conditions: 3136 * 'waitq' is locked 3137 * 3138 * Notes: 3139 * May temporarily disable and re-enable interrupts. 3140 */ 3141 kern_return_t 3142 waitq_wakeup64_one_locked(struct waitq *waitq, 3143 event64_t wake_event, 3144 wait_result_t result, 3145 uint64_t *reserved_preposts, 3146 int priority, 3147 waitq_lock_state_t lock_state, 3148 waitq_options_t option) 3149 { 3150 thread_t thread; 3151 spl_t th_spl; 3152 3153 assert(waitq_held(waitq)); 3154 3155 thread = waitq_select_one_locked(waitq, wake_event, 3156 reserved_preposts, 3157 priority, &th_spl); 3158 3159 if (thread != THREAD_NULL) { 3160 waitq_stats_count_wakeup(waitq); 3161 } else { 3162 waitq_stats_count_fail(waitq); 3163 } 3164 3165 if (lock_state == WAITQ_UNLOCK) { 3166 waitq_unlock(waitq); 3167 } 3168 3169 if (thread != THREAD_NULL) { 3170 maybe_adjust_thread_pri(thread, priority, waitq); 3171 kern_return_t ret = thread_go(thread, result, option); 3172 assert(ret == KERN_SUCCESS); 3173 thread_unlock(thread); 3174 splx(th_spl); 3175 return ret; 3176 } 3177 3178 return KERN_NOT_WAITING; 3179 } 3180 3181 /** 3182 * wakeup one thread waiting on 'waitq' for 'wake_event' 3183 * 3184 * Conditions: 3185 * 'waitq' is locked 3186 * 3187 * Returns: 3188 * A locked, runnable thread. 3189 * If return value is non-NULL, interrupts have also 3190 * been disabled, and the caller is responsible to call 3191 * splx() with the returned '*spl' value. 3192 */ 3193 thread_t 3194 waitq_wakeup64_identify_locked(struct waitq *waitq, 3195 event64_t wake_event, 3196 wait_result_t result, 3197 spl_t *spl, 3198 uint64_t *reserved_preposts, 3199 int priority, 3200 waitq_lock_state_t lock_state) 3201 { 3202 thread_t thread; 3203 3204 assert(waitq_held(waitq)); 3205 3206 thread = waitq_select_one_locked(waitq, wake_event, 3207 reserved_preposts, 3208 priority, spl); 3209 3210 if (thread != THREAD_NULL) { 3211 waitq_stats_count_wakeup(waitq); 3212 } else { 3213 waitq_stats_count_fail(waitq); 3214 } 3215 3216 if (lock_state == WAITQ_UNLOCK) { 3217 waitq_unlock(waitq); 3218 } 3219 3220 if (thread != THREAD_NULL) { 3221 kern_return_t __assert_only ret; 3222 ret = thread_go(thread, result, WQ_OPTION_NONE); 3223 assert(ret == KERN_SUCCESS); 3224 } 3225 3226 return thread; /* locked if not NULL (caller responsible for spl) */ 3227 } 3228 3229 /** 3230 * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event' 3231 * 3232 * Conditions: 3233 * 'waitq' is locked 3234 * 'thread' is unlocked 3235 * 3236 * Notes: 3237 * May temporarily disable and re-enable interrupts 3238 * 3239 * If the input lock_state == WAITQ_UNLOCK then the waitq will have been 3240 * unlocked before calling thread_go() if 'thread' is to be awoken, and 3241 * is guaranteed to be unlocked upon function return. 3242 */ 3243 kern_return_t 3244 waitq_wakeup64_thread_locked(struct waitq *waitq, 3245 event64_t wake_event, 3246 thread_t thread, 3247 wait_result_t result, 3248 waitq_lock_state_t lock_state) 3249 { 3250 kern_return_t ret; 3251 spl_t th_spl; 3252 3253 assert(waitq_held(waitq)); 3254 assert_thread_magic(thread); 3255 3256 /* 3257 * See if the thread was still waiting there. If so, it got 3258 * dequeued and returned locked. 3259 */ 3260 ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl); 3261 3262 if (ret == KERN_SUCCESS) { 3263 waitq_stats_count_wakeup(waitq); 3264 } else { 3265 waitq_stats_count_fail(waitq); 3266 } 3267 3268 if (lock_state == WAITQ_UNLOCK) { 3269 waitq_unlock(waitq); 3270 } 3271 3272 if (ret != KERN_SUCCESS) { 3273 return KERN_NOT_WAITING; 3274 } 3275 3276 ret = thread_go(thread, result, WQ_OPTION_NONE); 3277 assert(ret == KERN_SUCCESS); 3278 thread_unlock(thread); 3279 splx(th_spl); 3280 3281 return ret; 3282 } 3283 3284 3285 3286 /* ---------------------------------------------------------------------- 3287 * 3288 * In-Kernel API 3289 * 3290 * ---------------------------------------------------------------------- */ 3291 3292 /** 3293 * initialize a waitq object 3294 */ 3295 kern_return_t 3296 waitq_init(struct waitq *waitq, int policy) 3297 { 3298 assert(waitq != NULL); 3299 3300 /* only FIFO and LIFO for now */ 3301 if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0) { 3302 return KERN_INVALID_ARGUMENT; 3303 } 3304 3305 waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0); 3306 waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ); 3307 waitq->waitq_prepost = 0; 3308 if (policy & SYNC_POLICY_TURNSTILE_PROXY) { 3309 waitq->waitq_type = WQT_TSPROXY; 3310 } else { 3311 waitq->waitq_type = WQT_QUEUE; 3312 } 3313 waitq->waitq_turnstile = !!(policy & SYNC_POLICY_TURNSTILE); 3314 waitq->waitq_eventmask = 0; 3315 3316 waitq->waitq_set_id = 0; 3317 waitq->waitq_prepost_id = 0; 3318 3319 waitq_lock_init(waitq); 3320 if (waitq_is_turnstile_queue(waitq)) { 3321 /* For turnstile, initialize it as a priority queue */ 3322 priority_queue_init(&waitq->waitq_prio_queue); 3323 assert(waitq->waitq_fifo == 0); 3324 } else if (policy & SYNC_POLICY_TURNSTILE_PROXY) { 3325 waitq->waitq_ts = TURNSTILE_NULL; 3326 waitq->waitq_tspriv = NULL; 3327 } else { 3328 queue_init(&waitq->waitq_queue); 3329 } 3330 3331 waitq->waitq_isvalid = 1; 3332 return KERN_SUCCESS; 3333 } 3334 3335 struct wq_unlink_ctx { 3336 struct waitq *unlink_wq; 3337 struct waitq_set *unlink_wqset; 3338 }; 3339 3340 static int waitq_unlink_prepost_cb(struct waitq_set __unused *wqset, void *ctx, 3341 struct wq_prepost *wqp, struct waitq *waitq); 3342 3343 /** 3344 * walk_waitq_links callback to invalidate 'link' parameter 3345 * 3346 * Conditions: 3347 * Called from walk_waitq_links. 3348 * Note that unlink other callbacks, this one make no assumptions about 3349 * the 'waitq' parameter, specifically it does not have to be locked or 3350 * even valid. 3351 */ 3352 static int 3353 waitq_unlink_all_cb(struct waitq *waitq, void *ctx, 3354 struct waitq_link *link) 3355 { 3356 (void)waitq; 3357 (void)ctx; 3358 if (wql_type(link) == WQL_LINK && wql_is_valid(link)) { 3359 wql_invalidate(link); 3360 } 3361 3362 if (wql_type(link) == WQL_WQS) { 3363 struct waitq_set *wqset; 3364 struct wq_unlink_ctx ulctx; 3365 3366 /* 3367 * When destroying the waitq, take the time to clear out any 3368 * preposts it may have made. This could potentially save time 3369 * on the IPC send path which would otherwise have to iterate 3370 * over lots of dead port preposts. 3371 */ 3372 if (waitq->waitq_prepost_id == 0) { 3373 goto out; 3374 } 3375 3376 wqset = link->wql_wqs.wql_set; 3377 assert(wqset != NULL); 3378 assert(!waitq_irq_safe(&wqset->wqset_q)); 3379 3380 waitq_set_lock(wqset); 3381 3382 if (!waitq_set_is_valid(wqset)) { 3383 /* someone raced us to teardown */ 3384 goto out_unlock; 3385 } 3386 if (!waitq_set_maybe_preposted(wqset)) { 3387 goto out_unlock; 3388 } 3389 3390 ulctx.unlink_wq = waitq; 3391 ulctx.unlink_wqset = wqset; 3392 (void)wq_prepost_iterate(wqset->wqset_prepost_id, &ulctx, 3393 waitq_unlink_prepost_cb); 3394 out_unlock: 3395 waitq_set_unlock(wqset); 3396 } 3397 3398 out: 3399 return WQ_ITERATE_CONTINUE; 3400 } 3401 3402 3403 /** 3404 * cleanup any link/prepost table resources associated with a waitq 3405 */ 3406 void 3407 waitq_deinit(struct waitq *waitq) 3408 { 3409 spl_t s; 3410 3411 assert(waitq); 3412 if (!waitq_is_valid(waitq)) { 3413 return; 3414 } 3415 3416 if (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) { 3417 return; 3418 } 3419 3420 if (waitq_irq_safe(waitq)) { 3421 s = splsched(); 3422 } 3423 waitq_lock(waitq); 3424 3425 if (waitq_valid(waitq)) { 3426 waitq->waitq_isvalid = 0; 3427 if (!waitq_irq_safe(waitq)) { 3428 waitq_unlink_all_unlock(waitq); 3429 /* waitq unlocked and set links deallocated */ 3430 goto out; 3431 } 3432 } 3433 3434 waitq_unlock(waitq); 3435 if (waitq_irq_safe(waitq)) { 3436 splx(s); 3437 } 3438 3439 out: 3440 #if MACH_ASSERT 3441 if (waitq_is_turnstile_queue(waitq)) { 3442 assert(priority_queue_empty(&waitq->waitq_prio_queue)); 3443 } else if (waitq_is_turnstile_proxy(waitq)) { 3444 assert(waitq->waitq_ts == TURNSTILE_NULL); 3445 } else { 3446 assert(queue_empty(&waitq->waitq_queue)); 3447 } 3448 #else 3449 (void)0; 3450 #endif // MACH_ASSERT 3451 } 3452 3453 void 3454 waitq_invalidate_locked(struct waitq *waitq) 3455 { 3456 assert(waitq_held(waitq)); 3457 assert(waitq_is_valid(waitq)); 3458 waitq->waitq_isvalid = 0; 3459 } 3460 3461 /** 3462 * invalidate the given wq_prepost object 3463 * 3464 * Conditions: 3465 * Called from wq_prepost_iterate (_not_ from wq_prepost_foreach_locked!) 3466 */ 3467 static int 3468 wqset_clear_prepost_chain_cb(struct waitq_set __unused *wqset, 3469 void __unused *ctx, 3470 struct wq_prepost *wqp, 3471 struct waitq __unused *waitq) 3472 { 3473 if (wqp_type(wqp) == WQP_POST) { 3474 wq_prepost_invalidate(wqp); 3475 } 3476 return WQ_ITERATE_CONTINUE; 3477 } 3478 3479 3480 /** 3481 * allocate and initialize a waitq set object 3482 * 3483 * Conditions: 3484 * may block 3485 * 3486 * Returns: 3487 * allocated / initialized waitq_set object. 3488 * the waits_set object returned does not have 3489 * a waitq_link associated. 3490 * 3491 * NULL on failure 3492 */ 3493 struct waitq_set * 3494 waitq_set_alloc(int policy, waitq_set_prepost_hook_t *prepost_hook) 3495 { 3496 struct waitq_set *wqset; 3497 3498 wqset = (struct waitq_set *)zalloc(waitq_set_zone); 3499 if (!wqset) { 3500 panic("Can't allocate a new waitq set from zone %p", waitq_set_zone); 3501 } 3502 3503 kern_return_t ret; 3504 ret = waitq_set_init(wqset, policy, NULL, prepost_hook); 3505 if (ret != KERN_SUCCESS) { 3506 zfree(waitq_set_zone, wqset); 3507 wqset = NULL; 3508 } 3509 3510 return wqset; 3511 } 3512 3513 /** 3514 * initialize a waitq set object 3515 * 3516 * if no 'reserved_link' object is passed 3517 * the waitq_link will be lazily allocated 3518 * on demand through waitq_set_lazy_init_link. 3519 */ 3520 kern_return_t 3521 waitq_set_init(struct waitq_set *wqset, 3522 int policy, uint64_t *reserved_link, 3523 waitq_set_prepost_hook_t *prepost_hook) 3524 { 3525 struct waitq_link *link; 3526 kern_return_t ret; 3527 3528 memset(wqset, 0, sizeof(*wqset)); 3529 3530 ret = waitq_init(&wqset->wqset_q, policy); 3531 if (ret != KERN_SUCCESS) { 3532 return ret; 3533 } 3534 3535 wqset->wqset_q.waitq_type = WQT_SET; 3536 if (policy & SYNC_POLICY_PREPOST) { 3537 wqset->wqset_q.waitq_prepost = 1; 3538 wqset->wqset_prepost_id = 0; 3539 assert(prepost_hook == NULL); 3540 } else { 3541 wqset->wqset_q.waitq_prepost = 0; 3542 wqset->wqset_prepost_hook = prepost_hook; 3543 } 3544 3545 if (reserved_link && *reserved_link != 0) { 3546 link = wql_get_reserved(*reserved_link, WQL_WQS); 3547 3548 if (!link) { 3549 panic("Can't allocate link object for waitq set: %p", wqset); 3550 } 3551 3552 /* always consume the caller's reference */ 3553 *reserved_link = 0; 3554 3555 link->wql_wqs.wql_set = wqset; 3556 wql_mkvalid(link); 3557 3558 wqset->wqset_id = link->wql_setid.id; 3559 wql_put_link(link); 3560 } else { 3561 /* 3562 * Lazy allocate the link only when an actual id is needed. 3563 */ 3564 wqset->wqset_id = WQSET_NOT_LINKED; 3565 } 3566 3567 return KERN_SUCCESS; 3568 } 3569 3570 #if DEVELOPMENT || DEBUG 3571 3572 int 3573 sysctl_helper_waitq_set_nelem(void) 3574 { 3575 return ltable_nelem(&g_wqlinktable); 3576 } 3577 3578 #endif 3579 3580 /** 3581 * initialize a waitq set link. 3582 * 3583 * Conditions: 3584 * may block 3585 * locks and unlocks the waiq set lock 3586 * 3587 */ 3588 void 3589 waitq_set_lazy_init_link(struct waitq_set *wqset) 3590 { 3591 struct waitq_link *link; 3592 3593 assert(get_preemption_level() == 0 && waitq_wait_possible(current_thread())); 3594 3595 waitq_set_lock(wqset); 3596 if (!waitq_set_should_lazy_init_link(wqset)) { 3597 waitq_set_unlock(wqset); 3598 return; 3599 } 3600 3601 assert(wqset->wqset_id == WQSET_NOT_LINKED); 3602 waitq_set_unlock(wqset); 3603 3604 link = wql_alloc_link(WQL_WQS); 3605 if (!link) { 3606 panic("Can't allocate link object for waitq set: %p", wqset); 3607 } 3608 3609 link->wql_wqs.wql_set = wqset; 3610 3611 waitq_set_lock(wqset); 3612 if (waitq_set_should_lazy_init_link(wqset)) { 3613 wql_mkvalid(link); 3614 wqset->wqset_id = link->wql_setid.id; 3615 } 3616 3617 assert(wqset->wqset_id != 0); 3618 assert(wqset->wqset_id != WQSET_NOT_LINKED); 3619 3620 waitq_set_unlock(wqset); 3621 3622 wql_put_link(link); 3623 3624 return; 3625 } 3626 3627 /** 3628 * checks if a waitq set needs to be linked. 3629 * 3630 */ 3631 boolean_t 3632 waitq_set_should_lazy_init_link(struct waitq_set *wqset) 3633 { 3634 if (waitqs_is_linked(wqset) || wqset->wqset_id == 0) { 3635 return FALSE; 3636 } 3637 return TRUE; 3638 } 3639 3640 /** 3641 * clear out / release any resources associated with a waitq set 3642 * 3643 * Conditions: 3644 * may block 3645 * Note: 3646 * This will render the waitq set invalid, and it must 3647 * be re-initialized with waitq_set_init before it can be used again 3648 */ 3649 void 3650 waitq_set_deinit(struct waitq_set *wqset) 3651 { 3652 struct waitq_link *link = NULL; 3653 uint64_t set_id, prepost_id; 3654 3655 if (!waitqs_is_set(wqset)) { 3656 panic("trying to de-initialize an invalid wqset @%p", wqset); 3657 } 3658 3659 assert(!waitq_irq_safe(&wqset->wqset_q)); 3660 3661 waitq_set_lock(wqset); 3662 3663 if (waitq_set_has_prepost_hook(wqset)) { 3664 waitq_set_prepost_hook_t *hook = wqset->wqset_prepost_hook; 3665 /* 3666 * If the wqset_prepost_hook value is non 0, 3667 * then another core is currently posting to this waitq set 3668 * and we need for it to finish what it's doing. 3669 */ 3670 while (os_atomic_load(hook, relaxed) != 0) { 3671 waitq_set_unlock(wqset); 3672 delay(1); 3673 waitq_set_lock(wqset); 3674 } 3675 } 3676 3677 set_id = wqset->wqset_id; 3678 3679 if (waitqs_is_linked(wqset) || set_id == 0) { 3680 /* grab the set's link object */ 3681 link = wql_get_link(set_id); 3682 if (link) { 3683 wql_invalidate(link); 3684 } 3685 /* someone raced us to deinit */ 3686 if (!link || wqset->wqset_id != set_id || set_id != link->wql_setid.id) { 3687 if (link) { 3688 wql_put_link(link); 3689 } 3690 waitq_set_unlock(wqset); 3691 return; 3692 } 3693 3694 /* the link should be a valid link object at this point */ 3695 assert(link != NULL && wql_type(link) == WQL_WQS); 3696 3697 wqset->wqset_id = 0; 3698 } 3699 3700 /* 3701 * This set may have a lot of preposts, or may have been a member of 3702 * many other sets. To minimize spinlock hold times, we clear out the 3703 * waitq set data structure under the lock-hold, but don't clear any 3704 * table objects. We keep handles to the prepost and set linkage 3705 * objects and free those outside the critical section. 3706 */ 3707 prepost_id = 0; 3708 if (wqset->wqset_q.waitq_prepost && wqset->wqset_prepost_id) { 3709 assert(link != NULL); 3710 prepost_id = wqset->wqset_prepost_id; 3711 } 3712 /* else { TODO: notify kqueue subsystem? } */ 3713 wqset->wqset_prepost_id = 0; 3714 3715 wqset->wqset_q.waitq_fifo = 0; 3716 wqset->wqset_q.waitq_prepost = 0; 3717 wqset->wqset_q.waitq_isvalid = 0; 3718 3719 /* don't clear the 'waitq_irq' bit: it's used in locking! */ 3720 wqset->wqset_q.waitq_eventmask = 0; 3721 3722 waitq_unlink_all_unlock(&wqset->wqset_q); 3723 /* wqset->wqset_q unlocked and set links deallocated */ 3724 3725 3726 if (link) { 3727 /* 3728 * walk_waitq_links may race with us for access to the waitq set. 3729 * If walk_waitq_links has a reference to the set, then we should wait 3730 * until the link's refcount goes to 1 (our reference) before we exit 3731 * this function. That way we ensure that the waitq set memory will 3732 * remain valid even though it's been cleared out. 3733 */ 3734 while (wql_refcnt(link) > 1) { 3735 delay(1); 3736 } 3737 wql_put_link(link); 3738 } 3739 3740 /* drop / unlink all the prepost table objects */ 3741 /* JMM - can this happen before the delay? */ 3742 if (prepost_id) { 3743 (void)wq_prepost_iterate(prepost_id, NULL, 3744 wqset_clear_prepost_chain_cb); 3745 } 3746 } 3747 3748 /** 3749 * de-initialize and free an allocated waitq set object 3750 * 3751 * Conditions: 3752 * may block 3753 */ 3754 kern_return_t 3755 waitq_set_free(struct waitq_set *wqset) 3756 { 3757 waitq_set_deinit(wqset); 3758 3759 memset(wqset, 0, sizeof(*wqset)); 3760 zfree(waitq_set_zone, wqset); 3761 3762 return KERN_SUCCESS; 3763 } 3764 3765 #if DEVELOPMENT || DEBUG 3766 #if CONFIG_WAITQ_DEBUG 3767 /** 3768 * return the set ID of 'wqset' 3769 */ 3770 uint64_t 3771 wqset_id(struct waitq_set *wqset) 3772 { 3773 if (!wqset) { 3774 return 0; 3775 } 3776 3777 assert(waitqs_is_set(wqset)); 3778 3779 if (!waitqs_is_linked(wqset)) { 3780 waitq_set_lazy_init_link(wqset); 3781 } 3782 3783 return wqset->wqset_id; 3784 } 3785 3786 /** 3787 * returns a pointer to the waitq object embedded in 'wqset' 3788 */ 3789 struct waitq * 3790 wqset_waitq(struct waitq_set *wqset) 3791 { 3792 if (!wqset) { 3793 return NULL; 3794 } 3795 3796 assert(waitqs_is_set(wqset)); 3797 3798 return &wqset->wqset_q; 3799 } 3800 #endif /* CONFIG_WAITQ_DEBUG */ 3801 #endif /* DEVELOPMENT || DEBUG */ 3802 3803 3804 /** 3805 * clear all preposts originating from 'waitq' 3806 * 3807 * Conditions: 3808 * 'waitq' locked 3809 * may (rarely) spin waiting for another on-core thread to 3810 * release the last reference to the waitq's prepost link object 3811 * 3812 * NOTE: 3813 * If this function needs to spin, it will drop the waitq lock! 3814 * The return value of the function indicates whether or not this 3815 * happened: 1 == lock was dropped, 0 == lock held 3816 */ 3817 int 3818 waitq_clear_prepost_locked(struct waitq *waitq) 3819 { 3820 struct wq_prepost *wqp; 3821 int dropped_lock = 0; 3822 3823 assert(!waitq_irq_safe(waitq)); 3824 3825 if (waitq->waitq_prepost_id == 0) { 3826 return 0; 3827 } 3828 3829 wqp = wq_prepost_get(waitq->waitq_prepost_id); 3830 waitq->waitq_prepost_id = 0; 3831 if (wqp) { 3832 uint64_t wqp_id = wqp->wqp_prepostid.id; 3833 wqdbg_v("invalidate prepost 0x%llx (refcnt:%d)", 3834 wqp->wqp_prepostid.id, wqp_refcnt(wqp)); 3835 wq_prepost_invalidate(wqp); 3836 while (wqp_refcnt(wqp) > 1) { 3837 /* 3838 * Some other thread must have raced us to grab a link 3839 * object reference before we invalidated it. This 3840 * means that they are probably trying to access the 3841 * waitq to which the prepost object points. We need 3842 * to wait here until the other thread drops their 3843 * reference. We know that no one else can get a 3844 * reference (the object has been invalidated), and 3845 * that prepost references are short-lived (dropped on 3846 * a call to wq_prepost_put). We also know that no one 3847 * blocks while holding a reference therefore the 3848 * other reference holder must be on-core. We'll just 3849 * sit and wait for the other reference to be dropped. 3850 */ 3851 disable_preemption(); 3852 3853 waitq_unlock(waitq); 3854 dropped_lock = 1; 3855 /* 3856 * don't yield here, just spin and assume the other 3857 * consumer is already on core... 3858 */ 3859 delay(1); 3860 3861 waitq_lock(waitq); 3862 3863 enable_preemption(); 3864 } 3865 if (wqp_refcnt(wqp) > 0 && wqp->wqp_prepostid.id == wqp_id) { 3866 wq_prepost_put(wqp); 3867 } 3868 } 3869 3870 return dropped_lock; 3871 } 3872 3873 /** 3874 * clear all preposts originating from 'waitq' 3875 * 3876 * Conditions: 3877 * 'waitq' is not locked 3878 * may disable and re-enable interrupts 3879 */ 3880 void 3881 waitq_clear_prepost(struct waitq *waitq) 3882 { 3883 assert(waitq_valid(waitq)); 3884 assert(!waitq_irq_safe(waitq)); 3885 3886 waitq_lock(waitq); 3887 /* it doesn't matter to us if the lock is dropped here */ 3888 (void)waitq_clear_prepost_locked(waitq); 3889 waitq_unlock(waitq); 3890 } 3891 3892 /** 3893 * return a the waitq's prepost object ID (allocate if necessary) 3894 * 3895 * Conditions: 3896 * 'waitq' is unlocked 3897 */ 3898 uint64_t 3899 waitq_get_prepost_id(struct waitq *waitq) 3900 { 3901 struct wq_prepost *wqp; 3902 uint64_t wqp_id = 0; 3903 3904 if (!waitq_valid(waitq)) { 3905 return 0; 3906 } 3907 3908 assert(!waitq_irq_safe(waitq)); 3909 3910 waitq_lock(waitq); 3911 3912 if (!waitq_valid(waitq)) { 3913 goto out_unlock; 3914 } 3915 3916 if (waitq->waitq_prepost_id) { 3917 wqp_id = waitq->waitq_prepost_id; 3918 goto out_unlock; 3919 } 3920 3921 /* don't hold a spinlock while allocating a prepost object */ 3922 waitq_unlock(waitq); 3923 3924 wqp = wq_prepost_alloc(WQP_WQ, 1); 3925 if (!wqp) { 3926 return 0; 3927 } 3928 3929 /* re-acquire the waitq lock */ 3930 waitq_lock(waitq); 3931 3932 if (!waitq_valid(waitq)) { 3933 wq_prepost_put(wqp); 3934 wqp_id = 0; 3935 goto out_unlock; 3936 } 3937 3938 if (waitq->waitq_prepost_id) { 3939 /* we were beat by someone else */ 3940 wq_prepost_put(wqp); 3941 wqp_id = waitq->waitq_prepost_id; 3942 goto out_unlock; 3943 } 3944 3945 wqp->wqp_wq.wqp_wq_ptr = waitq; 3946 3947 wqp_set_valid(wqp); 3948 wqp_id = wqp->wqp_prepostid.id; 3949 waitq->waitq_prepost_id = wqp_id; 3950 3951 wq_prepost_put(wqp); 3952 3953 out_unlock: 3954 waitq_unlock(waitq); 3955 3956 return wqp_id; 3957 } 3958 3959 3960 static int 3961 waitq_inset_cb(struct waitq *waitq, void *ctx, struct waitq_link *link) 3962 { 3963 uint64_t setid = *(uint64_t *)ctx; 3964 int wqltype = wql_type(link); 3965 (void)waitq; 3966 if (wqltype == WQL_WQS && link->wql_setid.id == setid) { 3967 wqdbg_v(" waitq already in set 0x%llx", setid); 3968 return WQ_ITERATE_FOUND; 3969 } else if (wqltype == WQL_LINK) { 3970 /* 3971 * break out early if we see a link that points to the setid 3972 * in question. This saves us a step in the 3973 * iteration/recursion 3974 */ 3975 wqdbg_v(" waitq already in set 0x%llx (WQL_LINK)", setid); 3976 if (link->wql_link.left_setid == setid || 3977 link->wql_link.right_setid == setid) { 3978 return WQ_ITERATE_FOUND; 3979 } 3980 } 3981 3982 return WQ_ITERATE_CONTINUE; 3983 } 3984 3985 /** 3986 * determine if 'waitq' is a member of 'wqset' 3987 * 3988 * Conditions: 3989 * neither 'waitq' nor 'wqset' is not locked 3990 * may disable and re-enable interrupts while locking 'waitq' 3991 */ 3992 boolean_t 3993 waitq_member(struct waitq *waitq, struct waitq_set *wqset) 3994 { 3995 kern_return_t kr = WQ_ITERATE_SUCCESS; 3996 uint64_t setid; 3997 3998 if (!waitq_valid(waitq)) { 3999 panic("Invalid waitq: %p", waitq); 4000 } 4001 assert(!waitq_irq_safe(waitq)); 4002 4003 if (!waitqs_is_set(wqset)) { 4004 return FALSE; 4005 } 4006 4007 waitq_lock(waitq); 4008 4009 if (!waitqs_is_linked(wqset)) { 4010 goto out_unlock; 4011 } 4012 4013 setid = wqset->wqset_id; 4014 4015 /* fast path: most waitqs are members of only 1 set */ 4016 if (waitq->waitq_set_id == setid) { 4017 waitq_unlock(waitq); 4018 return TRUE; 4019 } 4020 4021 /* walk the link table and look for the Set ID of wqset */ 4022 kr = walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id, 4023 WQL_ALL, (void *)&setid, waitq_inset_cb); 4024 4025 out_unlock: 4026 waitq_unlock(waitq); 4027 return kr == WQ_ITERATE_FOUND; 4028 } 4029 4030 /** 4031 * Returns true is the given waitq is a member of at least 1 set 4032 */ 4033 boolean_t 4034 waitq_in_set(struct waitq *waitq) 4035 { 4036 struct waitq_link *link; 4037 boolean_t inset = FALSE; 4038 4039 if (waitq_irq_safe(waitq)) { 4040 return FALSE; 4041 } 4042 4043 waitq_lock(waitq); 4044 4045 if (!waitq->waitq_set_id) { 4046 goto out_unlock; 4047 } 4048 4049 link = wql_get_link(waitq->waitq_set_id); 4050 if (link) { 4051 /* if we get here, the waitq is in _at_least_one_ set */ 4052 inset = TRUE; 4053 wql_put_link(link); 4054 } else { 4055 /* we can just optimize this for next time */ 4056 waitq->waitq_set_id = 0; 4057 } 4058 4059 out_unlock: 4060 waitq_unlock(waitq); 4061 return inset; 4062 } 4063 4064 4065 /** 4066 * pre-allocate a waitq link structure from the link table 4067 * 4068 * Conditions: 4069 * 'waitq' is not locked 4070 * may (rarely) block if link table needs to grow 4071 */ 4072 uint64_t 4073 waitq_link_reserve(struct waitq *waitq) 4074 { 4075 struct waitq_link *link; 4076 uint64_t reserved_id = 0; 4077 4078 assert(get_preemption_level() == 0 && waitq_wait_possible(current_thread())); 4079 4080 /* 4081 * We've asserted that the caller can block, so we enforce a 4082 * minimum-free table element policy here. 4083 */ 4084 wql_ensure_free_space(); 4085 4086 (void)waitq; 4087 link = wql_alloc_link(LT_RESERVED); 4088 if (!link) { 4089 return 0; 4090 } 4091 4092 reserved_id = link->wql_setid.id; 4093 4094 return reserved_id; 4095 } 4096 4097 /** 4098 * release a pre-allocated waitq link structure 4099 */ 4100 void 4101 waitq_link_release(uint64_t id) 4102 { 4103 struct waitq_link *link; 4104 4105 if (id == 0) { 4106 return; 4107 } 4108 4109 link = wql_get_reserved(id, WQL_LINK); 4110 if (!link) { 4111 return; 4112 } 4113 4114 /* 4115 * if we successfully got a link object, then we know 4116 * it's not been marked valid, and can be released with 4117 * a standard wql_put_link() which should free the element. 4118 */ 4119 wql_put_link(link); 4120 #if CONFIG_LTABLE_STATS 4121 g_wqlinktable.nreserved_releases += 1; 4122 #endif 4123 } 4124 4125 /** 4126 * link 'waitq' to the set identified by 'setid' using the 'link' structure 4127 * 4128 * Conditions: 4129 * 'waitq' is locked 4130 * caller should have a reference to the 'link' object 4131 */ 4132 static kern_return_t 4133 waitq_link_internal(struct waitq *waitq, 4134 uint64_t setid, struct waitq_link *link) 4135 { 4136 struct waitq_link *qlink; 4137 kern_return_t kr; 4138 4139 assert(waitq_held(waitq)); 4140 assert(setid != 0); 4141 assert(setid != WQSET_NOT_LINKED); 4142 4143 /* 4144 * If the waitq_set_id field is empty, then this waitq is not 4145 * a member of any other set. All we have to do is update the 4146 * field. 4147 */ 4148 if (!waitq->waitq_set_id) { 4149 waitq->waitq_set_id = setid; 4150 return KERN_SUCCESS; 4151 } 4152 4153 qlink = wql_get_link(waitq->waitq_set_id); 4154 if (!qlink) { 4155 /* 4156 * The set to which this wait queue belonged has been 4157 * destroyed / invalidated. We can re-use the waitq field. 4158 */ 4159 waitq->waitq_set_id = setid; 4160 return KERN_SUCCESS; 4161 } 4162 wql_put_link(qlink); 4163 4164 /* 4165 * Check to see if it's already a member of the set. 4166 * 4167 * TODO: check for cycles! 4168 */ 4169 kr = walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id, 4170 WQL_ALL, (void *)&setid, waitq_inset_cb); 4171 if (kr == WQ_ITERATE_FOUND) { 4172 return KERN_ALREADY_IN_SET; 4173 } 4174 4175 /* 4176 * This wait queue is a member of at least one set already, 4177 * and _not_ a member of the given set. Use our previously 4178 * allocated link object, and hook it up to the wait queue. 4179 * Note that it's possible that one or more of the wait queue sets to 4180 * which the wait queue belongs was invalidated before we allocated 4181 * this link object. That's OK because the next time we use that 4182 * object we'll just ignore it. 4183 */ 4184 link->wql_link.left_setid = setid; 4185 link->wql_link.right_setid = waitq->waitq_set_id; 4186 wql_mkvalid(link); 4187 4188 waitq->waitq_set_id = link->wql_setid.id; 4189 4190 return KERN_SUCCESS; 4191 } 4192 4193 /** 4194 * link 'waitq' to 'wqset' 4195 * 4196 * Conditions: 4197 * if 'lock_state' contains WAITQ_SHOULD_LOCK, 'waitq' must be unlocked. 4198 * Otherwise, 'waitq' must be locked. 4199 * 4200 * may (rarely) block on link table allocation if the table has to grow, 4201 * and no 'reserved_link' object is passed. 4202 * 4203 * may block and acquire wqset lock if the wqset passed has no link. 4204 * 4205 * Notes: 4206 * The caller can guarantee that this function will never block by 4207 * - pre-allocating a link table object and passing its ID in 'reserved_link' 4208 * - and pre-allocating the waitq set link calling waitq_set_lazy_init_link. 4209 * It is not possible to provide a reserved_link without having also linked 4210 * the wqset. 4211 */ 4212 kern_return_t 4213 waitq_link(struct waitq *waitq, struct waitq_set *wqset, 4214 waitq_lock_state_t lock_state, uint64_t *reserved_link) 4215 { 4216 kern_return_t kr; 4217 struct waitq_link *link; 4218 int should_lock = (lock_state == WAITQ_SHOULD_LOCK); 4219 4220 if (!waitq_valid(waitq) || waitq_irq_safe(waitq)) { 4221 panic("Invalid waitq: %p", waitq); 4222 } 4223 4224 if (!waitqs_is_set(wqset)) { 4225 return KERN_INVALID_ARGUMENT; 4226 } 4227 4228 if (!reserved_link || *reserved_link == 0) { 4229 if (!waitqs_is_linked(wqset)) { 4230 waitq_set_lazy_init_link(wqset); 4231 } 4232 } 4233 4234 wqdbg_v("Link waitq %p to wqset 0x%llx", 4235 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id); 4236 4237 /* 4238 * We _might_ need a new link object here, so we'll grab outside 4239 * the lock because the alloc call _might_ block. 4240 * 4241 * If the caller reserved a link beforehand, then wql_get_link 4242 * is guaranteed not to block because the caller holds an extra 4243 * reference to the link which, in turn, hold a reference to the 4244 * link table. 4245 */ 4246 if (reserved_link && *reserved_link != 0) { 4247 link = wql_get_reserved(*reserved_link, WQL_LINK); 4248 /* always consume the caller's reference */ 4249 *reserved_link = 0; 4250 } else { 4251 link = wql_alloc_link(WQL_LINK); 4252 } 4253 if (!link) { 4254 return KERN_NO_SPACE; 4255 } 4256 4257 if (should_lock) { 4258 waitq_lock(waitq); 4259 } 4260 4261 kr = waitq_link_internal(waitq, wqset->wqset_id, link); 4262 4263 if (should_lock) { 4264 waitq_unlock(waitq); 4265 } 4266 4267 wql_put_link(link); 4268 4269 return kr; 4270 } 4271 4272 /** 4273 * helper: unlink 'waitq' from waitq set identified by 'setid' 4274 * this function also prunes invalid objects from the tree 4275 * 4276 * Conditions: 4277 * MUST be called from walk_waitq_links link table walk 4278 * 'waitq' is locked 4279 * 4280 * Notes: 4281 * This is a helper function which compresses the link table by culling 4282 * unused or unnecessary links. See comments below for different 4283 * scenarios. 4284 */ 4285 static inline int 4286 waitq_maybe_remove_link(struct waitq *waitq, 4287 uint64_t setid, 4288 struct waitq_link *parent, 4289 struct waitq_link *left, 4290 struct waitq_link *right) 4291 { 4292 uint64_t *wq_setid = &waitq->waitq_set_id; 4293 4294 /* 4295 * There are two scenarios: 4296 * 4297 * Scenario 1: 4298 * -------------------------------------------------------------------- 4299 * waitq->waitq_set_id == parent 4300 * 4301 * parent(LINK) 4302 * / \ 4303 * / \ 4304 * / \ 4305 * L(LINK/WQS_l) R(LINK/WQS_r) 4306 * 4307 * In this scenario, we assert that the original waitq points to the 4308 * parent link we were passed in. If WQS_l (or WQS_r) is the waitq 4309 * set we're looking for, we can set the corresponding parent 4310 * link id (left or right) to 0. To compress the tree, we can reset the 4311 * waitq_set_id of the original waitq to point to the side of the 4312 * parent that is still valid. We then discard the parent link object. 4313 */ 4314 if (*wq_setid == parent->wql_setid.id) { 4315 if (!left && !right) { 4316 /* completely invalid children */ 4317 wql_invalidate(parent); 4318 wqdbg_v("S1, L+R"); 4319 *wq_setid = 0; 4320 return WQ_ITERATE_INVALID; 4321 } else if (!left || left->wql_setid.id == setid) { 4322 /* 4323 * left side matches we know it points either to the 4324 * WQS we're unlinking, or to an invalid object: 4325 * no need to invalidate it 4326 */ 4327 *wq_setid = right ? right->wql_setid.id : 0; 4328 wql_invalidate(parent); 4329 wqdbg_v("S1, L"); 4330 return left ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4331 } else if (!right || right->wql_setid.id == setid) { 4332 /* 4333 * if right side matches we know it points either to the 4334 * WQS we're unlinking, or to an invalid object: 4335 * no need to invalidate it 4336 */ 4337 *wq_setid = left ? left->wql_setid.id : 0; 4338 wql_invalidate(parent); 4339 wqdbg_v("S1, R"); 4340 return right ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4341 } 4342 } 4343 4344 /* 4345 * the tree walk starts at the top-of-tree and moves down, 4346 * so these are safe asserts. 4347 */ 4348 assert(left || right); /* one of them has to be valid at this point */ 4349 4350 /* 4351 * Scenario 2: 4352 * -------------------------------------------------------------------- 4353 * waitq->waitq_set_id == ... (OR parent) 4354 * 4355 * ... 4356 * | 4357 * parent 4358 * / \ 4359 * / \ 4360 * L(LINK) R(LINK) 4361 * /\ /\ 4362 * / \ / \ 4363 * / \ Rl(*) Rr(*) 4364 * Ll(WQS) Lr(WQS) 4365 * 4366 * In this scenario, a leaf node of either the left or right side 4367 * could be the wait queue set we're looking to unlink. We also handle 4368 * the case where one of these links is invalid. If a leaf node is 4369 * invalid or it's the set we're looking for, we can safely remove the 4370 * middle link (left or right) and point the parent link directly to 4371 * the remaining leaf node. 4372 */ 4373 if (left && wql_type(left) == WQL_LINK) { 4374 uint64_t Ll, Lr; 4375 struct waitq_link *linkLl, *linkLr; 4376 assert(left->wql_setid.id != setid); 4377 Ll = left->wql_link.left_setid; 4378 Lr = left->wql_link.right_setid; 4379 linkLl = wql_get_link(Ll); 4380 linkLr = wql_get_link(Lr); 4381 if (!linkLl && !linkLr) { 4382 /* 4383 * The left object points to two invalid objects! 4384 * We can invalidate the left w/o touching the parent. 4385 */ 4386 wql_invalidate(left); 4387 wqdbg_v("S2, Ll+Lr"); 4388 return WQ_ITERATE_INVALID; 4389 } else if (!linkLl || Ll == setid) { 4390 /* Ll is invalid and/or the wait queue set we're looking for */ 4391 parent->wql_link.left_setid = Lr; 4392 wql_invalidate(left); 4393 wql_put_link(linkLl); 4394 wql_put_link(linkLr); 4395 wqdbg_v("S2, Ll"); 4396 return linkLl ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4397 } else if (!linkLr || Lr == setid) { 4398 /* Lr is invalid and/or the wait queue set we're looking for */ 4399 parent->wql_link.left_setid = Ll; 4400 wql_invalidate(left); 4401 wql_put_link(linkLr); 4402 wql_put_link(linkLl); 4403 wqdbg_v("S2, Lr"); 4404 return linkLr ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4405 } 4406 wql_put_link(linkLl); 4407 wql_put_link(linkLr); 4408 } 4409 4410 if (right && wql_type(right) == WQL_LINK) { 4411 uint64_t Rl, Rr; 4412 struct waitq_link *linkRl, *linkRr; 4413 assert(right->wql_setid.id != setid); 4414 Rl = right->wql_link.left_setid; 4415 Rr = right->wql_link.right_setid; 4416 linkRl = wql_get_link(Rl); 4417 linkRr = wql_get_link(Rr); 4418 if (!linkRl && !linkRr) { 4419 /* 4420 * The right object points to two invalid objects! 4421 * We can invalidate the right w/o touching the parent. 4422 */ 4423 wql_invalidate(right); 4424 wqdbg_v("S2, Rl+Rr"); 4425 return WQ_ITERATE_INVALID; 4426 } else if (!linkRl || Rl == setid) { 4427 /* Rl is invalid and/or the wait queue set we're looking for */ 4428 parent->wql_link.right_setid = Rr; 4429 wql_invalidate(right); 4430 wql_put_link(linkRl); 4431 wql_put_link(linkRr); 4432 wqdbg_v("S2, Rl"); 4433 return linkRl ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4434 } else if (!linkRr || Rr == setid) { 4435 /* Rr is invalid and/or the wait queue set we're looking for */ 4436 parent->wql_link.right_setid = Rl; 4437 wql_invalidate(right); 4438 wql_put_link(linkRl); 4439 wql_put_link(linkRr); 4440 wqdbg_v("S2, Rr"); 4441 return linkRr ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID; 4442 } 4443 wql_put_link(linkRl); 4444 wql_put_link(linkRr); 4445 } 4446 4447 return WQ_ITERATE_CONTINUE; 4448 } 4449 4450 /** 4451 * link table walk callback that unlinks 'waitq' from 'ctx->setid' 4452 * 4453 * Conditions: 4454 * called from walk_waitq_links 4455 * 'waitq' is locked 4456 * 4457 * Notes: 4458 * uses waitq_maybe_remove_link() to compress the linktable and 4459 * perform the actual unlinking 4460 */ 4461 static int 4462 waitq_unlink_cb(struct waitq *waitq, void *ctx, 4463 struct waitq_link *link) 4464 { 4465 uint64_t setid = *((uint64_t *)ctx); 4466 struct waitq_link *right, *left; 4467 int ret = 0; 4468 4469 if (wql_type(link) != WQL_LINK) { 4470 return WQ_ITERATE_CONTINUE; 4471 } 4472 4473 do { 4474 left = wql_get_link(link->wql_link.left_setid); 4475 right = wql_get_link(link->wql_link.right_setid); 4476 4477 ret = waitq_maybe_remove_link(waitq, setid, link, left, right); 4478 4479 wql_put_link(left); 4480 wql_put_link(right); 4481 4482 if (!wql_is_valid(link)) { 4483 return WQ_ITERATE_INVALID; 4484 } 4485 /* A ret value of UNLINKED will break us out of table walk */ 4486 } while (ret == WQ_ITERATE_INVALID); 4487 4488 return ret; 4489 } 4490 4491 4492 /** 4493 * undo/remove a prepost from 'ctx' (waitq) to 'wqset' 4494 * 4495 * Conditions: 4496 * Called from wq_prepost_foreach_locked OR wq_prepost_iterate 4497 * 'wqset' may be NULL 4498 * (ctx)->unlink_wqset is locked 4499 */ 4500 static int 4501 waitq_unlink_prepost_cb(struct waitq_set __unused *wqset, void *ctx, 4502 struct wq_prepost *wqp, struct waitq *waitq) 4503 { 4504 struct wq_unlink_ctx *ulctx = (struct wq_unlink_ctx *)ctx; 4505 4506 if (waitq != ulctx->unlink_wq) { 4507 return WQ_ITERATE_CONTINUE; 4508 } 4509 4510 if (wqp_type(wqp) == WQP_WQ && 4511 wqp->wqp_prepostid.id == ulctx->unlink_wqset->wqset_prepost_id) { 4512 /* this is the only prepost on this wait queue set */ 4513 wqdbg_v("unlink wqp (WQ) 0x%llx", wqp->wqp_prepostid.id); 4514 ulctx->unlink_wqset->wqset_prepost_id = 0; 4515 return WQ_ITERATE_BREAK; 4516 } 4517 4518 assert(wqp_type(wqp) == WQP_POST); 4519 4520 /* 4521 * The prepost object 'wqp' points to a waitq which should no longer 4522 * be preposted to 'ulctx->unlink_wqset'. We can remove the prepost 4523 * object from the list and break out of the iteration. Using the 4524 * context object in this way allows this same callback function to be 4525 * used from both wq_prepost_foreach_locked and wq_prepost_iterate. 4526 */ 4527 wq_prepost_remove(ulctx->unlink_wqset, wqp); 4528 return WQ_ITERATE_BREAK; 4529 } 4530 4531 /** 4532 * unlink 'waitq' from 'wqset' 4533 * 4534 * Conditions: 4535 * 'waitq' is locked 4536 * 'wqset' is _not_ locked 4537 * may (rarely) spin in prepost clear and drop/re-acquire 'waitq' lock 4538 * (see waitq_clear_prepost_locked) 4539 */ 4540 static kern_return_t 4541 waitq_unlink_locked(struct waitq *waitq, 4542 struct waitq_set *wqset) 4543 { 4544 uint64_t setid; 4545 kern_return_t kr; 4546 4547 assert(!waitq_irq_safe(waitq)); 4548 4549 if (waitq->waitq_set_id == 0) { 4550 /* 4551 * TODO: 4552 * it doesn't belong to anyone, and it has a prepost object? 4553 * This is an artifact of not cleaning up after kqueues when 4554 * they prepost into select sets... 4555 */ 4556 if (waitq->waitq_prepost_id != 0) { 4557 (void)waitq_clear_prepost_locked(waitq); 4558 } 4559 return KERN_NOT_IN_SET; 4560 } 4561 4562 if (!waitqs_is_linked(wqset)) { 4563 /* 4564 * No link has been allocated for the wqset, 4565 * so no waitq could have been linked to it. 4566 */ 4567 return KERN_NOT_IN_SET; 4568 } 4569 4570 setid = wqset->wqset_id; 4571 4572 if (waitq->waitq_set_id == setid) { 4573 waitq->waitq_set_id = 0; 4574 /* 4575 * This was the only set to which the waitq belonged: we can 4576 * safely release the waitq's prepost object. It doesn't 4577 * matter if this function drops and re-acquires the lock 4578 * because we're not manipulating waitq state any more. 4579 */ 4580 (void)waitq_clear_prepost_locked(waitq); 4581 return KERN_SUCCESS; 4582 } 4583 4584 /* 4585 * The waitq was a member of more that 1 set, so we need to 4586 * handle potentially compressing the link table, and 4587 * adjusting the waitq->waitq_set_id value. 4588 * 4589 * Note: we can't free the waitq's associated prepost object (if any) 4590 * because it may be in use by the one or more _other_ sets to 4591 * which this queue belongs. 4592 * 4593 * Note: This function only handles a single level of the queue linkage. 4594 * Removing a waitq from a set to which it does not directly 4595 * belong is undefined. For example, if a waitq belonged to set 4596 * A, and set A belonged to set B. You can't remove the waitq 4597 * from set B. 4598 */ 4599 kr = walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id, 4600 WQL_LINK, (void *)&setid, waitq_unlink_cb); 4601 4602 if (kr == WQ_ITERATE_UNLINKED) { 4603 struct wq_unlink_ctx ulctx; 4604 4605 kr = KERN_SUCCESS; /* found it and dis-associated it */ 4606 4607 /* don't look for preposts if it's not prepost-enabled */ 4608 if (!wqset->wqset_q.waitq_prepost) { 4609 goto out; 4610 } 4611 4612 assert(!waitq_irq_safe(&wqset->wqset_q)); 4613 4614 waitq_set_lock(wqset); 4615 /* 4616 * clear out any prepost from waitq into wqset 4617 * TODO: this could be more efficient than a linear search of 4618 * the waitq set's prepost list. 4619 */ 4620 ulctx.unlink_wq = waitq; 4621 ulctx.unlink_wqset = wqset; 4622 (void)wq_prepost_iterate(wqset->wqset_prepost_id, (void *)&ulctx, 4623 waitq_unlink_prepost_cb); 4624 waitq_set_unlock(wqset); 4625 } else { 4626 kr = KERN_NOT_IN_SET; /* waitq is _not_ associated with wqset */ 4627 } 4628 4629 out: 4630 return kr; 4631 } 4632 4633 /** 4634 * unlink 'waitq' from 'wqset' 4635 * 4636 * Conditions: 4637 * neither 'waitq' nor 'wqset' is locked 4638 * may disable and re-enable interrupts 4639 * may (rarely) spin in prepost clear 4640 * (see waitq_clear_prepost_locked) 4641 */ 4642 kern_return_t 4643 waitq_unlink(struct waitq *waitq, struct waitq_set *wqset) 4644 { 4645 kern_return_t kr = KERN_SUCCESS; 4646 4647 assert(waitqs_is_set(wqset)); 4648 4649 /* 4650 * we allow the waitq to be invalid because the caller may be trying 4651 * to clear out old/dirty state 4652 */ 4653 if (!waitq_valid(waitq)) { 4654 return KERN_INVALID_ARGUMENT; 4655 } 4656 4657 wqdbg_v("unlink waitq %p from set 0x%llx", 4658 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id); 4659 4660 assert(!waitq_irq_safe(waitq)); 4661 4662 waitq_lock(waitq); 4663 4664 kr = waitq_unlink_locked(waitq, wqset); 4665 4666 waitq_unlock(waitq); 4667 return kr; 4668 } 4669 4670 /** 4671 * unlink a waitq from a waitq set, but reference the waitq by its prepost ID 4672 * 4673 * Conditions: 4674 * 'wqset' is unlocked 4675 * wqp_id may be valid or invalid 4676 */ 4677 void 4678 waitq_unlink_by_prepost_id(uint64_t wqp_id, struct waitq_set *wqset) 4679 { 4680 struct wq_prepost *wqp; 4681 4682 disable_preemption(); 4683 wqp = wq_prepost_get(wqp_id); 4684 if (wqp) { 4685 struct waitq *wq; 4686 4687 wq = wqp->wqp_wq.wqp_wq_ptr; 4688 4689 /* 4690 * lock the waitq, then release our prepost ID reference, then 4691 * unlink the waitq from the wqset: this ensures that we don't 4692 * hold a prepost ID reference during the unlink, but we also 4693 * complete the unlink operation atomically to avoid a race 4694 * with waitq_unlink[_all]. 4695 */ 4696 assert(!waitq_irq_safe(wq)); 4697 4698 waitq_lock(wq); 4699 wq_prepost_put(wqp); 4700 4701 if (!waitq_valid(wq)) { 4702 /* someone already tore down this waitq! */ 4703 waitq_unlock(wq); 4704 enable_preemption(); 4705 return; 4706 } 4707 4708 /* this _may_ drop the wq lock, but that's OK */ 4709 waitq_unlink_locked(wq, wqset); 4710 4711 waitq_unlock(wq); 4712 } 4713 enable_preemption(); 4714 return; 4715 } 4716 4717 4718 /** 4719 * reference and lock a waitq by its prepost ID 4720 * 4721 * Conditions: 4722 * wqp_id may be valid or invalid 4723 * 4724 * Returns: 4725 * a locked waitq if wqp_id was valid 4726 * NULL on failure 4727 */ 4728 struct waitq * 4729 waitq_lock_by_prepost_id(uint64_t wqp_id) 4730 { 4731 struct waitq *wq = NULL; 4732 struct wq_prepost *wqp; 4733 4734 disable_preemption(); 4735 wqp = wq_prepost_get(wqp_id); 4736 if (wqp) { 4737 wq = wqp->wqp_wq.wqp_wq_ptr; 4738 4739 assert(!waitq_irq_safe(wq)); 4740 4741 waitq_lock(wq); 4742 wq_prepost_put(wqp); 4743 4744 if (!waitq_valid(wq)) { 4745 /* someone already tore down this waitq! */ 4746 waitq_unlock(wq); 4747 enable_preemption(); 4748 return NULL; 4749 } 4750 } 4751 enable_preemption(); 4752 return wq; 4753 } 4754 4755 4756 /** 4757 * unlink 'waitq' from all sets to which it belongs 4758 * 4759 * Conditions: 4760 * 'waitq' is locked on entry 4761 * returns with waitq lock dropped 4762 * 4763 * Notes: 4764 * may (rarely) spin (see waitq_clear_prepost_locked) 4765 */ 4766 kern_return_t 4767 waitq_unlink_all_unlock(struct waitq *waitq) 4768 { 4769 uint64_t old_set_id = 0; 4770 wqdbg_v("unlink waitq %p from all sets", 4771 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq)); 4772 assert(!waitq_irq_safe(waitq)); 4773 4774 /* it's not a member of any sets */ 4775 if (waitq->waitq_set_id == 0) { 4776 waitq_unlock(waitq); 4777 return KERN_SUCCESS; 4778 } 4779 4780 old_set_id = waitq->waitq_set_id; 4781 waitq->waitq_set_id = 0; 4782 4783 /* 4784 * invalidate the prepost entry for this waitq. 4785 * This may drop and re-acquire the waitq lock, but that's OK because 4786 * if it was added to another set and preposted to that set in the 4787 * time we drop the lock, the state will remain consistent. 4788 */ 4789 (void)waitq_clear_prepost_locked(waitq); 4790 4791 waitq_unlock(waitq); 4792 4793 if (old_set_id) { 4794 /* 4795 * Walk the link table and invalidate each LINK object that 4796 * used to connect this waitq to one or more sets: this works 4797 * because WQL_LINK objects are private to each wait queue 4798 */ 4799 (void)walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, old_set_id, 4800 WQL_LINK, NULL, waitq_unlink_all_cb); 4801 } 4802 4803 return KERN_SUCCESS; 4804 } 4805 4806 /** 4807 * unlink 'waitq' from all sets to which it belongs 4808 * 4809 * Conditions: 4810 * 'waitq' is not locked 4811 * may disable and re-enable interrupts 4812 * may (rarely) spin 4813 * (see waitq_unlink_all_locked, waitq_clear_prepost_locked) 4814 */ 4815 kern_return_t 4816 waitq_unlink_all(struct waitq *waitq) 4817 { 4818 kern_return_t kr = KERN_SUCCESS; 4819 4820 if (!waitq_valid(waitq)) { 4821 panic("Invalid waitq: %p", waitq); 4822 } 4823 4824 assert(!waitq_irq_safe(waitq)); 4825 waitq_lock(waitq); 4826 if (!waitq_valid(waitq)) { 4827 waitq_unlock(waitq); 4828 return KERN_SUCCESS; 4829 } 4830 4831 kr = waitq_unlink_all_unlock(waitq); 4832 /* waitq unlocked and set links deallocated */ 4833 4834 return kr; 4835 } 4836 4837 4838 /** 4839 * unlink all waitqs from 'wqset' 4840 * 4841 * Conditions: 4842 * 'wqset' is locked on entry 4843 * 'wqset' is unlocked on exit and spl is restored 4844 * 4845 * Note: 4846 * may (rarely) spin/block (see waitq_clear_prepost_locked) 4847 */ 4848 kern_return_t 4849 waitq_set_unlink_all_unlock(struct waitq_set *wqset) 4850 { 4851 struct waitq_link *link; 4852 uint64_t prepost_id; 4853 4854 wqdbg_v("unlink all queues from set 0x%llx", wqset->wqset_id); 4855 4856 /* 4857 * This operation does not require interaction with any of the set's 4858 * constituent wait queues. All we have to do is invalidate the SetID 4859 */ 4860 4861 if (waitqs_is_linked(wqset)) { 4862 /* invalidate and re-alloc the link object first */ 4863 link = wql_get_link(wqset->wqset_id); 4864 4865 /* we may have raced with a waitq_set_deinit: handle this */ 4866 if (!link) { 4867 waitq_set_unlock(wqset); 4868 return KERN_SUCCESS; 4869 } 4870 4871 wql_invalidate(link); 4872 4873 /* re-alloc the object to get a new generation ID */ 4874 wql_realloc_link(link, WQL_WQS); 4875 link->wql_wqs.wql_set = wqset; 4876 4877 wqset->wqset_id = link->wql_setid.id; 4878 wql_mkvalid(link); 4879 wql_put_link(link); 4880 } 4881 4882 /* clear any preposts attached to this set */ 4883 prepost_id = 0; 4884 if (wqset->wqset_q.waitq_prepost && wqset->wqset_prepost_id) { 4885 prepost_id = wqset->wqset_prepost_id; 4886 } 4887 /* else { TODO: notify kqueue subsystem? } */ 4888 wqset->wqset_prepost_id = 0; 4889 4890 /* 4891 * clear set linkage and prepost object associated with this set: 4892 * waitq sets may prepost to other sets if, for example, they are 4893 * associated with a kqueue which is in a select set. 4894 * 4895 * This releases all the set link objects 4896 * (links to other sets to which this set was previously added) 4897 */ 4898 waitq_unlink_all_unlock(&wqset->wqset_q); 4899 /* wqset->wqset_q unlocked */ 4900 4901 /* drop / unlink all the prepost table objects */ 4902 if (prepost_id) { 4903 (void)wq_prepost_iterate(prepost_id, NULL, 4904 wqset_clear_prepost_chain_cb); 4905 } 4906 4907 return KERN_SUCCESS; 4908 } 4909 4910 /** 4911 * unlink all waitqs from 'wqset' 4912 * 4913 * Conditions: 4914 * 'wqset' is not locked 4915 * may (rarely) spin/block (see waitq_clear_prepost_locked) 4916 */ 4917 kern_return_t 4918 waitq_set_unlink_all(struct waitq_set *wqset) 4919 { 4920 assert(waitqs_is_set(wqset)); 4921 assert(!waitq_irq_safe(&wqset->wqset_q)); 4922 4923 waitq_set_lock(wqset); 4924 return waitq_set_unlink_all_unlock(wqset); 4925 /* wqset unlocked and set links and preposts deallocated */ 4926 } 4927 4928 static int 4929 waitq_prepost_reserve_cb(struct waitq *waitq, void *ctx, 4930 struct waitq_link *link) 4931 { 4932 uint32_t *num = (uint32_t *)ctx; 4933 (void)waitq; 4934 4935 /* 4936 * In the worst case, we'll have to allocate 2 prepost objects 4937 * per waitq set (if the set was already preposted by another 4938 * waitq). 4939 */ 4940 if (wql_type(link) == WQL_WQS) { 4941 /* 4942 * check to see if the associated waitq actually supports 4943 * preposting 4944 */ 4945 if (waitq_set_can_prepost(link->wql_wqs.wql_set)) { 4946 *num += 2; 4947 } 4948 } 4949 return WQ_ITERATE_CONTINUE; 4950 } 4951 4952 static int 4953 waitq_alloc_prepost_reservation(int nalloc, struct waitq *waitq, 4954 int *did_unlock, struct wq_prepost **wqp) 4955 { 4956 struct wq_prepost *tmp; 4957 struct wqp_cache *cache; 4958 4959 *did_unlock = 0; 4960 4961 /* 4962 * Before we unlock the waitq, check the per-processor prepost object 4963 * cache to see if there's enough there for us. If so, do the 4964 * allocation, keep the lock and save an entire iteration over the set 4965 * linkage! 4966 */ 4967 if (waitq) { 4968 #ifndef __DARLING__ 4969 disable_preemption(); 4970 cache = PERCPU_GET(wqp_cache); 4971 if (nalloc <= (int)cache->avail) { 4972 goto do_alloc; 4973 } 4974 enable_preemption(); 4975 #endif // __DARLING__ 4976 4977 /* unlock the waitq to perform the allocation */ 4978 *did_unlock = 1; 4979 waitq_unlock(waitq); 4980 } 4981 4982 do_alloc: 4983 tmp = wq_prepost_alloc(LT_RESERVED, nalloc); 4984 if (!tmp) { 4985 panic("Couldn't reserve %d preposts for waitq @%p (wqp@%p)", 4986 nalloc, waitq, *wqp); 4987 } 4988 if (*wqp) { 4989 /* link the two lists */ 4990 int __assert_only rc; 4991 rc = wq_prepost_rlink(tmp, *wqp); 4992 assert(rc == nalloc); 4993 } 4994 *wqp = tmp; 4995 4996 /* 4997 * If the caller can block, then enforce a minimum-free table element 4998 * policy here. This helps ensure that we will have enough prepost 4999 * objects for callers such as selwakeup() that can be called with 5000 * spin locks held. 5001 */ 5002 if (get_preemption_level() == 0) { 5003 wq_prepost_ensure_free_space(); 5004 } 5005 5006 if (waitq) { 5007 if (*did_unlock == 0) { 5008 /* decrement the preemption count if alloc from cache */ 5009 enable_preemption(); 5010 } else { 5011 /* otherwise: re-lock the waitq */ 5012 waitq_lock(waitq); 5013 } 5014 } 5015 5016 return nalloc; 5017 } 5018 5019 static int 5020 waitq_count_prepost_reservation(struct waitq *waitq, int extra, int keep_locked) 5021 { 5022 int npreposts = 0; 5023 5024 /* 5025 * If the waitq is not currently part of a set, and we're not asked to 5026 * keep the waitq locked then we'll want to have 3 in reserve 5027 * just-in-case it becomes part of a set while we unlock and reserve. 5028 * We may need up to 1 object for the waitq, and 2 for the set. 5029 */ 5030 if (waitq->waitq_set_id == 0) { 5031 npreposts = 3; 5032 } else { 5033 /* this queue has never been preposted before */ 5034 if (waitq->waitq_prepost_id == 0) { 5035 npreposts = 3; 5036 } 5037 5038 /* 5039 * Walk the set of table linkages associated with this waitq 5040 * and count the worst-case number of prepost objects that 5041 * may be needed during a wakeup_all. We can walk this without 5042 * locking each set along the way because the table-based IDs 5043 * disconnect us from the set pointers themselves, and the 5044 * table walking is careful to read the setid values only once. 5045 * Locking each set up the chain also doesn't guarantee that 5046 * their membership won't change between the time we unlock 5047 * that set and when we actually go to prepost, so our 5048 * situation is no worse than before and we've alleviated lock 5049 * contention on any sets to which this waitq belongs. 5050 */ 5051 (void)walk_waitq_links(LINK_WALK_FULL_DAG_UNLOCKED, 5052 waitq, waitq->waitq_set_id, 5053 WQL_WQS, (void *)&npreposts, 5054 waitq_prepost_reserve_cb); 5055 } 5056 5057 if (extra > 0) { 5058 npreposts += extra; 5059 } 5060 5061 if (npreposts == 0 && !keep_locked) { 5062 /* 5063 * If we get here, we were asked to reserve some prepost 5064 * objects for a waitq that's previously preposted, and is not 5065 * currently a member of any sets. We have also been 5066 * instructed to unlock the waitq when we're done. In this 5067 * case, we pre-allocated enough reserved objects to handle 5068 * the case where the waitq gets added to a single set when 5069 * the lock is released. 5070 */ 5071 npreposts = 3; 5072 } 5073 5074 return npreposts; 5075 } 5076 5077 5078 /** 5079 * pre-allocate prepost objects for 'waitq' 5080 * 5081 * Conditions: 5082 * 'waitq' is not locked 5083 * 5084 * Returns: 5085 * panic on error 5086 * 5087 * 0 on success, '*reserved' is set to the head of a singly-linked 5088 * list of pre-allocated prepost objects. 5089 * 5090 * Notes: 5091 * If 'lock_state' is WAITQ_KEEP_LOCKED, this function performs the pre-allocation 5092 * atomically and returns 'waitq' locked. 5093 * 5094 * This function attempts to pre-allocate precisely enough prepost 5095 * objects based on the current set membership of 'waitq'. If the 5096 * operation is performed atomically, then the caller 5097 * is guaranteed to have enough pre-allocated prepost object to avoid 5098 * any (rare) blocking in the wakeup path. 5099 */ 5100 uint64_t 5101 waitq_prepost_reserve(struct waitq *waitq, int extra, 5102 waitq_lock_state_t lock_state) 5103 { 5104 uint64_t reserved = 0; 5105 uint64_t prev_setid = 0, prev_prepostid = 0; 5106 struct wq_prepost *wqp = NULL; 5107 int nalloc = 0, npreposts = 0; 5108 int keep_locked = (lock_state == WAITQ_KEEP_LOCKED); 5109 int unlocked = 0; 5110 5111 wqdbg_v("Attempting to reserve prepost linkages for waitq %p (extra:%d)", 5112 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), extra); 5113 5114 if (waitq == NULL && extra > 0) { 5115 /* 5116 * Simple prepost object allocation: 5117 * we'll add 2 more because the waitq might need an object, 5118 * and the set itself may need a new POST object in addition 5119 * to the number of preposts requested by the caller 5120 */ 5121 nalloc = waitq_alloc_prepost_reservation(extra + 2, NULL, 5122 &unlocked, &wqp); 5123 assert(nalloc == extra + 2); 5124 return wqp->wqp_prepostid.id; 5125 } 5126 5127 assert(lock_state == WAITQ_KEEP_LOCKED || lock_state == WAITQ_UNLOCK); 5128 5129 assert(!waitq_irq_safe(waitq)); 5130 5131 waitq_lock(waitq); 5132 5133 /* remember the set ID that we started with */ 5134 prev_setid = waitq->waitq_set_id; 5135 prev_prepostid = waitq->waitq_prepost_id; 5136 5137 /* 5138 * If the waitq is not part of a set, and we're asked to 5139 * keep the set locked, then we don't have to reserve 5140 * anything! 5141 */ 5142 if (prev_setid == 0 && keep_locked) { 5143 goto out; 5144 } 5145 5146 npreposts = waitq_count_prepost_reservation(waitq, extra, keep_locked); 5147 5148 /* nothing for us to do! */ 5149 if (npreposts == 0) { 5150 if (keep_locked) { 5151 goto out; 5152 } 5153 goto out_unlock; 5154 } 5155 5156 try_alloc: 5157 /* this _may_ unlock and relock the waitq! */ 5158 nalloc = waitq_alloc_prepost_reservation(npreposts, waitq, 5159 &unlocked, &wqp); 5160 5161 if (!unlocked) { 5162 /* allocation held the waitq lock: we'd done! */ 5163 if (keep_locked) { 5164 goto out; 5165 } 5166 goto out_unlock; 5167 } 5168 5169 /* 5170 * Before we return, if the allocation had to unlock the waitq, we 5171 * must check one more time to see if we have enough. If not, we'll 5172 * try to allocate the difference. If the caller requests it, we'll 5173 * also leave the waitq locked so that the use of the pre-allocated 5174 * prepost objects can be guaranteed to be enough if a wakeup_all is 5175 * performed before unlocking the waitq. 5176 */ 5177 5178 /* 5179 * If the waitq is no longer associated with a set, or if the waitq's 5180 * set/prepostid has not changed since we first walked its linkage, 5181 * we're done. 5182 */ 5183 if ((waitq->waitq_set_id == 0) || 5184 (waitq->waitq_set_id == prev_setid && 5185 waitq->waitq_prepost_id == prev_prepostid)) { 5186 if (keep_locked) { 5187 goto out; 5188 } 5189 goto out_unlock; 5190 } 5191 5192 npreposts = waitq_count_prepost_reservation(waitq, extra, keep_locked); 5193 5194 if (npreposts > nalloc) { 5195 prev_setid = waitq->waitq_set_id; 5196 prev_prepostid = waitq->waitq_prepost_id; 5197 npreposts = npreposts - nalloc; /* only allocate the diff */ 5198 goto try_alloc; 5199 } 5200 5201 if (keep_locked) { 5202 goto out; 5203 } 5204 5205 out_unlock: 5206 waitq_unlock(waitq); 5207 out: 5208 if (wqp) { 5209 reserved = wqp->wqp_prepostid.id; 5210 } 5211 5212 return reserved; 5213 } 5214 5215 /** 5216 * release a linked list of prepost objects allocated via _prepost_reserve 5217 * 5218 * Conditions: 5219 * may (rarely) spin waiting for prepost table growth memcpy 5220 */ 5221 void 5222 waitq_prepost_release_reserve(uint64_t id) 5223 { 5224 struct wq_prepost *wqp; 5225 5226 wqdbg_v("releasing reserved preposts starting at: 0x%llx", id); 5227 5228 wqp = wq_prepost_rfirst(id); 5229 if (!wqp) { 5230 return; 5231 } 5232 5233 wq_prepost_release_rlist(wqp); 5234 } 5235 5236 5237 /** 5238 * clear all preposts from 'wqset' 5239 * 5240 * Conditions: 5241 * 'wqset' is not locked 5242 */ 5243 void 5244 waitq_set_clear_preposts(struct waitq_set *wqset) 5245 { 5246 uint64_t prepost_id; 5247 spl_t spl; 5248 5249 assert(waitqs_is_set(wqset)); 5250 5251 if (!wqset->wqset_q.waitq_prepost || !wqset->wqset_prepost_id) { 5252 return; 5253 } 5254 5255 wqdbg_v("Clearing all preposted queues on waitq_set: 0x%llx", 5256 wqset->wqset_id); 5257 5258 if (waitq_irq_safe(&wqset->wqset_q)) { 5259 spl = splsched(); 5260 } 5261 waitq_set_lock(wqset); 5262 prepost_id = wqset->wqset_prepost_id; 5263 wqset->wqset_prepost_id = 0; 5264 waitq_set_unlock(wqset); 5265 if (waitq_irq_safe(&wqset->wqset_q)) { 5266 splx(spl); 5267 } 5268 5269 /* drop / unlink all the prepost table objects */ 5270 if (prepost_id) { 5271 (void)wq_prepost_iterate(prepost_id, NULL, 5272 wqset_clear_prepost_chain_cb); 5273 } 5274 } 5275 5276 5277 /* ---------------------------------------------------------------------- 5278 * 5279 * Iteration: waitq -> sets / waitq_set -> preposts 5280 * 5281 * ---------------------------------------------------------------------- */ 5282 5283 struct wq_it_ctx { 5284 void *input; 5285 void *ctx; 5286 waitq_iterator_t it; 5287 }; 5288 5289 static int 5290 waitq_iterate_sets_cb(struct waitq *waitq, void *ctx, 5291 struct waitq_link *link) 5292 { 5293 struct wq_it_ctx *wctx = (struct wq_it_ctx *)(ctx); 5294 struct waitq_set *wqset; 5295 int ret; 5296 5297 (void)waitq; 5298 assert(!waitq_irq_safe(waitq)); 5299 assert(wql_type(link) == WQL_WQS); 5300 5301 /* 5302 * the waitq is locked, so we can just take the set lock 5303 * and call the iterator function 5304 */ 5305 wqset = link->wql_wqs.wql_set; 5306 assert(wqset != NULL); 5307 assert(!waitq_irq_safe(&wqset->wqset_q)); 5308 waitq_set_lock(wqset); 5309 5310 ret = wctx->it(wctx->ctx, (struct waitq *)wctx->input, wqset); 5311 5312 waitq_set_unlock(wqset); 5313 return ret; 5314 } 5315 5316 /** 5317 * call external iterator function for each prepost object in wqset 5318 * 5319 * Conditions: 5320 * Called from wq_prepost_foreach_locked 5321 * (wqset locked, waitq _not_ locked) 5322 */ 5323 static int 5324 wqset_iterate_prepost_cb(struct waitq_set *wqset, void *ctx, 5325 struct wq_prepost *wqp, struct waitq *waitq) 5326 { 5327 struct wq_it_ctx *wctx = (struct wq_it_ctx *)(ctx); 5328 uint64_t wqp_id; 5329 int ret; 5330 5331 (void)wqp; 5332 5333 /* 5334 * This is a bit tricky. The 'wqset' is locked, but the 'waitq' is not. 5335 * Taking the 'waitq' lock is a lock order violation, so we need to be 5336 * careful. We also must realize that we may have taken a reference to 5337 * the 'wqp' just as the associated waitq was being torn down (or 5338 * clearing all its preposts) - see waitq_clear_prepost_locked(). If 5339 * the 'wqp' is valid and we can get the waitq lock, then we are good 5340 * to go. If not, we need to back off, check that the 'wqp' hasn't 5341 * been invalidated, and try to re-take the locks. 5342 */ 5343 assert(!waitq_irq_safe(waitq)); 5344 5345 if (waitq_lock_try(waitq)) { 5346 goto call_iterator; 5347 } 5348 5349 if (!wqp_is_valid(wqp)) { 5350 return WQ_ITERATE_RESTART; 5351 } 5352 5353 /* We are passed a prepost object with a reference on it. If neither 5354 * the waitq set nor the waitq require interrupts disabled, then we 5355 * may block on the delay(1) call below. We can't hold a prepost 5356 * object reference while blocking, so we have to give that up as well 5357 * and re-acquire it when we come back. 5358 */ 5359 wqp_id = wqp->wqp_prepostid.id; 5360 wq_prepost_put(wqp); 5361 waitq_set_unlock(wqset); 5362 wqdbg_v("dropped set:%p lock waiting for wqp:%p (0x%llx -> wq:%p)", 5363 wqset, wqp, wqp->wqp_prepostid.id, waitq); 5364 delay(1); 5365 waitq_set_lock(wqset); 5366 wqp = wq_prepost_get(wqp_id); 5367 if (!wqp) { 5368 /* someone cleared preposts while we slept! */ 5369 return WQ_ITERATE_DROPPED; 5370 } 5371 5372 /* 5373 * TODO: 5374 * This differs slightly from the logic in ipc_mqueue.c: 5375 * ipc_mqueue_receive_on_thread(). There, if the waitq lock 5376 * can't be obtained, the prepost link is placed on the back of 5377 * the chain, and the iteration starts from the beginning. Here, 5378 * we just restart from the beginning. 5379 */ 5380 return WQ_ITERATE_RESTART; 5381 5382 call_iterator: 5383 if (!wqp_is_valid(wqp)) { 5384 ret = WQ_ITERATE_RESTART; 5385 goto out_unlock; 5386 } 5387 5388 /* call the external callback */ 5389 ret = wctx->it(wctx->ctx, waitq, wqset); 5390 5391 if (ret == WQ_ITERATE_BREAK_KEEP_LOCKED) { 5392 ret = WQ_ITERATE_BREAK; 5393 goto out; 5394 } 5395 5396 out_unlock: 5397 waitq_unlock(waitq); 5398 out: 5399 return ret; 5400 } 5401 5402 /** 5403 * iterator over all sets to which the given waitq has been linked 5404 * 5405 * Conditions: 5406 * 'waitq' is locked 5407 */ 5408 int 5409 waitq_iterate_sets(struct waitq *waitq, void *ctx, waitq_iterator_t it) 5410 { 5411 int ret; 5412 struct wq_it_ctx wctx = { 5413 .input = (void *)waitq, 5414 .ctx = ctx, 5415 .it = it, 5416 }; 5417 if (!it || !waitq) { 5418 return KERN_INVALID_ARGUMENT; 5419 } 5420 5421 ret = walk_waitq_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id, 5422 WQL_WQS, (void *)&wctx, waitq_iterate_sets_cb); 5423 if (ret == WQ_ITERATE_CONTINUE) { 5424 ret = WQ_ITERATE_SUCCESS; 5425 } 5426 return ret; 5427 } 5428 5429 /** 5430 * iterator over all preposts in the given wqset 5431 * 5432 * Conditions: 5433 * 'wqset' is locked 5434 */ 5435 int 5436 waitq_set_iterate_preposts(struct waitq_set *wqset, 5437 void *ctx, waitq_iterator_t it) 5438 { 5439 struct wq_it_ctx wctx = { 5440 .input = (void *)wqset, 5441 .ctx = ctx, 5442 .it = it, 5443 }; 5444 if (!it || !wqset) { 5445 return WQ_ITERATE_INVALID; 5446 } 5447 5448 assert(waitq_held(&wqset->wqset_q)); 5449 5450 return wq_prepost_foreach_locked(wqset, (void *)&wctx, 5451 wqset_iterate_prepost_cb); 5452 } 5453 5454 5455 /* ---------------------------------------------------------------------- 5456 * 5457 * Higher-level APIs 5458 * 5459 * ---------------------------------------------------------------------- */ 5460 5461 5462 /** 5463 * declare a thread's intent to wait on 'waitq' for 'wait_event' 5464 * 5465 * Conditions: 5466 * 'waitq' is not locked 5467 */ 5468 wait_result_t 5469 waitq_assert_wait64(struct waitq *waitq, 5470 event64_t wait_event, 5471 wait_interrupt_t interruptible, 5472 uint64_t deadline) 5473 { 5474 thread_t thread = current_thread(); 5475 wait_result_t ret; 5476 spl_t s; 5477 5478 if (!waitq_valid(waitq)) { 5479 panic("Invalid waitq: %p", waitq); 5480 } 5481 5482 if (waitq_irq_safe(waitq)) { 5483 s = splsched(); 5484 } 5485 5486 waitq_lock(waitq); 5487 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible, 5488 TIMEOUT_URGENCY_SYS_NORMAL, 5489 deadline, TIMEOUT_NO_LEEWAY, thread); 5490 waitq_unlock(waitq); 5491 5492 if (waitq_irq_safe(waitq)) { 5493 splx(s); 5494 } 5495 5496 return ret; 5497 } 5498 5499 /** 5500 * declare a thread's intent to wait on 'waitq' for 'wait_event' 5501 * 5502 * Conditions: 5503 * 'waitq' is not locked 5504 * will disable and re-enable interrupts while locking current_thread() 5505 */ 5506 wait_result_t 5507 waitq_assert_wait64_leeway(struct waitq *waitq, 5508 event64_t wait_event, 5509 wait_interrupt_t interruptible, 5510 wait_timeout_urgency_t urgency, 5511 uint64_t deadline, 5512 uint64_t leeway) 5513 { 5514 wait_result_t ret; 5515 thread_t thread = current_thread(); 5516 spl_t s; 5517 5518 if (!waitq_valid(waitq)) { 5519 panic("Invalid waitq: %p", waitq); 5520 } 5521 5522 if (waitq_irq_safe(waitq)) { 5523 s = splsched(); 5524 } 5525 5526 waitq_lock(waitq); 5527 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible, 5528 urgency, deadline, leeway, thread); 5529 waitq_unlock(waitq); 5530 5531 if (waitq_irq_safe(waitq)) { 5532 splx(s); 5533 } 5534 5535 return ret; 5536 } 5537 5538 /** 5539 * wakeup a single thread from a waitq that's waiting for a given event 5540 * 5541 * Conditions: 5542 * 'waitq' is not locked 5543 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets 5544 * may disable and re-enable interrupts 5545 * 5546 * Notes: 5547 * will _not_ block if waitq is global (or not a member of any set) 5548 */ 5549 kern_return_t 5550 waitq_wakeup64_one(struct waitq *waitq, event64_t wake_event, 5551 wait_result_t result, int priority) 5552 { 5553 kern_return_t kr; 5554 uint64_t reserved_preposts = 0; 5555 spl_t spl; 5556 5557 if (!waitq_valid(waitq)) { 5558 panic("Invalid waitq: %p", waitq); 5559 } 5560 5561 if (!waitq_irq_safe(waitq)) { 5562 /* reserve preposts in addition to locking the waitq */ 5563 reserved_preposts = waitq_prepost_reserve(waitq, 0, WAITQ_KEEP_LOCKED); 5564 } else { 5565 spl = splsched(); 5566 waitq_lock(waitq); 5567 } 5568 5569 /* waitq is locked upon return */ 5570 kr = waitq_wakeup64_one_locked(waitq, wake_event, result, 5571 &reserved_preposts, priority, WAITQ_UNLOCK, WQ_OPTION_NONE); 5572 5573 if (waitq_irq_safe(waitq)) { 5574 splx(spl); 5575 } 5576 5577 /* release any left-over prepost object (won't block/lock anything) */ 5578 waitq_prepost_release_reserve(reserved_preposts); 5579 5580 return kr; 5581 } 5582 5583 /** 5584 * wakeup all threads from a waitq that are waiting for a given event 5585 * 5586 * Conditions: 5587 * 'waitq' is not locked 5588 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets 5589 * may disable and re-enable interrupts 5590 * 5591 * Notes: 5592 * will _not_ block if waitq is global (or not a member of any set) 5593 */ 5594 kern_return_t 5595 waitq_wakeup64_all(struct waitq *waitq, 5596 event64_t wake_event, 5597 wait_result_t result, 5598 int priority) 5599 { 5600 kern_return_t ret; 5601 uint64_t reserved_preposts = 0; 5602 spl_t s; 5603 5604 if (!waitq_valid(waitq)) { 5605 panic("Invalid waitq: %p", waitq); 5606 } 5607 5608 if (!waitq_irq_safe(waitq)) { 5609 /* reserve preposts in addition to locking waitq */ 5610 reserved_preposts = waitq_prepost_reserve(waitq, 0, 5611 WAITQ_KEEP_LOCKED); 5612 } else { 5613 s = splsched(); 5614 waitq_lock(waitq); 5615 } 5616 5617 ret = waitq_wakeup64_all_locked(waitq, wake_event, result, 5618 &reserved_preposts, priority, 5619 WAITQ_UNLOCK); 5620 5621 if (waitq_irq_safe(waitq)) { 5622 splx(s); 5623 } 5624 5625 waitq_prepost_release_reserve(reserved_preposts); 5626 5627 return ret; 5628 } 5629 5630 /** 5631 * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event' 5632 * 5633 * Conditions: 5634 * 'waitq' is not locked 5635 * 5636 * Notes: 5637 * May temporarily disable and re-enable interrupts 5638 */ 5639 kern_return_t 5640 waitq_wakeup64_thread(struct waitq *waitq, 5641 event64_t wake_event, 5642 thread_t thread, 5643 wait_result_t result) 5644 { 5645 kern_return_t ret; 5646 spl_t s, th_spl; 5647 5648 if (!waitq_valid(waitq)) { 5649 panic("Invalid waitq: %p", waitq); 5650 } 5651 5652 if (waitq_irq_safe(waitq)) { 5653 s = splsched(); 5654 } 5655 waitq_lock(waitq); 5656 5657 ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl); 5658 /* on success, returns 'thread' locked */ 5659 5660 waitq_unlock(waitq); 5661 5662 if (ret == KERN_SUCCESS) { 5663 ret = thread_go(thread, result, WQ_OPTION_NONE); 5664 assert(ret == KERN_SUCCESS); 5665 thread_unlock(thread); 5666 splx(th_spl); 5667 waitq_stats_count_wakeup(waitq); 5668 } else { 5669 ret = KERN_NOT_WAITING; 5670 waitq_stats_count_fail(waitq); 5671 } 5672 5673 if (waitq_irq_safe(waitq)) { 5674 splx(s); 5675 } 5676 5677 return ret; 5678 } 5679 5680 /** 5681 * wakeup a single thread from a waitq that's waiting for a given event 5682 * and return a reference to that thread 5683 * returns THREAD_NULL if no thread was waiting 5684 * 5685 * Conditions: 5686 * 'waitq' is not locked 5687 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets 5688 * may disable and re-enable interrupts 5689 * 5690 * Notes: 5691 * will _not_ block if waitq is global (or not a member of any set) 5692 */ 5693 thread_t 5694 waitq_wakeup64_identify(struct waitq *waitq, 5695 event64_t wake_event, 5696 wait_result_t result, 5697 int priority) 5698 { 5699 uint64_t reserved_preposts = 0; 5700 spl_t thread_spl = 0; 5701 thread_t thread; 5702 spl_t spl; 5703 5704 if (!waitq_valid(waitq)) { 5705 panic("Invalid waitq: %p", waitq); 5706 } 5707 5708 if (!waitq_irq_safe(waitq)) { 5709 /* reserve preposts in addition to locking waitq */ 5710 reserved_preposts = waitq_prepost_reserve(waitq, 0, WAITQ_KEEP_LOCKED); 5711 } else { 5712 spl = splsched(); 5713 waitq_lock(waitq); 5714 } 5715 5716 thread = waitq_wakeup64_identify_locked(waitq, wake_event, result, 5717 &thread_spl, &reserved_preposts, 5718 priority, WAITQ_UNLOCK); 5719 /* waitq is unlocked, thread is locked */ 5720 5721 if (thread != THREAD_NULL) { 5722 thread_reference(thread); 5723 thread_unlock(thread); 5724 splx(thread_spl); 5725 } 5726 5727 if (waitq_irq_safe(waitq)) { 5728 splx(spl); 5729 } 5730 5731 /* release any left-over prepost object (won't block/lock anything) */ 5732 waitq_prepost_release_reserve(reserved_preposts); 5733 5734 /* returns +1 ref to running thread or THREAD_NULL */ 5735 return thread; 5736 }