mpsc_queue.h
1 /* 2 * Copyright (c) 2018 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #ifndef _KERN_MPSC_QUEUE_H_ 30 #define _KERN_MPSC_QUEUE_H_ 31 32 #ifdef XNU_KERNEL_PRIVATE 33 34 #include <machine/atomic.h> 35 #include <kern/macro_help.h> 36 #include <kern/thread_call.h> 37 38 #endif // XNU_KERNEL_PRIVATE 39 40 #include <sys/cdefs.h> 41 42 __BEGIN_DECLS 43 44 /*! 45 * @typedef struct mpsc_queue_chain 46 * 47 * @brief 48 * Type for the intrusive linkage used by MPSC queues. 49 */ 50 typedef struct mpsc_queue_chain { 51 struct mpsc_queue_chain *_Atomic mpqc_next; 52 } *mpsc_queue_chain_t; 53 54 /*! 55 * @typedef struct mpsc_queue_head 56 * 57 * @brief 58 * The type for a multi-producer single-consumer queue. 59 * 60 * @discussion 61 * MPSC queues allow for producers to not be affected by other producers or the 62 * consumer. Which means in turn that having producers in interrupt context 63 * does not require that other producers disable interrupts like a traditional 64 * spinlock based approach would require. 65 * 66 * These queues shine when data is produced from the entire system and is 67 * consumed from a single serial context (logging, tracing, ...). 68 * mpsc_daemon_queue_t is provided as a fully ready/easy-to-use pre-packaged 69 * solution for these common use cases. 70 * 71 * - mpsc_queue_append() can be used to append a single item 72 * - mpsc_queue_append_list() can be used to append a batch of items at once. 73 * 74 * Functions for the consumer side assume proper serialization that is not 75 * provided by the MPSC queue itself. Dequeuing doesn't require preemption 76 * to be disabled. 77 * 78 * <h2>Algorithm</h2> 79 * 80 * The base of the enqueue algorithm is a single atomic exchange (first half, 81 * called __mpsc_queue_append_update_tail) and a list fixup (2nd half, called 82 * __mpsc_queue_append_update_prev). 83 * 84 * Graphically, enqueuing `X` looks like this, with each step being done 85 * atomically (for the empty queue case, `tail` points to `head`): 86 * 87 * | orig state | update_tail | update_prev | 88 * +---------------------+---------------------+---------------------+ 89 * | | | | 90 * | head -> e1 -> e2 -. | head -> e1 -> e2 -. | head -> e1 -> e2 -. | 91 * | | | | | | | 92 * | ,- ... <--' | ,- ... <--' | ,- ... <--' | 93 * | | | | | | | 94 * | v | v | v | 95 * | tail -> eN -> NULL | tail eN -> NULL | tail eN | 96 * | | | | | | | 97 * | | | | | v | 98 * | X -> NULL | `---> X -> NULL | '---> X -> NULL | 99 * | | | | 100 * +---------------------+---------------------+---------------------+ 101 * 102 * 103 * There is a small 1-instruction gap of inconsistency which makes the chosen 104 * algorithm non linearizable, and requires enqueuers to disable preemption 105 * during the enqueue so as not to starve the consumer forever. 106 * 107 * As far as memory visibility is concerned, enqueuing uses a release fence in 108 * update_tail which pairs with memory fences in mpsc_queue_dequeue_batch(). 109 * 110 * Note: as far as the data structure in memory, its layout is equivalent to 111 * a BSD <sys/queue.h> STAILQ. However because of this inconsistency 112 * window and memory ordering concerns, it is incorrect to use STAILQ 113 * macros on an MPSC queue. 114 */ 115 typedef struct mpsc_queue_head { 116 struct mpsc_queue_chain mpqh_head; 117 struct mpsc_queue_chain *_Atomic mpqh_tail; 118 } *mpsc_queue_head_t; 119 120 /*! 121 * @macro MPSC_QUEUE_INITIALIZER 122 * 123 * @brief 124 * Macro to use in static initializers for mpsc queues. 125 * 126 * @param head 127 * The name of the variable to initialize. 128 */ 129 #define MPSC_QUEUE_INITIALIZER(head) { .mpqh_tail = &(head).mpqh_head } 130 131 #ifdef XNU_KERNEL_PRIVATE 132 133 /*! 134 * @function mpsc_queue_init 135 * 136 * @brief 137 * Dynamically initialize an mpsc queue. 138 * 139 * @discussion 140 * This initialization assumes that the object holding the queue head 141 * is initialized before it can be made visible to other threads/cores. 142 * 143 * @param q 144 * The queue to initialize. 145 */ 146 static inline void 147 mpsc_queue_init(mpsc_queue_head_t q) 148 { 149 os_atomic_init(&q->mpqh_head.mpqc_next, NULL); 150 os_atomic_init(&q->mpqh_tail, &q->mpqh_head); 151 } 152 153 /*! 154 * @typedef enum mpsc_queue_options 155 */ 156 typedef enum mpsc_queue_options { 157 MPSC_QUEUE_NONE = 0, 158 MPSC_QUEUE_DISABLE_PREEMPTION = 1 << 0, 159 } mpsc_queue_options_t; 160 161 /*! 162 * @const MPSC_QUEUE_NOTQUEUED_MARKER 163 * 164 * @brief 165 * Magical marker that implementations can use to poison the chain pointer of 166 * elements not on any MPSC queue. 167 */ 168 #define MPSC_QUEUE_NOTQUEUED_MARKER ((mpsc_queue_chain_t)~0ul) 169 170 /*! 171 * @macro mpsc_queue_element 172 * 173 * @brief 174 * Macro to find the pointer of an element back from its MPSC chain linkage. 175 */ 176 #define mpsc_queue_element(ptr, type, field) __container_of(ptr, type, field) 177 178 179 #pragma mark Advanced Multi Producer calls 180 181 /** 182 * @function __mpsc_queue_append_update_tail 183 * 184 * @brief 185 * First half of the enqueue operation onto a multi-producer single-consumer 186 * queue. 187 * 188 * @discussion 189 * This function is available for algorithms that need to do things (such as 190 * taking a refcount) before calling __mpsc_queue_append_update_prev(). 191 * 192 * Preemption should be disabled before calling 193 * __mpsc_queue_append_update_tail(), and until 194 * __mpsc_queue_append_update_prev() has returned. 195 * 196 * @param q 197 * The queue to update. 198 * 199 * @param elm 200 * The element to append to `q`. 201 * 202 * @returns 203 * A token to later pass to __mpsc_queue_append_update_prev() 204 * to complete the enqueue. 205 */ 206 static inline mpsc_queue_chain_t 207 __mpsc_queue_append_update_tail(mpsc_queue_head_t q, mpsc_queue_chain_t elm) 208 { 209 os_atomic_store(&elm->mpqc_next, NULL, relaxed); 210 return os_atomic_xchg(&q->mpqh_tail, elm, release); 211 } 212 213 /** 214 * @function __mpsc_queue_append_was_empty 215 * 216 * @brief 217 * Tests whether the queue was empty at the time 218 * __mpsc_queue_append_update_tail() was called. 219 * 220 * @param q 221 * The queue to test emptiness for. 222 * 223 * @param prev 224 * The token returned by __mpsc_queue_append_update_tail(). 225 * 226 * @returns 227 * Whether the queue was empty (true) or not (false). 228 */ 229 static inline bool 230 __mpsc_queue_append_was_empty(mpsc_queue_head_t q, mpsc_queue_chain_t prev) 231 { 232 return &q->mpqh_head == prev; 233 } 234 235 /** 236 * @function __mpsc_queue_append_update_prev 237 * 238 * @brief 239 * Second half of the enqueue operation onto a multi-producer single-consumer 240 * queue. 241 * 242 * @discussion 243 * This function is available for algorithms that need to do things (such as 244 * taking a refcount) before calling __mpsc_queue_append_update_prev(). 245 * 246 * Preemption should be disabled before calling 247 * __mpsc_queue_append_update_tail(), and until 248 * __mpsc_queue_append_update_prev() has returned. 249 * 250 * @param prev 251 * The token returned by __mpsc_queue_append_update_tail(). 252 * 253 * @param elm 254 * The element to append to the queue. 255 */ 256 static inline void 257 __mpsc_queue_append_update_prev(mpsc_queue_chain_t prev, mpsc_queue_chain_t elm) 258 { 259 os_atomic_store(&prev->mpqc_next, elm, relaxed); 260 } 261 262 263 #pragma mark Multi Producer calls 264 265 /** 266 * @function mpsc_queue_append_list 267 * 268 * @brief 269 * Enqueues a list of elements onto a queue. 270 * 271 * @discussion 272 * This enqueues a list that has to be fully formed from `first` to `last` 273 * at the end of `q`. 274 * 275 * Preemption should be disabled when calling mpsc_queue_append_list(). 276 * 277 * @param q 278 * The queue to update. 279 * 280 * @param first 281 * The first of the list elements being appended. 282 * 283 * @param last 284 * The last of the list elements being appended. 285 */ 286 static inline bool 287 mpsc_queue_append_list(mpsc_queue_head_t q, mpsc_queue_chain_t first, 288 mpsc_queue_chain_t last) 289 { 290 mpsc_queue_chain_t prev = __mpsc_queue_append_update_tail(q, last); 291 __mpsc_queue_append_update_prev(prev, first); 292 return __mpsc_queue_append_was_empty(q, prev); 293 } 294 295 /** 296 * @function __mpsc_queue_append_update_tail 297 * 298 * @brief 299 * Enqueues an element onto a queue. 300 * 301 * @discussion 302 * Preemption should be disabled when calling mpsc_queue_append(). 303 * 304 * @param q the queue to update 305 * @param elm the element to append 306 */ 307 static inline bool 308 mpsc_queue_append(mpsc_queue_head_t q, mpsc_queue_chain_t elm) 309 { 310 return mpsc_queue_append_list(q, elm, elm); 311 } 312 313 314 #pragma mark Single Consumer calls 315 316 /** 317 * @function mpsc_queue_dequeue_batch() 318 * 319 * @brief 320 * Atomically empty a queue at once and return the batch head and tail. 321 * 322 * @discussion 323 * Consumer function, must be called in a serialized way with respect to any 324 * other consumer function. 325 * 326 * @param q 327 * The queue 328 * 329 * @param tail 330 * An out pointer filled with the last element captured. 331 * 332 * @param dependency 333 * A dependency token (to rely on consume / hardware dependencies) 334 * When not trying to take advantage of hardware dependencies, just pass NULL. 335 * 336 * @returns 337 * The first element of the batch if any, or NULL the queue was empty. 338 */ 339 mpsc_queue_chain_t 340 mpsc_queue_dequeue_batch(mpsc_queue_head_t q, mpsc_queue_chain_t *tail, 341 os_atomic_dependency_t dependency); 342 343 /** 344 * @function mpsc_queue_batch_next() 345 * 346 * @brief 347 * Function used to consume an element from a batch dequeued with 348 * mpsc_queue_dequeue_batch(). 349 * 350 * @discussion 351 * Once a batch has been dequeued, there is no need to hold the consumer lock 352 * anymore to consume it. 353 * 354 * mpsc_queue_batch_foreach_safe() is the preferred interface to consume 355 * the whole batch. 356 * 357 * @param cur 358 * The current inspected element of the batch (must be the batch head or 359 * a value returned by mpsc_queue_batch_next()). 360 * 361 * @param tail 362 * The last element of the batch. 363 * 364 * @returns 365 * The next element if any. 366 */ 367 mpsc_queue_chain_t 368 mpsc_queue_batch_next(mpsc_queue_chain_t cur, mpsc_queue_chain_t tail); 369 370 /** 371 * @macro mpsc_queue_batch_foreach_safe 372 * 373 * @brief 374 * Macro used to enumerate a batch dequeued with mpsc_queue_dequeue_batch(). 375 * 376 * @param item 377 * The item being currently visited. 378 * 379 * @param head 380 * The first element of the batch. 381 * 382 * @param tail 383 * The last element of the batch. 384 */ 385 #define mpsc_queue_batch_foreach_safe(item, head, tail) \ 386 for (mpsc_queue_chain_t __tmp, __item = (head), __tail = (tail); \ 387 __tmp = mpsc_queue_batch_next(__item, __tail), (item) = __item; \ 388 __item = __tmp) 389 390 /** 391 * @function mpsc_queue_restore_batch() 392 * 393 * @brief 394 * "Restore"s a batch at the head of the queue. 395 * 396 * @discussion 397 * Consumer function, must be called in a serialized way with respect to any 398 * other consumer function. 399 * 400 * @param q 401 * The queue 402 * 403 * @param first 404 * The first element to put back. 405 * 406 * @param last 407 * The last element to put back. 408 * It is the responsibility of the caller to ensure the linkages from first to 409 * last are properly set up before calling this function. 410 */ 411 void 412 mpsc_queue_restore_batch(mpsc_queue_head_t q, mpsc_queue_chain_t first, 413 mpsc_queue_chain_t last); 414 415 416 #pragma mark "GCD"-like facilities 417 418 /*! 419 * @typedef struct mpsc_daemon_queue 420 * 421 * @brief 422 * Daemon queues are a ready-to use packaging of the low level MPSC queue 423 * primitive. 424 * 425 * @discussion 426 * mpsc_queue_t requires handling of state transitions of the queue and 427 * dequeuing yourself, which is a non trivial task. 428 * 429 * Daemon queues are a simple packaged solution that allows for mpsc_queue_t to 430 * form hierarchies (mostly for layering purposes), and be serviced at the 431 * bottom of such a hierarchy by a thread or a thread call. 432 * 433 * Daemon queues assume homogenous items, and are setup with an `invoke` 434 * callback that is called in the dequeuer on every item as they are dequeued. 435 */ 436 typedef struct mpsc_daemon_queue *mpsc_daemon_queue_t; 437 438 /*! 439 * @typedef struct mpsc_daemon_queue 440 * 441 * @brief 442 * The type for MPSC Daemon Queues invoke callbacks. 443 */ 444 typedef void (*mpsc_daemon_invoke_fn_t)(mpsc_queue_chain_t elm, 445 mpsc_daemon_queue_t dq); 446 447 /*! 448 * @enum mpsc_daemon_queue_kind 449 * 450 * @brief 451 * Internal type, not to be used by clients. 452 */ 453 typedef enum mpsc_daemon_queue_kind { 454 MPSC_QUEUE_KIND_UNKNOWN, 455 MPSC_QUEUE_KIND_NESTED, 456 MPSC_QUEUE_KIND_THREAD, 457 MPSC_QUEUE_KIND_THREAD_CRITICAL, 458 MPSC_QUEUE_KIND_THREAD_CALL, 459 } mpsc_daemon_queue_kind_t; 460 461 /*! 462 * @enum mpsc_daemon_queue_state 463 * 464 * @brief 465 * Internal type, not to be used by clients. 466 */ 467 __options_decl(mpsc_daemon_queue_state_t, uint32_t, { 468 MPSC_QUEUE_STATE_DRAINING = 0x0001, 469 MPSC_QUEUE_STATE_WAKEUP = 0x0002, 470 MPSC_QUEUE_STATE_CANCELED = 0x0004, 471 }); 472 473 struct mpsc_daemon_queue { 474 mpsc_daemon_queue_kind_t mpd_kind; 475 mpsc_daemon_queue_state_t _Atomic mpd_state; 476 mpsc_daemon_invoke_fn_t mpd_invoke; 477 union { 478 mpsc_daemon_queue_t mpd_target; 479 struct thread *mpd_thread; 480 struct thread_call *mpd_call; 481 }; 482 struct mpsc_queue_head mpd_queue; 483 struct mpsc_queue_chain mpd_chain; 484 }; 485 486 /*! 487 * @function mpsc_daemon_queue_init_with_thread 488 * 489 * @brief 490 * Sets up a daemon queue to be a base queue drained by a kernel thread. 491 * 492 * @discussion 493 * The function will allocate the thread and start it in assert_wait. 494 * 495 * @param dq 496 * The queue to initialize 497 * 498 * @param invoke 499 * The invoke function called on individual items on the queue during drain. 500 * 501 * @param pri 502 * The scheduler priority for the created thread. 503 * 504 * @param name 505 * The name to give to the created thread. 506 * 507 * @returns 508 * Whether creating the thread was successful. 509 */ 510 kern_return_t 511 mpsc_daemon_queue_init_with_thread(mpsc_daemon_queue_t dq, 512 mpsc_daemon_invoke_fn_t invoke, int pri, const char *name); 513 514 515 /*! 516 * @function mpsc_daemon_queue_init_with_thread_call 517 * 518 * @brief 519 * Sets up a daemon queue to be a base queue drained by a thread call. 520 * 521 * @param dq 522 * The queue to initialize 523 * 524 * @param invoke 525 * The invoke function called on individual items on the queue during drain. 526 * 527 * @param pri 528 * The priority the thread call will run at. 529 */ 530 void 531 mpsc_daemon_queue_init_with_thread_call(mpsc_daemon_queue_t dq, 532 mpsc_daemon_invoke_fn_t invoke, thread_call_priority_t pri); 533 534 /*! 535 * @function mpsc_daemon_queue_init_with_target 536 * 537 * @brief 538 * Sets up a daemon queue to target another daemon queue. 539 * 540 * @discussion 541 * The targetting relationship is useful for subsystem layering purposes only. 542 * Because draining a given queue is atomic with respect to its target, target 543 * queue hierarchies are prone to starvation. 544 * 545 * @param dq 546 * The queue to initialize 547 * 548 * @param invoke 549 * The invoke function called on individual items on the queue during drain. 550 * 551 * @param target 552 * The target queue of the initialized queue, which has to be initialized with 553 * the mpsc_daemon_queue_nested_invoke invoke handler. 554 */ 555 void 556 mpsc_daemon_queue_init_with_target(mpsc_daemon_queue_t dq, 557 mpsc_daemon_invoke_fn_t invoke, mpsc_daemon_queue_t target); 558 559 /*! 560 * @function mpsc_daemon_queue_nested_invoke 561 * 562 * @brief 563 * The invoke function to pass to mpsc_daemon_queue_init_* when a queue is meant 564 * to be targeted by other queues. 565 */ 566 void 567 mpsc_daemon_queue_nested_invoke(mpsc_queue_chain_t elm, 568 mpsc_daemon_queue_t dq); 569 570 /*! 571 * @function mpsc_daemon_queue_cancel_and_wait 572 * 573 * @brief 574 * Cancels the queue so that the object owning it can be destroyed. 575 * 576 * @discussion 577 * This interface will cancel the queue and wait synchronously for the 578 * cancelation to have taken effect, possibly waiting on elements currently 579 * draining. 580 * 581 * Sending objects to the daemon queue after cancelation is undefined. 582 * 583 * Calling this function multiple times is undefined. 584 * 585 * Tearing down daemon queue hierarchies is the responsibility of the adopter. 586 */ 587 void 588 mpsc_daemon_queue_cancel_and_wait(mpsc_daemon_queue_t dq); 589 590 /*! 591 * @function mpsc_daemon_enqueue 592 * 593 * @brief 594 * Send ("async") an item to a given daemon on a given queue. 595 * 596 * @discussion 597 * It is the responsibility of the caller to ensure preemption is disabled when 598 * this call is made. 599 * 600 * @param dq 601 * The daemon queue to enqueue the element onto. 602 * 603 * @param elm 604 * The item to enqueue. 605 * 606 * @param options 607 * Options applicable to the enqueue. In particupar passing 608 * MPSC_QUEUE_DISABLE_PREEMPTION makes sure preemption is properly disabled 609 * during the enqueue. 610 */ 611 void 612 mpsc_daemon_enqueue(mpsc_daemon_queue_t dq, mpsc_queue_chain_t elm, 613 mpsc_queue_options_t options); 614 615 616 #pragma mark Deferred deallocation daemon 617 618 /*! 619 * @function thread_deallocate_daemon_init 620 * 621 * @brief 622 * Initializes the deferred deallocation daemon, called by thread_daemon_init(). 623 * 624 * @discussion 625 * The deferred deallocation daemon is a kernel thread based daemon queue that 626 * is targeted by nested daemon queues. 627 * 628 * It is used to perform deferred deallocation for objects that can't safely be 629 * deallocated from the context where the deallocation should normally occur. 630 * 631 * Subsystems using it are for example: turnstiles, workqueues, threads. 632 * 633 * @warning 634 * New queues should be added to this daemon with great care, 635 * as abusing it can lead to unbounded amount of kernel work. 636 */ 637 void 638 thread_deallocate_daemon_init(void); 639 640 /*! 641 * @function thread_deallocate_daemon_register_queue 642 * 643 * @brief 644 * Dynamically register a queue for deferred deletion with the deferred 645 * deallocation daemon. 646 * 647 * @param dq 648 * The daemon queue to register with the deferred deallocation daemon. 649 * 650 * @param invoke 651 * The callback called on every element of this queue by the deallocation 652 * daemon. 653 */ 654 void 655 thread_deallocate_daemon_register_queue(mpsc_daemon_queue_t dq, 656 mpsc_daemon_invoke_fn_t invoke); 657 658 659 #pragma mark tests 660 #if DEBUG || DEVELOPMENT 661 662 int 663 mpsc_test_pingpong(uint64_t count, uint64_t *out); 664 665 #endif /* DEBUG || DEVELOPMENT */ 666 667 #endif /* XNU_KERNEL_PRIVATE */ 668 669 __END_DECLS 670 671 #endif /* _KERN_MPSC_QUEUE_H_ */