/ duct-tape / xnu / osfmk / kern / mpsc_queue.h
mpsc_queue.h
  1  /*
  2   * Copyright (c) 2018 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   *
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   *
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   *
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   */
 28  
 29  #ifndef _KERN_MPSC_QUEUE_H_
 30  #define _KERN_MPSC_QUEUE_H_
 31  
 32  #ifdef XNU_KERNEL_PRIVATE
 33  
 34  #include <machine/atomic.h>
 35  #include <kern/macro_help.h>
 36  #include <kern/thread_call.h>
 37  
 38  #endif // XNU_KERNEL_PRIVATE
 39  
 40  #include <sys/cdefs.h>
 41  
 42  __BEGIN_DECLS
 43  
 44  /*!
 45   * @typedef struct mpsc_queue_chain
 46   *
 47   * @brief
 48   * Type for the intrusive linkage used by MPSC queues.
 49   */
 50  typedef struct mpsc_queue_chain {
 51  	struct mpsc_queue_chain *_Atomic mpqc_next;
 52  } *mpsc_queue_chain_t;
 53  
 54  /*!
 55   * @typedef struct mpsc_queue_head
 56   *
 57   * @brief
 58   * The type for a multi-producer single-consumer queue.
 59   *
 60   * @discussion
 61   * MPSC queues allow for producers to not be affected by other producers or the
 62   * consumer. Which means in turn that having producers in interrupt context
 63   * does not require that other producers disable interrupts like a traditional
 64   * spinlock based approach would require.
 65   *
 66   * These queues shine when data is produced from the entire system and is
 67   * consumed from a single serial context (logging, tracing, ...).
 68   * mpsc_daemon_queue_t is provided as a fully ready/easy-to-use pre-packaged
 69   * solution for these common use cases.
 70   *
 71   * - mpsc_queue_append() can be used to append a single item
 72   * - mpsc_queue_append_list() can be used to append a batch of items at once.
 73   *
 74   * Functions for the consumer side assume proper serialization that is not
 75   * provided by the MPSC queue itself. Dequeuing doesn't require preemption
 76   * to be disabled.
 77   *
 78   * <h2>Algorithm</h2>
 79   *
 80   * The base of the enqueue algorithm is a single atomic exchange (first half,
 81   * called __mpsc_queue_append_update_tail) and a list fixup (2nd half, called
 82   * __mpsc_queue_append_update_prev).
 83   *
 84   * Graphically, enqueuing `X` looks like this, with each step being done
 85   * atomically (for the empty queue case, `tail` points to `head`):
 86   *
 87   *     | orig state          | update_tail         | update_prev         |
 88   *     +---------------------+---------------------+---------------------+
 89   *     |                     |                     |                     |
 90   *     | head -> e1 -> e2 -. | head -> e1 -> e2 -. | head -> e1 -> e2 -. |
 91   *     |                   | |                   | |                   | |
 92   *     |         ,- ... <--' |         ,- ... <--' |         ,- ... <--' |
 93   *     |         |           |         |           |         |           |
 94   *     |         v           |         v           |         v           |
 95   *     | tail -> eN -> NULL  | tail    eN -> NULL  | tail    eN          |
 96   *     |                     |   |                 |   |     |           |
 97   *     |                     |   |                 |   |     v           |
 98   *     |         X -> NULL   |   `---> X -> NULL   |   '---> X -> NULL   |
 99   *     |                     |                     |                     |
100   *     +---------------------+---------------------+---------------------+
101   *
102   *
103   * There is a small 1-instruction gap of inconsistency which makes the chosen
104   * algorithm non linearizable, and requires enqueuers to disable preemption
105   * during the enqueue so as not to starve the consumer forever.
106   *
107   * As far as memory visibility is concerned, enqueuing uses a release fence in
108   * update_tail which pairs with memory fences in mpsc_queue_dequeue_batch().
109   *
110   * Note: as far as the data structure in memory, its layout is equivalent to
111   *       a BSD <sys/queue.h> STAILQ. However because of this inconsistency
112   *       window and memory ordering concerns, it is incorrect to use STAILQ
113   *       macros on an MPSC queue.
114   */
115  typedef struct mpsc_queue_head {
116  	struct mpsc_queue_chain mpqh_head;
117  	struct mpsc_queue_chain *_Atomic mpqh_tail;
118  } *mpsc_queue_head_t;
119  
120  /*!
121   * @macro MPSC_QUEUE_INITIALIZER
122   *
123   * @brief
124   * Macro to use in static initializers for mpsc queues.
125   *
126   * @param head
127   * The name of the variable to initialize.
128   */
129  #define MPSC_QUEUE_INITIALIZER(head)   { .mpqh_tail = &(head).mpqh_head }
130  
131  #ifdef XNU_KERNEL_PRIVATE
132  
133  /*!
134   * @function mpsc_queue_init
135   *
136   * @brief
137   * Dynamically initialize an mpsc queue.
138   *
139   * @discussion
140   * This initialization assumes that the object holding the queue head
141   * is initialized before it can be made visible to other threads/cores.
142   *
143   * @param q
144   * The queue to initialize.
145   */
146  static inline void
147  mpsc_queue_init(mpsc_queue_head_t q)
148  {
149  	os_atomic_init(&q->mpqh_head.mpqc_next, NULL);
150  	os_atomic_init(&q->mpqh_tail, &q->mpqh_head);
151  }
152  
153  /*!
154   * @typedef enum mpsc_queue_options
155   */
156  typedef enum mpsc_queue_options {
157  	MPSC_QUEUE_NONE                = 0,
158  	MPSC_QUEUE_DISABLE_PREEMPTION  = 1 << 0,
159  } mpsc_queue_options_t;
160  
161  /*!
162   * @const MPSC_QUEUE_NOTQUEUED_MARKER
163   *
164   * @brief
165   * Magical marker that implementations can use to poison the chain pointer of
166   * elements not on any MPSC queue.
167   */
168  #define MPSC_QUEUE_NOTQUEUED_MARKER ((mpsc_queue_chain_t)~0ul)
169  
170  /*!
171   * @macro mpsc_queue_element
172   *
173   * @brief
174   * Macro to find the pointer of an element back from its MPSC chain linkage.
175   */
176  #define mpsc_queue_element(ptr, type, field) __container_of(ptr, type, field)
177  
178  
179  #pragma mark Advanced Multi Producer calls
180  
181  /**
182   * @function __mpsc_queue_append_update_tail
183   *
184   * @brief
185   * First half of the enqueue operation onto a multi-producer single-consumer
186   * queue.
187   *
188   * @discussion
189   * This function is available for algorithms that need to do things (such as
190   * taking a refcount) before calling __mpsc_queue_append_update_prev().
191   *
192   * Preemption should be disabled before calling
193   * __mpsc_queue_append_update_tail(), and until
194   * __mpsc_queue_append_update_prev() has returned.
195   *
196   * @param q
197   * The queue to update.
198   *
199   * @param elm
200   * The element to append to `q`.
201   *
202   * @returns
203   * A token to later pass to __mpsc_queue_append_update_prev()
204   * to complete the enqueue.
205   */
206  static inline mpsc_queue_chain_t
207  __mpsc_queue_append_update_tail(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
208  {
209  	os_atomic_store(&elm->mpqc_next, NULL, relaxed);
210  	return os_atomic_xchg(&q->mpqh_tail, elm, release);
211  }
212  
213  /**
214   * @function __mpsc_queue_append_was_empty
215   *
216   * @brief
217   * Tests whether the queue was empty at the time
218   * __mpsc_queue_append_update_tail() was called.
219   *
220   * @param q
221   * The queue to test emptiness for.
222   *
223   * @param prev
224   * The token returned by __mpsc_queue_append_update_tail().
225   *
226   * @returns
227   * Whether the queue was empty (true) or not (false).
228   */
229  static inline bool
230  __mpsc_queue_append_was_empty(mpsc_queue_head_t q, mpsc_queue_chain_t prev)
231  {
232  	return &q->mpqh_head == prev;
233  }
234  
235  /**
236   * @function __mpsc_queue_append_update_prev
237   *
238   * @brief
239   * Second half of the enqueue operation onto a multi-producer single-consumer
240   * queue.
241   *
242   * @discussion
243   * This function is available for algorithms that need to do things (such as
244   * taking a refcount) before calling __mpsc_queue_append_update_prev().
245   *
246   * Preemption should be disabled before calling
247   * __mpsc_queue_append_update_tail(), and until
248   * __mpsc_queue_append_update_prev() has returned.
249   *
250   * @param prev
251   * The token returned by __mpsc_queue_append_update_tail().
252   *
253   * @param elm
254   * The element to append to the queue.
255   */
256  static inline void
257  __mpsc_queue_append_update_prev(mpsc_queue_chain_t prev, mpsc_queue_chain_t elm)
258  {
259  	os_atomic_store(&prev->mpqc_next, elm, relaxed);
260  }
261  
262  
263  #pragma mark Multi Producer calls
264  
265  /**
266   * @function mpsc_queue_append_list
267   *
268   * @brief
269   * Enqueues a list of elements onto a queue.
270   *
271   * @discussion
272   * This enqueues a list that has to be fully formed from `first` to `last`
273   * at the end of `q`.
274   *
275   * Preemption should be disabled when calling mpsc_queue_append_list().
276   *
277   * @param q
278   * The queue to update.
279   *
280   * @param first
281   * The first of the list elements being appended.
282   *
283   * @param last
284   * The last of the list elements being appended.
285   */
286  static inline bool
287  mpsc_queue_append_list(mpsc_queue_head_t q, mpsc_queue_chain_t first,
288      mpsc_queue_chain_t last)
289  {
290  	mpsc_queue_chain_t prev = __mpsc_queue_append_update_tail(q, last);
291  	__mpsc_queue_append_update_prev(prev, first);
292  	return __mpsc_queue_append_was_empty(q, prev);
293  }
294  
295  /**
296   * @function __mpsc_queue_append_update_tail
297   *
298   * @brief
299   * Enqueues an element onto a queue.
300   *
301   * @discussion
302   * Preemption should be disabled when calling mpsc_queue_append().
303   *
304   * @param q    the queue to update
305   * @param elm  the element to append
306   */
307  static inline bool
308  mpsc_queue_append(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
309  {
310  	return mpsc_queue_append_list(q, elm, elm);
311  }
312  
313  
314  #pragma mark Single Consumer calls
315  
316  /**
317   * @function mpsc_queue_dequeue_batch()
318   *
319   * @brief
320   * Atomically empty a queue at once and return the batch head and tail.
321   *
322   * @discussion
323   * Consumer function, must be called in a serialized way with respect to any
324   * other consumer function.
325   *
326   * @param q
327   * The queue
328   *
329   * @param tail
330   * An out pointer filled with the last element captured.
331   *
332   * @param dependency
333   * A dependency token (to rely on consume / hardware dependencies)
334   * When not trying to take advantage of hardware dependencies, just pass NULL.
335   *
336   * @returns
337   * The first element of the batch if any, or NULL the queue was empty.
338   */
339  mpsc_queue_chain_t
340  mpsc_queue_dequeue_batch(mpsc_queue_head_t q, mpsc_queue_chain_t *tail,
341      os_atomic_dependency_t dependency);
342  
343  /**
344   * @function mpsc_queue_batch_next()
345   *
346   * @brief
347   * Function used to consume an element from a batch dequeued with
348   * mpsc_queue_dequeue_batch().
349   *
350   * @discussion
351   * Once a batch has been dequeued, there is no need to hold the consumer lock
352   * anymore to consume it.
353   *
354   * mpsc_queue_batch_foreach_safe() is the preferred interface to consume
355   * the whole batch.
356   *
357   * @param cur
358   * The current inspected element of the batch (must be the batch head or
359   * a value returned by mpsc_queue_batch_next()).
360   *
361   * @param tail
362   * The last element of the batch.
363   *
364   * @returns
365   * The next element if any.
366   */
367  mpsc_queue_chain_t
368  mpsc_queue_batch_next(mpsc_queue_chain_t cur, mpsc_queue_chain_t tail);
369  
370  /**
371   * @macro mpsc_queue_batch_foreach_safe
372   *
373   * @brief
374   * Macro used to enumerate a batch dequeued with mpsc_queue_dequeue_batch().
375   *
376   * @param item
377   * The item being currently visited.
378   *
379   * @param head
380   * The first element of the batch.
381   *
382   * @param tail
383   * The last element of the batch.
384   */
385  #define mpsc_queue_batch_foreach_safe(item, head, tail) \
386  	        for (mpsc_queue_chain_t __tmp, __item = (head), __tail = (tail); \
387  	                        __tmp = mpsc_queue_batch_next(__item, __tail), (item) = __item; \
388  	                        __item = __tmp)
389  
390  /**
391   * @function mpsc_queue_restore_batch()
392   *
393   * @brief
394   * "Restore"s a batch at the head of the queue.
395   *
396   * @discussion
397   * Consumer function, must be called in a serialized way with respect to any
398   * other consumer function.
399   *
400   * @param q
401   * The queue
402   *
403   * @param first
404   * The first element to put back.
405   *
406   * @param last
407   * The last element to put back.
408   * It is the responsibility of the caller to ensure the linkages from first to
409   * last are properly set up before calling this function.
410   */
411  void
412  mpsc_queue_restore_batch(mpsc_queue_head_t q, mpsc_queue_chain_t first,
413      mpsc_queue_chain_t last);
414  
415  
416  #pragma mark "GCD"-like facilities
417  
418  /*!
419   * @typedef struct mpsc_daemon_queue
420   *
421   * @brief
422   * Daemon queues are a ready-to use packaging of the low level MPSC queue
423   * primitive.
424   *
425   * @discussion
426   * mpsc_queue_t requires handling of state transitions of the queue and
427   * dequeuing yourself, which is a non trivial task.
428   *
429   * Daemon queues are a simple packaged solution that allows for mpsc_queue_t to
430   * form hierarchies (mostly for layering purposes), and be serviced at the
431   * bottom of such a hierarchy by a thread or a thread call.
432   *
433   * Daemon queues assume homogenous items, and are setup with an `invoke`
434   * callback that is called in the dequeuer on every item as they are dequeued.
435   */
436  typedef struct mpsc_daemon_queue *mpsc_daemon_queue_t;
437  
438  /*!
439   * @typedef struct mpsc_daemon_queue
440   *
441   * @brief
442   * The type for MPSC Daemon Queues invoke callbacks.
443   */
444  typedef void (*mpsc_daemon_invoke_fn_t)(mpsc_queue_chain_t elm,
445      mpsc_daemon_queue_t dq);
446  
447  /*!
448   * @enum mpsc_daemon_queue_kind
449   *
450   * @brief
451   * Internal type, not to be used by clients.
452   */
453  typedef enum mpsc_daemon_queue_kind {
454  	MPSC_QUEUE_KIND_UNKNOWN,
455  	MPSC_QUEUE_KIND_NESTED,
456  	MPSC_QUEUE_KIND_THREAD,
457  	MPSC_QUEUE_KIND_THREAD_CRITICAL,
458  	MPSC_QUEUE_KIND_THREAD_CALL,
459  } mpsc_daemon_queue_kind_t;
460  
461  /*!
462   * @enum mpsc_daemon_queue_state
463   *
464   * @brief
465   * Internal type, not to be used by clients.
466   */
467  __options_decl(mpsc_daemon_queue_state_t, uint32_t, {
468  	MPSC_QUEUE_STATE_DRAINING = 0x0001,
469  	MPSC_QUEUE_STATE_WAKEUP   = 0x0002,
470  	MPSC_QUEUE_STATE_CANCELED = 0x0004,
471  });
472  
473  struct mpsc_daemon_queue {
474  	mpsc_daemon_queue_kind_t    mpd_kind;
475  	mpsc_daemon_queue_state_t _Atomic mpd_state;
476  	mpsc_daemon_invoke_fn_t     mpd_invoke;
477  	union {
478  		mpsc_daemon_queue_t     mpd_target;
479  		struct thread          *mpd_thread;
480  		struct thread_call     *mpd_call;
481  	};
482  	struct mpsc_queue_head      mpd_queue;
483  	struct mpsc_queue_chain     mpd_chain;
484  };
485  
486  /*!
487   * @function mpsc_daemon_queue_init_with_thread
488   *
489   * @brief
490   * Sets up a daemon queue to be a base queue drained by a kernel thread.
491   *
492   * @discussion
493   * The function will allocate the thread and start it in assert_wait.
494   *
495   * @param dq
496   * The queue to initialize
497   *
498   * @param invoke
499   * The invoke function called on individual items on the queue during drain.
500   *
501   * @param pri
502   * The scheduler priority for the created thread.
503   *
504   * @param name
505   * The name to give to the created thread.
506   *
507   * @returns
508   * Whether creating the thread was successful.
509   */
510  kern_return_t
511  mpsc_daemon_queue_init_with_thread(mpsc_daemon_queue_t dq,
512      mpsc_daemon_invoke_fn_t invoke, int pri, const char *name);
513  
514  
515  /*!
516   * @function mpsc_daemon_queue_init_with_thread_call
517   *
518   * @brief
519   * Sets up a daemon queue to be a base queue drained by a thread call.
520   *
521   * @param dq
522   * The queue to initialize
523   *
524   * @param invoke
525   * The invoke function called on individual items on the queue during drain.
526   *
527   * @param pri
528   * The priority the thread call will run at.
529   */
530  void
531  mpsc_daemon_queue_init_with_thread_call(mpsc_daemon_queue_t dq,
532      mpsc_daemon_invoke_fn_t invoke, thread_call_priority_t pri);
533  
534  /*!
535   * @function mpsc_daemon_queue_init_with_target
536   *
537   * @brief
538   * Sets up a daemon queue to target another daemon queue.
539   *
540   * @discussion
541   * The targetting relationship is useful for subsystem layering purposes only.
542   * Because draining a given queue is atomic with respect to its target, target
543   * queue hierarchies are prone to starvation.
544   *
545   * @param dq
546   * The queue to initialize
547   *
548   * @param invoke
549   * The invoke function called on individual items on the queue during drain.
550   *
551   * @param target
552   * The target queue of the initialized queue, which has to be initialized with
553   * the mpsc_daemon_queue_nested_invoke invoke handler.
554   */
555  void
556  mpsc_daemon_queue_init_with_target(mpsc_daemon_queue_t dq,
557      mpsc_daemon_invoke_fn_t invoke, mpsc_daemon_queue_t target);
558  
559  /*!
560   * @function mpsc_daemon_queue_nested_invoke
561   *
562   * @brief
563   * The invoke function to pass to mpsc_daemon_queue_init_* when a queue is meant
564   * to be targeted by other queues.
565   */
566  void
567  mpsc_daemon_queue_nested_invoke(mpsc_queue_chain_t elm,
568      mpsc_daemon_queue_t dq);
569  
570  /*!
571   * @function mpsc_daemon_queue_cancel_and_wait
572   *
573   * @brief
574   * Cancels the queue so that the object owning it can be destroyed.
575   *
576   * @discussion
577   * This interface will cancel the queue and wait synchronously for the
578   * cancelation to have taken effect, possibly waiting on elements currently
579   * draining.
580   *
581   * Sending objects to the daemon queue after cancelation is undefined.
582   *
583   * Calling this function multiple times is undefined.
584   *
585   * Tearing down daemon queue hierarchies is the responsibility of the adopter.
586   */
587  void
588  mpsc_daemon_queue_cancel_and_wait(mpsc_daemon_queue_t dq);
589  
590  /*!
591   * @function mpsc_daemon_enqueue
592   *
593   * @brief
594   * Send ("async") an item to a given daemon on a given queue.
595   *
596   * @discussion
597   * It is the responsibility of the caller to ensure preemption is disabled when
598   * this call is made.
599   *
600   * @param dq
601   * The daemon queue to enqueue the element onto.
602   *
603   * @param elm
604   * The item to enqueue.
605   *
606   * @param options
607   * Options applicable to the enqueue. In particupar passing
608   * MPSC_QUEUE_DISABLE_PREEMPTION makes sure preemption is properly disabled
609   * during the enqueue.
610   */
611  void
612  mpsc_daemon_enqueue(mpsc_daemon_queue_t dq, mpsc_queue_chain_t elm,
613      mpsc_queue_options_t options);
614  
615  
616  #pragma mark Deferred deallocation daemon
617  
618  /*!
619   * @function thread_deallocate_daemon_init
620   *
621   * @brief
622   * Initializes the deferred deallocation daemon, called by thread_daemon_init().
623   *
624   * @discussion
625   * The deferred deallocation daemon is a kernel thread based daemon queue that
626   * is targeted by nested daemon queues.
627   *
628   * It is used to perform deferred deallocation for objects that can't safely be
629   * deallocated from the context where the deallocation should normally occur.
630   *
631   * Subsystems using it are for example: turnstiles, workqueues, threads.
632   *
633   * @warning
634   * New queues should be added to this daemon with great care,
635   * as abusing it can lead to unbounded amount of kernel work.
636   */
637  void
638  thread_deallocate_daemon_init(void);
639  
640  /*!
641   * @function thread_deallocate_daemon_register_queue
642   *
643   * @brief
644   * Dynamically register a queue for deferred deletion with the deferred
645   * deallocation daemon.
646   *
647   * @param dq
648   * The daemon queue to register with the deferred deallocation daemon.
649   *
650   * @param invoke
651   * The callback called on every element of this queue by the deallocation
652   * daemon.
653   */
654  void
655  thread_deallocate_daemon_register_queue(mpsc_daemon_queue_t dq,
656      mpsc_daemon_invoke_fn_t invoke);
657  
658  
659  #pragma mark tests
660  #if DEBUG || DEVELOPMENT
661  
662  int
663  mpsc_test_pingpong(uint64_t count, uint64_t *out);
664  
665  #endif /* DEBUG || DEVELOPMENT */
666  
667  #endif /* XNU_KERNEL_PRIVATE */
668  
669  __END_DECLS
670  
671  #endif /* _KERN_MPSC_QUEUE_H_ */