/ duct-tape / xnu / osfmk / kern / thread_call.c
thread_call.c
   1  /*
   2   * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. The rights granted to you under the License
  10   * may not be used to create, or enable the creation or redistribution of,
  11   * unlawful or unlicensed copies of an Apple operating system, or to
  12   * circumvent, violate, or enable the circumvention or violation of, any
  13   * terms of an Apple operating system software license agreement.
  14   *
  15   * Please obtain a copy of the License at
  16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17   *
  18   * The Original Code and all software distributed under the License are
  19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23   * Please see the License for the specific language governing rights and
  24   * limitations under the License.
  25   *
  26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27   */
  28  
  29  #include <mach/mach_types.h>
  30  #include <mach/thread_act.h>
  31  
  32  #include <kern/kern_types.h>
  33  #include <kern/zalloc.h>
  34  #include <kern/sched_prim.h>
  35  #include <kern/clock.h>
  36  #include <kern/task.h>
  37  #include <kern/thread.h>
  38  #include <kern/waitq.h>
  39  #include <kern/ledger.h>
  40  #include <kern/policy_internal.h>
  41  
  42  #include <vm/vm_pageout.h>
  43  
  44  #include <kern/thread_call.h>
  45  #include <kern/timer_call.h>
  46  
  47  #include <libkern/OSAtomic.h>
  48  #include <kern/timer_queue.h>
  49  
  50  #include <sys/kdebug.h>
  51  #if CONFIG_DTRACE
  52  #include <mach/sdt.h>
  53  #endif
  54  #include <machine/machine_routines.h>
  55  
  56  static ZONE_DECLARE(thread_call_zone, "thread_call",
  57      sizeof(thread_call_data_t), ZC_NOENCRYPT);
  58  
  59  typedef enum {
  60  	TCF_ABSOLUTE    = 0,
  61  	TCF_CONTINUOUS  = 1,
  62  	TCF_COUNT       = 2,
  63  } thread_call_flavor_t;
  64  
  65  __options_decl(thread_call_group_flags_t, uint32_t, {
  66  	TCG_NONE                = 0x0,
  67  	TCG_PARALLEL            = 0x1,
  68  	TCG_DEALLOC_ACTIVE      = 0x2,
  69  });
  70  
  71  static struct thread_call_group {
  72  	__attribute__((aligned(128))) lck_ticket_t tcg_lock;
  73  
  74  	const char *            tcg_name;
  75  
  76  	queue_head_t            pending_queue;
  77  	uint32_t                pending_count;
  78  
  79  	queue_head_t            delayed_queues[TCF_COUNT];
  80  	struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
  81  	timer_call_data_t       delayed_timers[TCF_COUNT];
  82  
  83  	timer_call_data_t       dealloc_timer;
  84  
  85  	struct waitq            idle_waitq;
  86  	uint64_t                idle_timestamp;
  87  	uint32_t                idle_count, active_count, blocked_count;
  88  
  89  	uint32_t                tcg_thread_pri;
  90  	uint32_t                target_thread_count;
  91  
  92  	thread_call_group_flags_t tcg_flags;
  93  
  94  	struct waitq            waiters_waitq;
  95  } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
  96  	[THREAD_CALL_INDEX_HIGH] = {
  97  		.tcg_name               = "high",
  98  		.tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
  99  		.target_thread_count    = 4,
 100  		.tcg_flags              = TCG_NONE,
 101  	},
 102  	[THREAD_CALL_INDEX_KERNEL] = {
 103  		.tcg_name               = "kernel",
 104  		.tcg_thread_pri         = BASEPRI_KERNEL,
 105  		.target_thread_count    = 1,
 106  		.tcg_flags              = TCG_PARALLEL,
 107  	},
 108  	[THREAD_CALL_INDEX_USER] = {
 109  		.tcg_name               = "user",
 110  		.tcg_thread_pri         = BASEPRI_DEFAULT,
 111  		.target_thread_count    = 1,
 112  		.tcg_flags              = TCG_PARALLEL,
 113  	},
 114  	[THREAD_CALL_INDEX_LOW] = {
 115  		.tcg_name               = "low",
 116  		.tcg_thread_pri         = MAXPRI_THROTTLE,
 117  		.target_thread_count    = 1,
 118  		.tcg_flags              = TCG_PARALLEL,
 119  	},
 120  	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
 121  		.tcg_name               = "kernel-high",
 122  		.tcg_thread_pri         = BASEPRI_PREEMPT,
 123  		.target_thread_count    = 2,
 124  		.tcg_flags              = TCG_NONE,
 125  	},
 126  	[THREAD_CALL_INDEX_QOS_UI] = {
 127  		.tcg_name               = "qos-ui",
 128  		.tcg_thread_pri         = BASEPRI_FOREGROUND,
 129  		.target_thread_count    = 1,
 130  		.tcg_flags              = TCG_NONE,
 131  	},
 132  	[THREAD_CALL_INDEX_QOS_IN] = {
 133  		.tcg_name               = "qos-in",
 134  		.tcg_thread_pri         = BASEPRI_USER_INITIATED,
 135  		.target_thread_count    = 1,
 136  		.tcg_flags              = TCG_NONE,
 137  	},
 138  	[THREAD_CALL_INDEX_QOS_UT] = {
 139  		.tcg_name               = "qos-ut",
 140  		.tcg_thread_pri         = BASEPRI_UTILITY,
 141  		.target_thread_count    = 1,
 142  		.tcg_flags              = TCG_NONE,
 143  	},
 144  };
 145  
 146  typedef struct thread_call_group        *thread_call_group_t;
 147  
 148  #define INTERNAL_CALL_COUNT             768
 149  #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
 150  #define THREAD_CALL_ADD_RATIO           4
 151  #define THREAD_CALL_MACH_FACTOR_CAP     3
 152  #define THREAD_CALL_GROUP_MAX_THREADS   500
 153  
 154  struct thread_call_thread_state {
 155  	struct thread_call_group * thc_group;
 156  	struct thread_call *       thc_call;    /* debug only, may be deallocated */
 157  	uint64_t thc_call_start;
 158  	uint64_t thc_call_soft_deadline;
 159  	uint64_t thc_call_hard_deadline;
 160  	uint64_t thc_call_pending_timestamp;
 161  	uint64_t thc_IOTES_invocation_timestamp;
 162  	thread_call_func_t  thc_func;
 163  	thread_call_param_t thc_param0;
 164  	thread_call_param_t thc_param1;
 165  };
 166  
 167  static bool                     thread_call_daemon_awake = true;
 168  /*
 169   * This special waitq exists because the daemon thread
 170   * might need to be woken while already holding a global waitq locked.
 171   */
 172  static struct waitq             daemon_waitq;
 173  
 174  static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
 175  static queue_head_t             thread_call_internal_queue;
 176  int                                             thread_call_internal_queue_count = 0;
 177  static uint64_t                 thread_call_dealloc_interval_abs;
 178  
 179  static void                     _internal_call_init(void);
 180  
 181  static thread_call_t            _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
 182  static bool                     _is_internal_call(thread_call_t call);
 183  static void                     _internal_call_release(thread_call_t call);
 184  static bool                     _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
 185  static bool                     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
 186      uint64_t deadline, thread_call_flavor_t flavor);
 187  static bool                     _call_dequeue(thread_call_t call, thread_call_group_t group);
 188  static void                     thread_call_wake(thread_call_group_t group);
 189  static void                     thread_call_daemon(void *arg);
 190  static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 191  static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 192  static void                     thread_call_group_setup(thread_call_group_t group);
 193  static void                     sched_call_thread(int type, thread_t thread);
 194  static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 195  static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
 196  static bool                     thread_call_wait_once_locked(thread_call_t call, spl_t s);
 197  
 198  static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
 199      thread_call_func_t alt_func, thread_call_param_t alt_param0,
 200      thread_call_param_t param1, uint64_t deadline,
 201      uint64_t leeway, unsigned int flags);
 202  
 203  /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
 204  extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 205  
 206  LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
 207  
 208  
 209  static void
 210  thread_call_lock_spin(thread_call_group_t group)
 211  {
 212  	lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
 213  }
 214  
 215  static void
 216  thread_call_unlock(thread_call_group_t group)
 217  {
 218  	lck_ticket_unlock(&group->tcg_lock);
 219  }
 220  
 221  static void __assert_only
 222  thread_call_assert_locked(thread_call_group_t group)
 223  {
 224  	lck_ticket_assert_owned(&group->tcg_lock);
 225  }
 226  
 227  
 228  static spl_t
 229  disable_ints_and_lock(thread_call_group_t group)
 230  {
 231  	spl_t s = splsched();
 232  	thread_call_lock_spin(group);
 233  
 234  	return s;
 235  }
 236  
 237  static void
 238  enable_ints_and_unlock(thread_call_group_t group, spl_t s)
 239  {
 240  	thread_call_unlock(group);
 241  	splx(s);
 242  }
 243  
 244  /* Lock held */
 245  static thread_call_group_t
 246  thread_call_get_group(thread_call_t call)
 247  {
 248  	thread_call_index_t index = call->tc_index;
 249  
 250  	assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
 251  
 252  	return &thread_call_groups[index];
 253  }
 254  
 255  /* Lock held */
 256  static thread_call_flavor_t
 257  thread_call_get_flavor(thread_call_t call)
 258  {
 259  	return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
 260  }
 261  
 262  /* Lock held */
 263  static thread_call_flavor_t
 264  thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
 265  {
 266  	assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
 267  	thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
 268  
 269  	if (old_flavor != flavor) {
 270  		if (flavor == TCF_CONTINUOUS) {
 271  			call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
 272  		} else {
 273  			call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
 274  		}
 275  	}
 276  
 277  	return old_flavor;
 278  }
 279  
 280  /* returns true if it was on a queue */
 281  static bool
 282  thread_call_enqueue_tail(
 283  	thread_call_t           call,
 284  	queue_t                 new_queue)
 285  {
 286  	queue_t                 old_queue = call->tc_queue;
 287  
 288  	thread_call_group_t     group = thread_call_get_group(call);
 289  	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
 290  
 291  	if (old_queue != NULL &&
 292  	    old_queue != &group->delayed_queues[flavor]) {
 293  		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 294  	}
 295  
 296  	if (old_queue == &group->delayed_queues[flavor]) {
 297  		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 298  	}
 299  
 300  	if (old_queue == NULL) {
 301  		enqueue_tail(new_queue, &call->tc_qlink);
 302  	} else {
 303  		re_queue_tail(new_queue, &call->tc_qlink);
 304  	}
 305  
 306  	call->tc_queue = new_queue;
 307  
 308  	return old_queue != NULL;
 309  }
 310  
 311  static queue_head_t *
 312  thread_call_dequeue(
 313  	thread_call_t            call)
 314  {
 315  	queue_t                 old_queue = call->tc_queue;
 316  
 317  	thread_call_group_t     group = thread_call_get_group(call);
 318  	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
 319  
 320  	if (old_queue != NULL &&
 321  	    old_queue != &group->pending_queue &&
 322  	    old_queue != &group->delayed_queues[flavor]) {
 323  		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 324  	}
 325  
 326  	if (old_queue == &group->delayed_queues[flavor]) {
 327  		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 328  	}
 329  
 330  	if (old_queue != NULL) {
 331  		remqueue(&call->tc_qlink);
 332  
 333  		call->tc_queue = NULL;
 334  	}
 335  	return old_queue;
 336  }
 337  
 338  static queue_head_t *
 339  thread_call_enqueue_deadline(
 340  	thread_call_t           call,
 341  	thread_call_group_t     group,
 342  	thread_call_flavor_t    flavor,
 343  	uint64_t                deadline)
 344  {
 345  	queue_t old_queue = call->tc_queue;
 346  	queue_t new_queue = &group->delayed_queues[flavor];
 347  
 348  	thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
 349  
 350  	if (old_queue != NULL &&
 351  	    old_queue != &group->pending_queue &&
 352  	    old_queue != &group->delayed_queues[old_flavor]) {
 353  		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 354  	}
 355  
 356  	if (old_queue == new_queue) {
 357  		/* optimize the same-queue case to avoid a full re-insert */
 358  		uint64_t old_deadline = call->tc_pqlink.deadline;
 359  		call->tc_pqlink.deadline = deadline;
 360  
 361  		if (old_deadline < deadline) {
 362  			priority_queue_entry_increased(&group->delayed_pqueues[flavor],
 363  			    &call->tc_pqlink);
 364  		} else {
 365  			priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
 366  			    &call->tc_pqlink);
 367  		}
 368  	} else {
 369  		if (old_queue == &group->delayed_queues[old_flavor]) {
 370  			priority_queue_remove(&group->delayed_pqueues[old_flavor],
 371  			    &call->tc_pqlink);
 372  		}
 373  
 374  		call->tc_pqlink.deadline = deadline;
 375  
 376  		priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 377  	}
 378  
 379  	if (old_queue == NULL) {
 380  		enqueue_tail(new_queue, &call->tc_qlink);
 381  	} else if (old_queue != new_queue) {
 382  		re_queue_tail(new_queue, &call->tc_qlink);
 383  	}
 384  
 385  	call->tc_queue = new_queue;
 386  
 387  	return old_queue;
 388  }
 389  
 390  uint64_t
 391  thread_call_get_armed_deadline(thread_call_t call)
 392  {
 393  	return call->tc_pqlink.deadline;
 394  }
 395  
 396  
 397  static bool
 398  group_isparallel(thread_call_group_t group)
 399  {
 400  	return (group->tcg_flags & TCG_PARALLEL) != 0;
 401  }
 402  
 403  static bool
 404  thread_call_group_should_add_thread(thread_call_group_t group)
 405  {
 406  	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
 407  		panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
 408  		    group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
 409  		    group->active_count, group->blocked_count, group->idle_count);
 410  	}
 411  
 412  	if (group_isparallel(group) == false) {
 413  		if (group->pending_count > 0 && group->active_count == 0) {
 414  			return true;
 415  		}
 416  
 417  		return false;
 418  	}
 419  
 420  	if (group->pending_count > 0) {
 421  		if (group->idle_count > 0) {
 422  			return false;
 423  		}
 424  
 425  		uint32_t thread_count = group->active_count;
 426  
 427  		/*
 428  		 * Add a thread if either there are no threads,
 429  		 * the group has fewer than its target number of
 430  		 * threads, or the amount of work is large relative
 431  		 * to the number of threads.  In the last case, pay attention
 432  		 * to the total load on the system, and back off if
 433  		 * it's high.
 434  		 */
 435  		if ((thread_count == 0) ||
 436  		    (thread_count < group->target_thread_count) ||
 437  		    ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 438  		    (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 439  			return true;
 440  		}
 441  	}
 442  
 443  	return false;
 444  }
 445  
 446  static void
 447  thread_call_group_setup(thread_call_group_t group)
 448  {
 449  	lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
 450  
 451  	queue_init(&group->pending_queue);
 452  
 453  	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
 454  		queue_init(&group->delayed_queues[flavor]);
 455  		priority_queue_init(&group->delayed_pqueues[flavor]);
 456  		timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
 457  	}
 458  
 459  	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 460  
 461  	waitq_init(&group->waiters_waitq, SYNC_POLICY_DISABLE_IRQ);
 462  
 463  	/* Reverse the wait order so we re-use the most recently parked thread from the pool */
 464  	waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED | SYNC_POLICY_DISABLE_IRQ);
 465  }
 466  
 467  /*
 468   * Simple wrapper for creating threads bound to
 469   * thread call groups.
 470   */
 471  static void
 472  thread_call_thread_create(
 473  	thread_call_group_t             group)
 474  {
 475  	thread_t thread;
 476  	kern_return_t result;
 477  
 478  	int thread_pri = group->tcg_thread_pri;
 479  
 480  	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
 481  	    group, thread_pri, &thread);
 482  	if (result != KERN_SUCCESS) {
 483  		panic("cannot create new thread call thread %d", result);
 484  	}
 485  
 486  	if (thread_pri <= BASEPRI_KERNEL) {
 487  		/*
 488  		 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
 489  		 * in kernel if there are higher priority threads available.
 490  		 */
 491  		thread_set_eager_preempt(thread);
 492  	}
 493  
 494  	char name[MAXTHREADNAMESIZE] = "";
 495  
 496  	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
 497  
 498  	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
 499  	thread_set_thread_name(thread, name);
 500  
 501  	thread_deallocate(thread);
 502  }
 503  
 504  /*
 505   *	thread_call_initialize:
 506   *
 507   *	Initialize this module, called
 508   *	early during system initialization.
 509   */
 510  void
 511  thread_call_initialize(void)
 512  {
 513  #ifdef __DARLING__
 514  	thread_call_zone = zone_create("thread_call", sizeof(thread_call_data_t), ZC_NOENCRYPT);
 515  #endif // __DARLING__
 516  
 517  	nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 518  	waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
 519  
 520  	for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
 521  		thread_call_group_setup(&thread_call_groups[i]);
 522  	}
 523  
 524  	_internal_call_init();
 525  
 526  	thread_t thread;
 527  	kern_return_t result;
 528  
 529  	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
 530  	    NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
 531  	if (result != KERN_SUCCESS) {
 532  		panic("thread_call_initialize");
 533  	}
 534  
 535  	thread_deallocate(thread);
 536  }
 537  
 538  void
 539  thread_call_setup_with_options(
 540  	thread_call_t                   call,
 541  	thread_call_func_t              func,
 542  	thread_call_param_t             param0,
 543  	thread_call_priority_t          pri,
 544  	thread_call_options_t           options)
 545  {
 546  	bzero(call, sizeof(*call));
 547  
 548  	*call = (struct thread_call) {
 549  		.tc_func = func,
 550  		.tc_param0 = param0,
 551  	};
 552  
 553  	switch (pri) {
 554  	case THREAD_CALL_PRIORITY_HIGH:
 555  		call->tc_index = THREAD_CALL_INDEX_HIGH;
 556  		break;
 557  	case THREAD_CALL_PRIORITY_KERNEL:
 558  		call->tc_index = THREAD_CALL_INDEX_KERNEL;
 559  		break;
 560  	case THREAD_CALL_PRIORITY_USER:
 561  		call->tc_index = THREAD_CALL_INDEX_USER;
 562  		break;
 563  	case THREAD_CALL_PRIORITY_LOW:
 564  		call->tc_index = THREAD_CALL_INDEX_LOW;
 565  		break;
 566  	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
 567  		call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
 568  		break;
 569  	default:
 570  		panic("Invalid thread call pri value: %d", pri);
 571  		break;
 572  	}
 573  
 574  	if (options & THREAD_CALL_OPTIONS_ONCE) {
 575  		call->tc_flags |= THREAD_CALL_ONCE;
 576  	}
 577  	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
 578  		call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
 579  	}
 580  }
 581  
 582  void
 583  thread_call_setup(
 584  	thread_call_t                   call,
 585  	thread_call_func_t              func,
 586  	thread_call_param_t             param0)
 587  {
 588  	thread_call_setup_with_options(call, func, param0,
 589  	    THREAD_CALL_PRIORITY_HIGH, 0);
 590  }
 591  
 592  static void
 593  _internal_call_init(void)
 594  {
 595  	/* Function-only thread calls are only kept in the default HIGH group */
 596  	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 597  
 598  	spl_t s = disable_ints_and_lock(group);
 599  
 600  	queue_init(&thread_call_internal_queue);
 601  
 602  	for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
 603  		enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
 604  		thread_call_internal_queue_count++;
 605  	}
 606  
 607  	enable_ints_and_unlock(group, s);
 608  }
 609  
 610  /*
 611   *	_internal_call_allocate:
 612   *
 613   *	Allocate an internal callout entry.
 614   *
 615   *	Called with thread_call_lock held.
 616   */
 617  static thread_call_t
 618  _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
 619  {
 620  	/* Function-only thread calls are only kept in the default HIGH group */
 621  	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 622  
 623  	spl_t s = disable_ints_and_lock(group);
 624  
 625  	thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
 626  	    struct thread_call, tc_qlink);
 627  
 628  	if (call == NULL) {
 629  		panic("_internal_call_allocate: thread_call_internal_queue empty");
 630  	}
 631  
 632  	thread_call_internal_queue_count--;
 633  
 634  	thread_call_setup(call, func, param0);
 635  	/* THREAD_CALL_ALLOC not set, do not free back to zone */
 636  	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 637  	enable_ints_and_unlock(group, s);
 638  
 639  	return call;
 640  }
 641  
 642  /* Check if a call is internal and needs to be returned to the internal pool. */
 643  static bool
 644  _is_internal_call(thread_call_t call)
 645  {
 646  	if (call >= internal_call_storage &&
 647  	    call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
 648  		assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 649  		return true;
 650  	}
 651  	return false;
 652  }
 653  
 654  /*
 655   *	_internal_call_release:
 656   *
 657   *	Release an internal callout entry which
 658   *	is no longer pending (or delayed).
 659   *
 660   *      Called with thread_call_lock held.
 661   */
 662  static void
 663  _internal_call_release(thread_call_t call)
 664  {
 665  	assert(_is_internal_call(call));
 666  
 667  	thread_call_group_t group = thread_call_get_group(call);
 668  
 669  	assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
 670  	thread_call_assert_locked(group);
 671  
 672  	enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
 673  	thread_call_internal_queue_count++;
 674  }
 675  
 676  /*
 677   *	_pending_call_enqueue:
 678   *
 679   *	Place an entry at the end of the
 680   *	pending queue, to be executed soon.
 681   *
 682   *	Returns TRUE if the entry was already
 683   *	on a queue.
 684   *
 685   *	Called with thread_call_lock held.
 686   */
 687  static bool
 688  _pending_call_enqueue(thread_call_t call,
 689      thread_call_group_t group,
 690      uint64_t now)
 691  {
 692  	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 693  	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 694  		call->tc_pqlink.deadline = 0;
 695  
 696  		thread_call_flags_t flags = call->tc_flags;
 697  		call->tc_flags |= THREAD_CALL_RESCHEDULE;
 698  
 699  		assert(call->tc_queue == NULL);
 700  
 701  		return flags & THREAD_CALL_RESCHEDULE;
 702  	}
 703  
 704  	call->tc_pending_timestamp = now;
 705  
 706  	bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
 707  
 708  	if (!was_on_queue) {
 709  		call->tc_submit_count++;
 710  	}
 711  
 712  	group->pending_count++;
 713  
 714  	thread_call_wake(group);
 715  
 716  	return was_on_queue;
 717  }
 718  
 719  /*
 720   *	_delayed_call_enqueue:
 721   *
 722   *	Place an entry on the delayed queue,
 723   *	after existing entries with an earlier
 724   *      (or identical) deadline.
 725   *
 726   *	Returns TRUE if the entry was already
 727   *	on a queue.
 728   *
 729   *	Called with thread_call_lock held.
 730   */
 731  static bool
 732  _delayed_call_enqueue(
 733  	thread_call_t           call,
 734  	thread_call_group_t     group,
 735  	uint64_t                deadline,
 736  	thread_call_flavor_t    flavor)
 737  {
 738  	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 739  	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 740  		call->tc_pqlink.deadline = deadline;
 741  
 742  		thread_call_flags_t flags = call->tc_flags;
 743  		call->tc_flags |= THREAD_CALL_RESCHEDULE;
 744  
 745  		assert(call->tc_queue == NULL);
 746  		thread_call_set_flavor(call, flavor);
 747  
 748  		return flags & THREAD_CALL_RESCHEDULE;
 749  	}
 750  
 751  	queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
 752  
 753  	if (old_queue == &group->pending_queue) {
 754  		group->pending_count--;
 755  	} else if (old_queue == NULL) {
 756  		call->tc_submit_count++;
 757  	}
 758  
 759  	return old_queue != NULL;
 760  }
 761  
 762  /*
 763   *	_call_dequeue:
 764   *
 765   *	Remove an entry from a queue.
 766   *
 767   *	Returns TRUE if the entry was on a queue.
 768   *
 769   *	Called with thread_call_lock held.
 770   */
 771  static bool
 772  _call_dequeue(
 773  	thread_call_t           call,
 774  	thread_call_group_t     group)
 775  {
 776  	queue_head_t *old_queue = thread_call_dequeue(call);
 777  
 778  	if (old_queue == NULL) {
 779  		return false;
 780  	}
 781  
 782  	call->tc_finish_count++;
 783  
 784  	if (old_queue == &group->pending_queue) {
 785  		group->pending_count--;
 786  	}
 787  
 788  	return true;
 789  }
 790  
 791  /*
 792   * _arm_delayed_call_timer:
 793   *
 794   * Check if the timer needs to be armed for this flavor,
 795   * and if so, arm it.
 796   *
 797   * If call is non-NULL, only re-arm the timer if the specified call
 798   * is the first in the queue.
 799   *
 800   * Returns true if the timer was armed/re-armed, false if it was left unset
 801   * Caller should cancel the timer if need be.
 802   *
 803   * Called with thread_call_lock held.
 804   */
 805  static bool
 806  _arm_delayed_call_timer(thread_call_t           new_call,
 807      thread_call_group_t     group,
 808      thread_call_flavor_t    flavor)
 809  {
 810  	/* No calls implies no timer needed */
 811  	if (queue_empty(&group->delayed_queues[flavor])) {
 812  		return false;
 813  	}
 814  
 815  	thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
 816  
 817  	/* We only need to change the hard timer if this new call is the first in the list */
 818  	if (new_call != NULL && new_call != call) {
 819  		return false;
 820  	}
 821  
 822  	assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
 823  
 824  	uint64_t fire_at = call->tc_soft_deadline;
 825  
 826  	if (flavor == TCF_CONTINUOUS) {
 827  		assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
 828  		fire_at = continuoustime_to_absolutetime(fire_at);
 829  	} else {
 830  		assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
 831  	}
 832  
 833  	/*
 834  	 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
 835  	 * which does not take into account later-deadline timers with a larger leeway.
 836  	 * This is a valid coalescing behavior, but masks a possible window to
 837  	 * fire a timer instead of going idle.
 838  	 */
 839  	uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
 840  
 841  	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
 842  	    fire_at, leeway,
 843  	    TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
 844  	    ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
 845  
 846  	return true;
 847  }
 848  
 849  /*
 850   *	_cancel_func_from_queue:
 851   *
 852   *	Remove the first (or all) matching
 853   *	entries from the specified queue.
 854   *
 855   *	Returns TRUE if any matching entries
 856   *	were found.
 857   *
 858   *	Called with thread_call_lock held.
 859   */
 860  static boolean_t
 861  _cancel_func_from_queue(thread_call_func_t      func,
 862      thread_call_param_t     param0,
 863      thread_call_group_t     group,
 864      boolean_t               remove_all,
 865      queue_head_t            *queue)
 866  {
 867  	boolean_t call_removed = FALSE;
 868  	thread_call_t call;
 869  
 870  	qe_foreach_element_safe(call, queue, tc_qlink) {
 871  		if (call->tc_func != func ||
 872  		    call->tc_param0 != param0) {
 873  			continue;
 874  		}
 875  
 876  		_call_dequeue(call, group);
 877  
 878  		if (_is_internal_call(call)) {
 879  			_internal_call_release(call);
 880  		}
 881  
 882  		call_removed = TRUE;
 883  		if (!remove_all) {
 884  			break;
 885  		}
 886  	}
 887  
 888  	return call_removed;
 889  }
 890  
 891  /*
 892   *	thread_call_func_delayed:
 893   *
 894   *	Enqueue a function callout to
 895   *	occur at the stated time.
 896   */
 897  void
 898  thread_call_func_delayed(
 899  	thread_call_func_t              func,
 900  	thread_call_param_t             param,
 901  	uint64_t                        deadline)
 902  {
 903  	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
 904  }
 905  
 906  /*
 907   * thread_call_func_delayed_with_leeway:
 908   *
 909   * Same as thread_call_func_delayed(), but with
 910   * leeway/flags threaded through.
 911   */
 912  
 913  void
 914  thread_call_func_delayed_with_leeway(
 915  	thread_call_func_t              func,
 916  	thread_call_param_t             param,
 917  	uint64_t                deadline,
 918  	uint64_t                leeway,
 919  	uint32_t                flags)
 920  {
 921  	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
 922  }
 923  
 924  /*
 925   *	thread_call_func_cancel:
 926   *
 927   *	Dequeue a function callout.
 928   *
 929   *	Removes one (or all) { function, argument }
 930   *	instance(s) from either (or both)
 931   *	the pending and	the delayed queue,
 932   *	in that order.
 933   *
 934   *	Returns TRUE if any calls were cancelled.
 935   *
 936   *	This iterates all of the pending or delayed thread calls in the group,
 937   *	which is really inefficient.  Switch to an allocated thread call instead.
 938   *
 939   *	TODO: Give 'func' thread calls their own group, so this silliness doesn't
 940   *	affect the main 'high' group.
 941   */
 942  boolean_t
 943  thread_call_func_cancel(
 944  	thread_call_func_t              func,
 945  	thread_call_param_t             param,
 946  	boolean_t                       cancel_all)
 947  {
 948  	boolean_t       result;
 949  
 950  	assert(func != NULL);
 951  
 952  	/* Function-only thread calls are only kept in the default HIGH group */
 953  	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 954  
 955  	spl_t s = disable_ints_and_lock(group);
 956  
 957  	if (cancel_all) {
 958  		/* exhaustively search every queue, and return true if any search found something */
 959  		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
 960  		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
 961  		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 962  	} else {
 963  		/* early-exit as soon as we find something, don't search other queues */
 964  		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
 965  		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
 966  		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 967  	}
 968  
 969  	enable_ints_and_unlock(group, s);
 970  
 971  	return result;
 972  }
 973  
 974  /*
 975   * Allocate a thread call with a given priority.  Importances other than
 976   * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
 977   * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
 978   * threads which are not in the normal "urgent" bands).
 979   */
 980  thread_call_t
 981  thread_call_allocate_with_priority(
 982  	thread_call_func_t              func,
 983  	thread_call_param_t             param0,
 984  	thread_call_priority_t          pri)
 985  {
 986  	return thread_call_allocate_with_options(func, param0, pri, 0);
 987  }
 988  
 989  thread_call_t
 990  thread_call_allocate_with_options(
 991  	thread_call_func_t              func,
 992  	thread_call_param_t             param0,
 993  	thread_call_priority_t          pri,
 994  	thread_call_options_t           options)
 995  {
 996  	thread_call_t call = zalloc(thread_call_zone);
 997  
 998  	thread_call_setup_with_options(call, func, param0, pri, options);
 999  	call->tc_refs = 1;
1000  	call->tc_flags |= THREAD_CALL_ALLOC;
1001  
1002  	return call;
1003  }
1004  
1005  thread_call_t
1006  thread_call_allocate_with_qos(thread_call_func_t        func,
1007      thread_call_param_t       param0,
1008      int                       qos_tier,
1009      thread_call_options_t     options)
1010  {
1011  	thread_call_t call = thread_call_allocate(func, param0);
1012  
1013  	switch (qos_tier) {
1014  	case THREAD_QOS_UNSPECIFIED:
1015  		call->tc_index = THREAD_CALL_INDEX_HIGH;
1016  		break;
1017  	case THREAD_QOS_LEGACY:
1018  		call->tc_index = THREAD_CALL_INDEX_USER;
1019  		break;
1020  	case THREAD_QOS_MAINTENANCE:
1021  	case THREAD_QOS_BACKGROUND:
1022  		call->tc_index = THREAD_CALL_INDEX_LOW;
1023  		break;
1024  	case THREAD_QOS_UTILITY:
1025  		call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1026  		break;
1027  	case THREAD_QOS_USER_INITIATED:
1028  		call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1029  		break;
1030  	case THREAD_QOS_USER_INTERACTIVE:
1031  		call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1032  		break;
1033  	default:
1034  		panic("Invalid thread call qos value: %d", qos_tier);
1035  		break;
1036  	}
1037  
1038  	if (options & THREAD_CALL_OPTIONS_ONCE) {
1039  		call->tc_flags |= THREAD_CALL_ONCE;
1040  	}
1041  
1042  	/* does not support THREAD_CALL_OPTIONS_SIGNAL */
1043  
1044  	return call;
1045  }
1046  
1047  
1048  /*
1049   *	thread_call_allocate:
1050   *
1051   *	Allocate a callout entry.
1052   */
1053  thread_call_t
1054  thread_call_allocate(
1055  	thread_call_func_t              func,
1056  	thread_call_param_t             param0)
1057  {
1058  	return thread_call_allocate_with_options(func, param0,
1059  	           THREAD_CALL_PRIORITY_HIGH, 0);
1060  }
1061  
1062  /*
1063   *	thread_call_free:
1064   *
1065   *	Release a callout.  If the callout is currently
1066   *	executing, it will be freed when all invocations
1067   *	finish.
1068   *
1069   *	If the callout is currently armed to fire again, then
1070   *	freeing is not allowed and returns FALSE.  The
1071   *	client must have canceled the pending invocation before freeing.
1072   */
1073  boolean_t
1074  thread_call_free(
1075  	thread_call_t           call)
1076  {
1077  	thread_call_group_t group = thread_call_get_group(call);
1078  
1079  	spl_t s = disable_ints_and_lock(group);
1080  
1081  	if (call->tc_queue != NULL ||
1082  	    ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1083  		thread_call_unlock(group);
1084  		splx(s);
1085  
1086  		return FALSE;
1087  	}
1088  
1089  	int32_t refs = --call->tc_refs;
1090  	if (refs < 0) {
1091  		panic("Refcount negative: %d\n", refs);
1092  	}
1093  
1094  	if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1095  	    == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1096  		thread_call_wait_once_locked(call, s);
1097  		/* thread call lock has been unlocked */
1098  	} else {
1099  		enable_ints_and_unlock(group, s);
1100  	}
1101  
1102  	if (refs == 0) {
1103  		assert(call->tc_finish_count == call->tc_submit_count);
1104  		zfree(thread_call_zone, call);
1105  	}
1106  
1107  	return TRUE;
1108  }
1109  
1110  /*
1111   *	thread_call_enter:
1112   *
1113   *	Enqueue a callout entry to occur "soon".
1114   *
1115   *	Returns TRUE if the call was
1116   *	already on a queue.
1117   */
1118  boolean_t
1119  thread_call_enter(
1120  	thread_call_t           call)
1121  {
1122  	return thread_call_enter1(call, 0);
1123  }
1124  
1125  boolean_t
1126  thread_call_enter1(
1127  	thread_call_t                   call,
1128  	thread_call_param_t             param1)
1129  {
1130  	assert(call->tc_func != NULL);
1131  	assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1132  
1133  	thread_call_group_t group = thread_call_get_group(call);
1134  	bool result = true;
1135  
1136  	spl_t s = disable_ints_and_lock(group);
1137  
1138  	if (call->tc_queue != &group->pending_queue) {
1139  		result = _pending_call_enqueue(call, group, mach_absolute_time());
1140  	}
1141  
1142  	call->tc_param1 = param1;
1143  
1144  	enable_ints_and_unlock(group, s);
1145  
1146  	return result;
1147  }
1148  
1149  /*
1150   *	thread_call_enter_delayed:
1151   *
1152   *	Enqueue a callout entry to occur
1153   *	at the stated time.
1154   *
1155   *	Returns TRUE if the call was
1156   *	already on a queue.
1157   */
1158  boolean_t
1159  thread_call_enter_delayed(
1160  	thread_call_t           call,
1161  	uint64_t                deadline)
1162  {
1163  	assert(call != NULL);
1164  	return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1165  }
1166  
1167  boolean_t
1168  thread_call_enter1_delayed(
1169  	thread_call_t                   call,
1170  	thread_call_param_t             param1,
1171  	uint64_t                        deadline)
1172  {
1173  	assert(call != NULL);
1174  	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1175  }
1176  
1177  boolean_t
1178  thread_call_enter_delayed_with_leeway(
1179  	thread_call_t           call,
1180  	thread_call_param_t     param1,
1181  	uint64_t                deadline,
1182  	uint64_t                leeway,
1183  	unsigned int            flags)
1184  {
1185  	assert(call != NULL);
1186  	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1187  }
1188  
1189  
1190  /*
1191   * thread_call_enter_delayed_internal:
1192   * enqueue a callout entry to occur at the stated time
1193   *
1194   * Returns True if the call was already on a queue
1195   * params:
1196   * call     - structure encapsulating state of the callout
1197   * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1198   * deadline - time deadline in nanoseconds
1199   * leeway   - timer slack represented as delta of deadline.
1200   * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1201   *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1202   *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1203   *                                                                        than mach_absolute_time
1204   */
1205  boolean_t
1206  thread_call_enter_delayed_internal(
1207  	thread_call_t           call,
1208  	thread_call_func_t      alt_func,
1209  	thread_call_param_t     alt_param0,
1210  	thread_call_param_t     param1,
1211  	uint64_t                deadline,
1212  	uint64_t                leeway,
1213  	unsigned int            flags)
1214  {
1215  	uint64_t                now, sdeadline;
1216  
1217  	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1218  
1219  	/* direct mapping between thread_call, timer_call, and timeout_urgency values */
1220  	uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1221  
1222  	if (call == NULL) {
1223  		/* allocate a structure out of internal storage, as a convenience for BSD callers */
1224  		call = _internal_call_allocate(alt_func, alt_param0);
1225  	}
1226  
1227  	assert(call->tc_func != NULL);
1228  	thread_call_group_t group = thread_call_get_group(call);
1229  
1230  	spl_t s = disable_ints_and_lock(group);
1231  
1232  	/*
1233  	 * kevent and IOTES let you change flavor for an existing timer, so we have to
1234  	 * support flipping flavors for enqueued thread calls.
1235  	 */
1236  	if (flavor == TCF_CONTINUOUS) {
1237  		now = mach_continuous_time();
1238  	} else {
1239  		now = mach_absolute_time();
1240  	}
1241  
1242  	call->tc_flags |= THREAD_CALL_DELAYED;
1243  
1244  	call->tc_soft_deadline = sdeadline = deadline;
1245  
1246  	boolean_t ratelimited = FALSE;
1247  	uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1248  
1249  	if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1250  		slop = leeway;
1251  	}
1252  
1253  	if (UINT64_MAX - deadline <= slop) {
1254  		deadline = UINT64_MAX;
1255  	} else {
1256  		deadline += slop;
1257  	}
1258  
1259  	if (ratelimited) {
1260  		call->tc_flags |= THREAD_CALL_RATELIMITED;
1261  	} else {
1262  		call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1263  	}
1264  
1265  	call->tc_param1 = param1;
1266  
1267  	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1268  
1269  	bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1270  
1271  	_arm_delayed_call_timer(call, group, flavor);
1272  
1273  #if CONFIG_DTRACE
1274  	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1275  	    uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1276  	    (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1277  #endif
1278  
1279  	enable_ints_and_unlock(group, s);
1280  
1281  	return result;
1282  }
1283  
1284  /*
1285   * Remove a callout entry from the queue
1286   * Called with thread_call_lock held
1287   */
1288  static bool
1289  thread_call_cancel_locked(thread_call_t call)
1290  {
1291  	bool canceled;
1292  
1293  	if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1294  		call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1295  		canceled = true;
1296  
1297  		/* if reschedule was set, it must not have been queued */
1298  		assert(call->tc_queue == NULL);
1299  	} else {
1300  		bool queue_head_changed = false;
1301  
1302  		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1303  		thread_call_group_t  group  = thread_call_get_group(call);
1304  
1305  		if (call->tc_pqlink.deadline != 0 &&
1306  		    call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1307  			assert(call->tc_queue == &group->delayed_queues[flavor]);
1308  			queue_head_changed = true;
1309  		}
1310  
1311  		canceled = _call_dequeue(call, group);
1312  
1313  		if (queue_head_changed) {
1314  			if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1315  				timer_call_cancel(&group->delayed_timers[flavor]);
1316  			}
1317  		}
1318  	}
1319  
1320  #if CONFIG_DTRACE
1321  	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1322  	    0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1323  #endif
1324  
1325  	return canceled;
1326  }
1327  
1328  /*
1329   *	thread_call_cancel:
1330   *
1331   *	Dequeue a callout entry.
1332   *
1333   *	Returns TRUE if the call was
1334   *	on a queue.
1335   */
1336  boolean_t
1337  thread_call_cancel(thread_call_t call)
1338  {
1339  	thread_call_group_t group = thread_call_get_group(call);
1340  
1341  	spl_t s = disable_ints_and_lock(group);
1342  
1343  	boolean_t result = thread_call_cancel_locked(call);
1344  
1345  	enable_ints_and_unlock(group, s);
1346  
1347  	return result;
1348  }
1349  
1350  /*
1351   * Cancel a thread call.  If it cannot be cancelled (i.e.
1352   * is already in flight), waits for the most recent invocation
1353   * to finish.  Note that if clients re-submit this thread call,
1354   * it may still be pending or in flight when thread_call_cancel_wait
1355   * returns, but all requests to execute this work item prior
1356   * to the call to thread_call_cancel_wait will have finished.
1357   */
1358  boolean_t
1359  thread_call_cancel_wait(thread_call_t call)
1360  {
1361  	thread_call_group_t group = thread_call_get_group(call);
1362  
1363  	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1364  		panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1365  	}
1366  
1367  	if (!ml_get_interrupts_enabled()) {
1368  		panic("unsafe thread_call_cancel_wait");
1369  	}
1370  
1371  	thread_t self = current_thread();
1372  
1373  	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1374  	    self->thc_state && self->thc_state->thc_call == call) {
1375  		panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1376  		    call, call->tc_func);
1377  	}
1378  
1379  	spl_t s = disable_ints_and_lock(group);
1380  
1381  	boolean_t canceled = thread_call_cancel_locked(call);
1382  
1383  	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1384  		/*
1385  		 * A cancel-wait on a 'once' call will both cancel
1386  		 * the pending call and wait for the in-flight call
1387  		 */
1388  
1389  		thread_call_wait_once_locked(call, s);
1390  		/* thread call lock unlocked */
1391  	} else {
1392  		/*
1393  		 * A cancel-wait on a normal call will only wait for the in-flight calls
1394  		 * if it did not cancel the pending call.
1395  		 *
1396  		 * TODO: This seems less than useful - shouldn't it do the wait as well?
1397  		 */
1398  
1399  		if (canceled == FALSE) {
1400  			thread_call_wait_locked(call, s);
1401  			/* thread call lock unlocked */
1402  		} else {
1403  			enable_ints_and_unlock(group, s);
1404  		}
1405  	}
1406  
1407  	return canceled;
1408  }
1409  
1410  
1411  /*
1412   *	thread_call_wake:
1413   *
1414   *	Wake a call thread to service
1415   *	pending call entries.  May wake
1416   *	the daemon thread in order to
1417   *	create additional call threads.
1418   *
1419   *	Called with thread_call_lock held.
1420   *
1421   *	For high-priority group, only does wakeup/creation if there are no threads
1422   *	running.
1423   */
1424  static void
1425  thread_call_wake(
1426  	thread_call_group_t             group)
1427  {
1428  	/*
1429  	 * New behavior: use threads if you've got 'em.
1430  	 * Traditional behavior: wake only if no threads running.
1431  	 */
1432  	if (group_isparallel(group) || group->active_count == 0) {
1433  		if (group->idle_count) {
1434  			__assert_only kern_return_t kr;
1435  
1436  			kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1437  			    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1438  			assert(kr == KERN_SUCCESS);
1439  
1440  			group->idle_count--;
1441  			group->active_count++;
1442  
1443  			if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1444  				if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1445  					group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1446  				}
1447  			}
1448  		} else {
1449  			if (thread_call_group_should_add_thread(group) &&
1450  			    os_atomic_cmpxchg(&thread_call_daemon_awake,
1451  			    false, true, relaxed)) {
1452  				waitq_wakeup64_all(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake),
1453  				    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1454  			}
1455  		}
1456  	}
1457  }
1458  
1459  /*
1460   *	sched_call_thread:
1461   *
1462   *	Call out invoked by the scheduler.
1463   */
1464  static void
1465  sched_call_thread(
1466  	int                             type,
1467  	thread_t                thread)
1468  {
1469  	thread_call_group_t             group;
1470  
1471  	assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1472  	assert(thread->thc_state != NULL);
1473  
1474  	group = thread->thc_state->thc_group;
1475  	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1476  
1477  	thread_call_lock_spin(group);
1478  
1479  	switch (type) {
1480  	case SCHED_CALL_BLOCK:
1481  		assert(group->active_count);
1482  		--group->active_count;
1483  		group->blocked_count++;
1484  		if (group->pending_count > 0) {
1485  			thread_call_wake(group);
1486  		}
1487  		break;
1488  
1489  	case SCHED_CALL_UNBLOCK:
1490  		assert(group->blocked_count);
1491  		--group->blocked_count;
1492  		group->active_count++;
1493  		break;
1494  	}
1495  
1496  	thread_call_unlock(group);
1497  }
1498  
1499  /*
1500   * Interrupts disabled, lock held; returns the same way.
1501   * Only called on thread calls whose storage we own.  Wakes up
1502   * anyone who might be waiting on this work item and frees it
1503   * if the client has so requested.
1504   */
1505  static bool
1506  thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1507  {
1508  	assert(thread_call_get_group(call) == group);
1509  
1510  	bool repend = false;
1511  	bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1512  	bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1513  
1514  	call->tc_finish_count++;
1515  
1516  	if (!signal && alloc) {
1517  		/* The thread call thread owns a ref until the call is finished */
1518  		if (call->tc_refs <= 0) {
1519  			panic("thread_call_finish: detected over-released thread call: %p", call);
1520  		}
1521  		call->tc_refs--;
1522  	}
1523  
1524  	thread_call_flags_t old_flags = call->tc_flags;
1525  	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1526  
1527  	if ((!alloc || call->tc_refs != 0) &&
1528  	    (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1529  		assert(old_flags & THREAD_CALL_ONCE);
1530  		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1531  
1532  		if (old_flags & THREAD_CALL_DELAYED) {
1533  			uint64_t now = mach_absolute_time();
1534  			if (flavor == TCF_CONTINUOUS) {
1535  				now = absolutetime_to_continuoustime(now);
1536  			}
1537  			if (call->tc_soft_deadline <= now) {
1538  				/* The deadline has already expired, go straight to pending */
1539  				call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1540  				call->tc_pqlink.deadline = 0;
1541  			}
1542  		}
1543  
1544  		if (call->tc_pqlink.deadline) {
1545  			_delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1546  			if (!signal) {
1547  				_arm_delayed_call_timer(call, group, flavor);
1548  			}
1549  		} else if (signal) {
1550  			call->tc_submit_count++;
1551  			repend = true;
1552  		} else {
1553  			_pending_call_enqueue(call, group, mach_absolute_time());
1554  		}
1555  	}
1556  
1557  	if (!signal && alloc && call->tc_refs == 0) {
1558  		if ((old_flags & THREAD_CALL_WAIT) != 0) {
1559  			panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_func);
1560  		}
1561  
1562  		assert(call->tc_finish_count == call->tc_submit_count);
1563  
1564  		enable_ints_and_unlock(group, *s);
1565  
1566  		zfree(thread_call_zone, call);
1567  
1568  		*s = disable_ints_and_lock(group);
1569  	}
1570  
1571  	if ((old_flags & THREAD_CALL_WAIT) != 0) {
1572  		/*
1573  		 * This may wake up a thread with a registered sched_call.
1574  		 * That call might need the group lock, so we drop the lock
1575  		 * to avoid deadlocking.
1576  		 *
1577  		 * We also must use a separate waitq from the idle waitq, as
1578  		 * this path goes waitq lock->thread lock->group lock, but
1579  		 * the idle wait goes group lock->waitq_lock->thread_lock.
1580  		 */
1581  		thread_call_unlock(group);
1582  
1583  		waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1584  		    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1585  
1586  		thread_call_lock_spin(group);
1587  		/* THREAD_CALL_SIGNAL call may have been freed */
1588  	}
1589  
1590  	return repend;
1591  }
1592  
1593  /*
1594   * thread_call_invoke
1595   *
1596   * Invoke the function provided for this thread call
1597   *
1598   * Note that the thread call object can be deallocated by the function if we do not control its storage.
1599   */
1600  static void __attribute__((noinline))
1601  thread_call_invoke(thread_call_func_t func,
1602      thread_call_param_t param0,
1603      thread_call_param_t param1,
1604      __unused thread_call_t call)
1605  {
1606  #if DEVELOPMENT || DEBUG
1607  	KERNEL_DEBUG_CONSTANT(
1608  		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1609  		VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1610  #endif /* DEVELOPMENT || DEBUG */
1611  
1612  #if CONFIG_DTRACE
1613  	uint64_t tc_ttd = call->tc_ttd;
1614  	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1615  	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1616  	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1617  #endif
1618  
1619  	(*func)(param0, param1);
1620  
1621  #if CONFIG_DTRACE
1622  	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1623  	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1624  #endif
1625  
1626  #if DEVELOPMENT || DEBUG
1627  	KERNEL_DEBUG_CONSTANT(
1628  		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1629  		VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1630  #endif /* DEVELOPMENT || DEBUG */
1631  }
1632  
1633  /*
1634   *	thread_call_thread:
1635   */
1636  static void
1637  thread_call_thread(
1638  	thread_call_group_t             group,
1639  	wait_result_t                   wres)
1640  {
1641  	thread_t self = current_thread();
1642  
1643  	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1644  		(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1645  	}
1646  
1647  	/*
1648  	 * A wakeup with THREAD_INTERRUPTED indicates that
1649  	 * we should terminate.
1650  	 */
1651  	if (wres == THREAD_INTERRUPTED) {
1652  		thread_terminate(self);
1653  
1654  		/* NOTREACHED */
1655  		panic("thread_terminate() returned?");
1656  	}
1657  
1658  	spl_t s = disable_ints_and_lock(group);
1659  
1660  	struct thread_call_thread_state thc_state = { .thc_group = group };
1661  	self->thc_state = &thc_state;
1662  
1663  	thread_sched_call(self, sched_call_thread);
1664  
1665  	while (group->pending_count > 0) {
1666  		thread_call_t call = qe_dequeue_head(&group->pending_queue,
1667  		    struct thread_call, tc_qlink);
1668  		assert(call != NULL);
1669  
1670  		group->pending_count--;
1671  		if (group->pending_count == 0) {
1672  			assert(queue_empty(&group->pending_queue));
1673  		}
1674  
1675  		thread_call_func_t  func   = call->tc_func;
1676  		thread_call_param_t param0 = call->tc_param0;
1677  		thread_call_param_t param1 = call->tc_param1;
1678  
1679  		call->tc_queue = NULL;
1680  
1681  		if (_is_internal_call(call)) {
1682  			_internal_call_release(call);
1683  		}
1684  
1685  		/*
1686  		 * Can only do wakeups for thread calls whose storage
1687  		 * we control.
1688  		 */
1689  		bool needs_finish = false;
1690  		if (call->tc_flags & THREAD_CALL_ALLOC) {
1691  			call->tc_refs++;        /* Delay free until we're done */
1692  		}
1693  		if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1694  			/*
1695  			 * If THREAD_CALL_ONCE is used, and the timer wasn't
1696  			 * THREAD_CALL_ALLOC, then clients swear they will use
1697  			 * thread_call_cancel_wait() before destroying
1698  			 * the thread call.
1699  			 *
1700  			 * Else, the storage for the thread call might have
1701  			 * disappeared when thread_call_invoke() ran.
1702  			 */
1703  			needs_finish = true;
1704  			call->tc_flags |= THREAD_CALL_RUNNING;
1705  		}
1706  
1707  		thc_state.thc_call = call;
1708  		thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1709  		thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1710  		thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1711  		thc_state.thc_func = func;
1712  		thc_state.thc_param0 = param0;
1713  		thc_state.thc_param1 = param1;
1714  		thc_state.thc_IOTES_invocation_timestamp = 0;
1715  
1716  		enable_ints_and_unlock(group, s);
1717  
1718  		thc_state.thc_call_start = mach_absolute_time();
1719  
1720  		thread_call_invoke(func, param0, param1, call);
1721  
1722  		thc_state.thc_call = NULL;
1723  
1724  		if (get_preemption_level() != 0) {
1725  			int pl = get_preemption_level();
1726  			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1727  			    pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1728  		}
1729  
1730  		s = disable_ints_and_lock(group);
1731  
1732  		if (needs_finish) {
1733  			/* Release refcount, may free, may temporarily drop lock */
1734  			thread_call_finish(call, group, &s);
1735  		}
1736  	}
1737  
1738  	thread_sched_call(self, NULL);
1739  	group->active_count--;
1740  
1741  #ifndef __DARLING__
1742  	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1743  		ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1744  		if (self->callout_woken_from_platform_idle) {
1745  			ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1746  		}
1747  	}
1748  #endif // __DARLING__
1749  
1750  	self->callout_woken_from_icontext = FALSE;
1751  	self->callout_woken_from_platform_idle = FALSE;
1752  	self->callout_woke_thread = FALSE;
1753  
1754  	self->thc_state = NULL;
1755  
1756  	if (group_isparallel(group)) {
1757  		/*
1758  		 * For new style of thread group, thread always blocks.
1759  		 * If we have more than the target number of threads,
1760  		 * and this is the first to block, and it isn't active
1761  		 * already, set a timer for deallocating a thread if we
1762  		 * continue to have a surplus.
1763  		 */
1764  		group->idle_count++;
1765  
1766  		if (group->idle_count == 1) {
1767  			group->idle_timestamp = mach_absolute_time();
1768  		}
1769  
1770  		if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1771  		    ((group->active_count + group->idle_count) > group->target_thread_count)) {
1772  			thread_call_start_deallocate_timer(group);
1773  		}
1774  
1775  		/* Wait for more work (or termination) */
1776  		wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1777  		if (wres != THREAD_WAITING) {
1778  			panic("kcall worker unable to assert wait?");
1779  		}
1780  
1781  		enable_ints_and_unlock(group, s);
1782  
1783  		thread_block_parameter((thread_continue_t)thread_call_thread, group);
1784  	} else {
1785  		if (group->idle_count < group->target_thread_count) {
1786  			group->idle_count++;
1787  
1788  			waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1789  
1790  			enable_ints_and_unlock(group, s);
1791  
1792  			thread_block_parameter((thread_continue_t)thread_call_thread, group);
1793  			/* NOTREACHED */
1794  		}
1795  	}
1796  
1797  	enable_ints_and_unlock(group, s);
1798  
1799  	thread_terminate(self);
1800  	/* NOTREACHED */
1801  }
1802  
1803  void
1804  thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1805  {
1806  	thread_t self = current_thread();
1807  
1808  	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1809  		/* not a thread call thread, might be a workloop IOTES */
1810  		return;
1811  	}
1812  
1813  	assert(self->thc_state);
1814  	assert(self->thc_state->thc_call == call);
1815  
1816  	self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1817  }
1818  
1819  
1820  /*
1821   *	thread_call_daemon: walk list of groups, allocating
1822   *	threads if appropriate (as determined by
1823   *	thread_call_group_should_add_thread()).
1824   */
1825  static void
1826  thread_call_daemon_continue(__unused void *arg)
1827  {
1828  	do {
1829  		os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1830  
1831  		/* Starting at zero happens to be high-priority first. */
1832  		for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1833  			thread_call_group_t group = &thread_call_groups[i];
1834  
1835  			spl_t s = disable_ints_and_lock(group);
1836  
1837  			while (thread_call_group_should_add_thread(group)) {
1838  				group->active_count++;
1839  
1840  				enable_ints_and_unlock(group, s);
1841  
1842  				thread_call_thread_create(group);
1843  
1844  				s = disable_ints_and_lock(group);
1845  			}
1846  
1847  			enable_ints_and_unlock(group, s);
1848  		}
1849  	} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1850  
1851  	waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1852  
1853  	if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1854  		clear_wait(current_thread(), THREAD_AWAKENED);
1855  	}
1856  
1857  	thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1858  	/* NOTREACHED */
1859  }
1860  
1861  static void
1862  thread_call_daemon(
1863  	__unused void    *arg)
1864  {
1865  	thread_t        self = current_thread();
1866  
1867  	self->options |= TH_OPT_VMPRIV;
1868  	vm_page_free_reserve(2);        /* XXX */
1869  
1870  	thread_set_thread_name(self, "thread_call_daemon");
1871  
1872  	thread_call_daemon_continue(NULL);
1873  	/* NOTREACHED */
1874  }
1875  
1876  /*
1877   * Schedule timer to deallocate a worker thread if we have a surplus
1878   * of threads (in excess of the group's target) and at least one thread
1879   * is idle the whole time.
1880   */
1881  static void
1882  thread_call_start_deallocate_timer(thread_call_group_t group)
1883  {
1884  	__assert_only bool already_enqueued;
1885  
1886  	assert(group->idle_count > 0);
1887  	assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1888  
1889  	group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1890  
1891  	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1892  
1893  	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1894  
1895  	assert(already_enqueued == false);
1896  }
1897  
1898  /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1899  void
1900  thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1901  {
1902  	thread_call_group_t  group  = (thread_call_group_t)  p0;
1903  	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1904  
1905  	thread_call_t   call;
1906  	uint64_t        now;
1907  
1908  	thread_call_lock_spin(group);
1909  
1910  	if (flavor == TCF_CONTINUOUS) {
1911  		now = mach_continuous_time();
1912  	} else if (flavor == TCF_ABSOLUTE) {
1913  		now = mach_absolute_time();
1914  	} else {
1915  		panic("invalid timer flavor: %d", flavor);
1916  	}
1917  
1918  	while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
1919  	    struct thread_call, tc_pqlink)) != NULL) {
1920  		assert(thread_call_get_group(call) == group);
1921  		assert(thread_call_get_flavor(call) == flavor);
1922  
1923  		/*
1924  		 * if we hit a call that isn't yet ready to expire,
1925  		 * then we're done for now
1926  		 * TODO: The next timer in the list could have a larger leeway
1927  		 *       and therefore be ready to expire.
1928  		 */
1929  		if (call->tc_soft_deadline > now) {
1930  			break;
1931  		}
1932  
1933  		/*
1934  		 * If we hit a rate-limited timer, don't eagerly wake it up.
1935  		 * Wait until it reaches the end of the leeway window.
1936  		 *
1937  		 * TODO: What if the next timer is not rate-limited?
1938  		 *       Have a separate rate-limited queue to avoid this
1939  		 */
1940  		if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1941  		    (call->tc_pqlink.deadline > now) &&
1942  		    (ml_timer_forced_evaluation() == FALSE)) {
1943  			break;
1944  		}
1945  
1946  		if (THREAD_CALL_SIGNAL & call->tc_flags) {
1947  			__assert_only queue_head_t *old_queue;
1948  			old_queue = thread_call_dequeue(call);
1949  			assert(old_queue == &group->delayed_queues[flavor]);
1950  
1951  			do {
1952  				thread_call_func_t  func   = call->tc_func;
1953  				thread_call_param_t param0 = call->tc_param0;
1954  				thread_call_param_t param1 = call->tc_param1;
1955  
1956  				call->tc_flags |= THREAD_CALL_RUNNING;
1957  
1958  				thread_call_unlock(group);
1959  				thread_call_invoke(func, param0, param1, call);
1960  				thread_call_lock_spin(group);
1961  
1962  				/* finish may detect that the call has been re-pended */
1963  			} while (thread_call_finish(call, group, NULL));
1964  			/* call may have been freed by the finish */
1965  		} else {
1966  			_pending_call_enqueue(call, group, now);
1967  		}
1968  	}
1969  
1970  	_arm_delayed_call_timer(call, group, flavor);
1971  
1972  	thread_call_unlock(group);
1973  }
1974  
1975  static void
1976  thread_call_delayed_timer_rescan(thread_call_group_t group,
1977      thread_call_flavor_t flavor)
1978  {
1979  	thread_call_t call;
1980  	uint64_t now;
1981  
1982  	spl_t s = disable_ints_and_lock(group);
1983  
1984  	assert(ml_timer_forced_evaluation() == TRUE);
1985  
1986  	if (flavor == TCF_CONTINUOUS) {
1987  		now = mach_continuous_time();
1988  	} else {
1989  		now = mach_absolute_time();
1990  	}
1991  
1992  	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
1993  		if (call->tc_soft_deadline <= now) {
1994  			_pending_call_enqueue(call, group, now);
1995  		} else {
1996  			uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
1997  			assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
1998  			/*
1999  			 * On a latency quality-of-service level change,
2000  			 * re-sort potentially rate-limited callout. The platform
2001  			 * layer determines which timers require this.
2002  			 *
2003  			 * This trick works by updating the deadline value to
2004  			 * equal soft-deadline, effectively crushing away
2005  			 * timer coalescing slop values for any armed
2006  			 * timer in the queue.
2007  			 *
2008  			 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2009  			 * only have to crush coalescing for timers that need it.
2010  			 *
2011  			 * TODO: Keep a separate queue of timers above the re-sort
2012  			 * threshold, so we only have to look at those.
2013  			 */
2014  			if (timer_resort_threshold(skew)) {
2015  				_call_dequeue(call, group);
2016  				_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2017  			}
2018  		}
2019  	}
2020  
2021  	_arm_delayed_call_timer(NULL, group, flavor);
2022  
2023  	enable_ints_and_unlock(group, s);
2024  }
2025  
2026  void
2027  thread_call_delayed_timer_rescan_all(void)
2028  {
2029  	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2030  		for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2031  			thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2032  		}
2033  	}
2034  }
2035  
2036  /*
2037   * Timer callback to tell a thread to terminate if
2038   * we have an excess of threads and at least one has been
2039   * idle for a long time.
2040   */
2041  static void
2042  thread_call_dealloc_timer(
2043  	timer_call_param_t              p0,
2044  	__unused timer_call_param_t     p1)
2045  {
2046  	thread_call_group_t group = (thread_call_group_t)p0;
2047  	uint64_t now;
2048  	kern_return_t res;
2049  	bool terminated = false;
2050  
2051  	thread_call_lock_spin(group);
2052  
2053  	assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2054  
2055  	now = mach_absolute_time();
2056  
2057  	if (group->idle_count > 0) {
2058  		if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2059  			terminated = true;
2060  			group->idle_count--;
2061  			res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2062  			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
2063  			if (res != KERN_SUCCESS) {
2064  				panic("Unable to wake up idle thread for termination?");
2065  			}
2066  		}
2067  	}
2068  
2069  	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2070  
2071  	/*
2072  	 * If we still have an excess of threads, schedule another
2073  	 * invocation of this function.
2074  	 */
2075  	if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2076  		/*
2077  		 * If we killed someone just now, push out the
2078  		 * next deadline.
2079  		 */
2080  		if (terminated) {
2081  			group->idle_timestamp = now;
2082  		}
2083  
2084  		thread_call_start_deallocate_timer(group);
2085  	}
2086  
2087  	thread_call_unlock(group);
2088  }
2089  
2090  /*
2091   * Wait for the invocation of the thread call to complete
2092   * We know there's only one in flight because of the 'once' flag.
2093   *
2094   * If a subsequent invocation comes in before we wake up, that's OK
2095   *
2096   * TODO: Here is where we will add priority inheritance to the thread executing
2097   * the thread call in case it's lower priority than the current thread
2098   *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2099   *
2100   * Takes the thread call lock locked, returns unlocked
2101   *      This lets us avoid a spurious take/drop after waking up from thread_block
2102   *
2103   * This thread could be a thread call thread itself, blocking and therefore making a
2104   * sched_call upcall into the thread call subsystem, needing the group lock.
2105   * However, we're saved from deadlock because the 'block' upcall is made in
2106   * thread_block, not in assert_wait.
2107   */
2108  static bool
2109  thread_call_wait_once_locked(thread_call_t call, spl_t s)
2110  {
2111  	assert(call->tc_flags & THREAD_CALL_ALLOC);
2112  	assert(call->tc_flags & THREAD_CALL_ONCE);
2113  
2114  	thread_call_group_t group = thread_call_get_group(call);
2115  
2116  	if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2117  		enable_ints_and_unlock(group, s);
2118  		return false;
2119  	}
2120  
2121  	/* call is running, so we have to wait for it */
2122  	call->tc_flags |= THREAD_CALL_WAIT;
2123  
2124  	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2125  	if (res != THREAD_WAITING) {
2126  		panic("Unable to assert wait: %d", res);
2127  	}
2128  
2129  	enable_ints_and_unlock(group, s);
2130  
2131  	res = thread_block(THREAD_CONTINUE_NULL);
2132  	if (res != THREAD_AWAKENED) {
2133  		panic("Awoken with %d?", res);
2134  	}
2135  
2136  	/* returns unlocked */
2137  	return true;
2138  }
2139  
2140  /*
2141   * Wait for an in-flight invocation to complete
2142   * Does NOT try to cancel, so the client doesn't need to hold their
2143   * lock while calling this function.
2144   *
2145   * Returns whether or not it had to wait.
2146   *
2147   * Only works for THREAD_CALL_ONCE calls.
2148   */
2149  boolean_t
2150  thread_call_wait_once(thread_call_t call)
2151  {
2152  	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2153  		panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
2154  	}
2155  
2156  	if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2157  		panic("thread_call_wait_once: can't wait_once on a non-once call");
2158  	}
2159  
2160  	if (!ml_get_interrupts_enabled()) {
2161  		panic("unsafe thread_call_wait_once");
2162  	}
2163  
2164  	thread_t self = current_thread();
2165  
2166  	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2167  	    self->thc_state && self->thc_state->thc_call == call) {
2168  		panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2169  		    call, call->tc_func);
2170  	}
2171  
2172  	thread_call_group_t group = thread_call_get_group(call);
2173  
2174  	spl_t s = disable_ints_and_lock(group);
2175  
2176  	bool waited = thread_call_wait_once_locked(call, s);
2177  	/* thread call lock unlocked */
2178  
2179  	return waited;
2180  }
2181  
2182  
2183  /*
2184   * Wait for all requested invocations of a thread call prior to now
2185   * to finish.  Can only be invoked on thread calls whose storage we manage.
2186   * Just waits for the finish count to catch up to the submit count we find
2187   * at the beginning of our wait.
2188   *
2189   * Called with thread_call_lock held.  Returns with lock released.
2190   */
2191  static void
2192  thread_call_wait_locked(thread_call_t call, spl_t s)
2193  {
2194  	thread_call_group_t group = thread_call_get_group(call);
2195  
2196  	assert(call->tc_flags & THREAD_CALL_ALLOC);
2197  
2198  	uint64_t submit_count = call->tc_submit_count;
2199  
2200  	while (call->tc_finish_count < submit_count) {
2201  		call->tc_flags |= THREAD_CALL_WAIT;
2202  
2203  		wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2204  		    CAST_EVENT64_T(call), THREAD_UNINT, 0);
2205  
2206  		if (res != THREAD_WAITING) {
2207  			panic("Unable to assert wait: %d", res);
2208  		}
2209  
2210  		enable_ints_and_unlock(group, s);
2211  
2212  		res = thread_block(THREAD_CONTINUE_NULL);
2213  		if (res != THREAD_AWAKENED) {
2214  			panic("Awoken with %d?", res);
2215  		}
2216  
2217  		s = disable_ints_and_lock(group);
2218  	}
2219  
2220  	enable_ints_and_unlock(group, s);
2221  }
2222  
2223  /*
2224   * Determine whether a thread call is either on a queue or
2225   * currently being executed.
2226   */
2227  boolean_t
2228  thread_call_isactive(thread_call_t call)
2229  {
2230  	thread_call_group_t group = thread_call_get_group(call);
2231  
2232  	spl_t s = disable_ints_and_lock(group);
2233  	boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2234  	enable_ints_and_unlock(group, s);
2235  
2236  	return active;
2237  }
2238  
2239  /*
2240   * adjust_cont_time_thread_calls
2241   * on wake, reenqueue delayed call timer for continuous time thread call groups
2242   */
2243  void
2244  adjust_cont_time_thread_calls(void)
2245  {
2246  	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2247  		thread_call_group_t group = &thread_call_groups[i];
2248  		spl_t s = disable_ints_and_lock(group);
2249  
2250  		/* only the continuous timers need to be re-armed */
2251  
2252  		_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2253  		enable_ints_and_unlock(group, s);
2254  	}
2255  }