/ duct-tape / xnu / osfmk / kern / coalition.c
coalition.c
   1  /*
   2   * Copyright (c) 2019-2020 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. The rights granted to you under the License
  10   * may not be used to create, or enable the creation or redistribution of,
  11   * unlawful or unlicensed copies of an Apple operating system, or to
  12   * circumvent, violate, or enable the circumvention or violation of, any
  13   * terms of an Apple operating system software license agreement.
  14   *
  15   * Please obtain a copy of the License at
  16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17   *
  18   * The Original Code and all software distributed under the License are
  19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23   * Please see the License for the specific language governing rights and
  24   * limitations under the License.
  25   *
  26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27   */
  28  
  29  #include <kern/kern_types.h>
  30  #include <mach/mach_types.h>
  31  #include <mach/boolean.h>
  32  
  33  #include <kern/coalition.h>
  34  #include <kern/exc_resource.h>
  35  #include <kern/host.h>
  36  #include <kern/ledger.h>
  37  #include <kern/mach_param.h> /* for TASK_CHUNK */
  38  #if MONOTONIC
  39  #include <kern/monotonic.h>
  40  #endif /* MONOTONIC */
  41  #include <kern/policy_internal.h>
  42  #include <kern/task.h>
  43  #include <kern/thread_group.h>
  44  #include <kern/zalloc.h>
  45  
  46  #include <libkern/OSAtomic.h>
  47  
  48  #include <mach/coalition_notification_server.h>
  49  #include <mach/host_priv.h>
  50  #include <mach/host_special_ports.h>
  51  
  52  #include <os/log.h>
  53  
  54  #include <sys/errno.h>
  55  
  56  /*
  57   * BSD interface functions
  58   */
  59  int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz);
  60  coalition_t task_get_coalition(task_t task, int type);
  61  boolean_t coalition_is_leader(task_t task, coalition_t coal);
  62  task_t coalition_get_leader(coalition_t coal);
  63  int coalition_get_task_count(coalition_t coal);
  64  uint64_t coalition_get_page_count(coalition_t coal, int *ntasks);
  65  int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
  66      int *pid_list, int list_sz);
  67  
  68  /* defined in task.c */
  69  extern ledger_template_t task_ledger_template;
  70  
  71  /*
  72   * Templates; task template is copied due to potential allocation limits on
  73   * task ledgers.
  74   */
  75  ledger_template_t coalition_task_ledger_template = NULL;
  76  ledger_template_t coalition_ledger_template = NULL;
  77  
  78  extern int      proc_selfpid(void);
  79  /*
  80   * Coalition zone needs limits. We expect there will be as many coalitions as
  81   * tasks (same order of magnitude), so use the task zone's limits.
  82   * */
  83  #define CONFIG_COALITION_MAX CONFIG_TASK_MAX
  84  #define COALITION_CHUNK TASK_CHUNK
  85  
  86  int unrestrict_coalition_syscalls;
  87  int merge_adaptive_coalitions;
  88  
  89  LCK_GRP_DECLARE(coalitions_lck_grp, "coalition");
  90  
  91  /* coalitions_list_lock protects coalition_count, coalitions queue, next_coalition_id. */
  92  static LCK_RW_DECLARE(coalitions_list_lock, &coalitions_lck_grp);
  93  static uint64_t coalition_count;
  94  static uint64_t coalition_next_id = 1;
  95  static queue_head_t coalitions_q;
  96  
  97  coalition_t init_coalition[COALITION_NUM_TYPES];
  98  coalition_t corpse_coalition[COALITION_NUM_TYPES];
  99  
 100  static const char *
 101  coal_type_str(int type)
 102  {
 103  	switch (type) {
 104  	case COALITION_TYPE_RESOURCE:
 105  		return "RESOURCE";
 106  	case COALITION_TYPE_JETSAM:
 107  		return "JETSAM";
 108  	default:
 109  		return "<unknown>";
 110  	}
 111  }
 112  
 113  struct coalition_type {
 114  	int type;
 115  	int has_default;
 116  	/*
 117  	 * init
 118  	 * pre-condition: coalition just allocated (unlocked), unreferenced,
 119  	 *                type field set
 120  	 */
 121  	kern_return_t (*init)(coalition_t coal, boolean_t privileged);
 122  
 123  	/*
 124  	 * dealloc
 125  	 * pre-condition: coalition unlocked
 126  	 * pre-condition: coalition refcount=0, active_count=0,
 127  	 *                termrequested=1, terminated=1, reaped=1
 128  	 */
 129  	void          (*dealloc)(coalition_t coal);
 130  
 131  	/*
 132  	 * adopt_task
 133  	 * pre-condition: coalition locked
 134  	 * pre-condition: coalition !repead and !terminated
 135  	 */
 136  	kern_return_t (*adopt_task)(coalition_t coal, task_t task);
 137  
 138  	/*
 139  	 * remove_task
 140  	 * pre-condition: coalition locked
 141  	 * pre-condition: task has been removed from coalition's task list
 142  	 */
 143  	kern_return_t (*remove_task)(coalition_t coal, task_t task);
 144  
 145  	/*
 146  	 * set_taskrole
 147  	 * pre-condition: coalition locked
 148  	 * pre-condition: task added to coalition's task list,
 149  	 *                active_count >= 1 (at least the given task is active)
 150  	 */
 151  	kern_return_t (*set_taskrole)(coalition_t coal, task_t task, int role);
 152  
 153  	/*
 154  	 * get_taskrole
 155  	 * pre-condition: coalition locked
 156  	 * pre-condition: task added to coalition's task list,
 157  	 *                active_count >= 1 (at least the given task is active)
 158  	 */
 159  	int (*get_taskrole)(coalition_t coal, task_t task);
 160  
 161  	/*
 162  	 * iterate_tasks
 163  	 * pre-condition: coalition locked
 164  	 */
 165  	void (*iterate_tasks)(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t));
 166  };
 167  
 168  /*
 169   * COALITION_TYPE_RESOURCE
 170   */
 171  
 172  static kern_return_t i_coal_resource_init(coalition_t coal, boolean_t privileged);
 173  static void          i_coal_resource_dealloc(coalition_t coal);
 174  static kern_return_t i_coal_resource_adopt_task(coalition_t coal, task_t task);
 175  static kern_return_t i_coal_resource_remove_task(coalition_t coal, task_t task);
 176  static kern_return_t i_coal_resource_set_taskrole(coalition_t coal,
 177      task_t task, int role);
 178  static int           i_coal_resource_get_taskrole(coalition_t coal, task_t task);
 179  static void          i_coal_resource_iterate_tasks(coalition_t coal, void *ctx,
 180      void (*callback)(coalition_t, void *, task_t));
 181  
 182  /*
 183   * Ensure COALITION_NUM_THREAD_QOS_TYPES defined in mach/coalition.h still
 184   * matches THREAD_QOS_LAST defined in mach/thread_policy.h
 185   */
 186  static_assert(COALITION_NUM_THREAD_QOS_TYPES == THREAD_QOS_LAST);
 187  
 188  struct i_resource_coalition {
 189  	/*
 190  	 * This keeps track of resource utilization of tasks that are no longer active
 191  	 * in the coalition and is updated when a task is removed from the coalition.
 192  	 */
 193  	ledger_t ledger;
 194  	uint64_t bytesread;
 195  	uint64_t byteswritten;
 196  	uint64_t energy;
 197  	uint64_t gpu_time;
 198  	uint64_t logical_immediate_writes;
 199  	uint64_t logical_deferred_writes;
 200  	uint64_t logical_invalidated_writes;
 201  	uint64_t logical_metadata_writes;
 202  	uint64_t logical_immediate_writes_to_external;
 203  	uint64_t logical_deferred_writes_to_external;
 204  	uint64_t logical_invalidated_writes_to_external;
 205  	uint64_t logical_metadata_writes_to_external;
 206  	uint64_t cpu_ptime;
 207  	uint64_t cpu_time_eqos[COALITION_NUM_THREAD_QOS_TYPES];      /* cpu time per effective QoS class */
 208  	uint64_t cpu_time_rqos[COALITION_NUM_THREAD_QOS_TYPES];      /* cpu time per requested QoS class */
 209  	uint64_t cpu_instructions;
 210  	uint64_t cpu_cycles;
 211  
 212  	uint64_t task_count;      /* tasks that have started in this coalition */
 213  	uint64_t dead_task_count; /* tasks that have exited in this coalition;
 214  	                           *  subtract from task_count to get count
 215  	                           *  of "active" tasks */
 216  	/*
 217  	 * Count the length of time this coalition had at least one active task.
 218  	 * This can be a 'denominator' to turn e.g. cpu_time to %cpu.
 219  	 * */
 220  	uint64_t last_became_nonempty_time;
 221  	uint64_t time_nonempty;
 222  
 223  	queue_head_t tasks;         /* List of active tasks in the coalition */
 224  	/*
 225  	 * This ledger is used for triggering resource exception. For the tracked resources, this is updated
 226  	 * when the member tasks' resource usage changes.
 227  	 */
 228  	ledger_t resource_monitor_ledger;
 229  #if CONFIG_PHYS_WRITE_ACCT
 230  	uint64_t fs_metadata_writes;
 231  #endif /* CONFIG_PHYS_WRITE_ACCT */
 232  };
 233  
 234  /*
 235   * COALITION_TYPE_JETSAM
 236   */
 237  
 238  static kern_return_t i_coal_jetsam_init(coalition_t coal, boolean_t privileged);
 239  static void          i_coal_jetsam_dealloc(coalition_t coal);
 240  static kern_return_t i_coal_jetsam_adopt_task(coalition_t coal, task_t task);
 241  static kern_return_t i_coal_jetsam_remove_task(coalition_t coal, task_t task);
 242  static kern_return_t i_coal_jetsam_set_taskrole(coalition_t coal,
 243      task_t task, int role);
 244  int           i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
 245  static void          i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx,
 246      void (*callback)(coalition_t, void *, task_t));
 247  
 248  struct i_jetsam_coalition {
 249  	task_t       leader;
 250  	queue_head_t extensions;
 251  	queue_head_t services;
 252  	queue_head_t other;
 253  	struct thread_group *thread_group;
 254  };
 255  
 256  
 257  /*
 258   * main coalition structure
 259   */
 260  struct coalition {
 261  	uint64_t id;                /* monotonically increasing */
 262  	uint32_t type;
 263  	uint32_t role;              /* default task role (background, adaptive, interactive, etc) */
 264  	uint32_t ref_count;         /* Number of references to the memory containing this struct */
 265  	uint32_t active_count;      /* Number of members of (tasks in) the
 266  	                             *  coalition, plus vouchers referring
 267  	                             *  to the coalition */
 268  	uint32_t focal_task_count;   /* Number of TASK_FOREGROUND_APPLICATION tasks in the coalition */
 269  	uint32_t nonfocal_task_count; /* Number of TASK_BACKGROUND_APPLICATION tasks in the coalition */
 270  
 271  	/* coalition flags */
 272  	uint32_t privileged : 1;    /* Members of this coalition may create
 273  	                             *  and manage coalitions and may posix_spawn
 274  	                             *  processes into selected coalitions */
 275  	/* ast? */
 276  	/* voucher */
 277  	uint32_t termrequested : 1; /* launchd has requested termination when coalition becomes empty */
 278  	uint32_t terminated : 1;    /* coalition became empty and spawns are now forbidden */
 279  	uint32_t reaped : 1;        /* reaped, invisible to userspace, but waiting for ref_count to go to zero */
 280  	uint32_t notified : 1;      /* no-more-processes notification was sent via special port */
 281  	uint32_t efficient : 1;     /* launchd has marked the coalition as efficient */
 282  #if DEVELOPMENT || DEBUG
 283  	uint32_t should_notify : 1; /* should this coalition send notifications (default: yes) */
 284  #endif
 285  
 286  	queue_chain_t coalitions;   /* global list of coalitions */
 287  
 288  	decl_lck_mtx_data(, lock);    /* Coalition lock. */
 289  
 290  	/* put coalition type-specific structures here */
 291  	union {
 292  		struct i_resource_coalition  r;
 293  		struct i_jetsam_coalition    j;
 294  	};
 295  };
 296  
 297  /*
 298   * register different coalition types:
 299   * these must be kept in the order specified in coalition.h
 300   */
 301  static const struct coalition_type
 302      s_coalition_types[COALITION_NUM_TYPES] = {
 303  	{
 304  		COALITION_TYPE_RESOURCE,
 305  		1,
 306  		i_coal_resource_init,
 307  		i_coal_resource_dealloc,
 308  		i_coal_resource_adopt_task,
 309  		i_coal_resource_remove_task,
 310  		i_coal_resource_set_taskrole,
 311  		i_coal_resource_get_taskrole,
 312  		i_coal_resource_iterate_tasks,
 313  	},
 314  	{
 315  		COALITION_TYPE_JETSAM,
 316  		1,
 317  		i_coal_jetsam_init,
 318  		i_coal_jetsam_dealloc,
 319  		i_coal_jetsam_adopt_task,
 320  		i_coal_jetsam_remove_task,
 321  		i_coal_jetsam_set_taskrole,
 322  		i_coal_jetsam_get_taskrole,
 323  		i_coal_jetsam_iterate_tasks,
 324  	},
 325  };
 326  
 327  ZONE_DECLARE(coalition_zone, "coalitions",
 328      sizeof(struct coalition), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 329  
 330  #define coal_call(coal, func, ...) \
 331  	(s_coalition_types[(coal)->type].func)(coal, ## __VA_ARGS__)
 332  
 333  
 334  #define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0)
 335  #define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0)
 336  
 337  /*
 338   * Define the coalition type to track focal tasks.
 339   * On embedded, track them using jetsam coalitions since they have associated thread
 340   * groups which reflect this property as a flag (and pass it down to CLPC).
 341   * On non-embedded platforms, since not all coalitions have jetsam coalitions
 342   * track focal counts on the resource coalition.
 343   */
 344  #if !XNU_TARGET_OS_OSX
 345  #define COALITION_FOCAL_TASKS_ACCOUNTING  COALITION_TYPE_JETSAM
 346  #else /* !XNU_TARGET_OS_OSX */
 347  #define COALITION_FOCAL_TASKS_ACCOUNTING  COALITION_TYPE_RESOURCE
 348  #endif /* !XNU_TARGET_OS_OSX */
 349  
 350  
 351  /*
 352   *
 353   * Coalition ledger implementation
 354   *
 355   */
 356  
 357  struct coalition_ledger_indices coalition_ledgers =
 358  {.logical_writes = -1, };
 359  void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_COALITION_IS_CAUSING_TOO_MUCH_IO(int flavor);
 360  
 361  ledger_t
 362  coalition_ledger_get_from_task(task_t task)
 363  {
 364  	ledger_t ledger = LEDGER_NULL;
 365  	coalition_t coal = task->coalition[COALITION_TYPE_RESOURCE];
 366  
 367  	if (coal != NULL && (!queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE]))) {
 368  		ledger = coal->r.resource_monitor_ledger;
 369  		ledger_reference(ledger);
 370  	}
 371  	return ledger;
 372  }
 373  
 374  
 375  enum {
 376  	COALITION_IO_LEDGER_ENABLE,
 377  	COALITION_IO_LEDGER_DISABLE
 378  };
 379  
 380  void
 381  coalition_io_monitor_ctl(struct coalition *coalition, uint32_t flags, int64_t limit)
 382  {
 383  	ledger_t ledger = coalition->r.resource_monitor_ledger;
 384  
 385  	if (flags == COALITION_IO_LEDGER_ENABLE) {
 386  		/* Configure the logical I/O ledger */
 387  		ledger_set_limit(ledger, coalition_ledgers.logical_writes, (limit * 1024 * 1024), 0);
 388  		ledger_set_period(ledger, coalition_ledgers.logical_writes, (COALITION_LEDGER_MONITOR_INTERVAL_SECS * NSEC_PER_SEC));
 389  	} else if (flags == COALITION_IO_LEDGER_DISABLE) {
 390  		ledger_disable_refill(ledger, coalition_ledgers.logical_writes);
 391  		ledger_disable_callback(ledger, coalition_ledgers.logical_writes);
 392  	}
 393  }
 394  
 395  int
 396  coalition_ledger_set_logical_writes_limit(struct coalition *coalition, int64_t limit)
 397  {
 398  	int error = 0;
 399  
 400  	/*  limit = -1 will be used to disable the limit and the callback */
 401  	if (limit > COALITION_MAX_LOGICAL_WRITES_LIMIT || limit == 0 || limit < -1) {
 402  		error = EINVAL;
 403  		goto out;
 404  	}
 405  
 406  	coalition_lock(coalition);
 407  	if (limit == -1) {
 408  		coalition_io_monitor_ctl(coalition, COALITION_IO_LEDGER_DISABLE, limit);
 409  	} else {
 410  		coalition_io_monitor_ctl(coalition, COALITION_IO_LEDGER_ENABLE, limit);
 411  	}
 412  	coalition_unlock(coalition);
 413  out:
 414  	return error;
 415  }
 416  
 417  void __attribute__((noinline))
 418  SENDING_NOTIFICATION__THIS_COALITION_IS_CAUSING_TOO_MUCH_IO(int flavor)
 419  {
 420  	int pid = proc_selfpid();
 421  	ledger_amount_t new_limit;
 422  	task_t task = current_task();
 423  	struct ledger_entry_info lei;
 424  	kern_return_t kr;
 425  	ledger_t ledger;
 426  	struct coalition *coalition = task->coalition[COALITION_TYPE_RESOURCE];
 427  
 428  	assert(coalition != NULL);
 429  	ledger = coalition->r.resource_monitor_ledger;
 430  
 431  	switch (flavor) {
 432  	case FLAVOR_IO_LOGICAL_WRITES:
 433  		ledger_get_entry_info(ledger, coalition_ledgers.logical_writes, &lei);
 434  		trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
 435  		break;
 436  	default:
 437  		goto Exit;
 438  	}
 439  
 440  	os_log(OS_LOG_DEFAULT, "Coalition [%lld] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]. Triggered by process [%d]\n",
 441  	    coalition->id, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)),
 442  	    (lei.lei_refill_period / NSEC_PER_SEC), pid);
 443  
 444  	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
 445  	if (kr) {
 446  		os_log(OS_LOG_DEFAULT, "ERROR %#x returned from send_resource_violation(disk_writes, ...)\n", kr);
 447  	}
 448  
 449  	/*
 450  	 * Continue to monitor the coalition after it hits the initital limit, but increase
 451  	 * the limit exponentially so that we don't spam the listener.
 452  	 */
 453  	new_limit = (lei.lei_limit / 1024 / 1024) * 4;
 454  	coalition_lock(coalition);
 455  	if (new_limit > COALITION_MAX_LOGICAL_WRITES_LIMIT) {
 456  		coalition_io_monitor_ctl(coalition, COALITION_IO_LEDGER_DISABLE, -1);
 457  	} else {
 458  		coalition_io_monitor_ctl(coalition, COALITION_IO_LEDGER_ENABLE, new_limit);
 459  	}
 460  	coalition_unlock(coalition);
 461  
 462  Exit:
 463  	return;
 464  }
 465  
 466  void
 467  coalition_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
 468  {
 469  	if (warning == 0) {
 470  		SENDING_NOTIFICATION__THIS_COALITION_IS_CAUSING_TOO_MUCH_IO((int)param0);
 471  	}
 472  }
 473  
 474  void
 475  init_coalition_ledgers(void)
 476  {
 477  	ledger_template_t t;
 478  	assert(coalition_ledger_template == NULL);
 479  
 480  	if ((t = ledger_template_create("Per-coalition ledgers")) == NULL) {
 481  		panic("couldn't create coalition ledger template");
 482  	}
 483  
 484  	coalition_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
 485  
 486  	if (coalition_ledgers.logical_writes < 0) {
 487  		panic("couldn't create entries for coaliton ledger template");
 488  	}
 489  
 490  	ledger_set_callback(t, coalition_ledgers.logical_writes, coalition_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
 491  	ledger_template_complete(t);
 492  
 493  	coalition_task_ledger_template = ledger_template_copy(task_ledger_template, "Coalition task ledgers");
 494  
 495  	if (coalition_task_ledger_template == NULL) {
 496  		panic("couldn't create coalition task ledger template");
 497  	}
 498  
 499  	ledger_template_complete(coalition_task_ledger_template);
 500  
 501  	coalition_ledger_template = t;
 502  }
 503  
 504  void
 505  coalition_io_ledger_update(task_t task, int32_t flavor, boolean_t is_credit, uint32_t io_size)
 506  {
 507  	ledger_t ledger;
 508  	coalition_t coal = task->coalition[COALITION_TYPE_RESOURCE];
 509  
 510  	assert(coal != NULL);
 511  	ledger = coal->r.resource_monitor_ledger;
 512  	if (LEDGER_VALID(ledger)) {
 513  		if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
 514  			if (is_credit) {
 515  				ledger_credit(ledger, coalition_ledgers.logical_writes, io_size);
 516  			} else {
 517  				ledger_debit(ledger, coalition_ledgers.logical_writes, io_size);
 518  			}
 519  		}
 520  	}
 521  }
 522  
 523  static void
 524  coalition_notify_user(uint64_t id, uint32_t flags)
 525  {
 526  	mach_port_t user_port;
 527  	kern_return_t kr;
 528  
 529  	kr = host_get_coalition_port(host_priv_self(), &user_port);
 530  	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
 531  		return;
 532  	}
 533  
 534  	coalition_notification(user_port, id, flags);
 535  	ipc_port_release_send(user_port);
 536  }
 537  
 538  /*
 539   *
 540   * COALITION_TYPE_RESOURCE
 541   *
 542   */
 543  static kern_return_t
 544  i_coal_resource_init(coalition_t coal, boolean_t privileged)
 545  {
 546  	(void)privileged;
 547  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 548  	coal->r.ledger = ledger_instantiate(coalition_task_ledger_template,
 549  	    LEDGER_CREATE_ACTIVE_ENTRIES);
 550  	if (coal->r.ledger == NULL) {
 551  		return KERN_RESOURCE_SHORTAGE;
 552  	}
 553  
 554  	coal->r.resource_monitor_ledger = ledger_instantiate(coalition_ledger_template,
 555  	    LEDGER_CREATE_ACTIVE_ENTRIES);
 556  	if (coal->r.resource_monitor_ledger == NULL) {
 557  		return KERN_RESOURCE_SHORTAGE;
 558  	}
 559  
 560  	queue_init(&coal->r.tasks);
 561  
 562  	return KERN_SUCCESS;
 563  }
 564  
 565  static void
 566  i_coal_resource_dealloc(coalition_t coal)
 567  {
 568  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 569  
 570  	ledger_dereference(coal->r.ledger);
 571  	ledger_dereference(coal->r.resource_monitor_ledger);
 572  }
 573  
 574  static kern_return_t
 575  i_coal_resource_adopt_task(coalition_t coal, task_t task)
 576  {
 577  	struct i_resource_coalition *cr;
 578  
 579  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 580  	assert(queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE]));
 581  
 582  	cr = &coal->r;
 583  	cr->task_count++;
 584  
 585  	if (cr->task_count < cr->dead_task_count) {
 586  		panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)",
 587  		    __func__, coal, coal->id, coal_type_str(coal->type),
 588  		    cr->task_count, cr->dead_task_count);
 589  	}
 590  
 591  	/* If moving from 0->1 active tasks */
 592  	if (cr->task_count - cr->dead_task_count == 1) {
 593  		cr->last_became_nonempty_time = mach_absolute_time();
 594  	}
 595  
 596  	/* put the task on the coalition's list of tasks */
 597  	enqueue_tail(&cr->tasks, &task->task_coalition[COALITION_TYPE_RESOURCE]);
 598  
 599  	coal_dbg("Added PID:%d to id:%llu, task_count:%llu, dead_count:%llu, nonempty_time:%llu",
 600  	    task_pid(task), coal->id, cr->task_count, cr->dead_task_count,
 601  	    cr->last_became_nonempty_time);
 602  
 603  	return KERN_SUCCESS;
 604  }
 605  
 606  static kern_return_t
 607  i_coal_resource_remove_task(coalition_t coal, task_t task)
 608  {
 609  	struct i_resource_coalition *cr;
 610  
 611  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 612  	assert(task->coalition[COALITION_TYPE_RESOURCE] == coal);
 613  	assert(!queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE]));
 614  
 615  	/*
 616  	 * handle resource coalition accounting rollup for dead tasks
 617  	 */
 618  	cr = &coal->r;
 619  
 620  	cr->dead_task_count++;
 621  
 622  	if (cr->task_count < cr->dead_task_count) {
 623  		panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)",
 624  		    __func__, coal, coal->id, coal_type_str(coal->type), cr->task_count, cr->dead_task_count);
 625  	}
 626  
 627  	/* If moving from 1->0 active tasks */
 628  	if (cr->task_count - cr->dead_task_count == 0) {
 629  		uint64_t last_time_nonempty = mach_absolute_time() - cr->last_became_nonempty_time;
 630  		cr->last_became_nonempty_time = 0;
 631  		cr->time_nonempty += last_time_nonempty;
 632  	}
 633  
 634  	/* Do not roll up for exec'd task or exec copy task */
 635  	if (!task_is_exec_copy(task) && !task_did_exec(task)) {
 636  		ledger_rollup(cr->ledger, task->ledger);
 637  		cr->bytesread += task->task_io_stats->disk_reads.size;
 638  		cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
 639  #if defined(__x86_64__)
 640  		cr->gpu_time += task_gpu_utilisation(task);
 641  #endif /* defined(__x86_64__) */
 642  
 643  #if defined(__arm__) || defined(__arm64__)
 644  		cr->energy += task_energy(task);
 645  #endif /* defined(__arm__) || defined(__arm64__) */
 646  
 647  		cr->logical_immediate_writes += task->task_writes_counters_internal.task_immediate_writes;
 648  		cr->logical_deferred_writes += task->task_writes_counters_internal.task_deferred_writes;
 649  		cr->logical_invalidated_writes += task->task_writes_counters_internal.task_invalidated_writes;
 650  		cr->logical_metadata_writes += task->task_writes_counters_internal.task_metadata_writes;
 651  		cr->logical_immediate_writes_to_external += task->task_writes_counters_external.task_immediate_writes;
 652  		cr->logical_deferred_writes_to_external += task->task_writes_counters_external.task_deferred_writes;
 653  		cr->logical_invalidated_writes_to_external += task->task_writes_counters_external.task_invalidated_writes;
 654  		cr->logical_metadata_writes_to_external += task->task_writes_counters_external.task_metadata_writes;
 655  #if CONFIG_PHYS_WRITE_ACCT
 656  		cr->fs_metadata_writes += task->task_fs_metadata_writes;
 657  #endif /* CONFIG_PHYS_WRITE_ACCT */
 658  		cr->cpu_ptime += task_cpu_ptime(task);
 659  		task_update_cpu_time_qos_stats(task, cr->cpu_time_eqos, cr->cpu_time_rqos);
 660  #if MONOTONIC
 661  		uint64_t counts[MT_CORE_NFIXED] = {};
 662  		(void)mt_fixed_task_counts(task, counts);
 663  		cr->cpu_cycles += counts[MT_CORE_CYCLES];
 664  #if defined(MT_CORE_INSTRS)
 665  		cr->cpu_instructions += counts[MT_CORE_INSTRS];
 666  #endif /* defined(MT_CORE_INSTRS) */
 667  #endif /* MONOTONIC */
 668  	}
 669  
 670  	/* remove the task from the coalition's list */
 671  	remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]);
 672  	queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]);
 673  
 674  	coal_dbg("removed PID:%d from id:%llu, task_count:%llu, dead_count:%llu",
 675  	    task_pid(task), coal->id, cr->task_count, cr->dead_task_count);
 676  
 677  	return KERN_SUCCESS;
 678  }
 679  
 680  static kern_return_t
 681  i_coal_resource_set_taskrole(__unused coalition_t coal,
 682      __unused task_t task, __unused int role)
 683  {
 684  	return KERN_SUCCESS;
 685  }
 686  
 687  static int
 688  i_coal_resource_get_taskrole(__unused coalition_t coal, __unused task_t task)
 689  {
 690  	task_t t;
 691  
 692  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 693  
 694  	qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) {
 695  		if (t == task) {
 696  			return COALITION_TASKROLE_UNDEF;
 697  		}
 698  	}
 699  
 700  	return -1;
 701  }
 702  
 703  static void
 704  i_coal_resource_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t))
 705  {
 706  	task_t t;
 707  	assert(coal && coal->type == COALITION_TYPE_RESOURCE);
 708  
 709  	qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE])
 710  	callback(coal, ctx, t);
 711  }
 712  
 713  #if CONFIG_PHYS_WRITE_ACCT
 714  extern uint64_t kernel_pm_writes;
 715  #endif /* CONFIG_PHYS_WRITE_ACCT */
 716  
 717  kern_return_t
 718  coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out)
 719  {
 720  	kern_return_t kr;
 721  	ledger_amount_t credit, debit;
 722  	int i;
 723  
 724  	if (coal->type != COALITION_TYPE_RESOURCE) {
 725  		return KERN_INVALID_ARGUMENT;
 726  	}
 727  
 728  	/* Return KERN_INVALID_ARGUMENT for Corpse coalition */
 729  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
 730  		if (coal == corpse_coalition[i]) {
 731  			return KERN_INVALID_ARGUMENT;
 732  		}
 733  	}
 734  
 735  	ledger_t sum_ledger = ledger_instantiate(coalition_task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
 736  	if (sum_ledger == LEDGER_NULL) {
 737  		return KERN_RESOURCE_SHORTAGE;
 738  	}
 739  
 740  	coalition_lock(coal);
 741  
 742  	/*
 743  	 * Start with the coalition's ledger, which holds the totals from all
 744  	 * the dead tasks.
 745  	 */
 746  	ledger_rollup(sum_ledger, coal->r.ledger);
 747  	uint64_t bytesread = coal->r.bytesread;
 748  	uint64_t byteswritten = coal->r.byteswritten;
 749  	uint64_t gpu_time = coal->r.gpu_time;
 750  	uint64_t energy = coal->r.energy;
 751  	uint64_t logical_immediate_writes = coal->r.logical_immediate_writes;
 752  	uint64_t logical_deferred_writes = coal->r.logical_deferred_writes;
 753  	uint64_t logical_invalidated_writes = coal->r.logical_invalidated_writes;
 754  	uint64_t logical_metadata_writes = coal->r.logical_metadata_writes;
 755  	uint64_t logical_immediate_writes_to_external = coal->r.logical_immediate_writes_to_external;
 756  	uint64_t logical_deferred_writes_to_external = coal->r.logical_deferred_writes_to_external;
 757  	uint64_t logical_invalidated_writes_to_external = coal->r.logical_invalidated_writes_to_external;
 758  	uint64_t logical_metadata_writes_to_external = coal->r.logical_metadata_writes_to_external;
 759  #if CONFIG_PHYS_WRITE_ACCT
 760  	uint64_t fs_metadata_writes = coal->r.fs_metadata_writes;
 761  #endif /* CONFIG_PHYS_WRITE_ACCT */
 762  	int64_t cpu_time_billed_to_me = 0;
 763  	int64_t cpu_time_billed_to_others = 0;
 764  	int64_t energy_billed_to_me = 0;
 765  	int64_t energy_billed_to_others = 0;
 766  	uint64_t cpu_ptime = coal->r.cpu_ptime;
 767  	uint64_t cpu_time_eqos[COALITION_NUM_THREAD_QOS_TYPES];
 768  	memcpy(cpu_time_eqos, coal->r.cpu_time_eqos, sizeof(cpu_time_eqos));
 769  	uint64_t cpu_time_rqos[COALITION_NUM_THREAD_QOS_TYPES];
 770  	memcpy(cpu_time_rqos, coal->r.cpu_time_rqos, sizeof(cpu_time_rqos));
 771  	uint64_t cpu_instructions = coal->r.cpu_instructions;
 772  	uint64_t cpu_cycles = coal->r.cpu_cycles;
 773  
 774  	/*
 775  	 * Add to that all the active tasks' ledgers. Tasks cannot deallocate
 776  	 * out from under us, since we hold the coalition lock.
 777  	 */
 778  	task_t task;
 779  	qe_foreach_element(task, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) {
 780  		/*
 781  		 * Rolling up stats for exec copy task or exec'd task will lead to double accounting.
 782  		 * Cannot take task lock after taking coaliton lock
 783  		 */
 784  		if (task_is_exec_copy(task) || task_did_exec(task)) {
 785  			continue;
 786  		}
 787  
 788  		ledger_rollup(sum_ledger, task->ledger);
 789  		bytesread += task->task_io_stats->disk_reads.size;
 790  		byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
 791  #if defined(__x86_64__)
 792  		gpu_time += task_gpu_utilisation(task);
 793  #endif /* defined(__x86_64__) */
 794  
 795  #if defined(__arm__) || defined(__arm64__)
 796  		energy += task_energy(task);
 797  #endif /* defined(__arm__) || defined(__arm64__) */
 798  
 799  		logical_immediate_writes += task->task_writes_counters_internal.task_immediate_writes;
 800  		logical_deferred_writes += task->task_writes_counters_internal.task_deferred_writes;
 801  		logical_invalidated_writes += task->task_writes_counters_internal.task_invalidated_writes;
 802  		logical_metadata_writes += task->task_writes_counters_internal.task_metadata_writes;
 803  		logical_immediate_writes_to_external += task->task_writes_counters_external.task_immediate_writes;
 804  		logical_deferred_writes_to_external += task->task_writes_counters_external.task_deferred_writes;
 805  		logical_invalidated_writes_to_external += task->task_writes_counters_external.task_invalidated_writes;
 806  		logical_metadata_writes_to_external += task->task_writes_counters_external.task_metadata_writes;
 807  #if CONFIG_PHYS_WRITE_ACCT
 808  		fs_metadata_writes += task->task_fs_metadata_writes;
 809  #endif /* CONFIG_PHYS_WRITE_ACCT */
 810  
 811  		cpu_ptime += task_cpu_ptime(task);
 812  		task_update_cpu_time_qos_stats(task, cpu_time_eqos, cpu_time_rqos);
 813  #if MONOTONIC
 814  		uint64_t counts[MT_CORE_NFIXED] = {};
 815  		(void)mt_fixed_task_counts(task, counts);
 816  		cpu_cycles += counts[MT_CORE_CYCLES];
 817  #if defined(MT_CORE_INSTRS)
 818  		cpu_instructions += counts[MT_CORE_INSTRS];
 819  #endif /* defined(MT_CORE_INSTRS) */
 820  #endif /* MONOTONIC */
 821  	}
 822  
 823  	kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_me, (int64_t *)&cpu_time_billed_to_me);
 824  	if (kr != KERN_SUCCESS || cpu_time_billed_to_me < 0) {
 825  		cpu_time_billed_to_me = 0;
 826  	}
 827  
 828  	kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_others, (int64_t *)&cpu_time_billed_to_others);
 829  	if (kr != KERN_SUCCESS || cpu_time_billed_to_others < 0) {
 830  		cpu_time_billed_to_others = 0;
 831  	}
 832  
 833  	kr = ledger_get_balance(sum_ledger, task_ledgers.energy_billed_to_me, (int64_t *)&energy_billed_to_me);
 834  	if (kr != KERN_SUCCESS || energy_billed_to_me < 0) {
 835  		energy_billed_to_me = 0;
 836  	}
 837  
 838  	kr = ledger_get_balance(sum_ledger, task_ledgers.energy_billed_to_others, (int64_t *)&energy_billed_to_others);
 839  	if (kr != KERN_SUCCESS || energy_billed_to_others < 0) {
 840  		energy_billed_to_others = 0;
 841  	}
 842  
 843  	/* collect information from the coalition itself */
 844  	cru_out->tasks_started = coal->r.task_count;
 845  	cru_out->tasks_exited = coal->r.dead_task_count;
 846  
 847  	uint64_t time_nonempty = coal->r.time_nonempty;
 848  	uint64_t last_became_nonempty_time = coal->r.last_became_nonempty_time;
 849  
 850  	coalition_unlock(coal);
 851  
 852  	/* Copy the totals out of sum_ledger */
 853  	kr = ledger_get_entries(sum_ledger, task_ledgers.cpu_time,
 854  	    &credit, &debit);
 855  	if (kr != KERN_SUCCESS) {
 856  		credit = 0;
 857  	}
 858  	cru_out->cpu_time = credit;
 859  	cru_out->cpu_time_billed_to_me = (uint64_t)cpu_time_billed_to_me;
 860  	cru_out->cpu_time_billed_to_others = (uint64_t)cpu_time_billed_to_others;
 861  	cru_out->energy_billed_to_me = (uint64_t)energy_billed_to_me;
 862  	cru_out->energy_billed_to_others = (uint64_t)energy_billed_to_others;
 863  
 864  	kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups,
 865  	    &credit, &debit);
 866  	if (kr != KERN_SUCCESS) {
 867  		credit = 0;
 868  	}
 869  	cru_out->interrupt_wakeups = credit;
 870  
 871  	kr = ledger_get_entries(sum_ledger, task_ledgers.platform_idle_wakeups,
 872  	    &credit, &debit);
 873  	if (kr != KERN_SUCCESS) {
 874  		credit = 0;
 875  	}
 876  	cru_out->platform_idle_wakeups = credit;
 877  
 878  	cru_out->bytesread = bytesread;
 879  	cru_out->byteswritten = byteswritten;
 880  	cru_out->gpu_time = gpu_time;
 881  	cru_out->energy = energy;
 882  	cru_out->logical_immediate_writes = logical_immediate_writes;
 883  	cru_out->logical_deferred_writes = logical_deferred_writes;
 884  	cru_out->logical_invalidated_writes = logical_invalidated_writes;
 885  	cru_out->logical_metadata_writes = logical_metadata_writes;
 886  	cru_out->logical_immediate_writes_to_external = logical_immediate_writes_to_external;
 887  	cru_out->logical_deferred_writes_to_external = logical_deferred_writes_to_external;
 888  	cru_out->logical_invalidated_writes_to_external = logical_invalidated_writes_to_external;
 889  	cru_out->logical_metadata_writes_to_external = logical_metadata_writes_to_external;
 890  #if CONFIG_PHYS_WRITE_ACCT
 891  	cru_out->fs_metadata_writes = fs_metadata_writes;
 892  #else
 893  	cru_out->fs_metadata_writes = 0;
 894  #endif /* CONFIG_PHYS_WRITE_ACCT */
 895  	cru_out->cpu_ptime = cpu_ptime;
 896  	cru_out->cpu_time_eqos_len = COALITION_NUM_THREAD_QOS_TYPES;
 897  	memcpy(cru_out->cpu_time_eqos, cpu_time_eqos, sizeof(cru_out->cpu_time_eqos));
 898  	cru_out->cpu_cycles = cpu_cycles;
 899  	cru_out->cpu_instructions = cpu_instructions;
 900  	ledger_dereference(sum_ledger);
 901  	sum_ledger = LEDGER_NULL;
 902  
 903  #if CONFIG_PHYS_WRITE_ACCT
 904  	// kernel_pm_writes are only recorded under kernel_task coalition
 905  	if (coalition_id(coal) == COALITION_ID_KERNEL) {
 906  		cru_out->pm_writes = kernel_pm_writes;
 907  	} else {
 908  		cru_out->pm_writes = 0;
 909  	}
 910  #else
 911  	cru_out->pm_writes = 0;
 912  #endif /* CONFIG_PHYS_WRITE_ACCT */
 913  
 914  	if (last_became_nonempty_time) {
 915  		time_nonempty += mach_absolute_time() - last_became_nonempty_time;
 916  	}
 917  	absolutetime_to_nanoseconds(time_nonempty, &cru_out->time_nonempty);
 918  
 919  	return KERN_SUCCESS;
 920  }
 921  
 922  /*
 923   *
 924   * COALITION_TYPE_JETSAM
 925   *
 926   */
 927  static kern_return_t
 928  i_coal_jetsam_init(coalition_t coal, boolean_t privileged)
 929  {
 930  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
 931  	(void)privileged;
 932  
 933  	coal->j.leader = TASK_NULL;
 934  	queue_head_init(coal->j.extensions);
 935  	queue_head_init(coal->j.services);
 936  	queue_head_init(coal->j.other);
 937  
 938  #if CONFIG_THREAD_GROUPS
 939  	switch (coal->role) {
 940  	case COALITION_ROLE_SYSTEM:
 941  		coal->j.thread_group = thread_group_find_by_id_and_retain(THREAD_GROUP_SYSTEM);
 942  		break;
 943  	case COALITION_ROLE_BACKGROUND:
 944  		coal->j.thread_group = thread_group_find_by_id_and_retain(THREAD_GROUP_BACKGROUND);
 945  		break;
 946  	case COALITION_ROLE_ADAPTIVE:
 947  		if (merge_adaptive_coalitions) {
 948  			coal->j.thread_group = thread_group_find_by_id_and_retain(THREAD_GROUP_ADAPTIVE);
 949  		} else {
 950  			coal->j.thread_group = thread_group_create_and_retain();
 951  		}
 952  		break;
 953  	default:
 954  		coal->j.thread_group = thread_group_create_and_retain();
 955  	}
 956  	assert(coal->j.thread_group != NULL);
 957  #endif
 958  	return KERN_SUCCESS;
 959  }
 960  
 961  static void
 962  i_coal_jetsam_dealloc(__unused coalition_t coal)
 963  {
 964  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
 965  
 966  	/* the coalition should be completely clear at this point */
 967  	assert(queue_empty(&coal->j.extensions));
 968  	assert(queue_empty(&coal->j.services));
 969  	assert(queue_empty(&coal->j.other));
 970  	assert(coal->j.leader == TASK_NULL);
 971  
 972  #if CONFIG_THREAD_GROUPS
 973  	/* disassociate from the thread group */
 974  	assert(coal->j.thread_group != NULL);
 975  	thread_group_release(coal->j.thread_group);
 976  	coal->j.thread_group = NULL;
 977  #endif
 978  }
 979  
 980  static kern_return_t
 981  i_coal_jetsam_adopt_task(coalition_t coal, task_t task)
 982  {
 983  	struct i_jetsam_coalition *cj;
 984  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
 985  
 986  	cj = &coal->j;
 987  
 988  	assert(queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM]));
 989  
 990  	/* put each task initially in the "other" list */
 991  	enqueue_tail(&cj->other, &task->task_coalition[COALITION_TYPE_JETSAM]);
 992  	coal_dbg("coalition %lld adopted PID:%d as UNDEF",
 993  	    coal->id, task_pid(task));
 994  
 995  	return KERN_SUCCESS;
 996  }
 997  
 998  static kern_return_t
 999  i_coal_jetsam_remove_task(coalition_t coal, task_t task)
1000  {
1001  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
1002  	assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
1003  
1004  	coal_dbg("removing PID:%d from coalition id:%lld",
1005  	    task_pid(task), coal->id);
1006  
1007  	if (task == coal->j.leader) {
1008  		coal->j.leader = NULL;
1009  		coal_dbg("    PID:%d was the leader!", task_pid(task));
1010  	} else {
1011  		assert(!queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM]));
1012  	}
1013  
1014  	/* remove the task from the specific coalition role queue */
1015  	remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]);
1016  	queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]);
1017  
1018  	return KERN_SUCCESS;
1019  }
1020  
1021  static kern_return_t
1022  i_coal_jetsam_set_taskrole(coalition_t coal, task_t task, int role)
1023  {
1024  	struct i_jetsam_coalition *cj;
1025  	queue_t q = NULL;
1026  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
1027  	assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
1028  
1029  	cj = &coal->j;
1030  
1031  	switch (role) {
1032  	case COALITION_TASKROLE_LEADER:
1033  		coal_dbg("setting PID:%d as LEADER of %lld",
1034  		    task_pid(task), coal->id);
1035  		if (cj->leader != TASK_NULL) {
1036  			/* re-queue the exiting leader onto the "other" list */
1037  			coal_dbg("    re-queue existing leader (%d) as OTHER",
1038  			    task_pid(cj->leader));
1039  			re_queue_tail(&cj->other, &cj->leader->task_coalition[COALITION_TYPE_JETSAM]);
1040  		}
1041  		/*
1042  		 * remove the task from the "other" list
1043  		 * (where it was put by default)
1044  		 */
1045  		remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]);
1046  		queue_chain_init(task->task_coalition[COALITION_TYPE_JETSAM]);
1047  
1048  		/* set the coalition leader */
1049  		cj->leader = task;
1050  		break;
1051  	case COALITION_TASKROLE_XPC:
1052  		coal_dbg("setting PID:%d as XPC in %lld",
1053  		    task_pid(task), coal->id);
1054  		q = (queue_t)&cj->services;
1055  		break;
1056  	case COALITION_TASKROLE_EXT:
1057  		coal_dbg("setting PID:%d as EXT in %lld",
1058  		    task_pid(task), coal->id);
1059  		q = (queue_t)&cj->extensions;
1060  		break;
1061  	case COALITION_TASKROLE_NONE:
1062  		/*
1063  		 * Tasks with a role of "none" should fall through to an
1064  		 * undefined role so long as the task is currently a member
1065  		 * of the coalition. This scenario can happen if a task is
1066  		 * killed (usually via jetsam) during exec.
1067  		 */
1068  		if (task->coalition[COALITION_TYPE_JETSAM] != coal) {
1069  			panic("%s: task %p attempting to set role %d "
1070  			    "in coalition %p to which it does not belong!", __func__, task, role, coal);
1071  		}
1072  		OS_FALLTHROUGH;
1073  	case COALITION_TASKROLE_UNDEF:
1074  		coal_dbg("setting PID:%d as UNDEF in %lld",
1075  		    task_pid(task), coal->id);
1076  		q = (queue_t)&cj->other;
1077  		break;
1078  	default:
1079  		panic("%s: invalid role(%d) for task", __func__, role);
1080  		return KERN_INVALID_ARGUMENT;
1081  	}
1082  
1083  	if (q != NULL) {
1084  		re_queue_tail(q, &task->task_coalition[COALITION_TYPE_JETSAM]);
1085  	}
1086  
1087  	return KERN_SUCCESS;
1088  }
1089  
1090  int
1091  i_coal_jetsam_get_taskrole(coalition_t coal, task_t task)
1092  {
1093  	struct i_jetsam_coalition *cj;
1094  	task_t t;
1095  
1096  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
1097  	assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
1098  
1099  	cj = &coal->j;
1100  
1101  	if (task == cj->leader) {
1102  		return COALITION_TASKROLE_LEADER;
1103  	}
1104  
1105  	qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM]) {
1106  		if (t == task) {
1107  			return COALITION_TASKROLE_XPC;
1108  		}
1109  	}
1110  
1111  	qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM]) {
1112  		if (t == task) {
1113  			return COALITION_TASKROLE_EXT;
1114  		}
1115  	}
1116  
1117  	qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM]) {
1118  		if (t == task) {
1119  			return COALITION_TASKROLE_UNDEF;
1120  		}
1121  	}
1122  
1123  	/* task not in the coalition?! */
1124  	return COALITION_TASKROLE_NONE;
1125  }
1126  
1127  static void
1128  i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t))
1129  {
1130  	struct i_jetsam_coalition *cj;
1131  	task_t t;
1132  
1133  	assert(coal && coal->type == COALITION_TYPE_JETSAM);
1134  
1135  	cj = &coal->j;
1136  
1137  	if (cj->leader) {
1138  		callback(coal, ctx, cj->leader);
1139  	}
1140  
1141  	qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM])
1142  	callback(coal, ctx, t);
1143  
1144  	qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM])
1145  	callback(coal, ctx, t);
1146  
1147  	qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM])
1148  	callback(coal, ctx, t);
1149  }
1150  
1151  
1152  /*
1153   *
1154   * Main Coalition implementation
1155   *
1156   */
1157  
1158  /*
1159   * coalition_create_internal
1160   * Returns: New coalition object, referenced for the caller and unlocked.
1161   * Condition: coalitions_list_lock must be UNLOCKED.
1162   */
1163  kern_return_t
1164  coalition_create_internal(int type, int role, boolean_t privileged, coalition_t *out, uint64_t *coalition_id)
1165  {
1166  	kern_return_t kr;
1167  	struct coalition *new_coal;
1168  	uint64_t cid;
1169  	uint32_t ctype;
1170  
1171  	if (type < 0 || type > COALITION_TYPE_MAX) {
1172  		return KERN_INVALID_ARGUMENT;
1173  	}
1174  
1175  	new_coal = (struct coalition *)zalloc(coalition_zone);
1176  	if (new_coal == COALITION_NULL) {
1177  		return KERN_RESOURCE_SHORTAGE;
1178  	}
1179  	bzero(new_coal, sizeof(*new_coal));
1180  
1181  	new_coal->type = type;
1182  	new_coal->role = role;
1183  
1184  	/* initialize type-specific resources */
1185  	kr = coal_call(new_coal, init, privileged);
1186  	if (kr != KERN_SUCCESS) {
1187  		zfree(coalition_zone, new_coal);
1188  		return kr;
1189  	}
1190  
1191  	/* One for caller, one for coalitions list */
1192  	new_coal->ref_count = 2;
1193  
1194  	new_coal->privileged = privileged ? TRUE : FALSE;
1195  #if DEVELOPMENT || DEBUG
1196  	new_coal->should_notify = 1;
1197  #endif
1198  
1199  	lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, LCK_ATTR_NULL);
1200  
1201  	lck_rw_lock_exclusive(&coalitions_list_lock);
1202  	new_coal->id = coalition_next_id++;
1203  	coalition_count++;
1204  	enqueue_tail(&coalitions_q, &new_coal->coalitions);
1205  
1206  #if CONFIG_THREAD_GROUPS
1207  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_NEW),
1208  	    new_coal->id, new_coal->type,
1209  	    (new_coal->type == COALITION_TYPE_JETSAM && new_coal->j.thread_group) ?
1210  	    thread_group_get_id(new_coal->j.thread_group) : 0);
1211  
1212  #else
1213  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_NEW),
1214  	    new_coal->id, new_coal->type);
1215  #endif
1216  	cid = new_coal->id;
1217  	ctype = new_coal->type;
1218  	lck_rw_unlock_exclusive(&coalitions_list_lock);
1219  
1220  	coal_dbg("id:%llu, type:%s", cid, coal_type_str(ctype));
1221  
1222  	if (coalition_id != NULL) {
1223  		*coalition_id = cid;
1224  	}
1225  
1226  	*out = new_coal;
1227  	return KERN_SUCCESS;
1228  }
1229  
1230  /*
1231   * coalition_release
1232   * Condition: coalition must be UNLOCKED.
1233   * */
1234  void
1235  coalition_release(coalition_t coal)
1236  {
1237  	/* TODO: This can be done with atomics. */
1238  	coalition_lock(coal);
1239  	coal->ref_count--;
1240  
1241  #if COALITION_DEBUG
1242  	uint32_t rc = coal->ref_count;
1243  	uint32_t ac = coal->active_count;
1244  #endif /* COALITION_DEBUG */
1245  
1246  	coal_dbg("id:%llu type:%s ref_count:%u active_count:%u%s",
1247  	    coal->id, coal_type_str(coal->type), rc, ac,
1248  	    rc <= 0 ? ", will deallocate now" : "");
1249  
1250  	if (coal->ref_count > 0) {
1251  		coalition_unlock(coal);
1252  		return;
1253  	}
1254  
1255  	assert(coal->termrequested);
1256  	assert(coal->terminated);
1257  	assert(coal->active_count == 0);
1258  	assert(coal->reaped);
1259  	assert(coal->focal_task_count == 0);
1260  	assert(coal->nonfocal_task_count == 0);
1261  #if CONFIG_THREAD_GROUPS
1262  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_FREE),
1263  	    coal->id, coal->type,
1264  	    coal->type == COALITION_TYPE_JETSAM ?
1265  	    coal->j.thread_group : 0);
1266  #else
1267  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_FREE),
1268  	    coal->id, coal->type);
1269  #endif
1270  
1271  	coal_call(coal, dealloc);
1272  
1273  	coalition_unlock(coal);
1274  
1275  	lck_mtx_destroy(&coal->lock, &coalitions_lck_grp);
1276  
1277  	zfree(coalition_zone, coal);
1278  }
1279  
1280  /*
1281   * coalition_find_by_id_internal
1282   * Returns: Coalition object with specified id, NOT referenced.
1283   *          If not found, returns COALITION_NULL.
1284   *          If found, returns a locked coalition.
1285   *
1286   * Condition: No locks held
1287   */
1288  static coalition_t
1289  coalition_find_by_id_internal(uint64_t coal_id)
1290  {
1291  	coalition_t coal;
1292  
1293  	if (coal_id == 0) {
1294  		return COALITION_NULL;
1295  	}
1296  
1297  	lck_rw_lock_shared(&coalitions_list_lock);
1298  	qe_foreach_element(coal, &coalitions_q, coalitions) {
1299  		if (coal->id == coal_id) {
1300  			coalition_lock(coal);
1301  			lck_rw_unlock_shared(&coalitions_list_lock);
1302  			return coal;
1303  		}
1304  	}
1305  	lck_rw_unlock_shared(&coalitions_list_lock);
1306  
1307  	return COALITION_NULL;
1308  }
1309  
1310  /*
1311   * coalition_find_by_id
1312   * Returns: Coalition object with specified id, referenced.
1313   * Condition: coalitions_list_lock must be UNLOCKED.
1314   */
1315  coalition_t
1316  coalition_find_by_id(uint64_t cid)
1317  {
1318  	coalition_t coal = coalition_find_by_id_internal(cid);
1319  
1320  	if (coal == COALITION_NULL) {
1321  		return COALITION_NULL;
1322  	}
1323  
1324  	/* coal is locked */
1325  
1326  	if (coal->reaped) {
1327  		coalition_unlock(coal);
1328  		return COALITION_NULL;
1329  	}
1330  
1331  	if (coal->ref_count == 0) {
1332  		panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n",
1333  		    coal, coal->id, coal_type_str(coal->type), coal->active_count);
1334  	}
1335  	coal->ref_count++;
1336  #if COALITION_DEBUG
1337  	uint32_t rc = coal->ref_count;
1338  #endif
1339  
1340  	coalition_unlock(coal);
1341  
1342  	coal_dbg("id:%llu type:%s ref_count:%u",
1343  	    coal->id, coal_type_str(coal->type), rc);
1344  
1345  	return coal;
1346  }
1347  
1348  /*
1349   * coalition_find_and_activate_by_id
1350   * Returns: Coalition object with specified id, referenced, and activated.
1351   * Condition: coalitions_list_lock must be UNLOCKED.
1352   * This is the function to use when putting a 'new' thing into a coalition,
1353   * like posix_spawn of an XPC service by launchd.
1354   * See also coalition_extend_active.
1355   */
1356  coalition_t
1357  coalition_find_and_activate_by_id(uint64_t cid)
1358  {
1359  	coalition_t coal = coalition_find_by_id_internal(cid);
1360  
1361  	if (coal == COALITION_NULL) {
1362  		return COALITION_NULL;
1363  	}
1364  
1365  	/* coal is locked */
1366  
1367  	if (coal->reaped || coal->terminated) {
1368  		/* Too late to put something new into this coalition, it's
1369  		 * already on its way out the door */
1370  		coalition_unlock(coal);
1371  		return COALITION_NULL;
1372  	}
1373  
1374  	if (coal->ref_count == 0) {
1375  		panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n",
1376  		    coal, coal->id, coal_type_str(coal->type), coal->active_count);
1377  	}
1378  
1379  	coal->ref_count++;
1380  	coal->active_count++;
1381  
1382  #if COALITION_DEBUG
1383  	uint32_t rc = coal->ref_count;
1384  	uint32_t ac = coal->active_count;
1385  #endif
1386  
1387  	coalition_unlock(coal);
1388  
1389  	coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u",
1390  	    coal->id, coal_type_str(coal->type), rc, ac);
1391  
1392  	return coal;
1393  }
1394  
1395  uint64_t
1396  coalition_id(coalition_t coal)
1397  {
1398  	assert(coal != COALITION_NULL);
1399  	return coal->id;
1400  }
1401  
1402  void
1403  task_coalition_ids(task_t task, uint64_t ids[COALITION_NUM_TYPES])
1404  {
1405  	int i;
1406  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1407  		if (task->coalition[i]) {
1408  			ids[i] = task->coalition[i]->id;
1409  		} else {
1410  			ids[i] = 0;
1411  		}
1412  	}
1413  }
1414  
1415  void
1416  task_coalition_roles(task_t task, int roles[COALITION_NUM_TYPES])
1417  {
1418  	int i;
1419  	memset(roles, 0, COALITION_NUM_TYPES * sizeof(roles[0]));
1420  
1421  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1422  		if (task->coalition[i]) {
1423  			coalition_lock(task->coalition[i]);
1424  			roles[i] = coal_call(task->coalition[i],
1425  			    get_taskrole, task);
1426  			coalition_unlock(task->coalition[i]);
1427  		} else {
1428  			roles[i] = COALITION_TASKROLE_NONE;
1429  		}
1430  	}
1431  }
1432  
1433  
1434  int
1435  coalition_type(coalition_t coal)
1436  {
1437  	return coal->type;
1438  }
1439  
1440  boolean_t
1441  coalition_term_requested(coalition_t coal)
1442  {
1443  	return coal->termrequested;
1444  }
1445  
1446  boolean_t
1447  coalition_is_terminated(coalition_t coal)
1448  {
1449  	return coal->terminated;
1450  }
1451  
1452  boolean_t
1453  coalition_is_reaped(coalition_t coal)
1454  {
1455  	return coal->reaped;
1456  }
1457  
1458  boolean_t
1459  coalition_is_privileged(coalition_t coal)
1460  {
1461  	return coal->privileged || unrestrict_coalition_syscalls;
1462  }
1463  
1464  boolean_t
1465  task_is_in_privileged_coalition(task_t task, int type)
1466  {
1467  	if (type < 0 || type > COALITION_TYPE_MAX) {
1468  		return FALSE;
1469  	}
1470  	if (unrestrict_coalition_syscalls) {
1471  		return TRUE;
1472  	}
1473  	if (!task->coalition[type]) {
1474  		return FALSE;
1475  	}
1476  	return task->coalition[type]->privileged;
1477  }
1478  
1479  void
1480  task_coalition_update_gpu_stats(task_t task, uint64_t gpu_ns_delta)
1481  {
1482  	coalition_t coal;
1483  
1484  	assert(task != TASK_NULL);
1485  	if (gpu_ns_delta == 0) {
1486  		return;
1487  	}
1488  
1489  	coal = task->coalition[COALITION_TYPE_RESOURCE];
1490  	assert(coal != COALITION_NULL);
1491  
1492  	coalition_lock(coal);
1493  	coal->r.gpu_time += gpu_ns_delta;
1494  	coalition_unlock(coal);
1495  }
1496  
1497  boolean_t
1498  task_coalition_adjust_focal_count(task_t task, int count, uint32_t *new_count)
1499  {
1500  	coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING];
1501  	if (coal == COALITION_NULL) {
1502  		return FALSE;
1503  	}
1504  
1505  	*new_count = os_atomic_add(&coal->focal_task_count, count, relaxed);
1506  	assert(*new_count != UINT32_MAX);
1507  	return TRUE;
1508  }
1509  
1510  uint32_t
1511  task_coalition_focal_count(task_t task)
1512  {
1513  	coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING];
1514  	if (coal == COALITION_NULL) {
1515  		return 0;
1516  	}
1517  
1518  	return coal->focal_task_count;
1519  }
1520  
1521  boolean_t
1522  task_coalition_adjust_nonfocal_count(task_t task, int count, uint32_t *new_count)
1523  {
1524  	coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING];
1525  	if (coal == COALITION_NULL) {
1526  		return FALSE;
1527  	}
1528  
1529  	*new_count = os_atomic_add(&coal->nonfocal_task_count, count, relaxed);
1530  	assert(*new_count != UINT32_MAX);
1531  	return TRUE;
1532  }
1533  
1534  uint32_t
1535  task_coalition_nonfocal_count(task_t task)
1536  {
1537  	coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING];
1538  	if (coal == COALITION_NULL) {
1539  		return 0;
1540  	}
1541  
1542  	return coal->nonfocal_task_count;
1543  }
1544  
1545  void
1546  coalition_set_efficient(coalition_t coal)
1547  {
1548  	coalition_lock(coal);
1549  	coal->efficient = TRUE;
1550  	coalition_unlock(coal);
1551  }
1552  
1553  #if CONFIG_THREAD_GROUPS
1554  struct thread_group *
1555  task_coalition_get_thread_group(task_t task)
1556  {
1557  	coalition_t coal = task->coalition[COALITION_TYPE_JETSAM];
1558  	/* return system thread group for non-jetsam coalitions */
1559  	if (coal == COALITION_NULL) {
1560  		return init_coalition[COALITION_TYPE_JETSAM]->j.thread_group;
1561  	}
1562  	return coal->j.thread_group;
1563  }
1564  
1565  
1566  struct thread_group *
1567  kdp_coalition_get_thread_group(coalition_t coal)
1568  {
1569  	if (coal->type != COALITION_TYPE_JETSAM) {
1570  		return NULL;
1571  	}
1572  	assert(coal->j.thread_group != NULL);
1573  	return coal->j.thread_group;
1574  }
1575  
1576  struct thread_group *
1577  coalition_get_thread_group(coalition_t coal)
1578  {
1579  	if (coal->type != COALITION_TYPE_JETSAM) {
1580  		return NULL;
1581  	}
1582  	assert(coal->j.thread_group != NULL);
1583  	return thread_group_retain(coal->j.thread_group);
1584  }
1585  
1586  void
1587  coalition_set_thread_group(coalition_t coal, struct thread_group *tg)
1588  {
1589  	assert(coal != COALITION_NULL);
1590  	assert(tg != NULL);
1591  
1592  	if (coal->type != COALITION_TYPE_JETSAM) {
1593  		return;
1594  	}
1595  	struct thread_group *old_tg = coal->j.thread_group;
1596  	assert(old_tg != NULL);
1597  	coal->j.thread_group = tg;
1598  
1599  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_THREAD_GROUP_SET),
1600  	    coal->id, coal->type, thread_group_get_id(tg));
1601  
1602  	thread_group_release(old_tg);
1603  }
1604  
1605  void
1606  task_coalition_thread_group_focal_update(task_t task)
1607  {
1608  	assert(task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING] != COALITION_NULL);
1609  	thread_group_flags_update_lock();
1610  	uint32_t focal_count = task_coalition_focal_count(task);
1611  	if (focal_count) {
1612  		thread_group_set_flags_locked(task_coalition_get_thread_group(task), THREAD_GROUP_FLAGS_UI_APP);
1613  	} else {
1614  		thread_group_clear_flags_locked(task_coalition_get_thread_group(task), THREAD_GROUP_FLAGS_UI_APP);
1615  	}
1616  	thread_group_flags_update_unlock();
1617  }
1618  
1619  #endif
1620  
1621  void
1622  coalition_for_each_task(coalition_t coal, void *ctx,
1623      void (*callback)(coalition_t, void *, task_t))
1624  {
1625  	assert(coal != COALITION_NULL);
1626  
1627  	coal_dbg("iterating tasks in coalition %p id:%llu type:%s, active_count:%u",
1628  	    coal, coal->id, coal_type_str(coal->type), coal->active_count);
1629  
1630  	coalition_lock(coal);
1631  
1632  	coal_call(coal, iterate_tasks, ctx, callback);
1633  
1634  	coalition_unlock(coal);
1635  }
1636  
1637  
1638  void
1639  coalition_remove_active(coalition_t coal)
1640  {
1641  	coalition_lock(coal);
1642  
1643  	assert(!coal->reaped);
1644  	assert(coal->active_count > 0);
1645  
1646  	coal->active_count--;
1647  
1648  	boolean_t do_notify = FALSE;
1649  	uint64_t notify_id = 0;
1650  	uint32_t notify_flags = 0;
1651  	if (coal->termrequested && coal->active_count == 0) {
1652  		/* We only notify once, when active_count reaches zero.
1653  		 * We just decremented, so if it reached zero, we mustn't have
1654  		 * notified already.
1655  		 */
1656  		assert(!coal->terminated);
1657  		coal->terminated = TRUE;
1658  
1659  		assert(!coal->notified);
1660  
1661  		coal->notified = TRUE;
1662  #if DEVELOPMENT || DEBUG
1663  		do_notify = coal->should_notify;
1664  #else
1665  		do_notify = TRUE;
1666  #endif
1667  		notify_id = coal->id;
1668  		notify_flags = 0;
1669  	}
1670  
1671  #if COALITION_DEBUG
1672  	uint64_t cid = coal->id;
1673  	uint32_t rc = coal->ref_count;
1674  	int      ac = coal->active_count;
1675  	int      ct = coal->type;
1676  #endif
1677  	coalition_unlock(coal);
1678  
1679  	coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u,%s",
1680  	    cid, coal_type_str(ct), rc, ac, do_notify ? " NOTIFY" : " ");
1681  
1682  	if (do_notify) {
1683  		coalition_notify_user(notify_id, notify_flags);
1684  	}
1685  }
1686  
1687  /* Used for kernel_task, launchd, launchd's early boot tasks... */
1688  kern_return_t
1689  coalitions_adopt_init_task(task_t task)
1690  {
1691  	kern_return_t kr;
1692  	kr = coalitions_adopt_task(init_coalition, task);
1693  	if (kr != KERN_SUCCESS) {
1694  		panic("failed to adopt task %p into default coalition: %d", task, kr);
1695  	}
1696  	return kr;
1697  }
1698  
1699  /* Used for forked corpses. */
1700  kern_return_t
1701  coalitions_adopt_corpse_task(task_t task)
1702  {
1703  	kern_return_t kr;
1704  	kr = coalitions_adopt_task(corpse_coalition, task);
1705  	if (kr != KERN_SUCCESS) {
1706  		panic("failed to adopt task %p into corpse coalition: %d", task, kr);
1707  	}
1708  	return kr;
1709  }
1710  
1711  /*
1712   * coalition_adopt_task_internal
1713   * Condition: Coalition must be referenced and unlocked. Will fail if coalition
1714   * is already terminated.
1715   */
1716  static kern_return_t
1717  coalition_adopt_task_internal(coalition_t coal, task_t task)
1718  {
1719  	kern_return_t kr;
1720  
1721  	if (task->coalition[coal->type]) {
1722  		return KERN_ALREADY_IN_SET;
1723  	}
1724  
1725  	coalition_lock(coal);
1726  
1727  	if (coal->reaped || coal->terminated) {
1728  		coalition_unlock(coal);
1729  		return KERN_TERMINATED;
1730  	}
1731  
1732  	kr = coal_call(coal, adopt_task, task);
1733  	if (kr != KERN_SUCCESS) {
1734  		goto out_unlock;
1735  	}
1736  
1737  	coal->active_count++;
1738  
1739  	coal->ref_count++;
1740  
1741  	task->coalition[coal->type] = coal;
1742  
1743  out_unlock:
1744  #if COALITION_DEBUG
1745  	(void)coal; /* need expression after label */
1746  	uint64_t cid = coal->id;
1747  	uint32_t rc = coal->ref_count;
1748  	uint32_t ct = coal->type;
1749  #endif
1750  	if (get_task_uniqueid(task) != UINT64_MAX) {
1751  		/* On 32-bit targets, uniqueid will get truncated to 32 bits */
1752  		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_ADOPT),
1753  		    coal->id, get_task_uniqueid(task));
1754  	}
1755  
1756  	coalition_unlock(coal);
1757  
1758  	coal_dbg("task:%d, id:%llu type:%s ref_count:%u, kr=%d",
1759  	    task_pid(task), cid, coal_type_str(ct), rc, kr);
1760  	return kr;
1761  }
1762  
1763  static kern_return_t
1764  coalition_remove_task_internal(task_t task, int type)
1765  {
1766  	kern_return_t kr;
1767  
1768  	coalition_t coal = task->coalition[type];
1769  
1770  	if (!coal) {
1771  		return KERN_SUCCESS;
1772  	}
1773  
1774  	assert(coal->type == (uint32_t)type);
1775  
1776  	coalition_lock(coal);
1777  
1778  	kr = coal_call(coal, remove_task, task);
1779  
1780  #if COALITION_DEBUG
1781  	uint64_t cid = coal->id;
1782  	uint32_t rc = coal->ref_count;
1783  	int      ac = coal->active_count;
1784  	int      ct = coal->type;
1785  #endif
1786  	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_REMOVE),
1787  	    coal->id, get_task_uniqueid(task));
1788  	coalition_unlock(coal);
1789  
1790  	coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u, kr=%d",
1791  	    cid, coal_type_str(ct), rc, ac, kr);
1792  
1793  	coalition_remove_active(coal);
1794  
1795  	return kr;
1796  }
1797  
1798  /*
1799   * coalitions_adopt_task
1800   * Condition: All coalitions must be referenced and unlocked.
1801   * Will fail if any coalition is already terminated.
1802   */
1803  kern_return_t
1804  coalitions_adopt_task(coalition_t *coals, task_t task)
1805  {
1806  	int i;
1807  	kern_return_t kr;
1808  
1809  	if (!coals || coals[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1810  		return KERN_INVALID_ARGUMENT;
1811  	}
1812  
1813  	/* verify that the incoming coalitions are what they say they are */
1814  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1815  		if (coals[i] && coals[i]->type != (uint32_t)i) {
1816  			return KERN_INVALID_ARGUMENT;
1817  		}
1818  	}
1819  
1820  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1821  		kr = KERN_SUCCESS;
1822  		if (coals[i]) {
1823  			kr = coalition_adopt_task_internal(coals[i], task);
1824  		}
1825  		if (kr != KERN_SUCCESS) {
1826  			/* dis-associate any coalitions that just adopted this task */
1827  			while (--i >= 0) {
1828  				if (task->coalition[i]) {
1829  					coalition_remove_task_internal(task, i);
1830  				}
1831  			}
1832  			break;
1833  		}
1834  	}
1835  	return kr;
1836  }
1837  
1838  /*
1839   * coalitions_remove_task
1840   * Condition: task must be referenced and UNLOCKED; all task's coalitions must be UNLOCKED
1841   */
1842  kern_return_t
1843  coalitions_remove_task(task_t task)
1844  {
1845  	kern_return_t kr;
1846  	int i;
1847  
1848  	task_lock(task);
1849  	if (!task_is_coalition_member(task)) {
1850  		task_unlock(task);
1851  		return KERN_SUCCESS;
1852  	}
1853  
1854  	task_clear_coalition_member(task);
1855  	task_unlock(task);
1856  
1857  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1858  		kr = coalition_remove_task_internal(task, i);
1859  		assert(kr == KERN_SUCCESS);
1860  	}
1861  
1862  	return kr;
1863  }
1864  
1865  /*
1866   * task_release_coalitions
1867   * helper function to release references to all coalitions in which
1868   * 'task' is a member.
1869   */
1870  void
1871  task_release_coalitions(task_t task)
1872  {
1873  	int i;
1874  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1875  		if (task->coalition[i]) {
1876  			coalition_release(task->coalition[i]);
1877  		} else if (i == COALITION_TYPE_RESOURCE) {
1878  			panic("deallocating task %p was not a member of a resource coalition", task);
1879  		}
1880  	}
1881  }
1882  
1883  /*
1884   * coalitions_set_roles
1885   * for each type of coalition, if the task is a member of a coalition of
1886   * that type (given in the coalitions parameter) then set the role of
1887   * the task within that that coalition.
1888   */
1889  kern_return_t
1890  coalitions_set_roles(coalition_t coalitions[COALITION_NUM_TYPES],
1891      task_t task, int roles[COALITION_NUM_TYPES])
1892  {
1893  	kern_return_t kr = KERN_SUCCESS;
1894  	int i;
1895  
1896  	for (i = 0; i < COALITION_NUM_TYPES; i++) {
1897  		if (!coalitions[i]) {
1898  			continue;
1899  		}
1900  		coalition_lock(coalitions[i]);
1901  		kr = coal_call(coalitions[i], set_taskrole, task, roles[i]);
1902  		coalition_unlock(coalitions[i]);
1903  		assert(kr == KERN_SUCCESS);
1904  	}
1905  
1906  	return kr;
1907  }
1908  
1909  /*
1910   * coalition_terminate_internal
1911   * Condition: Coalition must be referenced and UNLOCKED.
1912   */
1913  kern_return_t
1914  coalition_request_terminate_internal(coalition_t coal)
1915  {
1916  	assert(coal->type >= 0 && coal->type <= COALITION_TYPE_MAX);
1917  
1918  	if (coal == init_coalition[coal->type]) {
1919  		return KERN_DEFAULT_SET;
1920  	}
1921  
1922  	coalition_lock(coal);
1923  
1924  	if (coal->reaped) {
1925  		coalition_unlock(coal);
1926  		return KERN_INVALID_NAME;
1927  	}
1928  
1929  	if (coal->terminated || coal->termrequested) {
1930  		coalition_unlock(coal);
1931  		return KERN_TERMINATED;
1932  	}
1933  
1934  	coal->termrequested = TRUE;
1935  
1936  	boolean_t do_notify = FALSE;
1937  	uint64_t note_id = 0;
1938  	uint32_t note_flags = 0;
1939  
1940  	if (coal->active_count == 0) {
1941  		/*
1942  		 * We only notify once, when active_count reaches zero.
1943  		 * We just set termrequested to zero. If the active count
1944  		 * was already at zero (tasks died before we could request
1945  		 * a termination notification), we should notify.
1946  		 */
1947  		assert(!coal->terminated);
1948  		coal->terminated = TRUE;
1949  
1950  		assert(!coal->notified);
1951  
1952  		coal->notified = TRUE;
1953  #if DEVELOPMENT || DEBUG
1954  		do_notify = coal->should_notify;
1955  #else
1956  		do_notify = TRUE;
1957  #endif
1958  		note_id = coal->id;
1959  		note_flags = 0;
1960  	}
1961  
1962  	coalition_unlock(coal);
1963  
1964  	if (do_notify) {
1965  		coalition_notify_user(note_id, note_flags);
1966  	}
1967  
1968  	return KERN_SUCCESS;
1969  }
1970  
1971  /*
1972   * coalition_reap_internal
1973   * Condition: Coalition must be referenced and UNLOCKED.
1974   */
1975  kern_return_t
1976  coalition_reap_internal(coalition_t coal)
1977  {
1978  	assert(coal->type <= COALITION_TYPE_MAX);
1979  
1980  	if (coal == init_coalition[coal->type]) {
1981  		return KERN_DEFAULT_SET;
1982  	}
1983  
1984  	coalition_lock(coal);
1985  	if (coal->reaped) {
1986  		coalition_unlock(coal);
1987  		return KERN_TERMINATED;
1988  	}
1989  	if (!coal->terminated) {
1990  		coalition_unlock(coal);
1991  		return KERN_FAILURE;
1992  	}
1993  	assert(coal->termrequested);
1994  	if (coal->active_count > 0) {
1995  		coalition_unlock(coal);
1996  		return KERN_FAILURE;
1997  	}
1998  
1999  	coal->reaped = TRUE;
2000  
2001  	/* Caller, launchd, and coalitions list should each have a reference */
2002  	assert(coal->ref_count > 2);
2003  
2004  	coalition_unlock(coal);
2005  
2006  	lck_rw_lock_exclusive(&coalitions_list_lock);
2007  	coalition_count--;
2008  	remqueue(&coal->coalitions);
2009  	lck_rw_unlock_exclusive(&coalitions_list_lock);
2010  
2011  	/* Release the list's reference and launchd's reference. */
2012  	coalition_release(coal);
2013  	coalition_release(coal);
2014  
2015  	return KERN_SUCCESS;
2016  }
2017  
2018  #if DEVELOPMENT || DEBUG
2019  int
2020  coalition_should_notify(coalition_t coal)
2021  {
2022  	int should;
2023  	if (!coal) {
2024  		return -1;
2025  	}
2026  	coalition_lock(coal);
2027  	should = coal->should_notify;
2028  	coalition_unlock(coal);
2029  
2030  	return should;
2031  }
2032  
2033  void
2034  coalition_set_notify(coalition_t coal, int notify)
2035  {
2036  	if (!coal) {
2037  		return;
2038  	}
2039  	coalition_lock(coal);
2040  	coal->should_notify = !!notify;
2041  	coalition_unlock(coal);
2042  }
2043  #endif
2044  
2045  void
2046  coalitions_init(void)
2047  {
2048  	kern_return_t kr;
2049  	int i;
2050  	const struct coalition_type *ctype;
2051  
2052  	queue_head_init(coalitions_q);
2053  
2054  	if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls,
2055  	    sizeof(unrestrict_coalition_syscalls))) {
2056  		unrestrict_coalition_syscalls = 0;
2057  	}
2058  
2059  	if (!PE_parse_boot_argn("tg_adaptive", &merge_adaptive_coalitions,
2060  	    sizeof(merge_adaptive_coalitions))) {
2061  		merge_adaptive_coalitions = 0;
2062  	}
2063  
2064  	init_task_ledgers();
2065  
2066  	init_coalition_ledgers();
2067  
2068  	for (i = 0, ctype = &s_coalition_types[0]; i < COALITION_NUM_TYPES; ctype++, i++) {
2069  		/* verify the entry in the global coalition types array */
2070  		if (ctype->type != i ||
2071  		    !ctype->init ||
2072  		    !ctype->dealloc ||
2073  		    !ctype->adopt_task ||
2074  		    !ctype->remove_task) {
2075  			panic("%s: Malformed coalition type %s(%d) in slot for type:%s(%d)",
2076  			    __func__, coal_type_str(ctype->type), ctype->type, coal_type_str(i), i);
2077  		}
2078  		if (!ctype->has_default) {
2079  			continue;
2080  		}
2081  		kr = coalition_create_internal(ctype->type, COALITION_ROLE_SYSTEM, TRUE, &init_coalition[ctype->type], NULL);
2082  		if (kr != KERN_SUCCESS) {
2083  			panic("%s: could not create init %s coalition: kr:%d",
2084  			    __func__, coal_type_str(i), kr);
2085  		}
2086  		if (i == COALITION_TYPE_RESOURCE) {
2087  			assert(COALITION_ID_KERNEL == init_coalition[ctype->type]->id);
2088  		}
2089  		kr = coalition_create_internal(ctype->type, COALITION_ROLE_SYSTEM, FALSE, &corpse_coalition[ctype->type], NULL);
2090  		if (kr != KERN_SUCCESS) {
2091  			panic("%s: could not create corpse %s coalition: kr:%d",
2092  			    __func__, coal_type_str(i), kr);
2093  		}
2094  	}
2095  
2096  	/* "Leak" our reference to the global object */
2097  }
2098  
2099  /*
2100   * BSD Kernel interface functions
2101   *
2102   */
2103  static void
2104  coalition_fill_procinfo(struct coalition *coal,
2105      struct procinfo_coalinfo *coalinfo)
2106  {
2107  	coalinfo->coalition_id = coal->id;
2108  	coalinfo->coalition_type = coal->type;
2109  	coalinfo->coalition_tasks = coalition_get_task_count(coal);
2110  }
2111  
2112  
2113  int
2114  coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz)
2115  {
2116  	int ncoals = 0;
2117  	struct coalition *coal;
2118  
2119  	lck_rw_lock_shared(&coalitions_list_lock);
2120  	qe_foreach_element(coal, &coalitions_q, coalitions) {
2121  		if (!coal->reaped && (type < 0 || type == (int)coal->type)) {
2122  			if (coal_list && ncoals < list_sz) {
2123  				coalition_fill_procinfo(coal, &coal_list[ncoals]);
2124  			}
2125  			++ncoals;
2126  		}
2127  	}
2128  	lck_rw_unlock_shared(&coalitions_list_lock);
2129  
2130  	return ncoals;
2131  }
2132  
2133  /*
2134   * Return the coaltion of the given type to which the task belongs.
2135   */
2136  coalition_t
2137  task_get_coalition(task_t task, int coal_type)
2138  {
2139  	coalition_t c;
2140  
2141  	if (task == NULL || coal_type > COALITION_TYPE_MAX) {
2142  		return COALITION_NULL;
2143  	}
2144  
2145  	c = task->coalition[coal_type];
2146  	assert(c == COALITION_NULL || (int)c->type == coal_type);
2147  	return c;
2148  }
2149  
2150  /*
2151   * Report if the given task is the leader of the given jetsam coalition.
2152   */
2153  boolean_t
2154  coalition_is_leader(task_t task, coalition_t coal)
2155  {
2156  	boolean_t ret = FALSE;
2157  
2158  	if (coal != COALITION_NULL) {
2159  		coalition_lock(coal);
2160  
2161  		ret = (coal->type == COALITION_TYPE_JETSAM && coal->j.leader == task);
2162  
2163  		coalition_unlock(coal);
2164  	}
2165  
2166  	return ret;
2167  }
2168  
2169  kern_return_t
2170  coalition_iterate_stackshot(coalition_iterate_fn_t callout, void *arg, uint32_t coalition_type)
2171  {
2172  	coalition_t coal;
2173  	int i = 0;
2174  
2175  	qe_foreach_element(coal, &coalitions_q, coalitions) {
2176  		if (coal == NULL || !ml_validate_nofault((vm_offset_t)coal, sizeof(struct coalition))) {
2177  			return KERN_FAILURE;
2178  		}
2179  
2180  		if (coalition_type == coal->type) {
2181  			callout(arg, i++, coal);
2182  		}
2183  	}
2184  
2185  	return KERN_SUCCESS;
2186  }
2187  
2188  task_t
2189  kdp_coalition_get_leader(coalition_t coal)
2190  {
2191  	if (!coal) {
2192  		return TASK_NULL;
2193  	}
2194  
2195  	if (coal->type == COALITION_TYPE_JETSAM) {
2196  		return coal->j.leader;
2197  	}
2198  	return TASK_NULL;
2199  }
2200  
2201  task_t
2202  coalition_get_leader(coalition_t coal)
2203  {
2204  	task_t leader = TASK_NULL;
2205  
2206  	if (!coal) {
2207  		return TASK_NULL;
2208  	}
2209  
2210  	coalition_lock(coal);
2211  	if (coal->type != COALITION_TYPE_JETSAM) {
2212  		goto out_unlock;
2213  	}
2214  
2215  	leader = coal->j.leader;
2216  	if (leader != TASK_NULL) {
2217  		task_reference(leader);
2218  	}
2219  
2220  out_unlock:
2221  	coalition_unlock(coal);
2222  	return leader;
2223  }
2224  
2225  
2226  int
2227  coalition_get_task_count(coalition_t coal)
2228  {
2229  	int ntasks = 0;
2230  	struct queue_entry *qe;
2231  	if (!coal) {
2232  		return 0;
2233  	}
2234  
2235  	coalition_lock(coal);
2236  	switch (coal->type) {
2237  	case COALITION_TYPE_RESOURCE:
2238  		qe_foreach(qe, &coal->r.tasks)
2239  		ntasks++;
2240  		break;
2241  	case COALITION_TYPE_JETSAM:
2242  		if (coal->j.leader) {
2243  			ntasks++;
2244  		}
2245  		qe_foreach(qe, &coal->j.other)
2246  		ntasks++;
2247  		qe_foreach(qe, &coal->j.extensions)
2248  		ntasks++;
2249  		qe_foreach(qe, &coal->j.services)
2250  		ntasks++;
2251  		break;
2252  	default:
2253  		break;
2254  	}
2255  	coalition_unlock(coal);
2256  
2257  	return ntasks;
2258  }
2259  
2260  
2261  static uint64_t
2262  i_get_list_footprint(queue_t list, int type, int *ntasks)
2263  {
2264  	task_t task;
2265  	uint64_t bytes = 0;
2266  
2267  	qe_foreach_element(task, list, task_coalition[type]) {
2268  		bytes += get_task_phys_footprint(task);
2269  		coal_dbg("    [%d] task_pid:%d, type:%d, footprint:%lld",
2270  		    *ntasks, task_pid(task), type, bytes);
2271  		*ntasks += 1;
2272  	}
2273  
2274  	return bytes;
2275  }
2276  
2277  uint64_t
2278  coalition_get_page_count(coalition_t coal, int *ntasks)
2279  {
2280  	uint64_t bytes = 0;
2281  	int num_tasks = 0;
2282  
2283  	if (ntasks) {
2284  		*ntasks = 0;
2285  	}
2286  	if (!coal) {
2287  		return bytes;
2288  	}
2289  
2290  	coalition_lock(coal);
2291  
2292  	switch (coal->type) {
2293  	case COALITION_TYPE_RESOURCE:
2294  		bytes += i_get_list_footprint(&coal->r.tasks, COALITION_TYPE_RESOURCE, &num_tasks);
2295  		break;
2296  	case COALITION_TYPE_JETSAM:
2297  		if (coal->j.leader) {
2298  			bytes += get_task_phys_footprint(coal->j.leader);
2299  			num_tasks = 1;
2300  		}
2301  		bytes += i_get_list_footprint(&coal->j.extensions, COALITION_TYPE_JETSAM, &num_tasks);
2302  		bytes += i_get_list_footprint(&coal->j.services, COALITION_TYPE_JETSAM, &num_tasks);
2303  		bytes += i_get_list_footprint(&coal->j.other, COALITION_TYPE_JETSAM, &num_tasks);
2304  		break;
2305  	default:
2306  		break;
2307  	}
2308  
2309  	coalition_unlock(coal);
2310  
2311  	if (ntasks) {
2312  		*ntasks = num_tasks;
2313  	}
2314  
2315  	return bytes / PAGE_SIZE_64;
2316  }
2317  
2318  struct coal_sort_s {
2319  	int pid;
2320  	int usr_order;
2321  	uint64_t bytes;
2322  };
2323  
2324  /*
2325   * return < 0 for a < b
2326   *          0 for a == b
2327   *        > 0 for a > b
2328   */
2329  typedef int (*cmpfunc_t)(const void *a, const void *b);
2330  
2331  extern void
2332  qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
2333  
2334  static int
2335  dflt_cmp(const void *a, const void *b)
2336  {
2337  	const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
2338  	const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
2339  
2340  	/*
2341  	 * if both A and B are equal, use a memory descending sort
2342  	 */
2343  	if (csA->usr_order == csB->usr_order) {
2344  		return (int)((int64_t)csB->bytes - (int64_t)csA->bytes);
2345  	}
2346  
2347  	/* otherwise, return the relationship between user specified orders */
2348  	return csA->usr_order - csB->usr_order;
2349  }
2350  
2351  static int
2352  mem_asc_cmp(const void *a, const void *b)
2353  {
2354  	const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
2355  	const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
2356  
2357  	return (int)((int64_t)csA->bytes - (int64_t)csB->bytes);
2358  }
2359  
2360  static int
2361  mem_dec_cmp(const void *a, const void *b)
2362  {
2363  	const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
2364  	const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
2365  
2366  	return (int)((int64_t)csB->bytes - (int64_t)csA->bytes);
2367  }
2368  
2369  static int
2370  usr_asc_cmp(const void *a, const void *b)
2371  {
2372  	const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
2373  	const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
2374  
2375  	return csA->usr_order - csB->usr_order;
2376  }
2377  
2378  static int
2379  usr_dec_cmp(const void *a, const void *b)
2380  {
2381  	const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
2382  	const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
2383  
2384  	return csB->usr_order - csA->usr_order;
2385  }
2386  
2387  /* avoid dynamic allocation in this path */
2388  #define MAX_SORTED_PIDS  80
2389  
2390  static int
2391  coalition_get_sort_list(coalition_t coal, int sort_order, queue_t list,
2392      struct coal_sort_s *sort_array, int array_sz)
2393  {
2394  	int ntasks = 0;
2395  	task_t task;
2396  
2397  	assert(sort_array != NULL);
2398  
2399  	if (array_sz <= 0) {
2400  		return 0;
2401  	}
2402  
2403  	if (!list) {
2404  		/*
2405  		 * this function will only be called with a NULL
2406  		 * list for JETSAM-type coalitions, and is intended
2407  		 * to investigate the leader process
2408  		 */
2409  		if (coal->type != COALITION_TYPE_JETSAM ||
2410  		    coal->j.leader == TASK_NULL) {
2411  			return 0;
2412  		}
2413  		sort_array[0].pid = task_pid(coal->j.leader);
2414  		switch (sort_order) {
2415  		case COALITION_SORT_DEFAULT:
2416  			sort_array[0].usr_order = 0;
2417  			OS_FALLTHROUGH;
2418  		case COALITION_SORT_MEM_ASC:
2419  		case COALITION_SORT_MEM_DEC:
2420  			sort_array[0].bytes = get_task_phys_footprint(coal->j.leader);
2421  			break;
2422  		case COALITION_SORT_USER_ASC:
2423  		case COALITION_SORT_USER_DEC:
2424  			sort_array[0].usr_order = 0;
2425  			break;
2426  		default:
2427  			break;
2428  		}
2429  		return 1;
2430  	}
2431  
2432  	qe_foreach_element(task, list, task_coalition[coal->type]) {
2433  		if (ntasks >= array_sz) {
2434  			printf("WARNING: more than %d pids in coalition %llu\n",
2435  			    MAX_SORTED_PIDS, coal->id);
2436  			break;
2437  		}
2438  
2439  		sort_array[ntasks].pid = task_pid(task);
2440  
2441  		switch (sort_order) {
2442  		case COALITION_SORT_DEFAULT:
2443  			sort_array[ntasks].usr_order = 0;
2444  			OS_FALLTHROUGH;
2445  		case COALITION_SORT_MEM_ASC:
2446  		case COALITION_SORT_MEM_DEC:
2447  			sort_array[ntasks].bytes = get_task_phys_footprint(task);
2448  			break;
2449  		case COALITION_SORT_USER_ASC:
2450  		case COALITION_SORT_USER_DEC:
2451  			sort_array[ntasks].usr_order = 0;
2452  			break;
2453  		default:
2454  			break;
2455  		}
2456  
2457  		ntasks++;
2458  	}
2459  
2460  	return ntasks;
2461  }
2462  
2463  int
2464  coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
2465      int *pid_list, int list_sz)
2466  {
2467  	struct i_jetsam_coalition *cj;
2468  	int ntasks = 0;
2469  	cmpfunc_t cmp_func = NULL;
2470  	struct coal_sort_s sort_array[MAX_SORTED_PIDS] = { {0, 0, 0} }; /* keep to < 2k */
2471  
2472  	if (!coal ||
2473  	    !(rolemask & COALITION_ROLEMASK_ALLROLES) ||
2474  	    !pid_list || list_sz < 1) {
2475  		coal_dbg("Invalid parameters: coal:%p, type:%d, rolemask:0x%x, "
2476  		    "pid_list:%p, list_sz:%d", coal, coal ? coal->type : -1,
2477  		    rolemask, pid_list, list_sz);
2478  		return -EINVAL;
2479  	}
2480  
2481  	switch (sort_order) {
2482  	case COALITION_SORT_NOSORT:
2483  		cmp_func = NULL;
2484  		break;
2485  	case COALITION_SORT_DEFAULT:
2486  		cmp_func = dflt_cmp;
2487  		break;
2488  	case COALITION_SORT_MEM_ASC:
2489  		cmp_func = mem_asc_cmp;
2490  		break;
2491  	case COALITION_SORT_MEM_DEC:
2492  		cmp_func = mem_dec_cmp;
2493  		break;
2494  	case COALITION_SORT_USER_ASC:
2495  		cmp_func = usr_asc_cmp;
2496  		break;
2497  	case COALITION_SORT_USER_DEC:
2498  		cmp_func = usr_dec_cmp;
2499  		break;
2500  	default:
2501  		return -ENOTSUP;
2502  	}
2503  
2504  	coalition_lock(coal);
2505  
2506  	if (coal->type == COALITION_TYPE_RESOURCE) {
2507  		ntasks += coalition_get_sort_list(coal, sort_order, &coal->r.tasks,
2508  		    sort_array, MAX_SORTED_PIDS);
2509  		goto unlock_coal;
2510  	}
2511  
2512  	cj = &coal->j;
2513  
2514  	if (rolemask & COALITION_ROLEMASK_UNDEF) {
2515  		ntasks += coalition_get_sort_list(coal, sort_order, &cj->other,
2516  		    sort_array + ntasks,
2517  		    MAX_SORTED_PIDS - ntasks);
2518  	}
2519  
2520  	if (rolemask & COALITION_ROLEMASK_XPC) {
2521  		ntasks += coalition_get_sort_list(coal, sort_order, &cj->services,
2522  		    sort_array + ntasks,
2523  		    MAX_SORTED_PIDS - ntasks);
2524  	}
2525  
2526  	if (rolemask & COALITION_ROLEMASK_EXT) {
2527  		ntasks += coalition_get_sort_list(coal, sort_order, &cj->extensions,
2528  		    sort_array + ntasks,
2529  		    MAX_SORTED_PIDS - ntasks);
2530  	}
2531  
2532  	if (rolemask & COALITION_ROLEMASK_LEADER) {
2533  		ntasks += coalition_get_sort_list(coal, sort_order, NULL,
2534  		    sort_array + ntasks,
2535  		    MAX_SORTED_PIDS - ntasks);
2536  	}
2537  
2538  unlock_coal:
2539  	coalition_unlock(coal);
2540  
2541  	/* sort based on the chosen criterion (no sense sorting 1 item) */
2542  	if (cmp_func && ntasks > 1) {
2543  		qsort(sort_array, ntasks, sizeof(struct coal_sort_s), cmp_func);
2544  	}
2545  
2546  	for (int i = 0; i < ntasks; i++) {
2547  		if (i >= list_sz) {
2548  			break;
2549  		}
2550  		coal_dbg(" [%d] PID:%d, footprint:%lld, usr_order:%d",
2551  		    i, sort_array[i].pid, sort_array[i].bytes,
2552  		    sort_array[i].usr_order);
2553  		pid_list[i] = sort_array[i].pid;
2554  	}
2555  
2556  	return ntasks;
2557  }