/ duct-tape / xnu / osfmk / vm / vm_compressor.c
vm_compressor.c
   1  /*
   2   * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. The rights granted to you under the License
  10   * may not be used to create, or enable the creation or redistribution of,
  11   * unlawful or unlicensed copies of an Apple operating system, or to
  12   * circumvent, violate, or enable the circumvention or violation of, any
  13   * terms of an Apple operating system software license agreement.
  14   *
  15   * Please obtain a copy of the License at
  16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17   *
  18   * The Original Code and all software distributed under the License are
  19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23   * Please see the License for the specific language governing rights and
  24   * limitations under the License.
  25   *
  26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27   */
  28  
  29  #include <vm/vm_compressor.h>
  30  
  31  #if CONFIG_PHANTOM_CACHE
  32  #include <vm/vm_phantom_cache.h>
  33  #endif
  34  
  35  #include <vm/vm_map.h>
  36  #include <vm/vm_pageout.h>
  37  #include <vm/memory_object.h>
  38  #include <vm/vm_compressor_algorithms.h>
  39  #include <vm/vm_fault.h>
  40  #include <vm/vm_protos.h>
  41  #include <mach/mach_host.h>             /* for host_info() */
  42  #include <kern/ledger.h>
  43  #include <kern/policy_internal.h>
  44  #include <kern/thread_group.h>
  45  #include <san/kasan.h>
  46  
  47  #if defined(__x86_64__)
  48  #include <i386/misc_protos.h>
  49  #endif
  50  #if defined(__arm64__)
  51  #include <arm/machine_routines.h>
  52  #endif
  53  
  54  #include <IOKit/IOHibernatePrivate.h>
  55  
  56  extern boolean_t vm_darkwake_mode;
  57  extern zone_t vm_page_zone;
  58  
  59  #if DEVELOPMENT || DEBUG
  60  /* sysctl defined in bsd/dev/arm64/sysctl.c */
  61  int do_cseg_wedge_thread(void);
  62  int do_cseg_unwedge_thread(void);
  63  static event_t debug_cseg_wait_event = NULL;
  64  #endif /* DEVELOPMENT || DEBUG */
  65  
  66  #if CONFIG_FREEZE
  67  bool freezer_incore_cseg_acct = TRUE; /* Only count incore compressed memory for jetsams. */
  68  void task_disown_frozen_csegs(task_t owner_task);
  69  #endif /* CONFIG_FREEZE */
  70  
  71  #if POPCOUNT_THE_COMPRESSED_DATA
  72  boolean_t popcount_c_segs = TRUE;
  73  
  74  static inline uint32_t
  75  vmc_pop(uintptr_t ins, int sz)
  76  {
  77  	uint32_t rv = 0;
  78  
  79  	if (__probable(popcount_c_segs == FALSE)) {
  80  		return 0xDEAD707C;
  81  	}
  82  
  83  	while (sz >= 16) {
  84  		uint32_t rv1, rv2;
  85  		uint64_t *ins64 = (uint64_t *) ins;
  86  		uint64_t *ins642 = (uint64_t *) (ins + 8);
  87  		rv1 = __builtin_popcountll(*ins64);
  88  		rv2 = __builtin_popcountll(*ins642);
  89  		rv += rv1 + rv2;
  90  		sz -= 16;
  91  		ins += 16;
  92  	}
  93  
  94  	while (sz >= 4) {
  95  		uint32_t *ins32 = (uint32_t *) ins;
  96  		rv += __builtin_popcount(*ins32);
  97  		sz -= 4;
  98  		ins += 4;
  99  	}
 100  
 101  	while (sz > 0) {
 102  		char *ins8 = (char *)ins;
 103  		rv += __builtin_popcount(*ins8);
 104  		sz--;
 105  		ins++;
 106  	}
 107  	return rv;
 108  }
 109  #endif
 110  
 111  #if VALIDATE_C_SEGMENTS
 112  boolean_t validate_c_segs = TRUE;
 113  #endif
 114  /*
 115   * vm_compressor_mode has a heirarchy of control to set its value.
 116   * boot-args are checked first, then device-tree, and finally
 117   * the default value that is defined below. See vm_fault_init() for
 118   * the boot-arg & device-tree code.
 119   */
 120  
 121  #if !XNU_TARGET_OS_OSX
 122  
 123  #if CONFIG_FREEZE
 124  int     vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT;
 125  struct  freezer_context freezer_context_global;
 126  #else /* CONFIG_FREEZE */
 127  int     vm_compressor_mode = VM_PAGER_NOT_CONFIGURED;
 128  #endif /* CONFIG_FREEZE */
 129  
 130  #else /* !XNU_TARGET_OS_OSX */
 131  int             vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
 132  
 133  #endif /* !XNU_TARGET_OS_OSX */
 134  
 135  TUNABLE(uint32_t, vm_compression_limit, "vm_compression_limit", 0);
 136  int             vm_compressor_is_active = 0;
 137  int             vm_compressor_available = 0;
 138  
 139  extern uint64_t vm_swap_get_max_configured_space(void);
 140  extern void     vm_pageout_io_throttle(void);
 141  
 142  #if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
 143  extern unsigned int hash_string(char *cp, int len);
 144  static unsigned int vmc_hash(char *, int);
 145  boolean_t checksum_c_segs = TRUE;
 146  
 147  unsigned int
 148  vmc_hash(char *cp, int len)
 149  {
 150  	if (__probable(checksum_c_segs == FALSE)) {
 151  		return 0xDEAD7A37;
 152  	}
 153  	return hash_string(cp, len);
 154  }
 155  #endif
 156  
 157  #define UNPACK_C_SIZE(cs)       ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
 158  #define PACK_C_SIZE(cs, size)   (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
 159  
 160  
 161  struct c_sv_hash_entry {
 162  	union {
 163  		struct  {
 164  			uint32_t        c_sv_he_ref;
 165  			uint32_t        c_sv_he_data;
 166  		} c_sv_he;
 167  		uint64_t        c_sv_he_record;
 168  	} c_sv_he_un;
 169  };
 170  
 171  #define he_ref  c_sv_he_un.c_sv_he.c_sv_he_ref
 172  #define he_data c_sv_he_un.c_sv_he.c_sv_he_data
 173  #define he_record c_sv_he_un.c_sv_he_record
 174  
 175  #define C_SV_HASH_MAX_MISS      32
 176  #define C_SV_HASH_SIZE          ((1 << 10))
 177  #define C_SV_HASH_MASK          ((1 << 10) - 1)
 178  #define C_SV_CSEG_ID            ((1 << 22) - 1)
 179  
 180  
 181  union c_segu {
 182  	c_segment_t     c_seg;
 183  	uintptr_t       c_segno;
 184  };
 185  
 186  #define C_SLOT_ASSERT_PACKABLE(ptr) \
 187  	VM_ASSERT_POINTER_PACKABLE((vm_offset_t)(ptr), C_SLOT_PACKED_PTR);
 188  
 189  #define C_SLOT_PACK_PTR(ptr) \
 190  	VM_PACK_POINTER((vm_offset_t)(ptr), C_SLOT_PACKED_PTR)
 191  
 192  #define C_SLOT_UNPACK_PTR(cslot) \
 193  	(c_slot_mapping_t)VM_UNPACK_POINTER((cslot)->c_packed_ptr, C_SLOT_PACKED_PTR)
 194  
 195  /* for debugging purposes */
 196  SECURITY_READ_ONLY_EARLY(vm_packing_params_t) c_slot_packing_params =
 197      VM_PACKING_PARAMS(C_SLOT_PACKED_PTR);
 198  
 199  uint32_t        c_segment_count = 0;
 200  uint32_t        c_segment_count_max = 0;
 201  
 202  uint64_t        c_generation_id = 0;
 203  uint64_t        c_generation_id_flush_barrier;
 204  
 205  
 206  #define         HIBERNATE_FLUSHING_SECS_TO_COMPLETE     120
 207  
 208  boolean_t       hibernate_no_swapspace = FALSE;
 209  clock_sec_t     hibernate_flushing_deadline = 0;
 210  
 211  
 212  #if RECORD_THE_COMPRESSED_DATA
 213  char    *c_compressed_record_sbuf;
 214  char    *c_compressed_record_ebuf;
 215  char    *c_compressed_record_cptr;
 216  #endif
 217  
 218  
 219  queue_head_t    c_age_list_head;
 220  queue_head_t    c_swappedin_list_head;
 221  queue_head_t    c_swapout_list_head;
 222  queue_head_t    c_swapio_list_head;
 223  queue_head_t    c_swappedout_list_head;
 224  queue_head_t    c_swappedout_sparse_list_head;
 225  queue_head_t    c_major_list_head;
 226  queue_head_t    c_filling_list_head;
 227  queue_head_t    c_bad_list_head;
 228  
 229  uint32_t        c_age_count = 0;
 230  uint32_t        c_swappedin_count = 0;
 231  uint32_t        c_swapout_count = 0;
 232  uint32_t        c_swapio_count = 0;
 233  uint32_t        c_swappedout_count = 0;
 234  uint32_t        c_swappedout_sparse_count = 0;
 235  uint32_t        c_major_count = 0;
 236  uint32_t        c_filling_count = 0;
 237  uint32_t        c_empty_count = 0;
 238  uint32_t        c_bad_count = 0;
 239  
 240  
 241  queue_head_t    c_minor_list_head;
 242  uint32_t        c_minor_count = 0;
 243  
 244  int             c_overage_swapped_count = 0;
 245  int             c_overage_swapped_limit = 0;
 246  
 247  int             c_seg_fixed_array_len;
 248  union  c_segu   *c_segments;
 249  vm_offset_t     c_buffers;
 250  vm_size_t       c_buffers_size;
 251  caddr_t         c_segments_next_page;
 252  boolean_t       c_segments_busy;
 253  uint32_t        c_segments_available;
 254  uint32_t        c_segments_limit;
 255  uint32_t        c_segments_nearing_limit;
 256  
 257  uint32_t        c_segment_svp_in_hash;
 258  uint32_t        c_segment_svp_hash_succeeded;
 259  uint32_t        c_segment_svp_hash_failed;
 260  uint32_t        c_segment_svp_zero_compressions;
 261  uint32_t        c_segment_svp_nonzero_compressions;
 262  uint32_t        c_segment_svp_zero_decompressions;
 263  uint32_t        c_segment_svp_nonzero_decompressions;
 264  
 265  uint32_t        c_segment_noncompressible_pages;
 266  
 267  uint32_t        c_segment_pages_compressed = 0; /* Tracks # of uncompressed pages fed into the compressor */
 268  #if CONFIG_FREEZE
 269  int32_t        c_segment_pages_compressed_incore = 0; /* Tracks # of uncompressed pages fed into the compressor that are in memory */
 270  uint32_t        c_segments_incore_limit = 0; /* Tracks # of segments allowed to be in-core. Based on compressor pool size */
 271  #endif /* CONFIG_FREEZE */
 272  
 273  uint32_t        c_segment_pages_compressed_limit;
 274  uint32_t        c_segment_pages_compressed_nearing_limit;
 275  uint32_t        c_free_segno_head = (uint32_t)-1;
 276  
 277  uint32_t        vm_compressor_minorcompact_threshold_divisor = 10;
 278  uint32_t        vm_compressor_majorcompact_threshold_divisor = 10;
 279  uint32_t        vm_compressor_unthrottle_threshold_divisor = 10;
 280  uint32_t        vm_compressor_catchup_threshold_divisor = 10;
 281  
 282  uint32_t        vm_compressor_minorcompact_threshold_divisor_overridden = 0;
 283  uint32_t        vm_compressor_majorcompact_threshold_divisor_overridden = 0;
 284  uint32_t        vm_compressor_unthrottle_threshold_divisor_overridden = 0;
 285  uint32_t        vm_compressor_catchup_threshold_divisor_overridden = 0;
 286  
 287  #define         C_SEGMENTS_PER_PAGE     (PAGE_SIZE / sizeof(union c_segu))
 288  
 289  LCK_GRP_DECLARE(vm_compressor_lck_grp, "vm_compressor");
 290  LCK_RW_DECLARE(c_master_lock, &vm_compressor_lck_grp);
 291  LCK_MTX_DECLARE(c_list_lock_storage, &vm_compressor_lck_grp);
 292  
 293  boolean_t       decompressions_blocked = FALSE;
 294  
 295  zone_t          compressor_segment_zone;
 296  int             c_compressor_swap_trigger = 0;
 297  
 298  uint32_t        compressor_cpus;
 299  char            *compressor_scratch_bufs;
 300  char            *kdp_compressor_scratch_buf;
 301  char            *kdp_compressor_decompressed_page;
 302  addr64_t        kdp_compressor_decompressed_page_paddr;
 303  ppnum_t         kdp_compressor_decompressed_page_ppnum;
 304  
 305  clock_sec_t     start_of_sample_period_sec = 0;
 306  clock_nsec_t    start_of_sample_period_nsec = 0;
 307  clock_sec_t     start_of_eval_period_sec = 0;
 308  clock_nsec_t    start_of_eval_period_nsec = 0;
 309  uint32_t        sample_period_decompression_count = 0;
 310  uint32_t        sample_period_compression_count = 0;
 311  uint32_t        last_eval_decompression_count = 0;
 312  uint32_t        last_eval_compression_count = 0;
 313  
 314  #define         DECOMPRESSION_SAMPLE_MAX_AGE            (60 * 30)
 315  
 316  boolean_t       vm_swapout_ripe_segments = FALSE;
 317  uint32_t        vm_ripe_target_age = (60 * 60 * 48);
 318  
 319  uint32_t        swapout_target_age = 0;
 320  uint32_t        age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
 321  uint32_t        overage_decompressions_during_sample_period = 0;
 322  
 323  
 324  void            do_fastwake_warmup(queue_head_t *, boolean_t);
 325  boolean_t       fastwake_warmup = FALSE;
 326  boolean_t       fastwake_recording_in_progress = FALSE;
 327  clock_sec_t     dont_trim_until_ts = 0;
 328  
 329  uint64_t        c_segment_warmup_count;
 330  uint64_t        first_c_segment_to_warm_generation_id = 0;
 331  uint64_t        last_c_segment_to_warm_generation_id = 0;
 332  boolean_t       hibernate_flushing = FALSE;
 333  
 334  int64_t         c_segment_input_bytes __attribute__((aligned(8))) = 0;
 335  int64_t         c_segment_compressed_bytes __attribute__((aligned(8))) = 0;
 336  int64_t         compressor_bytes_used __attribute__((aligned(8))) = 0;
 337  
 338  
 339  struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE]  __attribute__ ((aligned(8)));
 340  
 341  static boolean_t compressor_needs_to_swap(void);
 342  static void vm_compressor_swap_trigger_thread(void);
 343  static void vm_compressor_do_delayed_compactions(boolean_t);
 344  static void vm_compressor_compact_and_swap(boolean_t);
 345  static void vm_compressor_age_swapped_in_segments(boolean_t);
 346  
 347  struct vm_compressor_swapper_stats vmcs_stats;
 348  
 349  #if XNU_TARGET_OS_OSX
 350  static void vm_compressor_take_paging_space_action(void);
 351  #endif /* XNU_TARGET_OS_OSX */
 352  
 353  void compute_swapout_target_age(void);
 354  
 355  boolean_t c_seg_major_compact(c_segment_t, c_segment_t);
 356  boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t);
 357  
 358  int  c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t);
 359  int  c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t);
 360  void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg);
 361  
 362  void c_seg_move_to_sparse_list(c_segment_t);
 363  void c_seg_insert_into_q(queue_head_t *, c_segment_t);
 364  
 365  uint64_t vm_available_memory(void);
 366  uint64_t vm_compressor_pages_compressed(void);
 367  
 368  /*
 369   * indicate the need to do a major compaction if
 370   * the overall set of in-use compression segments
 371   * becomes sparse... on systems that support pressure
 372   * driven swapping, this will also cause swapouts to
 373   * be initiated.
 374   */
 375  static inline boolean_t
 376  vm_compressor_needs_to_major_compact()
 377  {
 378  	uint32_t        incore_seg_count;
 379  
 380  	incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
 381  
 382  	if ((c_segment_count >= (c_segments_nearing_limit / 8)) &&
 383  	    ((incore_seg_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) >
 384  	    ((incore_seg_count / 8) * C_SEG_MAX_PAGES)) {
 385  		return 1;
 386  	}
 387  	return 0;
 388  }
 389  
 390  
 391  uint64_t
 392  vm_available_memory(void)
 393  {
 394  	return ((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64;
 395  }
 396  
 397  
 398  uint64_t
 399  vm_compressor_pages_compressed(void)
 400  {
 401  	return c_segment_pages_compressed * PAGE_SIZE_64;
 402  }
 403  
 404  
 405  boolean_t
 406  vm_compressor_low_on_space(void)
 407  {
 408  #if CONFIG_FREEZE
 409  	uint64_t incore_seg_count;
 410  	uint32_t incore_compressed_pages;
 411  	if (freezer_incore_cseg_acct) {
 412  		incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
 413  		incore_compressed_pages = c_segment_pages_compressed_incore;
 414  	} else {
 415  		incore_seg_count = c_segment_count;
 416  		incore_compressed_pages = c_segment_pages_compressed;
 417  	}
 418  
 419  	if ((incore_compressed_pages > c_segment_pages_compressed_nearing_limit) ||
 420  	    (incore_seg_count > c_segments_nearing_limit)) {
 421  		return TRUE;
 422  	}
 423  #else /* CONFIG_FREEZE */
 424  	if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) ||
 425  	    (c_segment_count > c_segments_nearing_limit)) {
 426  		return TRUE;
 427  	}
 428  #endif /* CONFIG_FREEZE */
 429  	return FALSE;
 430  }
 431  
 432  
 433  boolean_t
 434  vm_compressor_out_of_space(void)
 435  {
 436  #if CONFIG_FREEZE
 437  	uint64_t incore_seg_count;
 438  	uint32_t incore_compressed_pages;
 439  	if (freezer_incore_cseg_acct) {
 440  		incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
 441  		incore_compressed_pages = c_segment_pages_compressed_incore;
 442  	} else {
 443  		incore_seg_count = c_segment_count;
 444  		incore_compressed_pages = c_segment_pages_compressed;
 445  	}
 446  
 447  	if ((incore_compressed_pages >= c_segment_pages_compressed_limit) ||
 448  	    (incore_seg_count > c_segments_incore_limit)) {
 449  		return TRUE;
 450  	}
 451  #else /* CONFIG_FREEZE */
 452  	if ((c_segment_pages_compressed >= c_segment_pages_compressed_limit) ||
 453  	    (c_segment_count >= c_segments_limit)) {
 454  		return TRUE;
 455  	}
 456  #endif /* CONFIG_FREEZE */
 457  	return FALSE;
 458  }
 459  
 460  
 461  int
 462  vm_wants_task_throttled(task_t task)
 463  {
 464  	if (task == kernel_task) {
 465  		return 0;
 466  	}
 467  
 468  	if (VM_CONFIG_SWAP_IS_ACTIVE) {
 469  		if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
 470  		    (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4)) {
 471  			return 1;
 472  		}
 473  	}
 474  	return 0;
 475  }
 476  
 477  
 478  #if DEVELOPMENT || DEBUG
 479  /*
 480   * On compressor/swap exhaustion, kill the largest process regardless of
 481   * its chosen process policy.
 482   */
 483  TUNABLE(bool, kill_on_no_paging_space, "-kill_on_no_paging_space", false);
 484  #endif /* DEVELOPMENT || DEBUG */
 485  
 486  #if XNU_TARGET_OS_OSX
 487  
 488  static uint32_t no_paging_space_action_in_progress = 0;
 489  extern void memorystatus_send_low_swap_note(void);
 490  
 491  static void
 492  vm_compressor_take_paging_space_action(void)
 493  {
 494  	if (no_paging_space_action_in_progress == 0) {
 495  		if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
 496  			if (no_paging_space_action()) {
 497  #if DEVELOPMENT || DEBUG
 498  				if (kill_on_no_paging_space) {
 499  					/*
 500  					 * Since we are choosing to always kill a process, we don't need the
 501  					 * "out of application memory" dialog box in this mode. And, hence we won't
 502  					 * send the knote.
 503  					 */
 504  					no_paging_space_action_in_progress = 0;
 505  					return;
 506  				}
 507  #endif /* DEVELOPMENT || DEBUG */
 508  				memorystatus_send_low_swap_note();
 509  			}
 510  
 511  			no_paging_space_action_in_progress = 0;
 512  		}
 513  	}
 514  }
 515  #endif /* XNU_TARGET_OS_OSX */
 516  
 517  
 518  void
 519  vm_decompressor_lock(void)
 520  {
 521  	PAGE_REPLACEMENT_ALLOWED(TRUE);
 522  
 523  	decompressions_blocked = TRUE;
 524  
 525  	PAGE_REPLACEMENT_ALLOWED(FALSE);
 526  }
 527  
 528  void
 529  vm_decompressor_unlock(void)
 530  {
 531  	PAGE_REPLACEMENT_ALLOWED(TRUE);
 532  
 533  	decompressions_blocked = FALSE;
 534  
 535  	PAGE_REPLACEMENT_ALLOWED(FALSE);
 536  
 537  	thread_wakeup((event_t)&decompressions_blocked);
 538  }
 539  
 540  static inline void
 541  cslot_copy(c_slot_t cdst, c_slot_t csrc)
 542  {
 543  #if CHECKSUM_THE_DATA
 544  	cdst->c_hash_data = csrc->c_hash_data;
 545  #endif
 546  #if CHECKSUM_THE_COMPRESSED_DATA
 547  	cdst->c_hash_compressed_data = csrc->c_hash_compressed_data;
 548  #endif
 549  #if POPCOUNT_THE_COMPRESSED_DATA
 550  	cdst->c_pop_cdata = csrc->c_pop_cdata;
 551  #endif
 552  	cdst->c_size = csrc->c_size;
 553  	cdst->c_packed_ptr = csrc->c_packed_ptr;
 554  #if defined(__arm__) || defined(__arm64__)
 555  	cdst->c_codec = csrc->c_codec;
 556  #endif
 557  #if __ARM_WKDM_POPCNT__
 558  	cdst->c_inline_popcount = csrc->c_inline_popcount;
 559  #endif
 560  }
 561  
 562  vm_map_t compressor_map;
 563  uint64_t compressor_pool_max_size;
 564  uint64_t compressor_pool_size;
 565  uint32_t compressor_pool_multiplier;
 566  
 567  #if DEVELOPMENT || DEBUG
 568  /*
 569   * Compressor segments are write-protected in development/debug
 570   * kernels to help debug memory corruption.
 571   * In cases where performance is a concern, this can be disabled
 572   * via the boot-arg "-disable_cseg_write_protection".
 573   */
 574  boolean_t write_protect_c_segs = TRUE;
 575  int vm_compressor_test_seg_wp;
 576  uint32_t vm_ktrace_enabled;
 577  #endif /* DEVELOPMENT || DEBUG */
 578  
 579  void
 580  vm_compressor_init(void)
 581  {
 582  	thread_t        thread;
 583  	int             attempts = 1;
 584  	kern_return_t   retval = KERN_SUCCESS;
 585  	vm_offset_t     start_addr = 0;
 586  	vm_size_t       c_segments_arr_size = 0, compressor_submap_size = 0;
 587  	vm_map_kernel_flags_t vmk_flags;
 588  #if RECORD_THE_COMPRESSED_DATA
 589  	vm_size_t       c_compressed_record_sbuf_size = 0;
 590  #endif /* RECORD_THE_COMPRESSED_DATA */
 591  
 592  #if DEVELOPMENT || DEBUG || CONFIG_FREEZE
 593  	char bootarg_name[32];
 594  #endif /* DEVELOPMENT || DEBUG || CONFIG_FREEZE */
 595  
 596  #if DEVELOPMENT || DEBUG
 597  	if (PE_parse_boot_argn("-disable_cseg_write_protection", bootarg_name, sizeof(bootarg_name))) {
 598  		write_protect_c_segs = FALSE;
 599  	}
 600  	int vmcval = 1;
 601  	PE_parse_boot_argn("vm_compressor_validation", &vmcval, sizeof(vmcval));
 602  
 603  	if (kern_feature_override(KF_COMPRSV_OVRD)) {
 604  		vmcval = 0;
 605  	}
 606  	if (vmcval == 0) {
 607  #if POPCOUNT_THE_COMPRESSED_DATA
 608  		popcount_c_segs = FALSE;
 609  #endif
 610  #if CHECKSUM_THE_DATA || CHECKSUM_THE_COMPRESSED_DATA
 611  		checksum_c_segs = FALSE;
 612  #endif
 613  #if VALIDATE_C_SEGMENTS
 614  		validate_c_segs = FALSE;
 615  #endif
 616  		write_protect_c_segs = FALSE;
 617  	}
 618  #endif /* DEVELOPMENT || DEBUG */
 619  
 620  #if CONFIG_FREEZE
 621  	if (PE_parse_boot_argn("-disable_freezer_cseg_acct", bootarg_name, sizeof(bootarg_name))) {
 622  		freezer_incore_cseg_acct = FALSE;
 623  	}
 624  #endif /* CONFIG_FREEZE */
 625  
 626  	assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
 627  
 628  #if !XNU_TARGET_OS_OSX
 629  	vm_compressor_minorcompact_threshold_divisor = 20;
 630  	vm_compressor_majorcompact_threshold_divisor = 30;
 631  	vm_compressor_unthrottle_threshold_divisor = 40;
 632  	vm_compressor_catchup_threshold_divisor = 60;
 633  #else /* !XNU_TARGET_OS_OSX */
 634  	if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
 635  		vm_compressor_minorcompact_threshold_divisor = 11;
 636  		vm_compressor_majorcompact_threshold_divisor = 13;
 637  		vm_compressor_unthrottle_threshold_divisor = 20;
 638  		vm_compressor_catchup_threshold_divisor = 35;
 639  	} else {
 640  		vm_compressor_minorcompact_threshold_divisor = 20;
 641  		vm_compressor_majorcompact_threshold_divisor = 25;
 642  		vm_compressor_unthrottle_threshold_divisor = 35;
 643  		vm_compressor_catchup_threshold_divisor = 50;
 644  	}
 645  #endif /* !XNU_TARGET_OS_OSX */
 646  
 647  	queue_init(&c_bad_list_head);
 648  	queue_init(&c_age_list_head);
 649  	queue_init(&c_minor_list_head);
 650  	queue_init(&c_major_list_head);
 651  	queue_init(&c_filling_list_head);
 652  	queue_init(&c_swapout_list_head);
 653  	queue_init(&c_swapio_list_head);
 654  	queue_init(&c_swappedin_list_head);
 655  	queue_init(&c_swappedout_list_head);
 656  	queue_init(&c_swappedout_sparse_list_head);
 657  
 658  	c_free_segno_head = -1;
 659  	c_segments_available = 0;
 660  
 661  	if (vm_compression_limit) {
 662  		compressor_pool_size = ptoa_64(vm_compression_limit);
 663  	}
 664  
 665  	compressor_pool_max_size = C_SEG_MAX_LIMIT;
 666  	compressor_pool_max_size *= C_SEG_BUFSIZE;
 667  
 668  #if XNU_TARGET_OS_OSX
 669  
 670  	if (vm_compression_limit == 0) {
 671  		if (max_mem <= (4ULL * 1024ULL * 1024ULL * 1024ULL)) {
 672  			compressor_pool_size = 16ULL * max_mem;
 673  		} else if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL)) {
 674  			compressor_pool_size = 8ULL * max_mem;
 675  		} else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL)) {
 676  			compressor_pool_size = 4ULL * max_mem;
 677  		} else {
 678  			compressor_pool_size = 2ULL * max_mem;
 679  		}
 680  	}
 681  	if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL)) {
 682  		compressor_pool_multiplier = 1;
 683  	} else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL)) {
 684  		compressor_pool_multiplier = 2;
 685  	} else {
 686  		compressor_pool_multiplier = 4;
 687  	}
 688  
 689  #elif defined(__arm__)
 690  
 691  #define VM_RESERVE_SIZE                 (1024 * 1024 * 256)
 692  #define MAX_COMPRESSOR_POOL_SIZE        (1024 * 1024 * 450)
 693  
 694  	if (compressor_pool_max_size > MAX_COMPRESSOR_POOL_SIZE) {
 695  		compressor_pool_max_size = MAX_COMPRESSOR_POOL_SIZE;
 696  	}
 697  
 698  	if (vm_compression_limit == 0) {
 699  		compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE;
 700  	}
 701  	compressor_pool_multiplier = 1;
 702  
 703  #elif defined(__arm64__) && defined(XNU_TARGET_OS_WATCH)
 704  
 705  	/*
 706  	 * On M9 watches the compressor can become big and can lead to
 707  	 * churn in workingset resulting in audio drops. Setting a cap
 708  	 * on the compressor size favors reclaiming unused memory
 709  	 * sitting in idle band via jetsams
 710  	 */
 711  
 712  #define COMPRESSOR_CAP_PERCENTAGE        37ULL
 713  
 714  	if (compressor_pool_max_size > max_mem) {
 715  		compressor_pool_max_size = max_mem;
 716  	}
 717  
 718  	if (vm_compression_limit == 0) {
 719  		compressor_pool_size = (max_mem * COMPRESSOR_CAP_PERCENTAGE) / 100ULL;
 720  	}
 721  	compressor_pool_multiplier = 1;
 722  
 723  #else
 724  
 725  	if (compressor_pool_max_size > max_mem) {
 726  		compressor_pool_max_size = max_mem;
 727  	}
 728  
 729  	if (vm_compression_limit == 0) {
 730  		compressor_pool_size = max_mem;
 731  	}
 732  	compressor_pool_multiplier = 1;
 733  #endif
 734  	if (compressor_pool_size > compressor_pool_max_size) {
 735  		compressor_pool_size = compressor_pool_max_size;
 736  	}
 737  
 738  try_again:
 739  	c_segments_limit = (uint32_t)(compressor_pool_size / (vm_size_t)(C_SEG_ALLOCSIZE));
 740  	c_segments_nearing_limit = (uint32_t)(((uint64_t)c_segments_limit * 98ULL) / 100ULL);
 741  
 742  	c_segment_pages_compressed_limit = (c_segments_limit * (C_SEG_BUFSIZE / PAGE_SIZE) * compressor_pool_multiplier);
 743  
 744  	if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE)) {
 745  		if (!vm_compression_limit) {
 746  			c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE);
 747  		}
 748  	}
 749  
 750  	c_segment_pages_compressed_nearing_limit = (uint32_t)(((uint64_t)c_segment_pages_compressed_limit * 98ULL) / 100ULL);
 751  
 752  #if CONFIG_FREEZE
 753  	/*
 754  	 * Our in-core limits are based on the size of the compressor pool.
 755  	 * The c_segments_nearing_limit is also based on the compressor pool
 756  	 * size and calculated above.
 757  	 */
 758  	c_segments_incore_limit = c_segments_limit;
 759  
 760  	if (freezer_incore_cseg_acct) {
 761  		/*
 762  		 * Add enough segments to track all frozen c_segs that can be stored in swap.
 763  		 */
 764  		c_segments_limit += (uint32_t)(vm_swap_get_max_configured_space() / (vm_size_t)(C_SEG_ALLOCSIZE));
 765  	}
 766  #endif
 767  	/*
 768  	 * Submap needs space for:
 769  	 * - c_segments
 770  	 * - c_buffers
 771  	 * - swap reclaimations -- C_SEG_BUFSIZE
 772  	 */
 773  	c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
 774  	c_buffers_size = vm_map_round_page(((vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
 775  
 776  	compressor_submap_size = c_segments_arr_size + c_buffers_size + C_SEG_BUFSIZE;
 777  
 778  #if RECORD_THE_COMPRESSED_DATA
 779  	c_compressed_record_sbuf_size = (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2);
 780  	compressor_submap_size += c_compressed_record_sbuf_size;
 781  #endif /* RECORD_THE_COMPRESSED_DATA */
 782  
 783  	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 784  	vmk_flags.vmkf_permanent = TRUE;
 785  	retval = kmem_suballoc(kernel_map, &start_addr, compressor_submap_size,
 786  	    FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_COMPRESSOR,
 787  	    &compressor_map);
 788  
 789  	if (retval != KERN_SUCCESS) {
 790  		if (++attempts > 3) {
 791  			panic("vm_compressor_init: kmem_suballoc failed - 0x%llx", (uint64_t)compressor_submap_size);
 792  		}
 793  
 794  		compressor_pool_size = compressor_pool_size / 2;
 795  
 796  		kprintf("retrying creation of the compressor submap at 0x%llx bytes\n", compressor_pool_size);
 797  		goto try_again;
 798  	}
 799  	if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&c_segments),
 800  	    (sizeof(union c_segu) * c_segments_limit), 0,
 801  	    KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
 802  		panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n");
 803  	}
 804  	if (kernel_memory_allocate(compressor_map, &c_buffers, c_buffers_size, 0,
 805  	    KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
 806  		panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n");
 807  	}
 808  
 809  
 810  	/*
 811  	 * Pick a good size that will minimize fragmentation in zalloc
 812  	 * by minimizing the fragmentation in a 16k run.
 813  	 *
 814  	 * C_SEG_SLOT_VAR_ARRAY_MIN_LEN is larger on 4k systems than 16k ones,
 815  	 * making the fragmentation in a 4k page terrible. Using 16k for all
 816  	 * systems matches zalloc() and will minimize fragmentation.
 817  	 */
 818  	uint32_t c_segment_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot));
 819  	uint32_t cnt  = (16 << 10) / c_segment_size;
 820  	uint32_t frag = (16 << 10) % c_segment_size;
 821  
 822  	c_seg_fixed_array_len = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
 823  
 824  	while (cnt * sizeof(struct c_slot) < frag) {
 825  		c_segment_size += sizeof(struct c_slot);
 826  		c_seg_fixed_array_len++;
 827  		frag -= cnt * sizeof(struct c_slot);
 828  	}
 829  
 830  	compressor_segment_zone = zone_create("compressor_segment",
 831  	    c_segment_size, ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 832  
 833  	c_segments_busy = FALSE;
 834  
 835  	c_segments_next_page = (caddr_t)c_segments;
 836  	vm_compressor_algorithm_init();
 837  
 838  	{
 839  		host_basic_info_data_t hinfo;
 840  		mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
 841  		size_t bufsize;
 842  		char *buf;
 843  
 844  #define BSD_HOST 1
 845  		host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
 846  
 847  		compressor_cpus = hinfo.max_cpus;
 848  
 849  		bufsize = PAGE_SIZE;
 850  		bufsize += compressor_cpus * vm_compressor_get_decode_scratch_size();
 851  		bufsize += vm_compressor_get_decode_scratch_size();
 852  #if CONFIG_FREEZE
 853  		bufsize += vm_compressor_get_encode_scratch_size();
 854  #endif
 855  #if RECORD_THE_COMPRESSED_DATA
 856  		bufsize += c_compressed_record_sbuf_size;
 857  #endif
 858  
 859  		if (kernel_memory_allocate(kernel_map, (vm_offset_t *)&buf, bufsize,
 860  		    PAGE_MASK, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR)) {
 861  			panic("vm_compressor_init: Unable to allocate %zd bytes", bufsize);
 862  		}
 863  
 864  		/*
 865  		 * kdp_compressor_decompressed_page must be page aligned because we access
 866  		 * it through the physical apperture by page number.
 867  		 */
 868  		kdp_compressor_decompressed_page = buf;
 869  		kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page);
 870  		kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
 871  		buf += PAGE_SIZE;
 872  		bufsize -= PAGE_SIZE;
 873  
 874  		compressor_scratch_bufs = buf;
 875  		buf += compressor_cpus * vm_compressor_get_decode_scratch_size();
 876  		bufsize -= compressor_cpus * vm_compressor_get_decode_scratch_size();
 877  
 878  		kdp_compressor_scratch_buf = buf;
 879  		buf += vm_compressor_get_decode_scratch_size();
 880  		bufsize -= vm_compressor_get_decode_scratch_size();
 881  
 882  #if CONFIG_FREEZE
 883  		freezer_context_global.freezer_ctx_compressor_scratch_buf = buf;
 884  		buf += vm_compressor_get_encode_scratch_size();
 885  		bufsize -= vm_compressor_get_encode_scratch_size();
 886  #endif
 887  
 888  #if RECORD_THE_COMPRESSED_DATA
 889  		c_compressed_record_sbuf = buf;
 890  		c_compressed_record_cptr = buf;
 891  		c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size;
 892  		buf += c_compressed_record_sbuf_size;
 893  		bufsize -= c_compressed_record_sbuf_size;
 894  #endif
 895  		assert(bufsize == 0);
 896  	}
 897  
 898  	if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
 899  	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
 900  		panic("vm_compressor_swap_trigger_thread: create failed");
 901  	}
 902  	thread_deallocate(thread);
 903  
 904  	if (vm_pageout_internal_start() != KERN_SUCCESS) {
 905  		panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
 906  	}
 907  	if (VM_CONFIG_SWAP_IS_PRESENT) {
 908  		vm_compressor_swap_init();
 909  	}
 910  
 911  	if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
 912  		vm_compressor_is_active = 1;
 913  	}
 914  
 915  #if CONFIG_FREEZE
 916  	memorystatus_freeze_enabled = TRUE;
 917  #endif /* CONFIG_FREEZE */
 918  
 919  	vm_compressor_available = 1;
 920  
 921  	vm_page_reactivate_all_throttled();
 922  
 923  	bzero(&vmcs_stats, sizeof(struct vm_compressor_swapper_stats));
 924  }
 925  
 926  
 927  #if VALIDATE_C_SEGMENTS
 928  
 929  static void
 930  c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact)
 931  {
 932  	uint16_t        c_indx;
 933  	int32_t         bytes_used;
 934  	uint32_t        c_rounded_size;
 935  	uint32_t        c_size;
 936  	c_slot_t        cs;
 937  
 938  	if (__probable(validate_c_segs == FALSE)) {
 939  		return;
 940  	}
 941  	if (c_seg->c_firstemptyslot < c_seg->c_nextslot) {
 942  		c_indx = c_seg->c_firstemptyslot;
 943  		cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
 944  
 945  		if (cs == NULL) {
 946  			panic("c_seg_validate:  no slot backing c_firstemptyslot");
 947  		}
 948  
 949  		if (cs->c_size) {
 950  			panic("c_seg_validate:  c_firstemptyslot has non-zero size (%d)\n", cs->c_size);
 951  		}
 952  	}
 953  	bytes_used = 0;
 954  
 955  	for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) {
 956  		cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
 957  
 958  		c_size = UNPACK_C_SIZE(cs);
 959  
 960  		c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
 961  
 962  		bytes_used += c_rounded_size;
 963  
 964  #if CHECKSUM_THE_COMPRESSED_DATA
 965  		unsigned csvhash;
 966  		if (c_size && cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
 967  			addr64_t csvphys = kvtophys((vm_offset_t)&c_seg->c_store.c_buffer[cs->c_offset]);
 968  			panic("Compressed data doesn't match original %p phys: 0x%llx %d %p %d %d 0x%x 0x%x", c_seg, csvphys, cs->c_offset, cs, c_indx, c_size, cs->c_hash_compressed_data, csvhash);
 969  		}
 970  #endif
 971  #if POPCOUNT_THE_COMPRESSED_DATA
 972  		unsigned csvpop;
 973  		if (c_size) {
 974  			uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
 975  			if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
 976  				panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, (uint64_t)cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
 977  			}
 978  		}
 979  #endif
 980  	}
 981  
 982  	if (bytes_used != c_seg->c_bytes_used) {
 983  		panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used);
 984  	}
 985  
 986  	if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) {
 987  		panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d,  c_bytes_used = %d\n",
 988  		    (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
 989  	}
 990  
 991  	if (must_be_compact) {
 992  		if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) {
 993  			panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d,  c_bytes_used = %d\n",
 994  			    (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
 995  		}
 996  	}
 997  }
 998  
 999  #endif
1000  
1001  
1002  void
1003  c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held)
1004  {
1005  	boolean_t       clear_busy = FALSE;
1006  
1007  	if (c_list_lock_held == FALSE) {
1008  		if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1009  			C_SEG_BUSY(c_seg);
1010  
1011  			lck_mtx_unlock_always(&c_seg->c_lock);
1012  			lck_mtx_lock_spin_always(c_list_lock);
1013  			lck_mtx_lock_spin_always(&c_seg->c_lock);
1014  
1015  			clear_busy = TRUE;
1016  		}
1017  	}
1018  	assert(c_seg->c_state != C_IS_FILLING);
1019  
1020  	if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ON_DISK_OR_SOQ(c_seg))) {
1021  		queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
1022  		c_seg->c_on_minorcompact_q = 1;
1023  		c_minor_count++;
1024  	}
1025  	if (c_list_lock_held == FALSE) {
1026  		lck_mtx_unlock_always(c_list_lock);
1027  	}
1028  
1029  	if (clear_busy == TRUE) {
1030  		C_SEG_WAKEUP_DONE(c_seg);
1031  	}
1032  }
1033  
1034  
1035  unsigned int c_seg_moved_to_sparse_list = 0;
1036  
1037  void
1038  c_seg_move_to_sparse_list(c_segment_t c_seg)
1039  {
1040  	boolean_t       clear_busy = FALSE;
1041  
1042  	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1043  		C_SEG_BUSY(c_seg);
1044  
1045  		lck_mtx_unlock_always(&c_seg->c_lock);
1046  		lck_mtx_lock_spin_always(c_list_lock);
1047  		lck_mtx_lock_spin_always(&c_seg->c_lock);
1048  
1049  		clear_busy = TRUE;
1050  	}
1051  	c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
1052  
1053  	c_seg_moved_to_sparse_list++;
1054  
1055  	lck_mtx_unlock_always(c_list_lock);
1056  
1057  	if (clear_busy == TRUE) {
1058  		C_SEG_WAKEUP_DONE(c_seg);
1059  	}
1060  }
1061  
1062  
1063  void
1064  c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg)
1065  {
1066  	c_segment_t c_seg_next;
1067  
1068  	if (queue_empty(qhead)) {
1069  		queue_enter(qhead, c_seg, c_segment_t, c_age_list);
1070  	} else {
1071  		c_seg_next = (c_segment_t)queue_first(qhead);
1072  
1073  		while (TRUE) {
1074  			if (c_seg->c_generation_id < c_seg_next->c_generation_id) {
1075  				queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list);
1076  				break;
1077  			}
1078  			c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list);
1079  
1080  			if (queue_end(qhead, (queue_entry_t) c_seg_next)) {
1081  				queue_enter(qhead, c_seg, c_segment_t, c_age_list);
1082  				break;
1083  			}
1084  		}
1085  	}
1086  }
1087  
1088  
1089  int try_minor_compaction_failed = 0;
1090  int try_minor_compaction_succeeded = 0;
1091  
1092  void
1093  c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg)
1094  {
1095  	assert(c_seg->c_on_minorcompact_q);
1096  	/*
1097  	 * c_seg is currently on the delayed minor compaction
1098  	 * queue and we have c_seg locked... if we can get the
1099  	 * c_list_lock w/o blocking (if we blocked we could deadlock
1100  	 * because the lock order is c_list_lock then c_seg's lock)
1101  	 * we'll pull it from the delayed list and free it directly
1102  	 */
1103  	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1104  		/*
1105  		 * c_list_lock is held, we need to bail
1106  		 */
1107  		try_minor_compaction_failed++;
1108  
1109  		lck_mtx_unlock_always(&c_seg->c_lock);
1110  	} else {
1111  		try_minor_compaction_succeeded++;
1112  
1113  		C_SEG_BUSY(c_seg);
1114  		c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE);
1115  	}
1116  }
1117  
1118  
1119  int
1120  c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement)
1121  {
1122  	int     c_seg_freed;
1123  
1124  	assert(c_seg->c_busy);
1125  	assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
1126  
1127  	/*
1128  	 * check for the case that can occur when we are not swapping
1129  	 * and this segment has been major compacted in the past
1130  	 * and moved to the majorcompact q to remove it from further
1131  	 * consideration... if the occupancy falls too low we need
1132  	 * to put it back on the age_q so that it will be considered
1133  	 * in the next major compaction sweep... if we don't do this
1134  	 * we will eventually run into the c_segments_limit
1135  	 */
1136  	if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT_NOW(c_seg)) {
1137  		c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1138  	}
1139  	if (!c_seg->c_on_minorcompact_q) {
1140  		if (clear_busy == TRUE) {
1141  			C_SEG_WAKEUP_DONE(c_seg);
1142  		}
1143  
1144  		lck_mtx_unlock_always(&c_seg->c_lock);
1145  
1146  		return 0;
1147  	}
1148  	queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
1149  	c_seg->c_on_minorcompact_q = 0;
1150  	c_minor_count--;
1151  
1152  	lck_mtx_unlock_always(c_list_lock);
1153  
1154  	if (disallow_page_replacement == TRUE) {
1155  		lck_mtx_unlock_always(&c_seg->c_lock);
1156  
1157  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
1158  
1159  		lck_mtx_lock_spin_always(&c_seg->c_lock);
1160  	}
1161  	c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy);
1162  
1163  	if (disallow_page_replacement == TRUE) {
1164  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
1165  	}
1166  
1167  	if (need_list_lock == TRUE) {
1168  		lck_mtx_lock_spin_always(c_list_lock);
1169  	}
1170  
1171  	return c_seg_freed;
1172  }
1173  
1174  void
1175  kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo)
1176  {
1177  	c_segment_t c_seg = (c_segment_t) wait_event;
1178  
1179  	waitinfo->owner = thread_tid(c_seg->c_busy_for_thread);
1180  	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(c_seg);
1181  }
1182  
1183  #if DEVELOPMENT || DEBUG
1184  int
1185  do_cseg_wedge_thread(void)
1186  {
1187  	struct c_segment c_seg;
1188  	c_seg.c_busy_for_thread = current_thread();
1189  
1190  	debug_cseg_wait_event = (event_t) &c_seg;
1191  
1192  	thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
1193  	assert_wait((event_t) (&c_seg), THREAD_INTERRUPTIBLE);
1194  
1195  	thread_block(THREAD_CONTINUE_NULL);
1196  
1197  	return 0;
1198  }
1199  
1200  int
1201  do_cseg_unwedge_thread(void)
1202  {
1203  	thread_wakeup(debug_cseg_wait_event);
1204  	debug_cseg_wait_event = NULL;
1205  
1206  	return 0;
1207  }
1208  #endif /* DEVELOPMENT || DEBUG */
1209  
1210  void
1211  c_seg_wait_on_busy(c_segment_t c_seg)
1212  {
1213  	c_seg->c_wanted = 1;
1214  
1215  	thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
1216  	assert_wait((event_t) (c_seg), THREAD_UNINT);
1217  
1218  	lck_mtx_unlock_always(&c_seg->c_lock);
1219  	thread_block(THREAD_CONTINUE_NULL);
1220  }
1221  
1222  #if CONFIG_FREEZE
1223  /*
1224   * We don't have the task lock held while updating the task's
1225   * c_seg queues. We can do that because of the following restrictions:
1226   *
1227   * - SINGLE FREEZER CONTEXT:
1228   *   We 'insert' c_segs into the task list on the task_freeze path.
1229   *   There can only be one such freeze in progress and the task
1230   *   isn't disappearing because we have the VM map lock held throughout
1231   *   and we have a reference on the proc too.
1232   *
1233   * - SINGLE TASK DISOWN CONTEXT:
1234   *   We 'disown' c_segs of a task ONLY from the task_terminate context. So
1235   *   we don't need the task lock but we need the c_list_lock and the
1236   *   compressor master lock (shared). We also hold the individual
1237   *   c_seg locks (exclusive).
1238   *
1239   *   If we either:
1240   *   - can't get the c_seg lock on a try, then we start again because maybe
1241   *   the c_seg is part of a compaction and might get freed. So we can't trust
1242   *   that linkage and need to restart our queue traversal.
1243   *   - OR, we run into a busy c_seg (say being swapped in or free-ing) we
1244   *   drop all locks again and wait and restart our queue traversal.
1245   *
1246   * - The new_owner_task below is currently only the kernel or NULL.
1247   *
1248   */
1249  void
1250  c_seg_update_task_owner(c_segment_t c_seg, task_t new_owner_task)
1251  {
1252  	task_t          owner_task = c_seg->c_task_owner;
1253  	uint64_t        uncompressed_bytes = ((c_seg->c_slots_used) * PAGE_SIZE_64);
1254  
1255  	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1256  	LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1257  
1258  	if (owner_task) {
1259  		task_update_frozen_to_swap_acct(owner_task, uncompressed_bytes, DEBIT_FROM_SWAP);
1260  		queue_remove(&owner_task->task_frozen_cseg_q, c_seg,
1261  		    c_segment_t, c_task_list_next_cseg);
1262  	}
1263  
1264  	if (new_owner_task) {
1265  		queue_enter(&new_owner_task->task_frozen_cseg_q, c_seg,
1266  		    c_segment_t, c_task_list_next_cseg);
1267  		task_update_frozen_to_swap_acct(new_owner_task, uncompressed_bytes, CREDIT_TO_SWAP);
1268  	}
1269  
1270  	c_seg->c_task_owner = new_owner_task;
1271  }
1272  
1273  void
1274  task_disown_frozen_csegs(task_t owner_task)
1275  {
1276  	c_segment_t c_seg = NULL, next_cseg = NULL;
1277  
1278  again:
1279  	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1280  	lck_mtx_lock_spin_always(c_list_lock);
1281  
1282  	for (c_seg = (c_segment_t) queue_first(&owner_task->task_frozen_cseg_q);
1283  	    !queue_end(&owner_task->task_frozen_cseg_q, (queue_entry_t) c_seg);
1284  	    c_seg = next_cseg) {
1285  		next_cseg = (c_segment_t) queue_next(&c_seg->c_task_list_next_cseg);;
1286  
1287  		if (!lck_mtx_try_lock_spin_always(&c_seg->c_lock)) {
1288  			lck_mtx_unlock(c_list_lock);
1289  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
1290  			goto again;
1291  		}
1292  
1293  		if (c_seg->c_busy) {
1294  			lck_mtx_unlock(c_list_lock);
1295  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
1296  
1297  			c_seg_wait_on_busy(c_seg);
1298  
1299  			goto again;
1300  		}
1301  		assert(c_seg->c_task_owner == owner_task);
1302  		c_seg_update_task_owner(c_seg, kernel_task);
1303  		lck_mtx_unlock_always(&c_seg->c_lock);
1304  	}
1305  
1306  	lck_mtx_unlock(c_list_lock);
1307  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1308  }
1309  #endif /* CONFIG_FREEZE */
1310  
1311  void
1312  c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
1313  {
1314  	int     old_state = c_seg->c_state;
1315  
1316  #if XNU_TARGET_OS_OSX
1317  #if     DEVELOPMENT || DEBUG
1318  	if (new_state != C_IS_FILLING) {
1319  		LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1320  	}
1321  	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1322  #endif
1323  #endif /* XNU_TARGET_OS_OSX */
1324  	switch (old_state) {
1325  	case C_IS_EMPTY:
1326  		assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
1327  
1328  		c_empty_count--;
1329  		break;
1330  
1331  	case C_IS_FILLING:
1332  		assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q);
1333  
1334  		queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1335  		c_filling_count--;
1336  		break;
1337  
1338  	case C_ON_AGE_Q:
1339  		assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q ||
1340  		    new_state == C_IS_FREE);
1341  
1342  		queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1343  		c_age_count--;
1344  		break;
1345  
1346  	case C_ON_SWAPPEDIN_Q:
1347  		assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1348  
1349  		queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1350  		c_swappedin_count--;
1351  		break;
1352  
1353  	case C_ON_SWAPOUT_Q:
1354  		assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY || new_state == C_ON_SWAPIO_Q);
1355  
1356  #if CONFIG_FREEZE
1357  		if (c_seg->c_task_owner && (new_state != C_ON_SWAPIO_Q)) {
1358  			c_seg_update_task_owner(c_seg, NULL);
1359  		}
1360  #endif /* CONFIG_FREEZE */
1361  
1362  		queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1363  		thread_wakeup((event_t)&compaction_swapper_running);
1364  		c_swapout_count--;
1365  		break;
1366  
1367  	case C_ON_SWAPIO_Q:
1368  		assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q || new_state == C_ON_AGE_Q);
1369  
1370  		queue_remove(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1371  		c_swapio_count--;
1372  		break;
1373  
1374  	case C_ON_SWAPPEDOUT_Q:
1375  		assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1376  		    new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
1377  		    new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1378  
1379  		queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1380  		c_swappedout_count--;
1381  		break;
1382  
1383  	case C_ON_SWAPPEDOUTSPARSE_Q:
1384  		assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1385  		    new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1386  
1387  		queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1388  		c_swappedout_sparse_count--;
1389  		break;
1390  
1391  	case C_ON_MAJORCOMPACT_Q:
1392  		assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1393  
1394  		queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1395  		c_major_count--;
1396  		break;
1397  
1398  	case C_ON_BAD_Q:
1399  		assert(new_state == C_IS_FREE);
1400  
1401  		queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1402  		c_bad_count--;
1403  		break;
1404  
1405  	default:
1406  		panic("c_seg %p has bad c_state = %d\n", c_seg, old_state);
1407  	}
1408  
1409  	switch (new_state) {
1410  	case C_IS_FREE:
1411  		assert(old_state != C_IS_FILLING);
1412  
1413  		break;
1414  
1415  	case C_IS_EMPTY:
1416  		assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1417  
1418  		c_empty_count++;
1419  		break;
1420  
1421  	case C_IS_FILLING:
1422  		assert(old_state == C_IS_EMPTY);
1423  
1424  		queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1425  		c_filling_count++;
1426  		break;
1427  
1428  	case C_ON_AGE_Q:
1429  		assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q ||
1430  		    old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPIO_Q ||
1431  		    old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1432  
1433  		if (old_state == C_IS_FILLING) {
1434  			queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1435  		} else {
1436  			if (!queue_empty(&c_age_list_head)) {
1437  				c_segment_t     c_first;
1438  
1439  				c_first = (c_segment_t)queue_first(&c_age_list_head);
1440  				c_seg->c_creation_ts = c_first->c_creation_ts;
1441  			}
1442  			queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1443  		}
1444  		c_age_count++;
1445  		break;
1446  
1447  	case C_ON_SWAPPEDIN_Q:
1448  		assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1449  
1450  		if (insert_head == TRUE) {
1451  			queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1452  		} else {
1453  			queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1454  		}
1455  		c_swappedin_count++;
1456  		break;
1457  
1458  	case C_ON_SWAPOUT_Q:
1459  		assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING);
1460  
1461  		if (insert_head == TRUE) {
1462  			queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1463  		} else {
1464  			queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1465  		}
1466  		c_swapout_count++;
1467  		break;
1468  
1469  	case C_ON_SWAPIO_Q:
1470  		assert(old_state == C_ON_SWAPOUT_Q);
1471  
1472  		if (insert_head == TRUE) {
1473  			queue_enter_first(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1474  		} else {
1475  			queue_enter(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1476  		}
1477  		c_swapio_count++;
1478  		break;
1479  
1480  	case C_ON_SWAPPEDOUT_Q:
1481  		assert(old_state == C_ON_SWAPIO_Q);
1482  
1483  		if (insert_head == TRUE) {
1484  			queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1485  		} else {
1486  			queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1487  		}
1488  		c_swappedout_count++;
1489  		break;
1490  
1491  	case C_ON_SWAPPEDOUTSPARSE_Q:
1492  		assert(old_state == C_ON_SWAPIO_Q || old_state == C_ON_SWAPPEDOUT_Q);
1493  
1494  		if (insert_head == TRUE) {
1495  			queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1496  		} else {
1497  			queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1498  		}
1499  
1500  		c_swappedout_sparse_count++;
1501  		break;
1502  
1503  	case C_ON_MAJORCOMPACT_Q:
1504  		assert(old_state == C_ON_AGE_Q);
1505  
1506  		if (insert_head == TRUE) {
1507  			queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1508  		} else {
1509  			queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1510  		}
1511  		c_major_count++;
1512  		break;
1513  
1514  	case C_ON_BAD_Q:
1515  		assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1516  
1517  		if (insert_head == TRUE) {
1518  			queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1519  		} else {
1520  			queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1521  		}
1522  		c_bad_count++;
1523  		break;
1524  
1525  	default:
1526  		panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state);
1527  	}
1528  	c_seg->c_state = new_state;
1529  }
1530  
1531  
1532  
1533  void
1534  c_seg_free(c_segment_t c_seg)
1535  {
1536  	assert(c_seg->c_busy);
1537  
1538  	lck_mtx_unlock_always(&c_seg->c_lock);
1539  	lck_mtx_lock_spin_always(c_list_lock);
1540  	lck_mtx_lock_spin_always(&c_seg->c_lock);
1541  
1542  	c_seg_free_locked(c_seg);
1543  }
1544  
1545  
1546  void
1547  c_seg_free_locked(c_segment_t c_seg)
1548  {
1549  	int             segno;
1550  	int             pages_populated = 0;
1551  	int32_t         *c_buffer = NULL;
1552  	uint64_t        c_swap_handle = 0;
1553  
1554  	assert(c_seg->c_busy);
1555  	assert(c_seg->c_slots_used == 0);
1556  	assert(!c_seg->c_on_minorcompact_q);
1557  	assert(!c_seg->c_busy_swapping);
1558  
1559  	if (c_seg->c_overage_swap == TRUE) {
1560  		c_overage_swapped_count--;
1561  		c_seg->c_overage_swap = FALSE;
1562  	}
1563  	if (!(C_SEG_IS_ONDISK(c_seg))) {
1564  		c_buffer = c_seg->c_store.c_buffer;
1565  	} else {
1566  		c_swap_handle = c_seg->c_store.c_swap_handle;
1567  	}
1568  
1569  	c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
1570  
1571  	if (c_buffer) {
1572  		pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
1573  		c_seg->c_store.c_buffer = NULL;
1574  	} else {
1575  #if CONFIG_FREEZE
1576  		c_seg_update_task_owner(c_seg, NULL);
1577  #endif /* CONFIG_FREEZE */
1578  
1579  		c_seg->c_store.c_swap_handle = (uint64_t)-1;
1580  	}
1581  
1582  	lck_mtx_unlock_always(&c_seg->c_lock);
1583  
1584  	lck_mtx_unlock_always(c_list_lock);
1585  
1586  	if (c_buffer) {
1587  		if (pages_populated) {
1588  			kernel_memory_depopulate(compressor_map, (vm_offset_t)c_buffer,
1589  			    pages_populated * PAGE_SIZE, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1590  		}
1591  	} else if (c_swap_handle) {
1592  		/*
1593  		 * Free swap space on disk.
1594  		 */
1595  		vm_swap_free(c_swap_handle);
1596  	}
1597  	lck_mtx_lock_spin_always(&c_seg->c_lock);
1598  	/*
1599  	 * c_seg must remain busy until
1600  	 * after the call to vm_swap_free
1601  	 */
1602  	C_SEG_WAKEUP_DONE(c_seg);
1603  	lck_mtx_unlock_always(&c_seg->c_lock);
1604  
1605  	segno = c_seg->c_mysegno;
1606  
1607  	lck_mtx_lock_spin_always(c_list_lock);
1608  	/*
1609  	 * because the c_buffer is now associated with the segno,
1610  	 * we can't put the segno back on the free list until
1611  	 * after we have depopulated the c_buffer range, or
1612  	 * we run the risk of depopulating a range that is
1613  	 * now being used in one of the compressor heads
1614  	 */
1615  	c_segments[segno].c_segno = c_free_segno_head;
1616  	c_free_segno_head = segno;
1617  	c_segment_count--;
1618  
1619  	lck_mtx_unlock_always(c_list_lock);
1620  
1621  	lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
1622  
1623  	if (c_seg->c_slot_var_array_len) {
1624  		kheap_free(KHEAP_DATA_BUFFERS, c_seg->c_slot_var_array,
1625  		    sizeof(struct c_slot) * c_seg->c_slot_var_array_len);
1626  	}
1627  
1628  	zfree(compressor_segment_zone, c_seg);
1629  }
1630  
1631  #if DEVELOPMENT || DEBUG
1632  int c_seg_trim_page_count = 0;
1633  #endif
1634  
1635  void
1636  c_seg_trim_tail(c_segment_t c_seg)
1637  {
1638  	c_slot_t        cs;
1639  	uint32_t        c_size;
1640  	uint32_t        c_offset;
1641  	uint32_t        c_rounded_size;
1642  	uint16_t        current_nextslot;
1643  	uint32_t        current_populated_offset;
1644  
1645  	if (c_seg->c_bytes_used == 0) {
1646  		return;
1647  	}
1648  	current_nextslot = c_seg->c_nextslot;
1649  	current_populated_offset = c_seg->c_populated_offset;
1650  
1651  	while (c_seg->c_nextslot) {
1652  		cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1));
1653  
1654  		c_size = UNPACK_C_SIZE(cs);
1655  
1656  		if (c_size) {
1657  			if (current_nextslot != c_seg->c_nextslot) {
1658  				c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1659  				c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1660  
1661  				c_seg->c_nextoffset = c_offset;
1662  				c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) &
1663  				    ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1664  
1665  				if (c_seg->c_firstemptyslot > c_seg->c_nextslot) {
1666  					c_seg->c_firstemptyslot = c_seg->c_nextslot;
1667  				}
1668  #if DEVELOPMENT || DEBUG
1669  				c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) -
1670  				    round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) /
1671  				    PAGE_SIZE);
1672  #endif
1673  			}
1674  			break;
1675  		}
1676  		c_seg->c_nextslot--;
1677  	}
1678  	assert(c_seg->c_nextslot);
1679  }
1680  
1681  
1682  int
1683  c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy)
1684  {
1685  	c_slot_mapping_t slot_ptr;
1686  	uint32_t        c_offset = 0;
1687  	uint32_t        old_populated_offset;
1688  	uint32_t        c_rounded_size;
1689  	uint32_t        c_size;
1690  	uint16_t        c_indx = 0;
1691  	int             i;
1692  	c_slot_t        c_dst;
1693  	c_slot_t        c_src;
1694  
1695  	assert(c_seg->c_busy);
1696  
1697  #if VALIDATE_C_SEGMENTS
1698  	c_seg_validate(c_seg, FALSE);
1699  #endif
1700  	if (c_seg->c_bytes_used == 0) {
1701  		c_seg_free(c_seg);
1702  		return 1;
1703  	}
1704  	lck_mtx_unlock_always(&c_seg->c_lock);
1705  
1706  	if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) {
1707  		goto done;
1708  	}
1709  
1710  /* TODO: assert first emptyslot's c_size is actually 0 */
1711  
1712  #if DEVELOPMENT || DEBUG
1713  	C_SEG_MAKE_WRITEABLE(c_seg);
1714  #endif
1715  
1716  #if VALIDATE_C_SEGMENTS
1717  	c_seg->c_was_minor_compacted++;
1718  #endif
1719  	c_indx = c_seg->c_firstemptyslot;
1720  	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1721  
1722  	old_populated_offset = c_seg->c_populated_offset;
1723  	c_offset = c_dst->c_offset;
1724  
1725  	for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) {
1726  		c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i);
1727  
1728  		c_size = UNPACK_C_SIZE(c_src);
1729  
1730  		if (c_size == 0) {
1731  			continue;
1732  		}
1733  
1734  		c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1735  /* N.B.: This memcpy may be an overlapping copy */
1736  		memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_rounded_size);
1737  
1738  		cslot_copy(c_dst, c_src);
1739  		c_dst->c_offset = c_offset;
1740  
1741  		slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
1742  		slot_ptr->s_cindx = c_indx;
1743  
1744  		c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1745  		PACK_C_SIZE(c_src, 0);
1746  		c_indx++;
1747  
1748  		c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1749  	}
1750  	c_seg->c_firstemptyslot = c_indx;
1751  	c_seg->c_nextslot = c_indx;
1752  	c_seg->c_nextoffset = c_offset;
1753  	c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1754  	c_seg->c_bytes_unused = 0;
1755  
1756  #if VALIDATE_C_SEGMENTS
1757  	c_seg_validate(c_seg, TRUE);
1758  #endif
1759  	if (old_populated_offset > c_seg->c_populated_offset) {
1760  		uint32_t        gc_size;
1761  		int32_t         *gc_ptr;
1762  
1763  		gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset);
1764  		gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset];
1765  
1766  		kernel_memory_depopulate(compressor_map, (vm_offset_t)gc_ptr, gc_size,
1767  		    KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1768  	}
1769  
1770  #if DEVELOPMENT || DEBUG
1771  	C_SEG_WRITE_PROTECT(c_seg);
1772  #endif
1773  
1774  done:
1775  	if (clear_busy == TRUE) {
1776  		lck_mtx_lock_spin_always(&c_seg->c_lock);
1777  		C_SEG_WAKEUP_DONE(c_seg);
1778  		lck_mtx_unlock_always(&c_seg->c_lock);
1779  	}
1780  	return 0;
1781  }
1782  
1783  
1784  static void
1785  c_seg_alloc_nextslot(c_segment_t c_seg)
1786  {
1787  	struct c_slot   *old_slot_array = NULL;
1788  	struct c_slot   *new_slot_array = NULL;
1789  	int             newlen;
1790  	int             oldlen;
1791  
1792  	if (c_seg->c_nextslot < c_seg_fixed_array_len) {
1793  		return;
1794  	}
1795  
1796  	if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
1797  		oldlen = c_seg->c_slot_var_array_len;
1798  		old_slot_array = c_seg->c_slot_var_array;
1799  
1800  		if (oldlen == 0) {
1801  			newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
1802  		} else {
1803  			newlen = oldlen * 2;
1804  		}
1805  
1806  		new_slot_array = kheap_alloc(KHEAP_DATA_BUFFERS,
1807  		    sizeof(struct c_slot) * newlen, Z_WAITOK);
1808  
1809  		lck_mtx_lock_spin_always(&c_seg->c_lock);
1810  
1811  		if (old_slot_array) {
1812  			memcpy(new_slot_array, old_slot_array,
1813  			    sizeof(struct c_slot) * oldlen);
1814  		}
1815  
1816  		c_seg->c_slot_var_array_len = newlen;
1817  		c_seg->c_slot_var_array = new_slot_array;
1818  
1819  		lck_mtx_unlock_always(&c_seg->c_lock);
1820  
1821  		if (old_slot_array) {
1822  			kheap_free(KHEAP_DATA_BUFFERS, old_slot_array,
1823  			    sizeof(struct c_slot) * oldlen);
1824  		}
1825  	}
1826  }
1827  
1828  
1829  #define C_SEG_MAJOR_COMPACT_STATS_MAX   (30)
1830  
1831  struct {
1832  	uint64_t asked_permission;
1833  	uint64_t compactions;
1834  	uint64_t moved_slots;
1835  	uint64_t moved_bytes;
1836  	uint64_t wasted_space_in_swapouts;
1837  	uint64_t count_of_swapouts;
1838  	uint64_t count_of_freed_segs;
1839  	uint64_t bailed_compactions;
1840  	uint64_t bytes_freed_rate_us;
1841  } c_seg_major_compact_stats[C_SEG_MAJOR_COMPACT_STATS_MAX];
1842  
1843  int c_seg_major_compact_stats_now = 0;
1844  
1845  
1846  #define C_MAJOR_COMPACTION_SIZE_APPROPRIATE     ((C_SEG_BUFSIZE * 90) / 100)
1847  
1848  
1849  boolean_t
1850  c_seg_major_compact_ok(
1851  	c_segment_t c_seg_dst,
1852  	c_segment_t c_seg_src)
1853  {
1854  	c_seg_major_compact_stats[c_seg_major_compact_stats_now].asked_permission++;
1855  
1856  	if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
1857  	    c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) {
1858  		return FALSE;
1859  	}
1860  
1861  	if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1862  		/*
1863  		 * destination segment is full... can't compact
1864  		 */
1865  		return FALSE;
1866  	}
1867  
1868  	return TRUE;
1869  }
1870  
1871  
1872  boolean_t
1873  c_seg_major_compact(
1874  	c_segment_t c_seg_dst,
1875  	c_segment_t c_seg_src)
1876  {
1877  	c_slot_mapping_t slot_ptr;
1878  	uint32_t        c_rounded_size;
1879  	uint32_t        c_size;
1880  	uint16_t        dst_slot;
1881  	int             i;
1882  	c_slot_t        c_dst;
1883  	c_slot_t        c_src;
1884  	boolean_t       keep_compacting = TRUE;
1885  
1886  	/*
1887  	 * segments are not locked but they are both marked c_busy
1888  	 * which keeps c_decompress from working on them...
1889  	 * we can safely allocate new pages, move compressed data
1890  	 * from c_seg_src to c_seg_dst and update both c_segment's
1891  	 * state w/o holding the master lock
1892  	 */
1893  #if DEVELOPMENT || DEBUG
1894  	C_SEG_MAKE_WRITEABLE(c_seg_dst);
1895  #endif
1896  
1897  #if VALIDATE_C_SEGMENTS
1898  	c_seg_dst->c_was_major_compacted++;
1899  	c_seg_src->c_was_major_donor++;
1900  #endif
1901  	c_seg_major_compact_stats[c_seg_major_compact_stats_now].compactions++;
1902  
1903  	dst_slot = c_seg_dst->c_nextslot;
1904  
1905  	for (i = 0; i < c_seg_src->c_nextslot; i++) {
1906  		c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i);
1907  
1908  		c_size = UNPACK_C_SIZE(c_src);
1909  
1910  		if (c_size == 0) {
1911  			/* BATCH: move what we have so far; */
1912  			continue;
1913  		}
1914  
1915  		if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
1916  			int     size_to_populate;
1917  
1918  			/* doesn't fit */
1919  			size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
1920  
1921  			if (size_to_populate == 0) {
1922  				/* can't fit */
1923  				keep_compacting = FALSE;
1924  				break;
1925  			}
1926  			if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) {
1927  				size_to_populate = C_SEG_MAX_POPULATE_SIZE;
1928  			}
1929  
1930  			kernel_memory_populate(compressor_map,
1931  			    (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
1932  			    size_to_populate,
1933  			    KMA_COMPRESSOR,
1934  			    VM_KERN_MEMORY_COMPRESSOR);
1935  
1936  			c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
1937  			assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE);
1938  		}
1939  		c_seg_alloc_nextslot(c_seg_dst);
1940  
1941  		c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
1942  
1943  		memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
1944  
1945  		c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1946  
1947  		c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_slots++;
1948  		c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_bytes += c_size;
1949  
1950  		cslot_copy(c_dst, c_src);
1951  		c_dst->c_offset = c_seg_dst->c_nextoffset;
1952  
1953  		if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) {
1954  			c_seg_dst->c_firstemptyslot++;
1955  		}
1956  		c_seg_dst->c_slots_used++;
1957  		c_seg_dst->c_nextslot++;
1958  		c_seg_dst->c_bytes_used += c_rounded_size;
1959  		c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1960  
1961  		PACK_C_SIZE(c_src, 0);
1962  
1963  		c_seg_src->c_bytes_used -= c_rounded_size;
1964  		c_seg_src->c_bytes_unused += c_rounded_size;
1965  		c_seg_src->c_firstemptyslot = 0;
1966  
1967  		assert(c_seg_src->c_slots_used);
1968  		c_seg_src->c_slots_used--;
1969  
1970  		if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1971  			/* dest segment is now full */
1972  			keep_compacting = FALSE;
1973  			break;
1974  		}
1975  	}
1976  #if DEVELOPMENT || DEBUG
1977  	C_SEG_WRITE_PROTECT(c_seg_dst);
1978  #endif
1979  	if (dst_slot < c_seg_dst->c_nextslot) {
1980  		PAGE_REPLACEMENT_ALLOWED(TRUE);
1981  		/*
1982  		 * we've now locked out c_decompress from
1983  		 * converting the slot passed into it into
1984  		 * a c_segment_t which allows us to use
1985  		 * the backptr to change which c_segment and
1986  		 * index the slot points to
1987  		 */
1988  		while (dst_slot < c_seg_dst->c_nextslot) {
1989  			c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
1990  
1991  			slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
1992  			/* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
1993  			slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
1994  			slot_ptr->s_cindx = dst_slot++;
1995  		}
1996  		PAGE_REPLACEMENT_ALLOWED(FALSE);
1997  	}
1998  	return keep_compacting;
1999  }
2000  
2001  
2002  uint64_t
2003  vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec)
2004  {
2005  	uint64_t end_msecs;
2006  	uint64_t start_msecs;
2007  
2008  	end_msecs = (end_sec * 1000) + end_nsec / 1000000;
2009  	start_msecs = (start_sec * 1000) + start_nsec / 1000000;
2010  
2011  	return end_msecs - start_msecs;
2012  }
2013  
2014  
2015  
2016  uint32_t compressor_eval_period_in_msecs = 250;
2017  uint32_t compressor_sample_min_in_msecs = 500;
2018  uint32_t compressor_sample_max_in_msecs = 10000;
2019  uint32_t compressor_thrashing_threshold_per_10msecs = 50;
2020  uint32_t compressor_thrashing_min_per_10msecs = 20;
2021  
2022  /* When true, reset sample data next chance we get. */
2023  static boolean_t        compressor_need_sample_reset = FALSE;
2024  
2025  
2026  void
2027  compute_swapout_target_age(void)
2028  {
2029  	clock_sec_t     cur_ts_sec;
2030  	clock_nsec_t    cur_ts_nsec;
2031  	uint32_t        min_operations_needed_in_this_sample;
2032  	uint64_t        elapsed_msecs_in_eval;
2033  	uint64_t        elapsed_msecs_in_sample;
2034  	boolean_t       need_eval_reset = FALSE;
2035  
2036  	clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
2037  
2038  	elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec);
2039  
2040  	if (compressor_need_sample_reset ||
2041  	    elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) {
2042  		compressor_need_sample_reset = TRUE;
2043  		need_eval_reset = TRUE;
2044  		goto done;
2045  	}
2046  	elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec);
2047  
2048  	if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) {
2049  		goto done;
2050  	}
2051  	need_eval_reset = TRUE;
2052  
2053  	KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0);
2054  
2055  	min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10;
2056  
2057  	if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample ||
2058  	    (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) {
2059  		KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count,
2060  		    sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0);
2061  
2062  		swapout_target_age = 0;
2063  
2064  		compressor_need_sample_reset = TRUE;
2065  		need_eval_reset = TRUE;
2066  		goto done;
2067  	}
2068  	last_eval_compression_count = sample_period_compression_count;
2069  	last_eval_decompression_count = sample_period_decompression_count;
2070  
2071  	if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) {
2072  		KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0);
2073  		goto done;
2074  	}
2075  	if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) {
2076  		uint64_t        running_total;
2077  		uint64_t        working_target;
2078  		uint64_t        aging_target;
2079  		uint32_t        oldest_age_of_csegs_sampled = 0;
2080  		uint64_t        working_set_approximation = 0;
2081  
2082  		swapout_target_age = 0;
2083  
2084  		working_target = (sample_period_decompression_count / 100) * 95;                /* 95 percent */
2085  		aging_target = (sample_period_decompression_count / 100) * 1;                   /* 1 percent */
2086  		running_total = 0;
2087  
2088  		for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) {
2089  			running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2090  
2091  			working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2092  
2093  			if (running_total >= working_target) {
2094  				break;
2095  			}
2096  		}
2097  		if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) {
2098  			working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample;
2099  
2100  			if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) {
2101  				running_total = overage_decompressions_during_sample_period;
2102  
2103  				for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) {
2104  					running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2105  
2106  					if (running_total >= aging_target) {
2107  						break;
2108  					}
2109  				}
2110  				swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled;
2111  
2112  				KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0);
2113  			} else {
2114  				KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0);
2115  			}
2116  		} else {
2117  			KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0);
2118  		}
2119  
2120  		compressor_need_sample_reset = TRUE;
2121  		need_eval_reset = TRUE;
2122  	} else {
2123  		KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0);
2124  	}
2125  done:
2126  	if (compressor_need_sample_reset == TRUE) {
2127  		bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period));
2128  		overage_decompressions_during_sample_period = 0;
2129  
2130  		start_of_sample_period_sec = cur_ts_sec;
2131  		start_of_sample_period_nsec = cur_ts_nsec;
2132  		sample_period_decompression_count = 0;
2133  		sample_period_compression_count = 0;
2134  		last_eval_decompression_count = 0;
2135  		last_eval_compression_count = 0;
2136  		compressor_need_sample_reset = FALSE;
2137  	}
2138  	if (need_eval_reset == TRUE) {
2139  		start_of_eval_period_sec = cur_ts_sec;
2140  		start_of_eval_period_nsec = cur_ts_nsec;
2141  	}
2142  }
2143  
2144  
2145  int             compaction_swapper_init_now = 0;
2146  int             compaction_swapper_running = 0;
2147  int             compaction_swapper_awakened = 0;
2148  int             compaction_swapper_abort = 0;
2149  
2150  
2151  #if CONFIG_JETSAM
2152  boolean_t       memorystatus_kill_on_VM_compressor_thrashing(boolean_t);
2153  boolean_t       memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
2154  boolean_t       memorystatus_kill_on_FC_thrashing(boolean_t);
2155  int             compressor_thrashing_induced_jetsam = 0;
2156  int             filecache_thrashing_induced_jetsam = 0;
2157  static boolean_t        vm_compressor_thrashing_detected = FALSE;
2158  #endif /* CONFIG_JETSAM */
2159  
2160  static boolean_t
2161  compressor_needs_to_swap(void)
2162  {
2163  	boolean_t       should_swap = FALSE;
2164  
2165  	if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
2166  		c_segment_t     c_seg;
2167  		clock_sec_t     now;
2168  		clock_sec_t     age;
2169  		clock_nsec_t    nsec;
2170  
2171  		clock_get_system_nanotime(&now, &nsec);
2172  		age = 0;
2173  
2174  		lck_mtx_lock_spin_always(c_list_lock);
2175  
2176  		if (!queue_empty(&c_age_list_head)) {
2177  			c_seg = (c_segment_t) queue_first(&c_age_list_head);
2178  
2179  			age = now - c_seg->c_creation_ts;
2180  		}
2181  		lck_mtx_unlock_always(c_list_lock);
2182  
2183  		if (age >= vm_ripe_target_age) {
2184  			should_swap = TRUE;
2185  			goto check_if_low_space;
2186  		}
2187  	}
2188  	if (VM_CONFIG_SWAP_IS_ACTIVE) {
2189  		if (COMPRESSOR_NEEDS_TO_SWAP()) {
2190  			should_swap = TRUE;
2191  			vmcs_stats.compressor_swap_threshold_exceeded++;
2192  			goto check_if_low_space;
2193  		}
2194  		if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) {
2195  			should_swap = TRUE;
2196  			vmcs_stats.external_q_throttled++;
2197  			goto check_if_low_space;
2198  		}
2199  		if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2))) {
2200  			should_swap = TRUE;
2201  			vmcs_stats.free_count_below_reserve++;
2202  			goto check_if_low_space;
2203  		}
2204  	}
2205  
2206  #if (XNU_TARGET_OS_OSX && __arm64__)
2207  	/*
2208  	 * Thrashing detection disabled.
2209  	 */
2210  #else /* (XNU_TARGET_OS_OSX && __arm64__) */
2211  
2212  	compute_swapout_target_age();
2213  
2214  	if (swapout_target_age) {
2215  		c_segment_t     c_seg;
2216  
2217  		lck_mtx_lock_spin_always(c_list_lock);
2218  
2219  		if (!queue_empty(&c_age_list_head)) {
2220  			c_seg = (c_segment_t) queue_first(&c_age_list_head);
2221  
2222  			if (c_seg->c_creation_ts > swapout_target_age) {
2223  				swapout_target_age = 0;
2224  			}
2225  		}
2226  		lck_mtx_unlock_always(c_list_lock);
2227  	}
2228  #if CONFIG_PHANTOM_CACHE
2229  	if (vm_phantom_cache_check_pressure()) {
2230  		should_swap = TRUE;
2231  	}
2232  #endif
2233  	if (swapout_target_age) {
2234  		should_swap = TRUE;
2235  		vmcs_stats.thrashing_detected++;
2236  	}
2237  #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
2238  
2239  check_if_low_space:
2240  
2241  #if CONFIG_JETSAM
2242  	if (should_swap || vm_compressor_low_on_space() == TRUE) {
2243  		if (vm_compressor_thrashing_detected == FALSE) {
2244  			vm_compressor_thrashing_detected = TRUE;
2245  
2246  			if (swapout_target_age) {
2247  				/* The compressor is thrashing. */
2248  				memorystatus_kill_on_VM_compressor_thrashing(TRUE /* async */);
2249  				compressor_thrashing_induced_jetsam++;
2250  			} else if (vm_compressor_low_on_space() == TRUE) {
2251  				/* The compressor is running low on space. */
2252  				memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
2253  				compressor_thrashing_induced_jetsam++;
2254  			} else {
2255  				memorystatus_kill_on_FC_thrashing(TRUE /* async */);
2256  				filecache_thrashing_induced_jetsam++;
2257  			}
2258  		}
2259  		/*
2260  		 * let the jetsam take precedence over
2261  		 * any major compactions we might have
2262  		 * been able to do... otherwise we run
2263  		 * the risk of doing major compactions
2264  		 * on segments we're about to free up
2265  		 * due to the jetsam activity.
2266  		 */
2267  		should_swap = FALSE;
2268  	}
2269  
2270  #else /* CONFIG_JETSAM */
2271  	if (should_swap && vm_swap_low_on_space()) {
2272  		vm_compressor_take_paging_space_action();
2273  	}
2274  #endif /* CONFIG_JETSAM */
2275  
2276  	if (should_swap == FALSE) {
2277  		/*
2278  		 * vm_compressor_needs_to_major_compact returns true only if we're
2279  		 * about to run out of available compressor segments... in this
2280  		 * case, we absolutely need to run a major compaction even if
2281  		 * we've just kicked off a jetsam or we don't otherwise need to
2282  		 * swap... terminating objects releases
2283  		 * pages back to the uncompressed cache, but does not guarantee
2284  		 * that we will free up even a single compression segment
2285  		 */
2286  		should_swap = vm_compressor_needs_to_major_compact();
2287  		if (should_swap) {
2288  			vmcs_stats.fragmentation_detected++;
2289  #if (XNU_TARGET_OS_OSX && __arm64__)
2290  			/*
2291  			 * SSD based systems don't need the fragmentation
2292  			 * swapout trigger because that was designed for
2293  			 * systems where the swapout latencies could be long
2294  			 * enough that the pressure, if allowed to build up,
2295  			 * would be tightly tied to the swapouts later on.
2296  			 */
2297  			should_swap = FALSE;
2298  #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
2299  		}
2300  	}
2301  
2302  	/*
2303  	 * returning TRUE when swap_supported == FALSE
2304  	 * will cause the major compaction engine to
2305  	 * run, but will not trigger any swapping...
2306  	 * segments that have been major compacted
2307  	 * will be moved to the majorcompact queue
2308  	 */
2309  	return should_swap;
2310  }
2311  
2312  #if CONFIG_JETSAM
2313  /*
2314   * This function is called from the jetsam thread after killing something to
2315   * mitigate thrashing.
2316   *
2317   * We need to restart our thrashing detection heuristics since memory pressure
2318   * has potentially changed significantly, and we don't want to detect on old
2319   * data from before the jetsam.
2320   */
2321  void
2322  vm_thrashing_jetsam_done(void)
2323  {
2324  	vm_compressor_thrashing_detected = FALSE;
2325  
2326  	/* Were we compressor-thrashing or filecache-thrashing? */
2327  	if (swapout_target_age) {
2328  		swapout_target_age = 0;
2329  		compressor_need_sample_reset = TRUE;
2330  	}
2331  #if CONFIG_PHANTOM_CACHE
2332  	else {
2333  		vm_phantom_cache_restart_sample();
2334  	}
2335  #endif
2336  }
2337  #endif /* CONFIG_JETSAM */
2338  
2339  uint32_t vm_wake_compactor_swapper_calls = 0;
2340  uint32_t vm_run_compactor_already_running = 0;
2341  uint32_t vm_run_compactor_empty_minor_q = 0;
2342  uint32_t vm_run_compactor_did_compact = 0;
2343  uint32_t vm_run_compactor_waited = 0;
2344  
2345  void
2346  vm_run_compactor(void)
2347  {
2348  	if (c_segment_count == 0) {
2349  		return;
2350  	}
2351  
2352  	lck_mtx_lock_spin_always(c_list_lock);
2353  
2354  	if (c_minor_count == 0) {
2355  		vm_run_compactor_empty_minor_q++;
2356  
2357  		lck_mtx_unlock_always(c_list_lock);
2358  		return;
2359  	}
2360  	if (compaction_swapper_running) {
2361  		if (vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2362  			vm_run_compactor_already_running++;
2363  
2364  			lck_mtx_unlock_always(c_list_lock);
2365  			return;
2366  		}
2367  		vm_run_compactor_waited++;
2368  
2369  		assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2370  
2371  		lck_mtx_unlock_always(c_list_lock);
2372  
2373  		thread_block(THREAD_CONTINUE_NULL);
2374  
2375  		return;
2376  	}
2377  	vm_run_compactor_did_compact++;
2378  
2379  	fastwake_warmup = FALSE;
2380  	compaction_swapper_running = 1;
2381  
2382  	vm_compressor_do_delayed_compactions(FALSE);
2383  
2384  	compaction_swapper_running = 0;
2385  
2386  	lck_mtx_unlock_always(c_list_lock);
2387  
2388  	thread_wakeup((event_t)&compaction_swapper_running);
2389  }
2390  
2391  
2392  void
2393  vm_wake_compactor_swapper(void)
2394  {
2395  	if (compaction_swapper_running || compaction_swapper_awakened || c_segment_count == 0) {
2396  		return;
2397  	}
2398  
2399  	if (c_minor_count || vm_compressor_needs_to_major_compact()) {
2400  		lck_mtx_lock_spin_always(c_list_lock);
2401  
2402  		fastwake_warmup = FALSE;
2403  
2404  		if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2405  			vm_wake_compactor_swapper_calls++;
2406  
2407  			compaction_swapper_awakened = 1;
2408  			thread_wakeup((event_t)&c_compressor_swap_trigger);
2409  		}
2410  		lck_mtx_unlock_always(c_list_lock);
2411  	}
2412  }
2413  
2414  
2415  void
2416  vm_consider_swapping()
2417  {
2418  	c_segment_t     c_seg, c_seg_next;
2419  	clock_sec_t     now;
2420  	clock_nsec_t    nsec;
2421  
2422  	assert(VM_CONFIG_SWAP_IS_PRESENT);
2423  
2424  	lck_mtx_lock_spin_always(c_list_lock);
2425  
2426  	compaction_swapper_abort = 1;
2427  
2428  	while (compaction_swapper_running) {
2429  		assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2430  
2431  		lck_mtx_unlock_always(c_list_lock);
2432  
2433  		thread_block(THREAD_CONTINUE_NULL);
2434  
2435  		lck_mtx_lock_spin_always(c_list_lock);
2436  	}
2437  	compaction_swapper_abort = 0;
2438  	compaction_swapper_running = 1;
2439  
2440  	vm_swapout_ripe_segments = TRUE;
2441  
2442  	if (!queue_empty(&c_major_list_head)) {
2443  		clock_get_system_nanotime(&now, &nsec);
2444  
2445  		c_seg = (c_segment_t)queue_first(&c_major_list_head);
2446  
2447  		while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
2448  			if (c_overage_swapped_count >= c_overage_swapped_limit) {
2449  				break;
2450  			}
2451  
2452  			c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2453  
2454  			if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2455  				lck_mtx_lock_spin_always(&c_seg->c_lock);
2456  
2457  				c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2458  
2459  				lck_mtx_unlock_always(&c_seg->c_lock);
2460  			}
2461  			c_seg = c_seg_next;
2462  		}
2463  	}
2464  	vm_compressor_compact_and_swap(FALSE);
2465  
2466  	compaction_swapper_running = 0;
2467  
2468  	vm_swapout_ripe_segments = FALSE;
2469  
2470  	lck_mtx_unlock_always(c_list_lock);
2471  
2472  	thread_wakeup((event_t)&compaction_swapper_running);
2473  }
2474  
2475  
2476  void
2477  vm_consider_waking_compactor_swapper(void)
2478  {
2479  	boolean_t       need_wakeup = FALSE;
2480  
2481  	if (c_segment_count == 0) {
2482  		return;
2483  	}
2484  
2485  	if (compaction_swapper_running || compaction_swapper_awakened) {
2486  		return;
2487  	}
2488  
2489  	if (!compaction_swapper_inited && !compaction_swapper_init_now) {
2490  		compaction_swapper_init_now = 1;
2491  		need_wakeup = TRUE;
2492  	}
2493  
2494  	if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
2495  		need_wakeup = TRUE;
2496  	} else if (compressor_needs_to_swap()) {
2497  		need_wakeup = TRUE;
2498  	} else if (c_minor_count) {
2499  		uint64_t        total_bytes;
2500  
2501  		total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64;
2502  
2503  		if ((total_bytes - compressor_bytes_used) > total_bytes / 10) {
2504  			need_wakeup = TRUE;
2505  		}
2506  	}
2507  	if (need_wakeup == TRUE) {
2508  		lck_mtx_lock_spin_always(c_list_lock);
2509  
2510  		fastwake_warmup = FALSE;
2511  
2512  		if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2513  			memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE);
2514  
2515  			compaction_swapper_awakened = 1;
2516  			thread_wakeup((event_t)&c_compressor_swap_trigger);
2517  		}
2518  		lck_mtx_unlock_always(c_list_lock);
2519  	}
2520  }
2521  
2522  
2523  #define C_SWAPOUT_LIMIT                 4
2524  #define DELAYED_COMPACTIONS_PER_PASS    30
2525  
2526  void
2527  vm_compressor_do_delayed_compactions(boolean_t flush_all)
2528  {
2529  	c_segment_t     c_seg;
2530  	int             number_compacted = 0;
2531  	boolean_t       needs_to_swap = FALSE;
2532  
2533  
2534  	VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_START, c_minor_count, flush_all, 0, 0);
2535  
2536  #if XNU_TARGET_OS_OSX
2537  	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
2538  #endif /* XNU_TARGET_OS_OSX */
2539  
2540  	while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
2541  		c_seg = (c_segment_t)queue_first(&c_minor_list_head);
2542  
2543  		lck_mtx_lock_spin_always(&c_seg->c_lock);
2544  
2545  		if (c_seg->c_busy) {
2546  			lck_mtx_unlock_always(c_list_lock);
2547  			c_seg_wait_on_busy(c_seg);
2548  			lck_mtx_lock_spin_always(c_list_lock);
2549  
2550  			continue;
2551  		}
2552  		C_SEG_BUSY(c_seg);
2553  
2554  		c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE);
2555  
2556  		if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) {
2557  			if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT) {
2558  				needs_to_swap = TRUE;
2559  			}
2560  
2561  			number_compacted = 0;
2562  		}
2563  		lck_mtx_lock_spin_always(c_list_lock);
2564  	}
2565  
2566  	VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_END, c_minor_count, number_compacted, needs_to_swap, 0);
2567  }
2568  
2569  
2570  #define C_SEGMENT_SWAPPEDIN_AGE_LIMIT   10
2571  
2572  static void
2573  vm_compressor_age_swapped_in_segments(boolean_t flush_all)
2574  {
2575  	c_segment_t     c_seg;
2576  	clock_sec_t     now;
2577  	clock_nsec_t    nsec;
2578  
2579  	clock_get_system_nanotime(&now, &nsec);
2580  
2581  	while (!queue_empty(&c_swappedin_list_head)) {
2582  		c_seg = (c_segment_t)queue_first(&c_swappedin_list_head);
2583  
2584  		if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) {
2585  			break;
2586  		}
2587  
2588  		lck_mtx_lock_spin_always(&c_seg->c_lock);
2589  
2590  		c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2591  		c_seg->c_agedin_ts = (uint32_t) now;
2592  
2593  		lck_mtx_unlock_always(&c_seg->c_lock);
2594  	}
2595  }
2596  
2597  
2598  extern  int     vm_num_swap_files;
2599  extern  int     vm_num_pinned_swap_files;
2600  extern  int     vm_swappin_enabled;
2601  
2602  extern  unsigned int    vm_swapfile_total_segs_used;
2603  extern  unsigned int    vm_swapfile_total_segs_alloced;
2604  
2605  
2606  void
2607  vm_compressor_flush(void)
2608  {
2609  	uint64_t        vm_swap_put_failures_at_start;
2610  	wait_result_t   wait_result = 0;
2611  	AbsoluteTime    startTime, endTime;
2612  	clock_sec_t     now_sec;
2613  	clock_nsec_t    now_nsec;
2614  	uint64_t        nsec;
2615  
2616  	HIBLOG("vm_compressor_flush - starting\n");
2617  
2618  	clock_get_uptime(&startTime);
2619  
2620  	lck_mtx_lock_spin_always(c_list_lock);
2621  
2622  	fastwake_warmup = FALSE;
2623  	compaction_swapper_abort = 1;
2624  
2625  	while (compaction_swapper_running) {
2626  		assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2627  
2628  		lck_mtx_unlock_always(c_list_lock);
2629  
2630  		thread_block(THREAD_CONTINUE_NULL);
2631  
2632  		lck_mtx_lock_spin_always(c_list_lock);
2633  	}
2634  	compaction_swapper_abort = 0;
2635  	compaction_swapper_running = 1;
2636  
2637  	hibernate_flushing = TRUE;
2638  	hibernate_no_swapspace = FALSE;
2639  	c_generation_id_flush_barrier = c_generation_id + 1000;
2640  
2641  	clock_get_system_nanotime(&now_sec, &now_nsec);
2642  	hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE;
2643  
2644  	vm_swap_put_failures_at_start = vm_swap_put_failures;
2645  
2646  	vm_compressor_compact_and_swap(TRUE);
2647  
2648  	while (!queue_empty(&c_swapout_list_head)) {
2649  		assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000 * NSEC_PER_USEC);
2650  
2651  		lck_mtx_unlock_always(c_list_lock);
2652  
2653  		wait_result = thread_block(THREAD_CONTINUE_NULL);
2654  
2655  		lck_mtx_lock_spin_always(c_list_lock);
2656  
2657  		if (wait_result == THREAD_TIMED_OUT) {
2658  			break;
2659  		}
2660  	}
2661  	hibernate_flushing = FALSE;
2662  	compaction_swapper_running = 0;
2663  
2664  	if (vm_swap_put_failures > vm_swap_put_failures_at_start) {
2665  		HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
2666  		    vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT);
2667  	}
2668  
2669  	lck_mtx_unlock_always(c_list_lock);
2670  
2671  	thread_wakeup((event_t)&compaction_swapper_running);
2672  
2673  	clock_get_uptime(&endTime);
2674  	SUB_ABSOLUTETIME(&endTime, &startTime);
2675  	absolutetime_to_nanoseconds(endTime, &nsec);
2676  
2677  	HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n",
2678  	    nsec / 1000000ULL, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled);
2679  }
2680  
2681  
2682  int             compaction_swap_trigger_thread_awakened = 0;
2683  
2684  static void
2685  vm_compressor_swap_trigger_thread(void)
2686  {
2687  	current_thread()->options |= TH_OPT_VMPRIV;
2688  
2689  	/*
2690  	 * compaction_swapper_init_now is set when the first call to
2691  	 * vm_consider_waking_compactor_swapper is made from
2692  	 * vm_pageout_scan... since this function is called upon
2693  	 * thread creation, we want to make sure to delay adjusting
2694  	 * the tuneables until we are awakened via vm_pageout_scan
2695  	 * so that we are at a point where the vm_swapfile_open will
2696  	 * be operating on the correct directory (in case the default
2697  	 * of using the VM volume is overridden by the dynamic_pager)
2698  	 */
2699  	if (compaction_swapper_init_now) {
2700  		vm_compaction_swapper_do_init();
2701  
2702  		if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
2703  			thread_vm_bind_group_add();
2704  		}
2705  #if CONFIG_THREAD_GROUPS
2706  		thread_group_vm_add();
2707  #endif
2708  		thread_set_thread_name(current_thread(), "VM_cswap_trigger");
2709  		compaction_swapper_init_now = 0;
2710  	}
2711  	lck_mtx_lock_spin_always(c_list_lock);
2712  
2713  	compaction_swap_trigger_thread_awakened++;
2714  	compaction_swapper_awakened = 0;
2715  
2716  	if (compaction_swapper_running == 0) {
2717  		compaction_swapper_running = 1;
2718  
2719  		vm_compressor_compact_and_swap(FALSE);
2720  
2721  		compaction_swapper_running = 0;
2722  	}
2723  	assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT);
2724  
2725  	if (compaction_swapper_running == 0) {
2726  		thread_wakeup((event_t)&compaction_swapper_running);
2727  	}
2728  
2729  	lck_mtx_unlock_always(c_list_lock);
2730  
2731  	thread_block((thread_continue_t)vm_compressor_swap_trigger_thread);
2732  
2733  	/* NOTREACHED */
2734  }
2735  
2736  
2737  void
2738  vm_compressor_record_warmup_start(void)
2739  {
2740  	c_segment_t     c_seg;
2741  
2742  	lck_mtx_lock_spin_always(c_list_lock);
2743  
2744  	if (first_c_segment_to_warm_generation_id == 0) {
2745  		if (!queue_empty(&c_age_list_head)) {
2746  			c_seg = (c_segment_t)queue_last(&c_age_list_head);
2747  
2748  			first_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2749  		} else {
2750  			first_c_segment_to_warm_generation_id = 0;
2751  		}
2752  
2753  		fastwake_recording_in_progress = TRUE;
2754  	}
2755  	lck_mtx_unlock_always(c_list_lock);
2756  }
2757  
2758  
2759  void
2760  vm_compressor_record_warmup_end(void)
2761  {
2762  	c_segment_t     c_seg;
2763  
2764  	lck_mtx_lock_spin_always(c_list_lock);
2765  
2766  	if (fastwake_recording_in_progress == TRUE) {
2767  		if (!queue_empty(&c_age_list_head)) {
2768  			c_seg = (c_segment_t)queue_last(&c_age_list_head);
2769  
2770  			last_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2771  		} else {
2772  			last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id;
2773  		}
2774  
2775  		fastwake_recording_in_progress = FALSE;
2776  
2777  		HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2778  	}
2779  	lck_mtx_unlock_always(c_list_lock);
2780  }
2781  
2782  
2783  #define DELAY_TRIM_ON_WAKE_SECS         25
2784  
2785  void
2786  vm_compressor_delay_trim(void)
2787  {
2788  	clock_sec_t     sec;
2789  	clock_nsec_t    nsec;
2790  
2791  	clock_get_system_nanotime(&sec, &nsec);
2792  	dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS;
2793  }
2794  
2795  
2796  void
2797  vm_compressor_do_warmup(void)
2798  {
2799  	lck_mtx_lock_spin_always(c_list_lock);
2800  
2801  	if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) {
2802  		first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2803  
2804  		lck_mtx_unlock_always(c_list_lock);
2805  		return;
2806  	}
2807  
2808  	if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2809  		fastwake_warmup = TRUE;
2810  
2811  		compaction_swapper_awakened = 1;
2812  		thread_wakeup((event_t)&c_compressor_swap_trigger);
2813  	}
2814  	lck_mtx_unlock_always(c_list_lock);
2815  }
2816  
2817  void
2818  do_fastwake_warmup_all(void)
2819  {
2820  	lck_mtx_lock_spin_always(c_list_lock);
2821  
2822  	if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) {
2823  		lck_mtx_unlock_always(c_list_lock);
2824  		return;
2825  	}
2826  
2827  	fastwake_warmup = TRUE;
2828  
2829  	do_fastwake_warmup(&c_swappedout_list_head, TRUE);
2830  
2831  	do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE);
2832  
2833  	fastwake_warmup = FALSE;
2834  
2835  	lck_mtx_unlock_always(c_list_lock);
2836  }
2837  
2838  void
2839  do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg)
2840  {
2841  	c_segment_t     c_seg = NULL;
2842  	AbsoluteTime    startTime, endTime;
2843  	uint64_t        nsec;
2844  
2845  
2846  	HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2847  
2848  	clock_get_uptime(&startTime);
2849  
2850  	lck_mtx_unlock_always(c_list_lock);
2851  
2852  	proc_set_thread_policy(current_thread(),
2853  	    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
2854  
2855  	PAGE_REPLACEMENT_DISALLOWED(TRUE);
2856  
2857  	lck_mtx_lock_spin_always(c_list_lock);
2858  
2859  	while (!queue_empty(c_queue) && fastwake_warmup == TRUE) {
2860  		c_seg = (c_segment_t) queue_first(c_queue);
2861  
2862  		if (consider_all_cseg == FALSE) {
2863  			if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id ||
2864  			    c_seg->c_generation_id > last_c_segment_to_warm_generation_id) {
2865  				break;
2866  			}
2867  
2868  			if (vm_page_free_count < (AVAILABLE_MEMORY / 4)) {
2869  				break;
2870  			}
2871  		}
2872  
2873  		lck_mtx_lock_spin_always(&c_seg->c_lock);
2874  		lck_mtx_unlock_always(c_list_lock);
2875  
2876  		if (c_seg->c_busy) {
2877  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
2878  			c_seg_wait_on_busy(c_seg);
2879  			PAGE_REPLACEMENT_DISALLOWED(TRUE);
2880  		} else {
2881  			if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
2882  				lck_mtx_unlock_always(&c_seg->c_lock);
2883  			}
2884  			c_segment_warmup_count++;
2885  
2886  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
2887  			vm_pageout_io_throttle();
2888  			PAGE_REPLACEMENT_DISALLOWED(TRUE);
2889  		}
2890  		lck_mtx_lock_spin_always(c_list_lock);
2891  	}
2892  	lck_mtx_unlock_always(c_list_lock);
2893  
2894  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
2895  
2896  	proc_set_thread_policy(current_thread(),
2897  	    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0);
2898  
2899  	clock_get_uptime(&endTime);
2900  	SUB_ABSOLUTETIME(&endTime, &startTime);
2901  	absolutetime_to_nanoseconds(endTime, &nsec);
2902  
2903  	HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL);
2904  
2905  	lck_mtx_lock_spin_always(c_list_lock);
2906  
2907  	if (consider_all_cseg == FALSE) {
2908  		first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2909  	}
2910  }
2911  
2912  int min_csegs_per_major_compaction = DELAYED_COMPACTIONS_PER_PASS;
2913  extern bool     vm_swapout_thread_running;
2914  extern boolean_t        compressor_store_stop_compaction;
2915  
2916  void
2917  vm_compressor_compact_and_swap(boolean_t flush_all)
2918  {
2919  	c_segment_t     c_seg, c_seg_next;
2920  	boolean_t       keep_compacting, switch_state;
2921  	clock_sec_t     now;
2922  	clock_nsec_t    nsec;
2923  	mach_timespec_t start_ts, end_ts;
2924  	unsigned int    number_considered, wanted_cseg_found, yield_after_considered_per_pass, number_yields;
2925  	uint64_t        bytes_to_free, bytes_freed, delta_usec;
2926  
2927  	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_START, c_age_count, c_minor_count, c_major_count, vm_page_free_count);
2928  
2929  	if (fastwake_warmup == TRUE) {
2930  		uint64_t        starting_warmup_count;
2931  
2932  		starting_warmup_count = c_segment_warmup_count;
2933  
2934  		KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count,
2935  		    first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0);
2936  		do_fastwake_warmup(&c_swappedout_list_head, FALSE);
2937  		KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0);
2938  
2939  		fastwake_warmup = FALSE;
2940  	}
2941  
2942  	/*
2943  	 * it's possible for the c_age_list_head to be empty if we
2944  	 * hit our limits for growing the compressor pool and we subsequently
2945  	 * hibernated... on the next hibernation we could see the queue as
2946  	 * empty and not proceeed even though we have a bunch of segments on
2947  	 * the swapped in queue that need to be dealt with.
2948  	 */
2949  	vm_compressor_do_delayed_compactions(flush_all);
2950  
2951  	vm_compressor_age_swapped_in_segments(flush_all);
2952  
2953  	/*
2954  	 * we only need to grab the timestamp once per
2955  	 * invocation of this function since the
2956  	 * timescale we're interested in is measured
2957  	 * in days
2958  	 */
2959  	clock_get_system_nanotime(&now, &nsec);
2960  
2961  	start_ts.tv_sec = (int) now;
2962  	start_ts.tv_nsec = nsec;
2963  	delta_usec = 0;
2964  	number_considered = 0;
2965  	wanted_cseg_found = 0;
2966  	number_yields = 0;
2967  	bytes_to_free = 0;
2968  	bytes_freed = 0;
2969  	yield_after_considered_per_pass = MAX(min_csegs_per_major_compaction, DELAYED_COMPACTIONS_PER_PASS);
2970  
2971  	while (!queue_empty(&c_age_list_head) && !compaction_swapper_abort && !compressor_store_stop_compaction) {
2972  		if (hibernate_flushing == TRUE) {
2973  			clock_sec_t     sec;
2974  
2975  			if (hibernate_should_abort()) {
2976  				HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
2977  				break;
2978  			}
2979  			if (hibernate_no_swapspace == TRUE) {
2980  				HIBLOG("vm_compressor_flush - out of swap space\n");
2981  				break;
2982  			}
2983  			if (vm_swap_files_pinned() == FALSE) {
2984  				HIBLOG("vm_compressor_flush - unpinned swap files\n");
2985  				break;
2986  			}
2987  			if (hibernate_in_progress_with_pinned_swap == TRUE &&
2988  			    (vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) {
2989  				HIBLOG("vm_compressor_flush - out of pinned swap space\n");
2990  				break;
2991  			}
2992  			clock_get_system_nanotime(&sec, &nsec);
2993  
2994  			if (sec > hibernate_flushing_deadline) {
2995  				HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
2996  				break;
2997  			}
2998  		}
2999  		if (!vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) {
3000  			assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000 * NSEC_PER_USEC);
3001  
3002  			if (!vm_swapout_thread_running) {
3003  				thread_wakeup((event_t)&c_swapout_list_head);
3004  			}
3005  
3006  			lck_mtx_unlock_always(c_list_lock);
3007  
3008  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 1, c_swapout_count, 0, 0);
3009  
3010  			thread_block(THREAD_CONTINUE_NULL);
3011  
3012  			lck_mtx_lock_spin_always(c_list_lock);
3013  		}
3014  		/*
3015  		 * Minor compactions
3016  		 */
3017  		vm_compressor_do_delayed_compactions(flush_all);
3018  
3019  		vm_compressor_age_swapped_in_segments(flush_all);
3020  
3021  		if (!vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) {
3022  			/*
3023  			 * we timed out on the above thread_block
3024  			 * let's loop around and try again
3025  			 * the timeout allows us to continue
3026  			 * to do minor compactions to make
3027  			 * more memory available
3028  			 */
3029  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 2, c_swapout_count, 0, 0);
3030  
3031  			continue;
3032  		}
3033  
3034  		/*
3035  		 * Swap out segments?
3036  		 */
3037  		if (flush_all == FALSE) {
3038  			boolean_t       needs_to_swap;
3039  
3040  			lck_mtx_unlock_always(c_list_lock);
3041  
3042  			needs_to_swap = compressor_needs_to_swap();
3043  
3044  			lck_mtx_lock_spin_always(c_list_lock);
3045  
3046  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 3, needs_to_swap, 0, 0);
3047  
3048  			if (needs_to_swap == FALSE) {
3049  				break;
3050  			}
3051  		}
3052  		if (queue_empty(&c_age_list_head)) {
3053  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 4, c_age_count, 0, 0);
3054  			break;
3055  		}
3056  		c_seg = (c_segment_t) queue_first(&c_age_list_head);
3057  
3058  		assert(c_seg->c_state == C_ON_AGE_Q);
3059  
3060  		if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) {
3061  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 5, 0, 0, 0);
3062  			break;
3063  		}
3064  
3065  		lck_mtx_lock_spin_always(&c_seg->c_lock);
3066  
3067  		if (c_seg->c_busy) {
3068  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 6, (void*) VM_KERNEL_ADDRPERM(c_seg), 0, 0);
3069  
3070  			lck_mtx_unlock_always(c_list_lock);
3071  			c_seg_wait_on_busy(c_seg);
3072  			lck_mtx_lock_spin_always(c_list_lock);
3073  
3074  			continue;
3075  		}
3076  		C_SEG_BUSY(c_seg);
3077  
3078  		if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) {
3079  			/*
3080  			 * found an empty c_segment and freed it
3081  			 * so go grab the next guy in the queue
3082  			 */
3083  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 7, 0, 0, 0);
3084  			c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3085  			continue;
3086  		}
3087  		/*
3088  		 * Major compaction
3089  		 */
3090  		keep_compacting = TRUE;
3091  		switch_state = TRUE;
3092  
3093  		while (keep_compacting == TRUE) {
3094  			assert(c_seg->c_busy);
3095  
3096  			/* look for another segment to consolidate */
3097  
3098  			c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
3099  
3100  			if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next)) {
3101  				break;
3102  			}
3103  
3104  			assert(c_seg_next->c_state == C_ON_AGE_Q);
3105  
3106  			number_considered++;
3107  
3108  			if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE) {
3109  				break;
3110  			}
3111  
3112  			lck_mtx_lock_spin_always(&c_seg_next->c_lock);
3113  
3114  			if (c_seg_next->c_busy) {
3115  				/*
3116  				 * We are going to block for our neighbor.
3117  				 * If our c_seg is wanted, we should unbusy
3118  				 * it because we don't know how long we might
3119  				 * have to block here.
3120  				 */
3121  				if (c_seg->c_wanted) {
3122  					lck_mtx_unlock_always(&c_seg_next->c_lock);
3123  					switch_state = FALSE;
3124  					c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++;
3125  					wanted_cseg_found++;
3126  					break;
3127  				}
3128  
3129  				lck_mtx_unlock_always(c_list_lock);
3130  
3131  				VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 8, (void*) VM_KERNEL_ADDRPERM(c_seg_next), 0, 0);
3132  
3133  				c_seg_wait_on_busy(c_seg_next);
3134  				lck_mtx_lock_spin_always(c_list_lock);
3135  
3136  				continue;
3137  			}
3138  			/* grab that segment */
3139  			C_SEG_BUSY(c_seg_next);
3140  
3141  			bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3142  			if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) {
3143  				/*
3144  				 * found an empty c_segment and freed it
3145  				 * so we can't continue to use c_seg_next
3146  				 */
3147  				bytes_freed += bytes_to_free;
3148  				c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3149  				continue;
3150  			}
3151  
3152  			/* unlock the list ... */
3153  			lck_mtx_unlock_always(c_list_lock);
3154  
3155  			/* do the major compaction */
3156  
3157  			keep_compacting = c_seg_major_compact(c_seg, c_seg_next);
3158  
3159  			VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 9, keep_compacting, 0, 0);
3160  
3161  			PAGE_REPLACEMENT_DISALLOWED(TRUE);
3162  
3163  			lck_mtx_lock_spin_always(&c_seg_next->c_lock);
3164  			/*
3165  			 * run a minor compaction on the donor segment
3166  			 * since we pulled at least some of it's
3167  			 * data into our target...  if we've emptied
3168  			 * it, now is a good time to free it which
3169  			 * c_seg_minor_compaction_and_unlock also takes care of
3170  			 *
3171  			 * by passing TRUE, we ask for c_busy to be cleared
3172  			 * and c_wanted to be taken care of
3173  			 */
3174  			bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3175  			if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE)) {
3176  				bytes_freed += bytes_to_free;
3177  				c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3178  			} else {
3179  				bytes_to_free -= C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3180  				bytes_freed += bytes_to_free;
3181  			}
3182  
3183  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
3184  
3185  			/* relock the list */
3186  			lck_mtx_lock_spin_always(c_list_lock);
3187  
3188  			if (c_seg->c_wanted) {
3189  				/*
3190  				 * Our c_seg is in demand. Let's
3191  				 * unbusy it and wakeup the waiters
3192  				 * instead of continuing the compaction
3193  				 * because we could be in this loop
3194  				 * for a while.
3195  				 */
3196  				switch_state = FALSE;
3197  				wanted_cseg_found++;
3198  				c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++;
3199  				break;
3200  			}
3201  		} /* major compaction */
3202  
3203  		VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 10, number_considered, wanted_cseg_found, 0);
3204  
3205  		lck_mtx_lock_spin_always(&c_seg->c_lock);
3206  
3207  		assert(c_seg->c_busy);
3208  		assert(!c_seg->c_on_minorcompact_q);
3209  
3210  		if (switch_state) {
3211  			if (VM_CONFIG_SWAP_IS_ACTIVE) {
3212  				/*
3213  				 * This mode of putting a generic c_seg on the swapout list is
3214  				 * only supported when we have general swapping enabled
3215  				 */
3216  				clock_sec_t lnow;
3217  				clock_nsec_t lnsec;
3218  				clock_get_system_nanotime(&lnow, &lnsec);
3219  				if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 30) {
3220  					vmcs_stats.unripe_under_30s++;
3221  				} else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 60) {
3222  					vmcs_stats.unripe_under_60s++;
3223  				} else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 300) {
3224  					vmcs_stats.unripe_under_300s++;
3225  				}
3226  
3227  				c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
3228  			} else {
3229  				if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
3230  					assert(VM_CONFIG_SWAP_IS_PRESENT);
3231  					/*
3232  					 * we are running compressor sweeps with swap-behind
3233  					 * make sure the c_seg has aged enough before swapping it
3234  					 * out...
3235  					 */
3236  					if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
3237  						c_seg->c_overage_swap = TRUE;
3238  						c_overage_swapped_count++;
3239  						c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
3240  					}
3241  				}
3242  			}
3243  			if (c_seg->c_state == C_ON_AGE_Q) {
3244  				/*
3245  				 * this c_seg didn't get moved to the swapout queue
3246  				 * so we need to move it out of the way...
3247  				 * we just did a major compaction on it so put it
3248  				 * on that queue
3249  				 */
3250  				c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
3251  			} else {
3252  				c_seg_major_compact_stats[c_seg_major_compact_stats_now].wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
3253  				c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_swapouts++;
3254  			}
3255  		}
3256  
3257  		C_SEG_WAKEUP_DONE(c_seg);
3258  
3259  		lck_mtx_unlock_always(&c_seg->c_lock);
3260  
3261  		if (c_swapout_count) {
3262  			/*
3263  			 * We don't pause/yield here because we will either
3264  			 * yield below or at the top of the loop with the
3265  			 * assert_wait_timeout.
3266  			 */
3267  			if (!vm_swapout_thread_running) {
3268  				thread_wakeup((event_t)&c_swapout_list_head);
3269  			}
3270  		}
3271  
3272  		if (number_considered >= yield_after_considered_per_pass) {
3273  			if (wanted_cseg_found) {
3274  				/*
3275  				 * We stopped major compactions on a c_seg
3276  				 * that is wanted. We don't know the priority
3277  				 * of the waiter unfortunately but we are at
3278  				 * a very high priority and so, just in case
3279  				 * the waiter is a critical system daemon or
3280  				 * UI thread, let's give up the CPU in case
3281  				 * the system is running a few CPU intensive
3282  				 * tasks.
3283  				 */
3284  				lck_mtx_unlock_always(c_list_lock);
3285  
3286  				mutex_pause(2); /* 100us yield */
3287  
3288  				number_yields++;
3289  
3290  				VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 11, number_considered, number_yields, 0);
3291  
3292  				lck_mtx_lock_spin_always(c_list_lock);
3293  			}
3294  
3295  			number_considered = 0;
3296  			wanted_cseg_found = 0;
3297  		}
3298  	}
3299  	clock_get_system_nanotime(&now, &nsec);
3300  	end_ts.tv_sec = (int) now;
3301  	end_ts.tv_nsec = nsec;
3302  
3303  	SUB_MACH_TIMESPEC(&end_ts, &start_ts);
3304  
3305  	delta_usec = (end_ts.tv_sec * USEC_PER_SEC) + (end_ts.tv_nsec / NSEC_PER_USEC) - (number_yields * 100);
3306  
3307  	delta_usec = MAX(1, delta_usec); /* we could have 0 usec run if conditions weren't right */
3308  
3309  	c_seg_major_compact_stats[c_seg_major_compact_stats_now].bytes_freed_rate_us = (bytes_freed / delta_usec);
3310  
3311  	if ((c_seg_major_compact_stats_now + 1) == C_SEG_MAJOR_COMPACT_STATS_MAX) {
3312  		c_seg_major_compact_stats_now = 0;
3313  	} else {
3314  		c_seg_major_compact_stats_now++;
3315  	}
3316  
3317  	assert(c_seg_major_compact_stats_now < C_SEG_MAJOR_COMPACT_STATS_MAX);
3318  
3319  	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_END, c_age_count, c_minor_count, c_major_count, vm_page_free_count);
3320  }
3321  
3322  
3323  static c_segment_t
3324  c_seg_allocate(c_segment_t *current_chead)
3325  {
3326  	c_segment_t     c_seg;
3327  	int             min_needed;
3328  	int             size_to_populate;
3329  
3330  #if XNU_TARGET_OS_OSX
3331  	if (vm_compressor_low_on_space()) {
3332  		vm_compressor_take_paging_space_action();
3333  	}
3334  #endif /* XNU_TARGET_OS_OSX */
3335  
3336  	if ((c_seg = *current_chead) == NULL) {
3337  		uint32_t        c_segno;
3338  
3339  		lck_mtx_lock_spin_always(c_list_lock);
3340  
3341  		while (c_segments_busy == TRUE) {
3342  			assert_wait((event_t) (&c_segments_busy), THREAD_UNINT);
3343  
3344  			lck_mtx_unlock_always(c_list_lock);
3345  
3346  			thread_block(THREAD_CONTINUE_NULL);
3347  
3348  			lck_mtx_lock_spin_always(c_list_lock);
3349  		}
3350  		if (c_free_segno_head == (uint32_t)-1) {
3351  			uint32_t        c_segments_available_new;
3352  			uint32_t        compressed_pages;
3353  
3354  #if CONFIG_FREEZE
3355  			if (freezer_incore_cseg_acct) {
3356  				compressed_pages = c_segment_pages_compressed_incore;
3357  			} else {
3358  				compressed_pages = c_segment_pages_compressed;
3359  			}
3360  #else
3361  			compressed_pages = c_segment_pages_compressed;
3362  #endif /* CONFIG_FREEZE */
3363  
3364  			if (c_segments_available >= c_segments_limit || compressed_pages >= c_segment_pages_compressed_limit) {
3365  				lck_mtx_unlock_always(c_list_lock);
3366  
3367  				return NULL;
3368  			}
3369  			c_segments_busy = TRUE;
3370  			lck_mtx_unlock_always(c_list_lock);
3371  
3372  			kernel_memory_populate(compressor_map, (vm_offset_t)c_segments_next_page,
3373  			    PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
3374  			c_segments_next_page += PAGE_SIZE;
3375  
3376  			c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
3377  
3378  			if (c_segments_available_new > c_segments_limit) {
3379  				c_segments_available_new = c_segments_limit;
3380  			}
3381  
3382  			for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++) {
3383  				c_segments[c_segno - 1].c_segno = c_segno;
3384  			}
3385  
3386  			lck_mtx_lock_spin_always(c_list_lock);
3387  
3388  			c_segments[c_segno - 1].c_segno = c_free_segno_head;
3389  			c_free_segno_head = c_segments_available;
3390  			c_segments_available = c_segments_available_new;
3391  
3392  			c_segments_busy = FALSE;
3393  			thread_wakeup((event_t) (&c_segments_busy));
3394  		}
3395  		c_segno = c_free_segno_head;
3396  		assert(c_segno >= 0 && c_segno < c_segments_limit);
3397  
3398  		c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno;
3399  
3400  		/*
3401  		 * do the rest of the bookkeeping now while we're still behind
3402  		 * the list lock and grab our generation id now into a local
3403  		 * so that we can install it once we have the c_seg allocated
3404  		 */
3405  		c_segment_count++;
3406  		if (c_segment_count > c_segment_count_max) {
3407  			c_segment_count_max = c_segment_count;
3408  		}
3409  
3410  		lck_mtx_unlock_always(c_list_lock);
3411  
3412  		c_seg = zalloc_flags(compressor_segment_zone, Z_WAITOK | Z_ZERO);
3413  
3414  		c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
3415  
3416  		lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, LCK_ATTR_NULL);
3417  
3418  		c_seg->c_state = C_IS_EMPTY;
3419  		c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
3420  		c_seg->c_mysegno = c_segno;
3421  
3422  		lck_mtx_lock_spin_always(c_list_lock);
3423  		c_empty_count++;
3424  		c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
3425  		c_segments[c_segno].c_seg = c_seg;
3426  		assert(c_segments[c_segno].c_segno > c_segments_available);
3427  		lck_mtx_unlock_always(c_list_lock);
3428  
3429  		*current_chead = c_seg;
3430  
3431  #if DEVELOPMENT || DEBUG
3432  		C_SEG_MAKE_WRITEABLE(c_seg);
3433  #endif
3434  	}
3435  	c_seg_alloc_nextslot(c_seg);
3436  
3437  	size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
3438  
3439  	if (size_to_populate) {
3440  		min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE);
3441  
3442  		if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
3443  			if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) {
3444  				size_to_populate = C_SEG_MAX_POPULATE_SIZE;
3445  			}
3446  
3447  			OSAddAtomic64(size_to_populate / PAGE_SIZE, &vm_pageout_vminfo.vm_compressor_pages_grabbed);
3448  
3449  			kernel_memory_populate(compressor_map,
3450  			    (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
3451  			    size_to_populate,
3452  			    KMA_COMPRESSOR,
3453  			    VM_KERN_MEMORY_COMPRESSOR);
3454  		} else {
3455  			size_to_populate = 0;
3456  		}
3457  	}
3458  	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3459  
3460  	lck_mtx_lock_spin_always(&c_seg->c_lock);
3461  
3462  	if (size_to_populate) {
3463  		c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
3464  	}
3465  
3466  	return c_seg;
3467  }
3468  
3469  #if DEVELOPMENT || DEBUG
3470  #if CONFIG_FREEZE
3471  extern boolean_t memorystatus_freeze_to_memory;
3472  #endif /* CONFIG_FREEZE */
3473  #endif /* DEVELOPMENT || DEBUG */
3474  
3475  static void
3476  c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
3477  {
3478  	uint32_t        unused_bytes;
3479  	uint32_t        offset_to_depopulate;
3480  	int             new_state = C_ON_AGE_Q;
3481  	clock_sec_t     sec;
3482  	clock_nsec_t    nsec;
3483  	boolean_t       head_insert = FALSE;
3484  
3485  	unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
3486  
3487  	if (unused_bytes) {
3488  		offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset)));
3489  
3490  		/*
3491  		 *  release the extra physical page(s) at the end of the segment
3492  		 */
3493  		lck_mtx_unlock_always(&c_seg->c_lock);
3494  
3495  		kernel_memory_depopulate(
3496  			compressor_map,
3497  			(vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate],
3498  			unused_bytes,
3499  			KMA_COMPRESSOR,
3500  			VM_KERN_MEMORY_COMPRESSOR);
3501  
3502  		lck_mtx_lock_spin_always(&c_seg->c_lock);
3503  
3504  		c_seg->c_populated_offset = offset_to_depopulate;
3505  	}
3506  	assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE);
3507  
3508  #if DEVELOPMENT || DEBUG
3509  	{
3510  		boolean_t       c_seg_was_busy = FALSE;
3511  
3512  		if (!c_seg->c_busy) {
3513  			C_SEG_BUSY(c_seg);
3514  		} else {
3515  			c_seg_was_busy = TRUE;
3516  		}
3517  
3518  		lck_mtx_unlock_always(&c_seg->c_lock);
3519  
3520  		C_SEG_WRITE_PROTECT(c_seg);
3521  
3522  		lck_mtx_lock_spin_always(&c_seg->c_lock);
3523  
3524  		if (c_seg_was_busy == FALSE) {
3525  			C_SEG_WAKEUP_DONE(c_seg);
3526  		}
3527  	}
3528  #endif
3529  
3530  #if CONFIG_FREEZE
3531  	if (current_chead == (c_segment_t*) &(freezer_context_global.freezer_ctx_chead) &&
3532  	    VM_CONFIG_SWAP_IS_PRESENT &&
3533  	    VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
3534  #if DEVELOPMENT || DEBUG
3535  	    && !memorystatus_freeze_to_memory
3536  #endif /* DEVELOPMENT || DEBUG */
3537  	    ) {
3538  		new_state = C_ON_SWAPOUT_Q;
3539  	}
3540  #endif /* CONFIG_FREEZE */
3541  
3542  	if (vm_darkwake_mode == TRUE) {
3543  		new_state = C_ON_SWAPOUT_Q;
3544  		head_insert = TRUE;
3545  	}
3546  
3547  	clock_get_system_nanotime(&sec, &nsec);
3548  	c_seg->c_creation_ts = (uint32_t)sec;
3549  
3550  	lck_mtx_lock_spin_always(c_list_lock);
3551  
3552  	c_seg->c_generation_id = c_generation_id++;
3553  	c_seg_switch_state(c_seg, new_state, head_insert);
3554  
3555  #if CONFIG_FREEZE
3556  	if (c_seg->c_state == C_ON_SWAPOUT_Q) {
3557  		/*
3558  		 * darkwake and freezer can't co-exist together
3559  		 * We'll need to fix this accounting as a start.
3560  		 */
3561  		assert(vm_darkwake_mode == FALSE);
3562  		c_seg_update_task_owner(c_seg, freezer_context_global.freezer_ctx_task);
3563  		freezer_context_global.freezer_ctx_swapped_bytes += c_seg->c_bytes_used;
3564  	}
3565  #endif /* CONFIG_FREEZE */
3566  
3567  	if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
3568  #if CONFIG_FREEZE
3569  		assert(c_seg->c_task_owner == NULL);
3570  #endif /* CONFIG_FREEZE */
3571  		c_seg_need_delayed_compaction(c_seg, TRUE);
3572  	}
3573  
3574  	lck_mtx_unlock_always(c_list_lock);
3575  
3576  	if (c_seg->c_state == C_ON_SWAPOUT_Q) {
3577  		/*
3578  		 * Darkwake and Freeze configs always
3579  		 * wake up the swapout thread because
3580  		 * the compactor thread that normally handles
3581  		 * it may not be running as much in these
3582  		 * configs.
3583  		 */
3584  		thread_wakeup((event_t)&c_swapout_list_head);
3585  	}
3586  
3587  	*current_chead = NULL;
3588  }
3589  
3590  
3591  /*
3592   * returns with c_seg locked
3593   */
3594  void
3595  c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q)
3596  {
3597  	clock_sec_t     sec;
3598  	clock_nsec_t    nsec;
3599  
3600  	clock_get_system_nanotime(&sec, &nsec);
3601  
3602  	lck_mtx_lock_spin_always(c_list_lock);
3603  	lck_mtx_lock_spin_always(&c_seg->c_lock);
3604  
3605  	assert(c_seg->c_busy_swapping);
3606  	assert(c_seg->c_busy);
3607  
3608  	c_seg->c_busy_swapping = 0;
3609  
3610  	if (c_seg->c_overage_swap == TRUE) {
3611  		c_overage_swapped_count--;
3612  		c_seg->c_overage_swap = FALSE;
3613  	}
3614  	if (has_data == TRUE) {
3615  		if (age_on_swapin_q == TRUE) {
3616  			c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
3617  		} else {
3618  			c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3619  		}
3620  
3621  		if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
3622  			c_seg_need_delayed_compaction(c_seg, TRUE);
3623  		}
3624  	} else {
3625  		c_seg->c_store.c_buffer = (int32_t*) NULL;
3626  		c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3627  
3628  		c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
3629  	}
3630  	c_seg->c_swappedin_ts = (uint32_t)sec;
3631  
3632  	lck_mtx_unlock_always(c_list_lock);
3633  }
3634  
3635  
3636  
3637  /*
3638   * c_seg has to be locked and is returned locked if the c_seg isn't freed
3639   * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
3640   * c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise
3641   */
3642  
3643  int
3644  c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q)
3645  {
3646  	vm_offset_t     addr = 0;
3647  	uint32_t        io_size = 0;
3648  	uint64_t        f_offset;
3649  
3650  	assert(C_SEG_IS_ONDISK(c_seg));
3651  
3652  #if !CHECKSUM_THE_SWAP
3653  	c_seg_trim_tail(c_seg);
3654  #endif
3655  	io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
3656  	f_offset = c_seg->c_store.c_swap_handle;
3657  
3658  	C_SEG_BUSY(c_seg);
3659  	c_seg->c_busy_swapping = 1;
3660  
3661  	/*
3662  	 * This thread is likely going to block for I/O.
3663  	 * Make sure it is ready to run when the I/O completes because
3664  	 * it needs to clear the busy bit on the c_seg so that other
3665  	 * waiting threads can make progress too. To do that, boost
3666  	 * the rwlock_count so that the priority is boosted.
3667  	 */
3668  	set_thread_rwlock_boost();
3669  	lck_mtx_unlock_always(&c_seg->c_lock);
3670  
3671  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3672  
3673  	addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
3674  	c_seg->c_store.c_buffer = (int32_t*) addr;
3675  
3676  	kernel_memory_populate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
3677  
3678  	if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) {
3679  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
3680  
3681  		kernel_memory_depopulate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
3682  
3683  		c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q);
3684  	} else {
3685  #if ENCRYPTED_SWAP
3686  		vm_swap_decrypt(c_seg);
3687  #endif /* ENCRYPTED_SWAP */
3688  
3689  #if CHECKSUM_THE_SWAP
3690  		if (c_seg->cseg_swap_size != io_size) {
3691  			panic("swapin size doesn't match swapout size");
3692  		}
3693  
3694  		if (c_seg->cseg_hash != vmc_hash((char*) c_seg->c_store.c_buffer, (int)io_size)) {
3695  			panic("c_seg_swapin - Swap hash mismatch\n");
3696  		}
3697  #endif /* CHECKSUM_THE_SWAP */
3698  
3699  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
3700  
3701  		c_seg_swapin_requeue(c_seg, TRUE, force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q);
3702  
3703  #if CONFIG_FREEZE
3704  		/*
3705  		 * c_seg_swapin_requeue() returns with the c_seg lock held.
3706  		 */
3707  		if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
3708  			assert(c_seg->c_busy);
3709  
3710  			lck_mtx_unlock_always(&c_seg->c_lock);
3711  			lck_mtx_lock_spin_always(c_list_lock);
3712  			lck_mtx_lock_spin_always(&c_seg->c_lock);
3713  		}
3714  
3715  		if (c_seg->c_task_owner) {
3716  			c_seg_update_task_owner(c_seg, NULL);
3717  		}
3718  
3719  		lck_mtx_unlock_always(c_list_lock);
3720  
3721  		OSAddAtomic(c_seg->c_slots_used, &c_segment_pages_compressed_incore);
3722  #endif /* CONFIG_FREEZE */
3723  
3724  		OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
3725  
3726  		if (force_minor_compaction == TRUE) {
3727  			if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) {
3728  				/*
3729  				 * c_seg was completely empty so it was freed,
3730  				 * so be careful not to reference it again
3731  				 *
3732  				 * Drop the rwlock_count so that the thread priority
3733  				 * is returned back to where it is supposed to be.
3734  				 */
3735  				clear_thread_rwlock_boost();
3736  				return 1;
3737  			}
3738  
3739  			lck_mtx_lock_spin_always(&c_seg->c_lock);
3740  		}
3741  	}
3742  	C_SEG_WAKEUP_DONE(c_seg);
3743  
3744  	/*
3745  	 * Drop the rwlock_count so that the thread priority
3746  	 * is returned back to where it is supposed to be.
3747  	 */
3748  	clear_thread_rwlock_boost();
3749  
3750  	return 0;
3751  }
3752  
3753  
3754  static void
3755  c_segment_sv_hash_drop_ref(int hash_indx)
3756  {
3757  	struct c_sv_hash_entry o_sv_he, n_sv_he;
3758  
3759  	while (1) {
3760  		o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
3761  
3762  		n_sv_he.he_ref = o_sv_he.he_ref - 1;
3763  		n_sv_he.he_data = o_sv_he.he_data;
3764  
3765  		if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
3766  			if (n_sv_he.he_ref == 0) {
3767  				OSAddAtomic(-1, &c_segment_svp_in_hash);
3768  			}
3769  			break;
3770  		}
3771  	}
3772  }
3773  
3774  
3775  static int
3776  c_segment_sv_hash_insert(uint32_t data)
3777  {
3778  	int             hash_sindx;
3779  	int             misses;
3780  	struct c_sv_hash_entry o_sv_he, n_sv_he;
3781  	boolean_t       got_ref = FALSE;
3782  
3783  	if (data == 0) {
3784  		OSAddAtomic(1, &c_segment_svp_zero_compressions);
3785  	} else {
3786  		OSAddAtomic(1, &c_segment_svp_nonzero_compressions);
3787  	}
3788  
3789  	hash_sindx = data & C_SV_HASH_MASK;
3790  
3791  	for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++) {
3792  		o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3793  
3794  		while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) {
3795  			n_sv_he.he_ref = o_sv_he.he_ref + 1;
3796  			n_sv_he.he_data = data;
3797  
3798  			if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
3799  				if (n_sv_he.he_ref == 1) {
3800  					OSAddAtomic(1, &c_segment_svp_in_hash);
3801  				}
3802  				got_ref = TRUE;
3803  				break;
3804  			}
3805  			o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3806  		}
3807  		if (got_ref == TRUE) {
3808  			break;
3809  		}
3810  		hash_sindx++;
3811  
3812  		if (hash_sindx == C_SV_HASH_SIZE) {
3813  			hash_sindx = 0;
3814  		}
3815  	}
3816  	if (got_ref == FALSE) {
3817  		return -1;
3818  	}
3819  
3820  	return hash_sindx;
3821  }
3822  
3823  
3824  #if RECORD_THE_COMPRESSED_DATA
3825  
3826  static void
3827  c_compressed_record_data(char *src, int c_size)
3828  {
3829  	if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf) {
3830  		panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
3831  	}
3832  
3833  	*(int *)((void *)c_compressed_record_cptr) = c_size;
3834  
3835  	c_compressed_record_cptr += 4;
3836  
3837  	memcpy(c_compressed_record_cptr, src, c_size);
3838  	c_compressed_record_cptr += c_size;
3839  }
3840  #endif
3841  
3842  
3843  static int
3844  c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf)
3845  {
3846  	int             c_size;
3847  	int             c_rounded_size = 0;
3848  	int             max_csize;
3849  	c_slot_t        cs;
3850  	c_segment_t     c_seg;
3851  
3852  	KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0);
3853  retry:
3854  	if ((c_seg = c_seg_allocate(current_chead)) == NULL) {
3855  		return 1;
3856  	}
3857  	/*
3858  	 * returns with c_seg lock held
3859  	 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
3860  	 * c_nextslot has been allocated and
3861  	 * c_store.c_buffer populated
3862  	 */
3863  	assert(c_seg->c_state == C_IS_FILLING);
3864  
3865  	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
3866  
3867  	C_SLOT_ASSERT_PACKABLE(slot_ptr);
3868  	cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
3869  
3870  	cs->c_offset = c_seg->c_nextoffset;
3871  
3872  	max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset);
3873  
3874  	if (max_csize > PAGE_SIZE) {
3875  		max_csize = PAGE_SIZE;
3876  	}
3877  
3878  #if CHECKSUM_THE_DATA
3879  	cs->c_hash_data = vmc_hash(src, PAGE_SIZE);
3880  #endif
3881  	boolean_t incomp_copy = FALSE;
3882  	int max_csize_adj = (max_csize - 4);
3883  
3884  	if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3885  #if defined(__arm__) || defined(__arm64__)
3886  		uint16_t ccodec = CINVALID;
3887  		uint32_t inline_popcount;
3888  		if (max_csize >= C_SEG_OFFSET_ALIGNMENT_BOUNDARY) {
3889  			c_size = metacompressor((const uint8_t *) src,
3890  			    (uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
3891  			    max_csize_adj, &ccodec,
3892  			    scratch_buf, &incomp_copy, &inline_popcount);
3893  #if __ARM_WKDM_POPCNT__
3894  			cs->c_inline_popcount = inline_popcount;
3895  #else
3896  			assert(inline_popcount == C_SLOT_NO_POPCOUNT);
3897  #endif
3898  
3899  #if C_SEG_OFFSET_ALIGNMENT_BOUNDARY > 4
3900  			if (c_size > max_csize_adj) {
3901  				c_size = -1;
3902  			}
3903  #endif
3904  		} else {
3905  			c_size = -1;
3906  		}
3907  		assert(ccodec == CCWK || ccodec == CCLZ4);
3908  		cs->c_codec = ccodec;
3909  #endif
3910  	} else {
3911  #if defined(__arm__) || defined(__arm64__)
3912  		cs->c_codec = CCWK;
3913  #endif
3914  #if defined(__arm64__)
3915  		__unreachable_ok_push
3916  		if (PAGE_SIZE == 4096) {
3917  			c_size = WKdm_compress_4k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3918  			    (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3919  		} else {
3920  			c_size = WKdm_compress_16k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3921  			    (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3922  		}
3923  		__unreachable_ok_pop
3924  #else
3925  		c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3926  		    (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3927  #endif
3928  	}
3929  	assertf(((c_size <= max_csize_adj) && (c_size >= -1)),
3930  	    "c_size invalid (%d, %d), cur compressions: %d", c_size, max_csize_adj, c_segment_pages_compressed);
3931  
3932  	if (c_size == -1) {
3933  		if (max_csize < PAGE_SIZE) {
3934  			c_current_seg_filled(c_seg, current_chead);
3935  			assert(*current_chead == NULL);
3936  
3937  			lck_mtx_unlock_always(&c_seg->c_lock);
3938  			/* TODO: it may be worth requiring codecs to distinguish
3939  			 * between incompressible inputs and failures due to
3940  			 * budget exhaustion.
3941  			 */
3942  			PAGE_REPLACEMENT_DISALLOWED(FALSE);
3943  			goto retry;
3944  		}
3945  		c_size = PAGE_SIZE;
3946  
3947  		if (incomp_copy == FALSE) {
3948  			memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3949  		}
3950  
3951  		OSAddAtomic(1, &c_segment_noncompressible_pages);
3952  	} else if (c_size == 0) {
3953  		int             hash_index;
3954  
3955  		/*
3956  		 * special case - this is a page completely full of a single 32 bit value
3957  		 */
3958  		hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src);
3959  
3960  		if (hash_index != -1) {
3961  			slot_ptr->s_cindx = hash_index;
3962  			slot_ptr->s_cseg = C_SV_CSEG_ID;
3963  
3964  			OSAddAtomic(1, &c_segment_svp_hash_succeeded);
3965  #if RECORD_THE_COMPRESSED_DATA
3966  			c_compressed_record_data(src, 4);
3967  #endif
3968  			goto sv_compression;
3969  		}
3970  		c_size = 4;
3971  
3972  		memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3973  
3974  		OSAddAtomic(1, &c_segment_svp_hash_failed);
3975  	}
3976  
3977  #if RECORD_THE_COMPRESSED_DATA
3978  	c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3979  #endif
3980  #if CHECKSUM_THE_COMPRESSED_DATA
3981  	cs->c_hash_compressed_data = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3982  #endif
3983  #if POPCOUNT_THE_COMPRESSED_DATA
3984  	cs->c_pop_cdata = vmc_pop((uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset], c_size);
3985  #endif
3986  	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3987  
3988  	PACK_C_SIZE(cs, c_size);
3989  	c_seg->c_bytes_used += c_rounded_size;
3990  	c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
3991  	c_seg->c_slots_used++;
3992  
3993  	slot_ptr->s_cindx = c_seg->c_nextslot++;
3994  	/* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
3995  	slot_ptr->s_cseg = c_seg->c_mysegno + 1;
3996  
3997  sv_compression:
3998  	if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
3999  		c_current_seg_filled(c_seg, current_chead);
4000  		assert(*current_chead == NULL);
4001  	}
4002  	lck_mtx_unlock_always(&c_seg->c_lock);
4003  
4004  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4005  
4006  #if RECORD_THE_COMPRESSED_DATA
4007  	if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) {
4008  		c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
4009  		c_compressed_record_cptr = c_compressed_record_sbuf;
4010  	}
4011  #endif
4012  	if (c_size) {
4013  		OSAddAtomic64(c_size, &c_segment_compressed_bytes);
4014  		OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
4015  	}
4016  	OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
4017  
4018  	OSAddAtomic(1, &c_segment_pages_compressed);
4019  #if CONFIG_FREEZE
4020  	OSAddAtomic(1, &c_segment_pages_compressed_incore);
4021  #endif /* CONFIG_FREEZE */
4022  	OSAddAtomic(1, &sample_period_compression_count);
4023  
4024  	KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0);
4025  
4026  	return 0;
4027  }
4028  
4029  static inline void
4030  sv_decompress(int32_t *ddst, int32_t pattern)
4031  {
4032  //	assert(__builtin_constant_p(PAGE_SIZE) != 0);
4033  #if defined(__x86_64__)
4034  	memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t));
4035  #elif defined(__arm64__)
4036  	assert((PAGE_SIZE % 128) == 0);
4037  	if (pattern == 0) {
4038  		fill32_dczva((addr64_t)ddst, PAGE_SIZE);
4039  	} else {
4040  		fill32_nt((addr64_t)ddst, PAGE_SIZE, pattern);
4041  	}
4042  #else
4043  	size_t          i;
4044  
4045  	/* Unroll the pattern fill loop 4x to encourage the
4046  	 * compiler to emit NEON stores, cf.
4047  	 * <rdar://problem/25839866> Loop autovectorization
4048  	 * anomalies.
4049  	 */
4050  	/* * We use separate loops for each PAGE_SIZE
4051  	 * to allow the autovectorizer to engage, as PAGE_SIZE
4052  	 * may not be a constant.
4053  	 */
4054  
4055  	__unreachable_ok_push
4056  	if (PAGE_SIZE == 4096) {
4057  		for (i = 0; i < (4096U / sizeof(int32_t)); i += 4) {
4058  			*ddst++ = pattern;
4059  			*ddst++ = pattern;
4060  			*ddst++ = pattern;
4061  			*ddst++ = pattern;
4062  		}
4063  	} else {
4064  		assert(PAGE_SIZE == 16384);
4065  		for (i = 0; i < (int)(16384U / sizeof(int32_t)); i += 4) {
4066  			*ddst++ = pattern;
4067  			*ddst++ = pattern;
4068  			*ddst++ = pattern;
4069  			*ddst++ = pattern;
4070  		}
4071  	}
4072  	__unreachable_ok_pop
4073  #endif
4074  }
4075  
4076  static int
4077  c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot)
4078  {
4079  	c_slot_t        cs;
4080  	c_segment_t     c_seg;
4081  	uint32_t        c_segno;
4082  	uint16_t        c_indx;
4083  	int             c_rounded_size;
4084  	uint32_t        c_size;
4085  	int             retval = 0;
4086  	boolean_t       need_unlock = TRUE;
4087  	boolean_t       consider_defragmenting = FALSE;
4088  	boolean_t       kdp_mode = FALSE;
4089  
4090  	if (__improbable(flags & C_KDP)) {
4091  		if (not_in_kdp) {
4092  			panic("C_KDP passed to decompress page from outside of debugger context");
4093  		}
4094  
4095  		assert((flags & C_KEEP) == C_KEEP);
4096  		assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
4097  
4098  		if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) {
4099  			return -2;
4100  		}
4101  
4102  		kdp_mode = TRUE;
4103  		*zeroslot = 0;
4104  	}
4105  
4106  ReTry:
4107  	if (__probable(!kdp_mode)) {
4108  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
4109  	} else {
4110  		if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) {
4111  			return -2;
4112  		}
4113  	}
4114  
4115  #if HIBERNATION
4116  	/*
4117  	 * if hibernation is enabled, it indicates (via a call
4118  	 * to 'vm_decompressor_lock' that no further
4119  	 * decompressions are allowed once it reaches
4120  	 * the point of flushing all of the currently dirty
4121  	 * anonymous memory through the compressor and out
4122  	 * to disk... in this state we allow freeing of compressed
4123  	 * pages and must honor the C_DONT_BLOCK case
4124  	 */
4125  	if (__improbable(dst && decompressions_blocked == TRUE)) {
4126  		if (flags & C_DONT_BLOCK) {
4127  			if (__probable(!kdp_mode)) {
4128  				PAGE_REPLACEMENT_DISALLOWED(FALSE);
4129  			}
4130  
4131  			*zeroslot = 0;
4132  			return -2;
4133  		}
4134  		/*
4135  		 * it's safe to atomically assert and block behind the
4136  		 * lock held in shared mode because "decompressions_blocked" is
4137  		 * only set and cleared and the thread_wakeup done when the lock
4138  		 * is held exclusively
4139  		 */
4140  		assert_wait((event_t)&decompressions_blocked, THREAD_UNINT);
4141  
4142  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4143  
4144  		thread_block(THREAD_CONTINUE_NULL);
4145  
4146  		goto ReTry;
4147  	}
4148  #endif
4149  	/* s_cseg is actually "segno+1" */
4150  	c_segno = slot_ptr->s_cseg - 1;
4151  
4152  	if (__improbable(c_segno >= c_segments_available)) {
4153  		panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)",
4154  		    c_segno, c_segments_available, slot_ptr, *(int *)((void *)slot_ptr));
4155  	}
4156  
4157  	if (__improbable(c_segments[c_segno].c_segno < c_segments_available)) {
4158  		panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)",
4159  		    c_segno, slot_ptr, *(int *)((void *)slot_ptr));
4160  	}
4161  
4162  	c_seg = c_segments[c_segno].c_seg;
4163  
4164  	if (__probable(!kdp_mode)) {
4165  		lck_mtx_lock_spin_always(&c_seg->c_lock);
4166  	} else {
4167  		if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) {
4168  			return -2;
4169  		}
4170  	}
4171  
4172  	assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
4173  
4174  	if (dst == NULL && c_seg->c_busy_swapping) {
4175  		assert(c_seg->c_busy);
4176  
4177  		goto bypass_busy_check;
4178  	}
4179  	if (flags & C_DONT_BLOCK) {
4180  		if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) {
4181  			*zeroslot = 0;
4182  
4183  			retval = -2;
4184  			goto done;
4185  		}
4186  	}
4187  	if (c_seg->c_busy) {
4188  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4189  
4190  		c_seg_wait_on_busy(c_seg);
4191  
4192  		goto ReTry;
4193  	}
4194  bypass_busy_check:
4195  
4196  	c_indx = slot_ptr->s_cindx;
4197  
4198  	if (__improbable(c_indx >= c_seg->c_nextslot)) {
4199  		panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)",
4200  		    c_indx, c_seg->c_nextslot, c_seg, slot_ptr, *(int *)((void *)slot_ptr));
4201  	}
4202  
4203  	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
4204  
4205  	c_size = UNPACK_C_SIZE(cs);
4206  
4207  	if (__improbable(c_size == 0)) {
4208  		panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)",
4209  		    c_seg, slot_ptr, *(int *)((void *)slot_ptr));
4210  	}
4211  
4212  	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
4213  
4214  	if (dst) {
4215  		uint32_t        age_of_cseg;
4216  		clock_sec_t     cur_ts_sec;
4217  		clock_nsec_t    cur_ts_nsec;
4218  
4219  		if (C_SEG_IS_ONDISK(c_seg)) {
4220  #if CONFIG_FREEZE
4221  			if (freezer_incore_cseg_acct) {
4222  				if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
4223  					PAGE_REPLACEMENT_DISALLOWED(FALSE);
4224  					lck_mtx_unlock_always(&c_seg->c_lock);
4225  
4226  					memorystatus_kill_on_VM_compressor_space_shortage(FALSE /* async */);
4227  
4228  					goto ReTry;
4229  				}
4230  
4231  				uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
4232  				if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
4233  					PAGE_REPLACEMENT_DISALLOWED(FALSE);
4234  					lck_mtx_unlock_always(&c_seg->c_lock);
4235  
4236  					memorystatus_kill_on_VM_compressor_space_shortage(FALSE /* async */);
4237  
4238  					goto ReTry;
4239  				}
4240  			}
4241  #endif /* CONFIG_FREEZE */
4242  			assert(kdp_mode == FALSE);
4243  			retval = c_seg_swapin(c_seg, FALSE, TRUE);
4244  			assert(retval == 0);
4245  
4246  			retval = 1;
4247  		}
4248  		if (c_seg->c_state == C_ON_BAD_Q) {
4249  			assert(c_seg->c_store.c_buffer == NULL);
4250  			*zeroslot = 0;
4251  
4252  			retval = -1;
4253  			goto done;
4254  		}
4255  
4256  #if POPCOUNT_THE_COMPRESSED_DATA
4257  		unsigned csvpop;
4258  		uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
4259  		if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
4260  			panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%x 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
4261  		}
4262  #endif
4263  
4264  #if CHECKSUM_THE_COMPRESSED_DATA
4265  		unsigned csvhash;
4266  		if (cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
4267  			panic("Compressed data doesn't match original %p %p %u %u %u", c_seg, cs, c_size, cs->c_hash_compressed_data, csvhash);
4268  		}
4269  #endif
4270  		if (c_rounded_size == PAGE_SIZE) {
4271  			/*
4272  			 * page wasn't compressible... just copy it out
4273  			 */
4274  			memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
4275  		} else if (c_size == 4) {
4276  			int32_t         data;
4277  			int32_t         *dptr;
4278  
4279  			/*
4280  			 * page was populated with a single value
4281  			 * that didn't fit into our fast hash
4282  			 * so we packed it in as a single non-compressed value
4283  			 * that we need to populate the page with
4284  			 */
4285  			dptr = (int32_t *)(uintptr_t)dst;
4286  			data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]);
4287  			sv_decompress(dptr, data);
4288  		} else {
4289  			uint32_t        my_cpu_no;
4290  			char            *scratch_buf;
4291  
4292  			if (__probable(!kdp_mode)) {
4293  				/*
4294  				 * we're behind the c_seg lock held in spin mode
4295  				 * which means pre-emption is disabled... therefore
4296  				 * the following sequence is atomic and safe
4297  				 */
4298  				my_cpu_no = cpu_number();
4299  
4300  				assert(my_cpu_no < compressor_cpus);
4301  
4302  				scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()];
4303  			} else {
4304  				scratch_buf = kdp_compressor_scratch_buf;
4305  			}
4306  
4307  			if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
4308  #if defined(__arm__) || defined(__arm64__)
4309  				uint16_t c_codec = cs->c_codec;
4310  				uint32_t inline_popcount;
4311  				if (!metadecompressor((const uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
4312  				    (uint8_t *)dst, c_size, c_codec, (void *)scratch_buf, &inline_popcount)) {
4313  					retval = -1;
4314  				} else {
4315  #if __ARM_WKDM_POPCNT__
4316  					if (inline_popcount != cs->c_inline_popcount) {
4317  						/*
4318  						 * The codec choice in compression and
4319  						 * decompression must agree, so there
4320  						 * should never be a disagreement in
4321  						 * whether an inline population count
4322  						 * was performed.
4323  						 */
4324  						assert(inline_popcount != C_SLOT_NO_POPCOUNT);
4325  						assert(cs->c_inline_popcount != C_SLOT_NO_POPCOUNT);
4326  						printf("decompression failure from physical region %llx+%05x: popcount mismatch (%d != %d)\n",
4327  						    (unsigned long long)kvtophys((uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset]), c_size,
4328  						    inline_popcount,
4329  						    cs->c_inline_popcount);
4330  						retval = -1;
4331  					}
4332  #else
4333  					assert(inline_popcount == C_SLOT_NO_POPCOUNT);
4334  #endif /* __ARM_WKDM_POPCNT__ */
4335  				}
4336  #endif
4337  			} else {
4338  #if defined(__arm64__)
4339  				__unreachable_ok_push
4340  				if (PAGE_SIZE == 4096) {
4341  					WKdm_decompress_4k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4342  					    (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
4343  				} else {
4344  					WKdm_decompress_16k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4345  					    (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
4346  				}
4347  				__unreachable_ok_pop
4348  #else
4349  				WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4350  				    (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
4351  #endif
4352  			}
4353  		}
4354  
4355  #if CHECKSUM_THE_DATA
4356  		if (cs->c_hash_data != vmc_hash(dst, PAGE_SIZE)) {
4357  #if     defined(__arm__) || defined(__arm64__)
4358  			int32_t *dinput = &c_seg->c_store.c_buffer[cs->c_offset];
4359  			panic("decompressed data doesn't match original cs: %p, hash: 0x%x, offset: %d, c_size: %d, c_rounded_size: %d, codec: %d, header: 0x%x 0x%x 0x%x", cs, cs->c_hash_data, cs->c_offset, c_size, c_rounded_size, cs->c_codec, *dinput, *(dinput + 1), *(dinput + 2));
4360  #else
4361  			panic("decompressed data doesn't match original cs: %p, hash: %d, offset: 0x%x, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size);
4362  #endif
4363  		}
4364  #endif
4365  		if (c_seg->c_swappedin_ts == 0 && !kdp_mode) {
4366  			clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
4367  
4368  			age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts;
4369  			if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) {
4370  				OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]);
4371  			} else {
4372  				OSAddAtomic(1, &overage_decompressions_during_sample_period);
4373  			}
4374  
4375  			OSAddAtomic(1, &sample_period_decompression_count);
4376  		}
4377  	}
4378  #if CONFIG_FREEZE
4379  	else {
4380  		/*
4381  		 * We are freeing an uncompressed page from this c_seg and so balance the ledgers.
4382  		 */
4383  		if (C_SEG_IS_ONDISK(c_seg)) {
4384  			/*
4385  			 * The compression sweep feature will push out anonymous pages to disk
4386  			 * without going through the freezer path and so those c_segs, while
4387  			 * swapped out, won't have an owner.
4388  			 */
4389  			if (c_seg->c_task_owner) {
4390  				task_update_frozen_to_swap_acct(c_seg->c_task_owner, PAGE_SIZE_64, DEBIT_FROM_SWAP);
4391  			}
4392  
4393  			/*
4394  			 * We are freeing a page in swap without swapping it in. We bump the in-core
4395  			 * count here to simulate a swapin of a page so that we can accurately
4396  			 * decrement it below.
4397  			 */
4398  			OSAddAtomic(1, &c_segment_pages_compressed_incore);
4399  		}
4400  	}
4401  #endif /* CONFIG_FREEZE */
4402  
4403  	if (flags & C_KEEP) {
4404  		*zeroslot = 0;
4405  		goto done;
4406  	}
4407  	assert(kdp_mode == FALSE);
4408  
4409  	c_seg->c_bytes_unused += c_rounded_size;
4410  	c_seg->c_bytes_used -= c_rounded_size;
4411  
4412  	assert(c_seg->c_slots_used);
4413  	c_seg->c_slots_used--;
4414  
4415  	PACK_C_SIZE(cs, 0);
4416  
4417  	if (c_indx < c_seg->c_firstemptyslot) {
4418  		c_seg->c_firstemptyslot = c_indx;
4419  	}
4420  
4421  	OSAddAtomic(-1, &c_segment_pages_compressed);
4422  #if CONFIG_FREEZE
4423  	OSAddAtomic(-1, &c_segment_pages_compressed_incore);
4424  	assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
4425  #endif /* CONFIG_FREEZE */
4426  
4427  	if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
4428  		/*
4429  		 * C_SEG_IS_ONDISK == TRUE can occur when we're doing a
4430  		 * free of a compressed page (i.e. dst == NULL)
4431  		 */
4432  		OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
4433  	}
4434  	if (c_seg->c_busy_swapping) {
4435  		/*
4436  		 * bypass case for c_busy_swapping...
4437  		 * let the swapin/swapout paths deal with putting
4438  		 * the c_seg on the minor compaction queue if needed
4439  		 */
4440  		assert(c_seg->c_busy);
4441  		goto done;
4442  	}
4443  	assert(!c_seg->c_busy);
4444  
4445  	if (c_seg->c_state != C_IS_FILLING) {
4446  		if (c_seg->c_bytes_used == 0) {
4447  			if (!(C_SEG_IS_ONDISK(c_seg))) {
4448  				int     pages_populated;
4449  
4450  				pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
4451  				c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
4452  
4453  				if (pages_populated) {
4454  					assert(c_seg->c_state != C_ON_BAD_Q);
4455  					assert(c_seg->c_store.c_buffer != NULL);
4456  
4457  					C_SEG_BUSY(c_seg);
4458  					lck_mtx_unlock_always(&c_seg->c_lock);
4459  
4460  					kernel_memory_depopulate(compressor_map,
4461  					    (vm_offset_t) c_seg->c_store.c_buffer,
4462  					    pages_populated * PAGE_SIZE, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
4463  
4464  					lck_mtx_lock_spin_always(&c_seg->c_lock);
4465  					C_SEG_WAKEUP_DONE(c_seg);
4466  				}
4467  				if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q) {
4468  					c_seg_need_delayed_compaction(c_seg, FALSE);
4469  				}
4470  			} else {
4471  				if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) {
4472  					c_seg_move_to_sparse_list(c_seg);
4473  					consider_defragmenting = TRUE;
4474  				}
4475  			}
4476  		} else if (c_seg->c_on_minorcompact_q) {
4477  			assert(c_seg->c_state != C_ON_BAD_Q);
4478  			assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
4479  
4480  			if (C_SEG_SHOULD_MINORCOMPACT_NOW(c_seg)) {
4481  				c_seg_try_minor_compaction_and_unlock(c_seg);
4482  				need_unlock = FALSE;
4483  			}
4484  		} else if (!(C_SEG_IS_ONDISK(c_seg))) {
4485  			if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q &&
4486  			    C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
4487  				c_seg_need_delayed_compaction(c_seg, FALSE);
4488  			}
4489  		} else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
4490  			c_seg_move_to_sparse_list(c_seg);
4491  			consider_defragmenting = TRUE;
4492  		}
4493  	}
4494  done:
4495  	if (__improbable(kdp_mode)) {
4496  		return retval;
4497  	}
4498  
4499  	if (need_unlock == TRUE) {
4500  		lck_mtx_unlock_always(&c_seg->c_lock);
4501  	}
4502  
4503  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4504  
4505  	if (consider_defragmenting == TRUE) {
4506  		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
4507  	}
4508  
4509  #if !XNU_TARGET_OS_OSX
4510  	if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) || vm_compressor_needs_to_major_compact()) {
4511  		vm_wake_compactor_swapper();
4512  	}
4513  #endif /* !XNU_TARGET_OS_OSX */
4514  
4515  	return retval;
4516  }
4517  
4518  
4519  int
4520  vm_compressor_get(ppnum_t pn, int *slot, int flags)
4521  {
4522  	c_slot_mapping_t  slot_ptr;
4523  	char    *dst;
4524  	int     zeroslot = 1;
4525  	int     retval;
4526  
4527  	dst = pmap_map_compressor_page(pn);
4528  	slot_ptr = (c_slot_mapping_t)slot;
4529  
4530  	assert(dst != NULL);
4531  
4532  	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
4533  		int32_t         data;
4534  		int32_t         *dptr;
4535  
4536  		/*
4537  		 * page was populated with a single value
4538  		 * that found a home in our hash table
4539  		 * grab that value from the hash and populate the page
4540  		 * that we need to populate the page with
4541  		 */
4542  		dptr = (int32_t *)(uintptr_t)dst;
4543  		data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
4544  		sv_decompress(dptr, data);
4545  		if (!(flags & C_KEEP)) {
4546  			c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
4547  
4548  			OSAddAtomic(-1, &c_segment_pages_compressed);
4549  #if CONFIG_FREEZE
4550  			OSAddAtomic(-1, &c_segment_pages_compressed_incore);
4551  			assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count 0x%x", c_segment_pages_compressed_incore);
4552  #endif /* CONFIG_FREEZE */
4553  			*slot = 0;
4554  		}
4555  		if (data) {
4556  			OSAddAtomic(1, &c_segment_svp_nonzero_decompressions);
4557  		} else {
4558  			OSAddAtomic(1, &c_segment_svp_zero_decompressions);
4559  		}
4560  
4561  		pmap_unmap_compressor_page(pn, dst);
4562  		return 0;
4563  	}
4564  
4565  	retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot);
4566  
4567  	/*
4568  	 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
4569  	 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
4570  	 */
4571  	if (zeroslot) {
4572  		*slot = 0;
4573  	}
4574  
4575  	pmap_unmap_compressor_page(pn, dst);
4576  
4577  	/*
4578  	 * returns 0 if we successfully decompressed a page from a segment already in memory
4579  	 * returns 1 if we had to first swap in the segment, before successfully decompressing the page
4580  	 * returns -1 if we encountered an error swapping in the segment - decompression failed
4581  	 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
4582  	 */
4583  	return retval;
4584  }
4585  
4586  #if DEVELOPMENT || DEBUG
4587  
4588  void
4589  vm_compressor_inject_error(int *slot)
4590  {
4591  	c_slot_mapping_t slot_ptr = (c_slot_mapping_t)slot;
4592  
4593  	/* No error detection for single-value compression. */
4594  	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
4595  		printf("%s(): cannot inject errors in SV-compressed pages\n", __func__ );
4596  		return;
4597  	}
4598  
4599  	/* s_cseg is actually "segno+1" */
4600  	const uint32_t c_segno = slot_ptr->s_cseg - 1;
4601  
4602  	assert(c_segno < c_segments_available);
4603  	assert(c_segments[c_segno].c_segno >= c_segments_available);
4604  
4605  	const c_segment_t c_seg = c_segments[c_segno].c_seg;
4606  
4607  	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4608  
4609  	lck_mtx_lock_spin_always(&c_seg->c_lock);
4610  	assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
4611  
4612  	const uint16_t c_indx = slot_ptr->s_cindx;
4613  	assert(c_indx < c_seg->c_nextslot);
4614  
4615  	/*
4616  	 * To safely make this segment temporarily writable, we need to mark
4617  	 * the segment busy, which allows us to release the segment lock.
4618  	 */
4619  	while (c_seg->c_busy) {
4620  		c_seg_wait_on_busy(c_seg);
4621  		lck_mtx_lock_spin_always(&c_seg->c_lock);
4622  	}
4623  	C_SEG_BUSY(c_seg);
4624  
4625  	bool already_writable = (c_seg->c_state == C_IS_FILLING);
4626  	if (!already_writable) {
4627  		/*
4628  		 * Protection update must be performed preemptibly, so temporarily drop
4629  		 * the lock. Having set c_busy will prevent most other concurrent
4630  		 * operations.
4631  		 */
4632  		lck_mtx_unlock_always(&c_seg->c_lock);
4633  		C_SEG_MAKE_WRITEABLE(c_seg);
4634  		lck_mtx_lock_spin_always(&c_seg->c_lock);
4635  	}
4636  
4637  	/*
4638  	 * Once we've released the lock following our c_state == C_IS_FILLING check,
4639  	 * c_current_seg_filled() can (re-)write-protect the segment. However, it
4640  	 * will transition from C_IS_FILLING before releasing the c_seg lock, so we
4641  	 * can detect this by re-checking after we've reobtained the lock.
4642  	 */
4643  	if (already_writable && c_seg->c_state != C_IS_FILLING) {
4644  		lck_mtx_unlock_always(&c_seg->c_lock);
4645  		C_SEG_MAKE_WRITEABLE(c_seg);
4646  		lck_mtx_lock_spin_always(&c_seg->c_lock);
4647  		already_writable = false;
4648  		/* Segment can't be freed while c_busy is set. */
4649  		assert(c_seg->c_state != C_IS_FILLING);
4650  	}
4651  
4652  	c_slot_t cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
4653  	int32_t *data = &c_seg->c_store.c_buffer[cs->c_offset];
4654  	/* assume that the compressed data holds at least one int32_t */
4655  	assert(UNPACK_C_SIZE(cs) > sizeof(*data));
4656  	/*
4657  	 * This bit is known to be in the payload of a MISS packet resulting from
4658  	 * the pattern used in the test pattern from decompression_failure.c.
4659  	 * Flipping it should result in many corrupted bits in the test page.
4660  	 */
4661  	data[0] ^= 0x00000100;
4662  	if (!already_writable) {
4663  		lck_mtx_unlock_always(&c_seg->c_lock);
4664  		C_SEG_WRITE_PROTECT(c_seg);
4665  		lck_mtx_lock_spin_always(&c_seg->c_lock);
4666  	}
4667  
4668  	C_SEG_WAKEUP_DONE(c_seg);
4669  	lck_mtx_unlock_always(&c_seg->c_lock);
4670  
4671  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4672  }
4673  
4674  #endif /* DEVELOPMENT || DEBUG */
4675  
4676  int
4677  vm_compressor_free(int *slot, int flags)
4678  {
4679  	c_slot_mapping_t  slot_ptr;
4680  	int     zeroslot = 1;
4681  	int     retval;
4682  
4683  	assert(flags == 0 || flags == C_DONT_BLOCK);
4684  
4685  	slot_ptr = (c_slot_mapping_t)slot;
4686  
4687  	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
4688  		c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
4689  		OSAddAtomic(-1, &c_segment_pages_compressed);
4690  #if CONFIG_FREEZE
4691  		OSAddAtomic(-1, &c_segment_pages_compressed_incore);
4692  		assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count 0x%x", c_segment_pages_compressed_incore);
4693  #endif /* CONFIG_FREEZE */
4694  
4695  		*slot = 0;
4696  		return 0;
4697  	}
4698  	retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot);
4699  	/*
4700  	 * returns 0 if we successfully freed the specified compressed page
4701  	 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
4702  	 */
4703  
4704  	if (retval == 0) {
4705  		*slot = 0;
4706  	} else {
4707  		assert(retval == -2);
4708  	}
4709  
4710  	return retval;
4711  }
4712  
4713  
4714  int
4715  vm_compressor_put(ppnum_t pn, int *slot, void  **current_chead, char *scratch_buf)
4716  {
4717  	char    *src;
4718  	int     retval;
4719  
4720  	src = pmap_map_compressor_page(pn);
4721  	assert(src != NULL);
4722  
4723  	retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf);
4724  	pmap_unmap_compressor_page(pn, src);
4725  
4726  	return retval;
4727  }
4728  
4729  void
4730  vm_compressor_transfer(
4731  	int     *dst_slot_p,
4732  	int     *src_slot_p)
4733  {
4734  	c_slot_mapping_t        dst_slot, src_slot;
4735  	c_segment_t             c_seg;
4736  	uint16_t                c_indx;
4737  	c_slot_t                cs;
4738  
4739  	src_slot = (c_slot_mapping_t) src_slot_p;
4740  
4741  	if (src_slot->s_cseg == C_SV_CSEG_ID) {
4742  		*dst_slot_p = *src_slot_p;
4743  		*src_slot_p = 0;
4744  		return;
4745  	}
4746  	dst_slot = (c_slot_mapping_t) dst_slot_p;
4747  Retry:
4748  	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4749  	/* get segment for src_slot */
4750  	c_seg = c_segments[src_slot->s_cseg - 1].c_seg;
4751  	/* lock segment */
4752  	lck_mtx_lock_spin_always(&c_seg->c_lock);
4753  	/* wait if it's busy */
4754  	if (c_seg->c_busy && !c_seg->c_busy_swapping) {
4755  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4756  		c_seg_wait_on_busy(c_seg);
4757  		goto Retry;
4758  	}
4759  	/* find the c_slot */
4760  	c_indx = src_slot->s_cindx;
4761  	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
4762  	/* point the c_slot back to dst_slot instead of src_slot */
4763  	C_SLOT_ASSERT_PACKABLE(dst_slot);
4764  	cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot);
4765  	/* transfer */
4766  	*dst_slot_p = *src_slot_p;
4767  	*src_slot_p = 0;
4768  	lck_mtx_unlock_always(&c_seg->c_lock);
4769  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4770  }
4771  
4772  #if CONFIG_FREEZE
4773  
4774  int     freezer_finished_filling = 0;
4775  
4776  void
4777  vm_compressor_finished_filling(
4778  	void    **current_chead)
4779  {
4780  	c_segment_t     c_seg;
4781  
4782  	if ((c_seg = *(c_segment_t *)current_chead) == NULL) {
4783  		return;
4784  	}
4785  
4786  	assert(c_seg->c_state == C_IS_FILLING);
4787  
4788  	lck_mtx_lock_spin_always(&c_seg->c_lock);
4789  
4790  	c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
4791  
4792  	lck_mtx_unlock_always(&c_seg->c_lock);
4793  
4794  	freezer_finished_filling++;
4795  }
4796  
4797  
4798  /*
4799   * This routine is used to transfer the compressed chunks from
4800   * the c_seg/cindx pointed to by slot_p into a new c_seg headed
4801   * by the current_chead and a new cindx within that c_seg.
4802   *
4803   * Currently, this routine is only used by the "freezer backed by
4804   * compressor with swap" mode to create a series of c_segs that
4805   * only contain compressed data belonging to one task. So, we
4806   * move a task's previously compressed data into a set of new
4807   * c_segs which will also hold the task's yet to be compressed data.
4808   */
4809  
4810  kern_return_t
4811  vm_compressor_relocate(
4812  	void            **current_chead,
4813  	int             *slot_p)
4814  {
4815  	c_slot_mapping_t        slot_ptr;
4816  	c_slot_mapping_t        src_slot;
4817  	uint32_t                c_rounded_size;
4818  	uint32_t                c_size;
4819  	uint16_t                dst_slot;
4820  	c_slot_t                c_dst;
4821  	c_slot_t                c_src;
4822  	uint16_t                c_indx;
4823  	c_segment_t             c_seg_dst = NULL;
4824  	c_segment_t             c_seg_src = NULL;
4825  	kern_return_t           kr = KERN_SUCCESS;
4826  
4827  
4828  	src_slot = (c_slot_mapping_t) slot_p;
4829  
4830  	if (src_slot->s_cseg == C_SV_CSEG_ID) {
4831  		/*
4832  		 * no need to relocate... this is a page full of a single
4833  		 * value which is hashed to a single entry not contained
4834  		 * in a c_segment_t
4835  		 */
4836  		return kr;
4837  	}
4838  
4839  Relookup_dst:
4840  	c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
4841  	/*
4842  	 * returns with c_seg lock held
4843  	 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
4844  	 * c_nextslot has been allocated and
4845  	 * c_store.c_buffer populated
4846  	 */
4847  	if (c_seg_dst == NULL) {
4848  		/*
4849  		 * Out of compression segments?
4850  		 */
4851  		kr = KERN_RESOURCE_SHORTAGE;
4852  		goto out;
4853  	}
4854  
4855  	assert(c_seg_dst->c_busy == 0);
4856  
4857  	C_SEG_BUSY(c_seg_dst);
4858  
4859  	dst_slot = c_seg_dst->c_nextslot;
4860  
4861  	lck_mtx_unlock_always(&c_seg_dst->c_lock);
4862  
4863  Relookup_src:
4864  	c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg;
4865  
4866  	assert(c_seg_dst != c_seg_src);
4867  
4868  	lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4869  
4870  	if (C_SEG_IS_ON_DISK_OR_SOQ(c_seg_src) ||
4871  	    c_seg_src->c_state == C_IS_FILLING) {
4872  		/*
4873  		 * Skip this page if :-
4874  		 * a) the src c_seg is already on-disk (or on its way there)
4875  		 *    A "thaw" can mark a process as eligible for
4876  		 * another freeze cycle without bringing any of
4877  		 * its swapped out c_segs back from disk (because
4878  		 * that is done on-demand).
4879  		 *    Or, this page may be mapped elsewhere in the task's map,
4880  		 * and we may have marked it for swap already.
4881  		 *
4882  		 * b) Or, the src c_seg is being filled by the compressor
4883  		 * thread. We don't want the added latency of waiting for
4884  		 * this c_seg in the freeze path and so we skip it.
4885  		 */
4886  
4887  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4888  
4889  		lck_mtx_unlock_always(&c_seg_src->c_lock);
4890  
4891  		c_seg_src = NULL;
4892  
4893  		goto out;
4894  	}
4895  
4896  	if (c_seg_src->c_busy) {
4897  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4898  		c_seg_wait_on_busy(c_seg_src);
4899  
4900  		c_seg_src = NULL;
4901  
4902  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
4903  
4904  		goto Relookup_src;
4905  	}
4906  
4907  	C_SEG_BUSY(c_seg_src);
4908  
4909  	lck_mtx_unlock_always(&c_seg_src->c_lock);
4910  
4911  	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4912  
4913  	/* find the c_slot */
4914  	c_indx = src_slot->s_cindx;
4915  
4916  	c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
4917  
4918  	c_size = UNPACK_C_SIZE(c_src);
4919  
4920  	assert(c_size);
4921  
4922  	if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
4923  		/*
4924  		 * This segment is full. We need a new one.
4925  		 */
4926  
4927  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
4928  
4929  		lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4930  		C_SEG_WAKEUP_DONE(c_seg_src);
4931  		lck_mtx_unlock_always(&c_seg_src->c_lock);
4932  
4933  		c_seg_src = NULL;
4934  
4935  		lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
4936  
4937  		assert(c_seg_dst->c_busy);
4938  		assert(c_seg_dst->c_state == C_IS_FILLING);
4939  		assert(!c_seg_dst->c_on_minorcompact_q);
4940  
4941  		c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
4942  		assert(*current_chead == NULL);
4943  
4944  		C_SEG_WAKEUP_DONE(c_seg_dst);
4945  
4946  		lck_mtx_unlock_always(&c_seg_dst->c_lock);
4947  
4948  		c_seg_dst = NULL;
4949  
4950  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
4951  
4952  		goto Relookup_dst;
4953  	}
4954  
4955  	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
4956  
4957  	memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
4958  	/*
4959  	 * Is platform alignment actually necessary since wkdm aligns its output?
4960  	 */
4961  	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
4962  
4963  	cslot_copy(c_dst, c_src);
4964  	c_dst->c_offset = c_seg_dst->c_nextoffset;
4965  
4966  	if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) {
4967  		c_seg_dst->c_firstemptyslot++;
4968  	}
4969  
4970  	c_seg_dst->c_slots_used++;
4971  	c_seg_dst->c_nextslot++;
4972  	c_seg_dst->c_bytes_used += c_rounded_size;
4973  	c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
4974  
4975  
4976  	PACK_C_SIZE(c_src, 0);
4977  
4978  	c_seg_src->c_bytes_used -= c_rounded_size;
4979  	c_seg_src->c_bytes_unused += c_rounded_size;
4980  
4981  	assert(c_seg_src->c_slots_used);
4982  	c_seg_src->c_slots_used--;
4983  
4984  	if (c_indx < c_seg_src->c_firstemptyslot) {
4985  		c_seg_src->c_firstemptyslot = c_indx;
4986  	}
4987  
4988  	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
4989  
4990  	PAGE_REPLACEMENT_ALLOWED(TRUE);
4991  	slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
4992  	/* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
4993  	slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
4994  	slot_ptr->s_cindx = dst_slot;
4995  
4996  	PAGE_REPLACEMENT_ALLOWED(FALSE);
4997  
4998  out:
4999  	if (c_seg_src) {
5000  		lck_mtx_lock_spin_always(&c_seg_src->c_lock);
5001  
5002  		C_SEG_WAKEUP_DONE(c_seg_src);
5003  
5004  		if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) {
5005  			if (!c_seg_src->c_on_minorcompact_q) {
5006  				c_seg_need_delayed_compaction(c_seg_src, FALSE);
5007  			}
5008  		}
5009  
5010  		lck_mtx_unlock_always(&c_seg_src->c_lock);
5011  	}
5012  
5013  	if (c_seg_dst) {
5014  		PAGE_REPLACEMENT_DISALLOWED(TRUE);
5015  
5016  		lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
5017  
5018  		if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
5019  			/*
5020  			 * Nearing or exceeded maximum slot and offset capacity.
5021  			 */
5022  			assert(c_seg_dst->c_busy);
5023  			assert(c_seg_dst->c_state == C_IS_FILLING);
5024  			assert(!c_seg_dst->c_on_minorcompact_q);
5025  
5026  			c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
5027  			assert(*current_chead == NULL);
5028  		}
5029  
5030  		C_SEG_WAKEUP_DONE(c_seg_dst);
5031  
5032  		lck_mtx_unlock_always(&c_seg_dst->c_lock);
5033  
5034  		c_seg_dst = NULL;
5035  
5036  		PAGE_REPLACEMENT_DISALLOWED(FALSE);
5037  	}
5038  
5039  	return kr;
5040  }
5041  #endif /* CONFIG_FREEZE */