vm_compressor.c
1 /* 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #include <vm/vm_compressor.h> 30 31 #if CONFIG_PHANTOM_CACHE 32 #include <vm/vm_phantom_cache.h> 33 #endif 34 35 #include <vm/vm_map.h> 36 #include <vm/vm_pageout.h> 37 #include <vm/memory_object.h> 38 #include <vm/vm_compressor_algorithms.h> 39 #include <vm/vm_fault.h> 40 #include <vm/vm_protos.h> 41 #include <mach/mach_host.h> /* for host_info() */ 42 #include <kern/ledger.h> 43 #include <kern/policy_internal.h> 44 #include <kern/thread_group.h> 45 #include <san/kasan.h> 46 47 #if defined(__x86_64__) 48 #include <i386/misc_protos.h> 49 #endif 50 #if defined(__arm64__) 51 #include <arm/machine_routines.h> 52 #endif 53 54 #include <IOKit/IOHibernatePrivate.h> 55 56 extern boolean_t vm_darkwake_mode; 57 extern zone_t vm_page_zone; 58 59 #if DEVELOPMENT || DEBUG 60 /* sysctl defined in bsd/dev/arm64/sysctl.c */ 61 int do_cseg_wedge_thread(void); 62 int do_cseg_unwedge_thread(void); 63 static event_t debug_cseg_wait_event = NULL; 64 #endif /* DEVELOPMENT || DEBUG */ 65 66 #if CONFIG_FREEZE 67 bool freezer_incore_cseg_acct = TRUE; /* Only count incore compressed memory for jetsams. */ 68 void task_disown_frozen_csegs(task_t owner_task); 69 #endif /* CONFIG_FREEZE */ 70 71 #if POPCOUNT_THE_COMPRESSED_DATA 72 boolean_t popcount_c_segs = TRUE; 73 74 static inline uint32_t 75 vmc_pop(uintptr_t ins, int sz) 76 { 77 uint32_t rv = 0; 78 79 if (__probable(popcount_c_segs == FALSE)) { 80 return 0xDEAD707C; 81 } 82 83 while (sz >= 16) { 84 uint32_t rv1, rv2; 85 uint64_t *ins64 = (uint64_t *) ins; 86 uint64_t *ins642 = (uint64_t *) (ins + 8); 87 rv1 = __builtin_popcountll(*ins64); 88 rv2 = __builtin_popcountll(*ins642); 89 rv += rv1 + rv2; 90 sz -= 16; 91 ins += 16; 92 } 93 94 while (sz >= 4) { 95 uint32_t *ins32 = (uint32_t *) ins; 96 rv += __builtin_popcount(*ins32); 97 sz -= 4; 98 ins += 4; 99 } 100 101 while (sz > 0) { 102 char *ins8 = (char *)ins; 103 rv += __builtin_popcount(*ins8); 104 sz--; 105 ins++; 106 } 107 return rv; 108 } 109 #endif 110 111 #if VALIDATE_C_SEGMENTS 112 boolean_t validate_c_segs = TRUE; 113 #endif 114 /* 115 * vm_compressor_mode has a heirarchy of control to set its value. 116 * boot-args are checked first, then device-tree, and finally 117 * the default value that is defined below. See vm_fault_init() for 118 * the boot-arg & device-tree code. 119 */ 120 121 #if !XNU_TARGET_OS_OSX 122 123 #if CONFIG_FREEZE 124 int vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT; 125 struct freezer_context freezer_context_global; 126 #else /* CONFIG_FREEZE */ 127 int vm_compressor_mode = VM_PAGER_NOT_CONFIGURED; 128 #endif /* CONFIG_FREEZE */ 129 130 #else /* !XNU_TARGET_OS_OSX */ 131 int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP; 132 133 #endif /* !XNU_TARGET_OS_OSX */ 134 135 TUNABLE(uint32_t, vm_compression_limit, "vm_compression_limit", 0); 136 int vm_compressor_is_active = 0; 137 int vm_compressor_available = 0; 138 139 extern uint64_t vm_swap_get_max_configured_space(void); 140 extern void vm_pageout_io_throttle(void); 141 142 #if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA 143 extern unsigned int hash_string(char *cp, int len); 144 static unsigned int vmc_hash(char *, int); 145 boolean_t checksum_c_segs = TRUE; 146 147 unsigned int 148 vmc_hash(char *cp, int len) 149 { 150 if (__probable(checksum_c_segs == FALSE)) { 151 return 0xDEAD7A37; 152 } 153 return hash_string(cp, len); 154 } 155 #endif 156 157 #define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size) 158 #define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size)) 159 160 161 struct c_sv_hash_entry { 162 union { 163 struct { 164 uint32_t c_sv_he_ref; 165 uint32_t c_sv_he_data; 166 } c_sv_he; 167 uint64_t c_sv_he_record; 168 } c_sv_he_un; 169 }; 170 171 #define he_ref c_sv_he_un.c_sv_he.c_sv_he_ref 172 #define he_data c_sv_he_un.c_sv_he.c_sv_he_data 173 #define he_record c_sv_he_un.c_sv_he_record 174 175 #define C_SV_HASH_MAX_MISS 32 176 #define C_SV_HASH_SIZE ((1 << 10)) 177 #define C_SV_HASH_MASK ((1 << 10) - 1) 178 #define C_SV_CSEG_ID ((1 << 22) - 1) 179 180 181 union c_segu { 182 c_segment_t c_seg; 183 uintptr_t c_segno; 184 }; 185 186 #define C_SLOT_ASSERT_PACKABLE(ptr) \ 187 VM_ASSERT_POINTER_PACKABLE((vm_offset_t)(ptr), C_SLOT_PACKED_PTR); 188 189 #define C_SLOT_PACK_PTR(ptr) \ 190 VM_PACK_POINTER((vm_offset_t)(ptr), C_SLOT_PACKED_PTR) 191 192 #define C_SLOT_UNPACK_PTR(cslot) \ 193 (c_slot_mapping_t)VM_UNPACK_POINTER((cslot)->c_packed_ptr, C_SLOT_PACKED_PTR) 194 195 /* for debugging purposes */ 196 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) c_slot_packing_params = 197 VM_PACKING_PARAMS(C_SLOT_PACKED_PTR); 198 199 uint32_t c_segment_count = 0; 200 uint32_t c_segment_count_max = 0; 201 202 uint64_t c_generation_id = 0; 203 uint64_t c_generation_id_flush_barrier; 204 205 206 #define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120 207 208 boolean_t hibernate_no_swapspace = FALSE; 209 clock_sec_t hibernate_flushing_deadline = 0; 210 211 212 #if RECORD_THE_COMPRESSED_DATA 213 char *c_compressed_record_sbuf; 214 char *c_compressed_record_ebuf; 215 char *c_compressed_record_cptr; 216 #endif 217 218 219 queue_head_t c_age_list_head; 220 queue_head_t c_swappedin_list_head; 221 queue_head_t c_swapout_list_head; 222 queue_head_t c_swapio_list_head; 223 queue_head_t c_swappedout_list_head; 224 queue_head_t c_swappedout_sparse_list_head; 225 queue_head_t c_major_list_head; 226 queue_head_t c_filling_list_head; 227 queue_head_t c_bad_list_head; 228 229 uint32_t c_age_count = 0; 230 uint32_t c_swappedin_count = 0; 231 uint32_t c_swapout_count = 0; 232 uint32_t c_swapio_count = 0; 233 uint32_t c_swappedout_count = 0; 234 uint32_t c_swappedout_sparse_count = 0; 235 uint32_t c_major_count = 0; 236 uint32_t c_filling_count = 0; 237 uint32_t c_empty_count = 0; 238 uint32_t c_bad_count = 0; 239 240 241 queue_head_t c_minor_list_head; 242 uint32_t c_minor_count = 0; 243 244 int c_overage_swapped_count = 0; 245 int c_overage_swapped_limit = 0; 246 247 int c_seg_fixed_array_len; 248 union c_segu *c_segments; 249 vm_offset_t c_buffers; 250 vm_size_t c_buffers_size; 251 caddr_t c_segments_next_page; 252 boolean_t c_segments_busy; 253 uint32_t c_segments_available; 254 uint32_t c_segments_limit; 255 uint32_t c_segments_nearing_limit; 256 257 uint32_t c_segment_svp_in_hash; 258 uint32_t c_segment_svp_hash_succeeded; 259 uint32_t c_segment_svp_hash_failed; 260 uint32_t c_segment_svp_zero_compressions; 261 uint32_t c_segment_svp_nonzero_compressions; 262 uint32_t c_segment_svp_zero_decompressions; 263 uint32_t c_segment_svp_nonzero_decompressions; 264 265 uint32_t c_segment_noncompressible_pages; 266 267 uint32_t c_segment_pages_compressed = 0; /* Tracks # of uncompressed pages fed into the compressor */ 268 #if CONFIG_FREEZE 269 int32_t c_segment_pages_compressed_incore = 0; /* Tracks # of uncompressed pages fed into the compressor that are in memory */ 270 uint32_t c_segments_incore_limit = 0; /* Tracks # of segments allowed to be in-core. Based on compressor pool size */ 271 #endif /* CONFIG_FREEZE */ 272 273 uint32_t c_segment_pages_compressed_limit; 274 uint32_t c_segment_pages_compressed_nearing_limit; 275 uint32_t c_free_segno_head = (uint32_t)-1; 276 277 uint32_t vm_compressor_minorcompact_threshold_divisor = 10; 278 uint32_t vm_compressor_majorcompact_threshold_divisor = 10; 279 uint32_t vm_compressor_unthrottle_threshold_divisor = 10; 280 uint32_t vm_compressor_catchup_threshold_divisor = 10; 281 282 uint32_t vm_compressor_minorcompact_threshold_divisor_overridden = 0; 283 uint32_t vm_compressor_majorcompact_threshold_divisor_overridden = 0; 284 uint32_t vm_compressor_unthrottle_threshold_divisor_overridden = 0; 285 uint32_t vm_compressor_catchup_threshold_divisor_overridden = 0; 286 287 #define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu)) 288 289 LCK_GRP_DECLARE(vm_compressor_lck_grp, "vm_compressor"); 290 LCK_RW_DECLARE(c_master_lock, &vm_compressor_lck_grp); 291 LCK_MTX_DECLARE(c_list_lock_storage, &vm_compressor_lck_grp); 292 293 boolean_t decompressions_blocked = FALSE; 294 295 zone_t compressor_segment_zone; 296 int c_compressor_swap_trigger = 0; 297 298 uint32_t compressor_cpus; 299 char *compressor_scratch_bufs; 300 char *kdp_compressor_scratch_buf; 301 char *kdp_compressor_decompressed_page; 302 addr64_t kdp_compressor_decompressed_page_paddr; 303 ppnum_t kdp_compressor_decompressed_page_ppnum; 304 305 clock_sec_t start_of_sample_period_sec = 0; 306 clock_nsec_t start_of_sample_period_nsec = 0; 307 clock_sec_t start_of_eval_period_sec = 0; 308 clock_nsec_t start_of_eval_period_nsec = 0; 309 uint32_t sample_period_decompression_count = 0; 310 uint32_t sample_period_compression_count = 0; 311 uint32_t last_eval_decompression_count = 0; 312 uint32_t last_eval_compression_count = 0; 313 314 #define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30) 315 316 boolean_t vm_swapout_ripe_segments = FALSE; 317 uint32_t vm_ripe_target_age = (60 * 60 * 48); 318 319 uint32_t swapout_target_age = 0; 320 uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE]; 321 uint32_t overage_decompressions_during_sample_period = 0; 322 323 324 void do_fastwake_warmup(queue_head_t *, boolean_t); 325 boolean_t fastwake_warmup = FALSE; 326 boolean_t fastwake_recording_in_progress = FALSE; 327 clock_sec_t dont_trim_until_ts = 0; 328 329 uint64_t c_segment_warmup_count; 330 uint64_t first_c_segment_to_warm_generation_id = 0; 331 uint64_t last_c_segment_to_warm_generation_id = 0; 332 boolean_t hibernate_flushing = FALSE; 333 334 int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0; 335 int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0; 336 int64_t compressor_bytes_used __attribute__((aligned(8))) = 0; 337 338 339 struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE] __attribute__ ((aligned(8))); 340 341 static boolean_t compressor_needs_to_swap(void); 342 static void vm_compressor_swap_trigger_thread(void); 343 static void vm_compressor_do_delayed_compactions(boolean_t); 344 static void vm_compressor_compact_and_swap(boolean_t); 345 static void vm_compressor_age_swapped_in_segments(boolean_t); 346 347 struct vm_compressor_swapper_stats vmcs_stats; 348 349 #if XNU_TARGET_OS_OSX 350 static void vm_compressor_take_paging_space_action(void); 351 #endif /* XNU_TARGET_OS_OSX */ 352 353 void compute_swapout_target_age(void); 354 355 boolean_t c_seg_major_compact(c_segment_t, c_segment_t); 356 boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t); 357 358 int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t); 359 int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t); 360 void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg); 361 362 void c_seg_move_to_sparse_list(c_segment_t); 363 void c_seg_insert_into_q(queue_head_t *, c_segment_t); 364 365 uint64_t vm_available_memory(void); 366 uint64_t vm_compressor_pages_compressed(void); 367 368 /* 369 * indicate the need to do a major compaction if 370 * the overall set of in-use compression segments 371 * becomes sparse... on systems that support pressure 372 * driven swapping, this will also cause swapouts to 373 * be initiated. 374 */ 375 static inline boolean_t 376 vm_compressor_needs_to_major_compact() 377 { 378 uint32_t incore_seg_count; 379 380 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count; 381 382 if ((c_segment_count >= (c_segments_nearing_limit / 8)) && 383 ((incore_seg_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) > 384 ((incore_seg_count / 8) * C_SEG_MAX_PAGES)) { 385 return 1; 386 } 387 return 0; 388 } 389 390 391 uint64_t 392 vm_available_memory(void) 393 { 394 return ((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64; 395 } 396 397 398 uint64_t 399 vm_compressor_pages_compressed(void) 400 { 401 return c_segment_pages_compressed * PAGE_SIZE_64; 402 } 403 404 405 boolean_t 406 vm_compressor_low_on_space(void) 407 { 408 #if CONFIG_FREEZE 409 uint64_t incore_seg_count; 410 uint32_t incore_compressed_pages; 411 if (freezer_incore_cseg_acct) { 412 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count; 413 incore_compressed_pages = c_segment_pages_compressed_incore; 414 } else { 415 incore_seg_count = c_segment_count; 416 incore_compressed_pages = c_segment_pages_compressed; 417 } 418 419 if ((incore_compressed_pages > c_segment_pages_compressed_nearing_limit) || 420 (incore_seg_count > c_segments_nearing_limit)) { 421 return TRUE; 422 } 423 #else /* CONFIG_FREEZE */ 424 if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) || 425 (c_segment_count > c_segments_nearing_limit)) { 426 return TRUE; 427 } 428 #endif /* CONFIG_FREEZE */ 429 return FALSE; 430 } 431 432 433 boolean_t 434 vm_compressor_out_of_space(void) 435 { 436 #if CONFIG_FREEZE 437 uint64_t incore_seg_count; 438 uint32_t incore_compressed_pages; 439 if (freezer_incore_cseg_acct) { 440 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count; 441 incore_compressed_pages = c_segment_pages_compressed_incore; 442 } else { 443 incore_seg_count = c_segment_count; 444 incore_compressed_pages = c_segment_pages_compressed; 445 } 446 447 if ((incore_compressed_pages >= c_segment_pages_compressed_limit) || 448 (incore_seg_count > c_segments_incore_limit)) { 449 return TRUE; 450 } 451 #else /* CONFIG_FREEZE */ 452 if ((c_segment_pages_compressed >= c_segment_pages_compressed_limit) || 453 (c_segment_count >= c_segments_limit)) { 454 return TRUE; 455 } 456 #endif /* CONFIG_FREEZE */ 457 return FALSE; 458 } 459 460 461 int 462 vm_wants_task_throttled(task_t task) 463 { 464 if (task == kernel_task) { 465 return 0; 466 } 467 468 if (VM_CONFIG_SWAP_IS_ACTIVE) { 469 if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) && 470 (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4)) { 471 return 1; 472 } 473 } 474 return 0; 475 } 476 477 478 #if DEVELOPMENT || DEBUG 479 /* 480 * On compressor/swap exhaustion, kill the largest process regardless of 481 * its chosen process policy. 482 */ 483 TUNABLE(bool, kill_on_no_paging_space, "-kill_on_no_paging_space", false); 484 #endif /* DEVELOPMENT || DEBUG */ 485 486 #if XNU_TARGET_OS_OSX 487 488 static uint32_t no_paging_space_action_in_progress = 0; 489 extern void memorystatus_send_low_swap_note(void); 490 491 static void 492 vm_compressor_take_paging_space_action(void) 493 { 494 if (no_paging_space_action_in_progress == 0) { 495 if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) { 496 if (no_paging_space_action()) { 497 #if DEVELOPMENT || DEBUG 498 if (kill_on_no_paging_space) { 499 /* 500 * Since we are choosing to always kill a process, we don't need the 501 * "out of application memory" dialog box in this mode. And, hence we won't 502 * send the knote. 503 */ 504 no_paging_space_action_in_progress = 0; 505 return; 506 } 507 #endif /* DEVELOPMENT || DEBUG */ 508 memorystatus_send_low_swap_note(); 509 } 510 511 no_paging_space_action_in_progress = 0; 512 } 513 } 514 } 515 #endif /* XNU_TARGET_OS_OSX */ 516 517 518 void 519 vm_decompressor_lock(void) 520 { 521 PAGE_REPLACEMENT_ALLOWED(TRUE); 522 523 decompressions_blocked = TRUE; 524 525 PAGE_REPLACEMENT_ALLOWED(FALSE); 526 } 527 528 void 529 vm_decompressor_unlock(void) 530 { 531 PAGE_REPLACEMENT_ALLOWED(TRUE); 532 533 decompressions_blocked = FALSE; 534 535 PAGE_REPLACEMENT_ALLOWED(FALSE); 536 537 thread_wakeup((event_t)&decompressions_blocked); 538 } 539 540 static inline void 541 cslot_copy(c_slot_t cdst, c_slot_t csrc) 542 { 543 #if CHECKSUM_THE_DATA 544 cdst->c_hash_data = csrc->c_hash_data; 545 #endif 546 #if CHECKSUM_THE_COMPRESSED_DATA 547 cdst->c_hash_compressed_data = csrc->c_hash_compressed_data; 548 #endif 549 #if POPCOUNT_THE_COMPRESSED_DATA 550 cdst->c_pop_cdata = csrc->c_pop_cdata; 551 #endif 552 cdst->c_size = csrc->c_size; 553 cdst->c_packed_ptr = csrc->c_packed_ptr; 554 #if defined(__arm__) || defined(__arm64__) 555 cdst->c_codec = csrc->c_codec; 556 #endif 557 #if __ARM_WKDM_POPCNT__ 558 cdst->c_inline_popcount = csrc->c_inline_popcount; 559 #endif 560 } 561 562 vm_map_t compressor_map; 563 uint64_t compressor_pool_max_size; 564 uint64_t compressor_pool_size; 565 uint32_t compressor_pool_multiplier; 566 567 #if DEVELOPMENT || DEBUG 568 /* 569 * Compressor segments are write-protected in development/debug 570 * kernels to help debug memory corruption. 571 * In cases where performance is a concern, this can be disabled 572 * via the boot-arg "-disable_cseg_write_protection". 573 */ 574 boolean_t write_protect_c_segs = TRUE; 575 int vm_compressor_test_seg_wp; 576 uint32_t vm_ktrace_enabled; 577 #endif /* DEVELOPMENT || DEBUG */ 578 579 void 580 vm_compressor_init(void) 581 { 582 thread_t thread; 583 int attempts = 1; 584 kern_return_t retval = KERN_SUCCESS; 585 vm_offset_t start_addr = 0; 586 vm_size_t c_segments_arr_size = 0, compressor_submap_size = 0; 587 vm_map_kernel_flags_t vmk_flags; 588 #if RECORD_THE_COMPRESSED_DATA 589 vm_size_t c_compressed_record_sbuf_size = 0; 590 #endif /* RECORD_THE_COMPRESSED_DATA */ 591 592 #if DEVELOPMENT || DEBUG || CONFIG_FREEZE 593 char bootarg_name[32]; 594 #endif /* DEVELOPMENT || DEBUG || CONFIG_FREEZE */ 595 596 #if DEVELOPMENT || DEBUG 597 if (PE_parse_boot_argn("-disable_cseg_write_protection", bootarg_name, sizeof(bootarg_name))) { 598 write_protect_c_segs = FALSE; 599 } 600 int vmcval = 1; 601 PE_parse_boot_argn("vm_compressor_validation", &vmcval, sizeof(vmcval)); 602 603 if (kern_feature_override(KF_COMPRSV_OVRD)) { 604 vmcval = 0; 605 } 606 if (vmcval == 0) { 607 #if POPCOUNT_THE_COMPRESSED_DATA 608 popcount_c_segs = FALSE; 609 #endif 610 #if CHECKSUM_THE_DATA || CHECKSUM_THE_COMPRESSED_DATA 611 checksum_c_segs = FALSE; 612 #endif 613 #if VALIDATE_C_SEGMENTS 614 validate_c_segs = FALSE; 615 #endif 616 write_protect_c_segs = FALSE; 617 } 618 #endif /* DEVELOPMENT || DEBUG */ 619 620 #if CONFIG_FREEZE 621 if (PE_parse_boot_argn("-disable_freezer_cseg_acct", bootarg_name, sizeof(bootarg_name))) { 622 freezer_incore_cseg_acct = FALSE; 623 } 624 #endif /* CONFIG_FREEZE */ 625 626 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE); 627 628 #if !XNU_TARGET_OS_OSX 629 vm_compressor_minorcompact_threshold_divisor = 20; 630 vm_compressor_majorcompact_threshold_divisor = 30; 631 vm_compressor_unthrottle_threshold_divisor = 40; 632 vm_compressor_catchup_threshold_divisor = 60; 633 #else /* !XNU_TARGET_OS_OSX */ 634 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) { 635 vm_compressor_minorcompact_threshold_divisor = 11; 636 vm_compressor_majorcompact_threshold_divisor = 13; 637 vm_compressor_unthrottle_threshold_divisor = 20; 638 vm_compressor_catchup_threshold_divisor = 35; 639 } else { 640 vm_compressor_minorcompact_threshold_divisor = 20; 641 vm_compressor_majorcompact_threshold_divisor = 25; 642 vm_compressor_unthrottle_threshold_divisor = 35; 643 vm_compressor_catchup_threshold_divisor = 50; 644 } 645 #endif /* !XNU_TARGET_OS_OSX */ 646 647 queue_init(&c_bad_list_head); 648 queue_init(&c_age_list_head); 649 queue_init(&c_minor_list_head); 650 queue_init(&c_major_list_head); 651 queue_init(&c_filling_list_head); 652 queue_init(&c_swapout_list_head); 653 queue_init(&c_swapio_list_head); 654 queue_init(&c_swappedin_list_head); 655 queue_init(&c_swappedout_list_head); 656 queue_init(&c_swappedout_sparse_list_head); 657 658 c_free_segno_head = -1; 659 c_segments_available = 0; 660 661 if (vm_compression_limit) { 662 compressor_pool_size = ptoa_64(vm_compression_limit); 663 } 664 665 compressor_pool_max_size = C_SEG_MAX_LIMIT; 666 compressor_pool_max_size *= C_SEG_BUFSIZE; 667 668 #if XNU_TARGET_OS_OSX 669 670 if (vm_compression_limit == 0) { 671 if (max_mem <= (4ULL * 1024ULL * 1024ULL * 1024ULL)) { 672 compressor_pool_size = 16ULL * max_mem; 673 } else if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL)) { 674 compressor_pool_size = 8ULL * max_mem; 675 } else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL)) { 676 compressor_pool_size = 4ULL * max_mem; 677 } else { 678 compressor_pool_size = 2ULL * max_mem; 679 } 680 } 681 if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL)) { 682 compressor_pool_multiplier = 1; 683 } else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL)) { 684 compressor_pool_multiplier = 2; 685 } else { 686 compressor_pool_multiplier = 4; 687 } 688 689 #elif defined(__arm__) 690 691 #define VM_RESERVE_SIZE (1024 * 1024 * 256) 692 #define MAX_COMPRESSOR_POOL_SIZE (1024 * 1024 * 450) 693 694 if (compressor_pool_max_size > MAX_COMPRESSOR_POOL_SIZE) { 695 compressor_pool_max_size = MAX_COMPRESSOR_POOL_SIZE; 696 } 697 698 if (vm_compression_limit == 0) { 699 compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE; 700 } 701 compressor_pool_multiplier = 1; 702 703 #elif defined(__arm64__) && defined(XNU_TARGET_OS_WATCH) 704 705 /* 706 * On M9 watches the compressor can become big and can lead to 707 * churn in workingset resulting in audio drops. Setting a cap 708 * on the compressor size favors reclaiming unused memory 709 * sitting in idle band via jetsams 710 */ 711 712 #define COMPRESSOR_CAP_PERCENTAGE 37ULL 713 714 if (compressor_pool_max_size > max_mem) { 715 compressor_pool_max_size = max_mem; 716 } 717 718 if (vm_compression_limit == 0) { 719 compressor_pool_size = (max_mem * COMPRESSOR_CAP_PERCENTAGE) / 100ULL; 720 } 721 compressor_pool_multiplier = 1; 722 723 #else 724 725 if (compressor_pool_max_size > max_mem) { 726 compressor_pool_max_size = max_mem; 727 } 728 729 if (vm_compression_limit == 0) { 730 compressor_pool_size = max_mem; 731 } 732 compressor_pool_multiplier = 1; 733 #endif 734 if (compressor_pool_size > compressor_pool_max_size) { 735 compressor_pool_size = compressor_pool_max_size; 736 } 737 738 try_again: 739 c_segments_limit = (uint32_t)(compressor_pool_size / (vm_size_t)(C_SEG_ALLOCSIZE)); 740 c_segments_nearing_limit = (uint32_t)(((uint64_t)c_segments_limit * 98ULL) / 100ULL); 741 742 c_segment_pages_compressed_limit = (c_segments_limit * (C_SEG_BUFSIZE / PAGE_SIZE) * compressor_pool_multiplier); 743 744 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE)) { 745 if (!vm_compression_limit) { 746 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE); 747 } 748 } 749 750 c_segment_pages_compressed_nearing_limit = (uint32_t)(((uint64_t)c_segment_pages_compressed_limit * 98ULL) / 100ULL); 751 752 #if CONFIG_FREEZE 753 /* 754 * Our in-core limits are based on the size of the compressor pool. 755 * The c_segments_nearing_limit is also based on the compressor pool 756 * size and calculated above. 757 */ 758 c_segments_incore_limit = c_segments_limit; 759 760 if (freezer_incore_cseg_acct) { 761 /* 762 * Add enough segments to track all frozen c_segs that can be stored in swap. 763 */ 764 c_segments_limit += (uint32_t)(vm_swap_get_max_configured_space() / (vm_size_t)(C_SEG_ALLOCSIZE)); 765 } 766 #endif 767 /* 768 * Submap needs space for: 769 * - c_segments 770 * - c_buffers 771 * - swap reclaimations -- C_SEG_BUFSIZE 772 */ 773 c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit), VM_MAP_PAGE_MASK(kernel_map)); 774 c_buffers_size = vm_map_round_page(((vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map)); 775 776 compressor_submap_size = c_segments_arr_size + c_buffers_size + C_SEG_BUFSIZE; 777 778 #if RECORD_THE_COMPRESSED_DATA 779 c_compressed_record_sbuf_size = (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2); 780 compressor_submap_size += c_compressed_record_sbuf_size; 781 #endif /* RECORD_THE_COMPRESSED_DATA */ 782 783 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; 784 vmk_flags.vmkf_permanent = TRUE; 785 retval = kmem_suballoc(kernel_map, &start_addr, compressor_submap_size, 786 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_COMPRESSOR, 787 &compressor_map); 788 789 if (retval != KERN_SUCCESS) { 790 if (++attempts > 3) { 791 panic("vm_compressor_init: kmem_suballoc failed - 0x%llx", (uint64_t)compressor_submap_size); 792 } 793 794 compressor_pool_size = compressor_pool_size / 2; 795 796 kprintf("retrying creation of the compressor submap at 0x%llx bytes\n", compressor_pool_size); 797 goto try_again; 798 } 799 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&c_segments), 800 (sizeof(union c_segu) * c_segments_limit), 0, 801 KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) { 802 panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n"); 803 } 804 if (kernel_memory_allocate(compressor_map, &c_buffers, c_buffers_size, 0, 805 KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) { 806 panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n"); 807 } 808 809 810 /* 811 * Pick a good size that will minimize fragmentation in zalloc 812 * by minimizing the fragmentation in a 16k run. 813 * 814 * C_SEG_SLOT_VAR_ARRAY_MIN_LEN is larger on 4k systems than 16k ones, 815 * making the fragmentation in a 4k page terrible. Using 16k for all 816 * systems matches zalloc() and will minimize fragmentation. 817 */ 818 uint32_t c_segment_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot)); 819 uint32_t cnt = (16 << 10) / c_segment_size; 820 uint32_t frag = (16 << 10) % c_segment_size; 821 822 c_seg_fixed_array_len = C_SEG_SLOT_VAR_ARRAY_MIN_LEN; 823 824 while (cnt * sizeof(struct c_slot) < frag) { 825 c_segment_size += sizeof(struct c_slot); 826 c_seg_fixed_array_len++; 827 frag -= cnt * sizeof(struct c_slot); 828 } 829 830 compressor_segment_zone = zone_create("compressor_segment", 831 c_segment_size, ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM); 832 833 c_segments_busy = FALSE; 834 835 c_segments_next_page = (caddr_t)c_segments; 836 vm_compressor_algorithm_init(); 837 838 { 839 host_basic_info_data_t hinfo; 840 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; 841 size_t bufsize; 842 char *buf; 843 844 #define BSD_HOST 1 845 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); 846 847 compressor_cpus = hinfo.max_cpus; 848 849 bufsize = PAGE_SIZE; 850 bufsize += compressor_cpus * vm_compressor_get_decode_scratch_size(); 851 bufsize += vm_compressor_get_decode_scratch_size(); 852 #if CONFIG_FREEZE 853 bufsize += vm_compressor_get_encode_scratch_size(); 854 #endif 855 #if RECORD_THE_COMPRESSED_DATA 856 bufsize += c_compressed_record_sbuf_size; 857 #endif 858 859 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)&buf, bufsize, 860 PAGE_MASK, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR)) { 861 panic("vm_compressor_init: Unable to allocate %zd bytes", bufsize); 862 } 863 864 /* 865 * kdp_compressor_decompressed_page must be page aligned because we access 866 * it through the physical apperture by page number. 867 */ 868 kdp_compressor_decompressed_page = buf; 869 kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page); 870 kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr); 871 buf += PAGE_SIZE; 872 bufsize -= PAGE_SIZE; 873 874 compressor_scratch_bufs = buf; 875 buf += compressor_cpus * vm_compressor_get_decode_scratch_size(); 876 bufsize -= compressor_cpus * vm_compressor_get_decode_scratch_size(); 877 878 kdp_compressor_scratch_buf = buf; 879 buf += vm_compressor_get_decode_scratch_size(); 880 bufsize -= vm_compressor_get_decode_scratch_size(); 881 882 #if CONFIG_FREEZE 883 freezer_context_global.freezer_ctx_compressor_scratch_buf = buf; 884 buf += vm_compressor_get_encode_scratch_size(); 885 bufsize -= vm_compressor_get_encode_scratch_size(); 886 #endif 887 888 #if RECORD_THE_COMPRESSED_DATA 889 c_compressed_record_sbuf = buf; 890 c_compressed_record_cptr = buf; 891 c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size; 892 buf += c_compressed_record_sbuf_size; 893 bufsize -= c_compressed_record_sbuf_size; 894 #endif 895 assert(bufsize == 0); 896 } 897 898 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL, 899 BASEPRI_VM, &thread) != KERN_SUCCESS) { 900 panic("vm_compressor_swap_trigger_thread: create failed"); 901 } 902 thread_deallocate(thread); 903 904 if (vm_pageout_internal_start() != KERN_SUCCESS) { 905 panic("vm_compressor_init: Failed to start the internal pageout thread.\n"); 906 } 907 if (VM_CONFIG_SWAP_IS_PRESENT) { 908 vm_compressor_swap_init(); 909 } 910 911 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) { 912 vm_compressor_is_active = 1; 913 } 914 915 #if CONFIG_FREEZE 916 memorystatus_freeze_enabled = TRUE; 917 #endif /* CONFIG_FREEZE */ 918 919 vm_compressor_available = 1; 920 921 vm_page_reactivate_all_throttled(); 922 923 bzero(&vmcs_stats, sizeof(struct vm_compressor_swapper_stats)); 924 } 925 926 927 #if VALIDATE_C_SEGMENTS 928 929 static void 930 c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact) 931 { 932 uint16_t c_indx; 933 int32_t bytes_used; 934 uint32_t c_rounded_size; 935 uint32_t c_size; 936 c_slot_t cs; 937 938 if (__probable(validate_c_segs == FALSE)) { 939 return; 940 } 941 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) { 942 c_indx = c_seg->c_firstemptyslot; 943 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 944 945 if (cs == NULL) { 946 panic("c_seg_validate: no slot backing c_firstemptyslot"); 947 } 948 949 if (cs->c_size) { 950 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size); 951 } 952 } 953 bytes_used = 0; 954 955 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) { 956 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 957 958 c_size = UNPACK_C_SIZE(cs); 959 960 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 961 962 bytes_used += c_rounded_size; 963 964 #if CHECKSUM_THE_COMPRESSED_DATA 965 unsigned csvhash; 966 if (c_size && cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) { 967 addr64_t csvphys = kvtophys((vm_offset_t)&c_seg->c_store.c_buffer[cs->c_offset]); 968 panic("Compressed data doesn't match original %p phys: 0x%llx %d %p %d %d 0x%x 0x%x", c_seg, csvphys, cs->c_offset, cs, c_indx, c_size, cs->c_hash_compressed_data, csvhash); 969 } 970 #endif 971 #if POPCOUNT_THE_COMPRESSED_DATA 972 unsigned csvpop; 973 if (c_size) { 974 uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset]; 975 if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) { 976 panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, (uint64_t)cs->c_offset, c_size, csvpop, cs->c_pop_cdata); 977 } 978 } 979 #endif 980 } 981 982 if (bytes_used != c_seg->c_bytes_used) { 983 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used); 984 } 985 986 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) { 987 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 988 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 989 } 990 991 if (must_be_compact) { 992 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) { 993 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 994 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 995 } 996 } 997 } 998 999 #endif 1000 1001 1002 void 1003 c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held) 1004 { 1005 boolean_t clear_busy = FALSE; 1006 1007 if (c_list_lock_held == FALSE) { 1008 if (!lck_mtx_try_lock_spin_always(c_list_lock)) { 1009 C_SEG_BUSY(c_seg); 1010 1011 lck_mtx_unlock_always(&c_seg->c_lock); 1012 lck_mtx_lock_spin_always(c_list_lock); 1013 lck_mtx_lock_spin_always(&c_seg->c_lock); 1014 1015 clear_busy = TRUE; 1016 } 1017 } 1018 assert(c_seg->c_state != C_IS_FILLING); 1019 1020 if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ON_DISK_OR_SOQ(c_seg))) { 1021 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list); 1022 c_seg->c_on_minorcompact_q = 1; 1023 c_minor_count++; 1024 } 1025 if (c_list_lock_held == FALSE) { 1026 lck_mtx_unlock_always(c_list_lock); 1027 } 1028 1029 if (clear_busy == TRUE) { 1030 C_SEG_WAKEUP_DONE(c_seg); 1031 } 1032 } 1033 1034 1035 unsigned int c_seg_moved_to_sparse_list = 0; 1036 1037 void 1038 c_seg_move_to_sparse_list(c_segment_t c_seg) 1039 { 1040 boolean_t clear_busy = FALSE; 1041 1042 if (!lck_mtx_try_lock_spin_always(c_list_lock)) { 1043 C_SEG_BUSY(c_seg); 1044 1045 lck_mtx_unlock_always(&c_seg->c_lock); 1046 lck_mtx_lock_spin_always(c_list_lock); 1047 lck_mtx_lock_spin_always(&c_seg->c_lock); 1048 1049 clear_busy = TRUE; 1050 } 1051 c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE); 1052 1053 c_seg_moved_to_sparse_list++; 1054 1055 lck_mtx_unlock_always(c_list_lock); 1056 1057 if (clear_busy == TRUE) { 1058 C_SEG_WAKEUP_DONE(c_seg); 1059 } 1060 } 1061 1062 1063 void 1064 c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg) 1065 { 1066 c_segment_t c_seg_next; 1067 1068 if (queue_empty(qhead)) { 1069 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 1070 } else { 1071 c_seg_next = (c_segment_t)queue_first(qhead); 1072 1073 while (TRUE) { 1074 if (c_seg->c_generation_id < c_seg_next->c_generation_id) { 1075 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list); 1076 break; 1077 } 1078 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list); 1079 1080 if (queue_end(qhead, (queue_entry_t) c_seg_next)) { 1081 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 1082 break; 1083 } 1084 } 1085 } 1086 } 1087 1088 1089 int try_minor_compaction_failed = 0; 1090 int try_minor_compaction_succeeded = 0; 1091 1092 void 1093 c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg) 1094 { 1095 assert(c_seg->c_on_minorcompact_q); 1096 /* 1097 * c_seg is currently on the delayed minor compaction 1098 * queue and we have c_seg locked... if we can get the 1099 * c_list_lock w/o blocking (if we blocked we could deadlock 1100 * because the lock order is c_list_lock then c_seg's lock) 1101 * we'll pull it from the delayed list and free it directly 1102 */ 1103 if (!lck_mtx_try_lock_spin_always(c_list_lock)) { 1104 /* 1105 * c_list_lock is held, we need to bail 1106 */ 1107 try_minor_compaction_failed++; 1108 1109 lck_mtx_unlock_always(&c_seg->c_lock); 1110 } else { 1111 try_minor_compaction_succeeded++; 1112 1113 C_SEG_BUSY(c_seg); 1114 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE); 1115 } 1116 } 1117 1118 1119 int 1120 c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement) 1121 { 1122 int c_seg_freed; 1123 1124 assert(c_seg->c_busy); 1125 assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg)); 1126 1127 /* 1128 * check for the case that can occur when we are not swapping 1129 * and this segment has been major compacted in the past 1130 * and moved to the majorcompact q to remove it from further 1131 * consideration... if the occupancy falls too low we need 1132 * to put it back on the age_q so that it will be considered 1133 * in the next major compaction sweep... if we don't do this 1134 * we will eventually run into the c_segments_limit 1135 */ 1136 if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT_NOW(c_seg)) { 1137 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE); 1138 } 1139 if (!c_seg->c_on_minorcompact_q) { 1140 if (clear_busy == TRUE) { 1141 C_SEG_WAKEUP_DONE(c_seg); 1142 } 1143 1144 lck_mtx_unlock_always(&c_seg->c_lock); 1145 1146 return 0; 1147 } 1148 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); 1149 c_seg->c_on_minorcompact_q = 0; 1150 c_minor_count--; 1151 1152 lck_mtx_unlock_always(c_list_lock); 1153 1154 if (disallow_page_replacement == TRUE) { 1155 lck_mtx_unlock_always(&c_seg->c_lock); 1156 1157 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1158 1159 lck_mtx_lock_spin_always(&c_seg->c_lock); 1160 } 1161 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy); 1162 1163 if (disallow_page_replacement == TRUE) { 1164 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1165 } 1166 1167 if (need_list_lock == TRUE) { 1168 lck_mtx_lock_spin_always(c_list_lock); 1169 } 1170 1171 return c_seg_freed; 1172 } 1173 1174 void 1175 kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo) 1176 { 1177 c_segment_t c_seg = (c_segment_t) wait_event; 1178 1179 waitinfo->owner = thread_tid(c_seg->c_busy_for_thread); 1180 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(c_seg); 1181 } 1182 1183 #if DEVELOPMENT || DEBUG 1184 int 1185 do_cseg_wedge_thread(void) 1186 { 1187 struct c_segment c_seg; 1188 c_seg.c_busy_for_thread = current_thread(); 1189 1190 debug_cseg_wait_event = (event_t) &c_seg; 1191 1192 thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor); 1193 assert_wait((event_t) (&c_seg), THREAD_INTERRUPTIBLE); 1194 1195 thread_block(THREAD_CONTINUE_NULL); 1196 1197 return 0; 1198 } 1199 1200 int 1201 do_cseg_unwedge_thread(void) 1202 { 1203 thread_wakeup(debug_cseg_wait_event); 1204 debug_cseg_wait_event = NULL; 1205 1206 return 0; 1207 } 1208 #endif /* DEVELOPMENT || DEBUG */ 1209 1210 void 1211 c_seg_wait_on_busy(c_segment_t c_seg) 1212 { 1213 c_seg->c_wanted = 1; 1214 1215 thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor); 1216 assert_wait((event_t) (c_seg), THREAD_UNINT); 1217 1218 lck_mtx_unlock_always(&c_seg->c_lock); 1219 thread_block(THREAD_CONTINUE_NULL); 1220 } 1221 1222 #if CONFIG_FREEZE 1223 /* 1224 * We don't have the task lock held while updating the task's 1225 * c_seg queues. We can do that because of the following restrictions: 1226 * 1227 * - SINGLE FREEZER CONTEXT: 1228 * We 'insert' c_segs into the task list on the task_freeze path. 1229 * There can only be one such freeze in progress and the task 1230 * isn't disappearing because we have the VM map lock held throughout 1231 * and we have a reference on the proc too. 1232 * 1233 * - SINGLE TASK DISOWN CONTEXT: 1234 * We 'disown' c_segs of a task ONLY from the task_terminate context. So 1235 * we don't need the task lock but we need the c_list_lock and the 1236 * compressor master lock (shared). We also hold the individual 1237 * c_seg locks (exclusive). 1238 * 1239 * If we either: 1240 * - can't get the c_seg lock on a try, then we start again because maybe 1241 * the c_seg is part of a compaction and might get freed. So we can't trust 1242 * that linkage and need to restart our queue traversal. 1243 * - OR, we run into a busy c_seg (say being swapped in or free-ing) we 1244 * drop all locks again and wait and restart our queue traversal. 1245 * 1246 * - The new_owner_task below is currently only the kernel or NULL. 1247 * 1248 */ 1249 void 1250 c_seg_update_task_owner(c_segment_t c_seg, task_t new_owner_task) 1251 { 1252 task_t owner_task = c_seg->c_task_owner; 1253 uint64_t uncompressed_bytes = ((c_seg->c_slots_used) * PAGE_SIZE_64); 1254 1255 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED); 1256 LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED); 1257 1258 if (owner_task) { 1259 task_update_frozen_to_swap_acct(owner_task, uncompressed_bytes, DEBIT_FROM_SWAP); 1260 queue_remove(&owner_task->task_frozen_cseg_q, c_seg, 1261 c_segment_t, c_task_list_next_cseg); 1262 } 1263 1264 if (new_owner_task) { 1265 queue_enter(&new_owner_task->task_frozen_cseg_q, c_seg, 1266 c_segment_t, c_task_list_next_cseg); 1267 task_update_frozen_to_swap_acct(new_owner_task, uncompressed_bytes, CREDIT_TO_SWAP); 1268 } 1269 1270 c_seg->c_task_owner = new_owner_task; 1271 } 1272 1273 void 1274 task_disown_frozen_csegs(task_t owner_task) 1275 { 1276 c_segment_t c_seg = NULL, next_cseg = NULL; 1277 1278 again: 1279 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1280 lck_mtx_lock_spin_always(c_list_lock); 1281 1282 for (c_seg = (c_segment_t) queue_first(&owner_task->task_frozen_cseg_q); 1283 !queue_end(&owner_task->task_frozen_cseg_q, (queue_entry_t) c_seg); 1284 c_seg = next_cseg) { 1285 next_cseg = (c_segment_t) queue_next(&c_seg->c_task_list_next_cseg);; 1286 1287 if (!lck_mtx_try_lock_spin_always(&c_seg->c_lock)) { 1288 lck_mtx_unlock(c_list_lock); 1289 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1290 goto again; 1291 } 1292 1293 if (c_seg->c_busy) { 1294 lck_mtx_unlock(c_list_lock); 1295 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1296 1297 c_seg_wait_on_busy(c_seg); 1298 1299 goto again; 1300 } 1301 assert(c_seg->c_task_owner == owner_task); 1302 c_seg_update_task_owner(c_seg, kernel_task); 1303 lck_mtx_unlock_always(&c_seg->c_lock); 1304 } 1305 1306 lck_mtx_unlock(c_list_lock); 1307 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1308 } 1309 #endif /* CONFIG_FREEZE */ 1310 1311 void 1312 c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head) 1313 { 1314 int old_state = c_seg->c_state; 1315 1316 #if XNU_TARGET_OS_OSX 1317 #if DEVELOPMENT || DEBUG 1318 if (new_state != C_IS_FILLING) { 1319 LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED); 1320 } 1321 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED); 1322 #endif 1323 #endif /* XNU_TARGET_OS_OSX */ 1324 switch (old_state) { 1325 case C_IS_EMPTY: 1326 assert(new_state == C_IS_FILLING || new_state == C_IS_FREE); 1327 1328 c_empty_count--; 1329 break; 1330 1331 case C_IS_FILLING: 1332 assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q); 1333 1334 queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list); 1335 c_filling_count--; 1336 break; 1337 1338 case C_ON_AGE_Q: 1339 assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q || 1340 new_state == C_IS_FREE); 1341 1342 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); 1343 c_age_count--; 1344 break; 1345 1346 case C_ON_SWAPPEDIN_Q: 1347 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE); 1348 1349 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 1350 c_swappedin_count--; 1351 break; 1352 1353 case C_ON_SWAPOUT_Q: 1354 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY || new_state == C_ON_SWAPIO_Q); 1355 1356 #if CONFIG_FREEZE 1357 if (c_seg->c_task_owner && (new_state != C_ON_SWAPIO_Q)) { 1358 c_seg_update_task_owner(c_seg, NULL); 1359 } 1360 #endif /* CONFIG_FREEZE */ 1361 1362 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 1363 thread_wakeup((event_t)&compaction_swapper_running); 1364 c_swapout_count--; 1365 break; 1366 1367 case C_ON_SWAPIO_Q: 1368 assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q || new_state == C_ON_AGE_Q); 1369 1370 queue_remove(&c_swapio_list_head, c_seg, c_segment_t, c_age_list); 1371 c_swapio_count--; 1372 break; 1373 1374 case C_ON_SWAPPEDOUT_Q: 1375 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q || 1376 new_state == C_ON_SWAPPEDOUTSPARSE_Q || 1377 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE); 1378 1379 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 1380 c_swappedout_count--; 1381 break; 1382 1383 case C_ON_SWAPPEDOUTSPARSE_Q: 1384 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q || 1385 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE); 1386 1387 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 1388 c_swappedout_sparse_count--; 1389 break; 1390 1391 case C_ON_MAJORCOMPACT_Q: 1392 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE); 1393 1394 queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list); 1395 c_major_count--; 1396 break; 1397 1398 case C_ON_BAD_Q: 1399 assert(new_state == C_IS_FREE); 1400 1401 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 1402 c_bad_count--; 1403 break; 1404 1405 default: 1406 panic("c_seg %p has bad c_state = %d\n", c_seg, old_state); 1407 } 1408 1409 switch (new_state) { 1410 case C_IS_FREE: 1411 assert(old_state != C_IS_FILLING); 1412 1413 break; 1414 1415 case C_IS_EMPTY: 1416 assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q); 1417 1418 c_empty_count++; 1419 break; 1420 1421 case C_IS_FILLING: 1422 assert(old_state == C_IS_EMPTY); 1423 1424 queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list); 1425 c_filling_count++; 1426 break; 1427 1428 case C_ON_AGE_Q: 1429 assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q || 1430 old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPIO_Q || 1431 old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q); 1432 1433 if (old_state == C_IS_FILLING) { 1434 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list); 1435 } else { 1436 if (!queue_empty(&c_age_list_head)) { 1437 c_segment_t c_first; 1438 1439 c_first = (c_segment_t)queue_first(&c_age_list_head); 1440 c_seg->c_creation_ts = c_first->c_creation_ts; 1441 } 1442 queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list); 1443 } 1444 c_age_count++; 1445 break; 1446 1447 case C_ON_SWAPPEDIN_Q: 1448 assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q); 1449 1450 if (insert_head == TRUE) { 1451 queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 1452 } else { 1453 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 1454 } 1455 c_swappedin_count++; 1456 break; 1457 1458 case C_ON_SWAPOUT_Q: 1459 assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING); 1460 1461 if (insert_head == TRUE) { 1462 queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 1463 } else { 1464 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 1465 } 1466 c_swapout_count++; 1467 break; 1468 1469 case C_ON_SWAPIO_Q: 1470 assert(old_state == C_ON_SWAPOUT_Q); 1471 1472 if (insert_head == TRUE) { 1473 queue_enter_first(&c_swapio_list_head, c_seg, c_segment_t, c_age_list); 1474 } else { 1475 queue_enter(&c_swapio_list_head, c_seg, c_segment_t, c_age_list); 1476 } 1477 c_swapio_count++; 1478 break; 1479 1480 case C_ON_SWAPPEDOUT_Q: 1481 assert(old_state == C_ON_SWAPIO_Q); 1482 1483 if (insert_head == TRUE) { 1484 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 1485 } else { 1486 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 1487 } 1488 c_swappedout_count++; 1489 break; 1490 1491 case C_ON_SWAPPEDOUTSPARSE_Q: 1492 assert(old_state == C_ON_SWAPIO_Q || old_state == C_ON_SWAPPEDOUT_Q); 1493 1494 if (insert_head == TRUE) { 1495 queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 1496 } else { 1497 queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 1498 } 1499 1500 c_swappedout_sparse_count++; 1501 break; 1502 1503 case C_ON_MAJORCOMPACT_Q: 1504 assert(old_state == C_ON_AGE_Q); 1505 1506 if (insert_head == TRUE) { 1507 queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list); 1508 } else { 1509 queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list); 1510 } 1511 c_major_count++; 1512 break; 1513 1514 case C_ON_BAD_Q: 1515 assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q); 1516 1517 if (insert_head == TRUE) { 1518 queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 1519 } else { 1520 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 1521 } 1522 c_bad_count++; 1523 break; 1524 1525 default: 1526 panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state); 1527 } 1528 c_seg->c_state = new_state; 1529 } 1530 1531 1532 1533 void 1534 c_seg_free(c_segment_t c_seg) 1535 { 1536 assert(c_seg->c_busy); 1537 1538 lck_mtx_unlock_always(&c_seg->c_lock); 1539 lck_mtx_lock_spin_always(c_list_lock); 1540 lck_mtx_lock_spin_always(&c_seg->c_lock); 1541 1542 c_seg_free_locked(c_seg); 1543 } 1544 1545 1546 void 1547 c_seg_free_locked(c_segment_t c_seg) 1548 { 1549 int segno; 1550 int pages_populated = 0; 1551 int32_t *c_buffer = NULL; 1552 uint64_t c_swap_handle = 0; 1553 1554 assert(c_seg->c_busy); 1555 assert(c_seg->c_slots_used == 0); 1556 assert(!c_seg->c_on_minorcompact_q); 1557 assert(!c_seg->c_busy_swapping); 1558 1559 if (c_seg->c_overage_swap == TRUE) { 1560 c_overage_swapped_count--; 1561 c_seg->c_overage_swap = FALSE; 1562 } 1563 if (!(C_SEG_IS_ONDISK(c_seg))) { 1564 c_buffer = c_seg->c_store.c_buffer; 1565 } else { 1566 c_swap_handle = c_seg->c_store.c_swap_handle; 1567 } 1568 1569 c_seg_switch_state(c_seg, C_IS_FREE, FALSE); 1570 1571 if (c_buffer) { 1572 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 1573 c_seg->c_store.c_buffer = NULL; 1574 } else { 1575 #if CONFIG_FREEZE 1576 c_seg_update_task_owner(c_seg, NULL); 1577 #endif /* CONFIG_FREEZE */ 1578 1579 c_seg->c_store.c_swap_handle = (uint64_t)-1; 1580 } 1581 1582 lck_mtx_unlock_always(&c_seg->c_lock); 1583 1584 lck_mtx_unlock_always(c_list_lock); 1585 1586 if (c_buffer) { 1587 if (pages_populated) { 1588 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_buffer, 1589 pages_populated * PAGE_SIZE, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR); 1590 } 1591 } else if (c_swap_handle) { 1592 /* 1593 * Free swap space on disk. 1594 */ 1595 vm_swap_free(c_swap_handle); 1596 } 1597 lck_mtx_lock_spin_always(&c_seg->c_lock); 1598 /* 1599 * c_seg must remain busy until 1600 * after the call to vm_swap_free 1601 */ 1602 C_SEG_WAKEUP_DONE(c_seg); 1603 lck_mtx_unlock_always(&c_seg->c_lock); 1604 1605 segno = c_seg->c_mysegno; 1606 1607 lck_mtx_lock_spin_always(c_list_lock); 1608 /* 1609 * because the c_buffer is now associated with the segno, 1610 * we can't put the segno back on the free list until 1611 * after we have depopulated the c_buffer range, or 1612 * we run the risk of depopulating a range that is 1613 * now being used in one of the compressor heads 1614 */ 1615 c_segments[segno].c_segno = c_free_segno_head; 1616 c_free_segno_head = segno; 1617 c_segment_count--; 1618 1619 lck_mtx_unlock_always(c_list_lock); 1620 1621 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); 1622 1623 if (c_seg->c_slot_var_array_len) { 1624 kheap_free(KHEAP_DATA_BUFFERS, c_seg->c_slot_var_array, 1625 sizeof(struct c_slot) * c_seg->c_slot_var_array_len); 1626 } 1627 1628 zfree(compressor_segment_zone, c_seg); 1629 } 1630 1631 #if DEVELOPMENT || DEBUG 1632 int c_seg_trim_page_count = 0; 1633 #endif 1634 1635 void 1636 c_seg_trim_tail(c_segment_t c_seg) 1637 { 1638 c_slot_t cs; 1639 uint32_t c_size; 1640 uint32_t c_offset; 1641 uint32_t c_rounded_size; 1642 uint16_t current_nextslot; 1643 uint32_t current_populated_offset; 1644 1645 if (c_seg->c_bytes_used == 0) { 1646 return; 1647 } 1648 current_nextslot = c_seg->c_nextslot; 1649 current_populated_offset = c_seg->c_populated_offset; 1650 1651 while (c_seg->c_nextslot) { 1652 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1)); 1653 1654 c_size = UNPACK_C_SIZE(cs); 1655 1656 if (c_size) { 1657 if (current_nextslot != c_seg->c_nextslot) { 1658 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 1659 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size); 1660 1661 c_seg->c_nextoffset = c_offset; 1662 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & 1663 ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 1664 1665 if (c_seg->c_firstemptyslot > c_seg->c_nextslot) { 1666 c_seg->c_firstemptyslot = c_seg->c_nextslot; 1667 } 1668 #if DEVELOPMENT || DEBUG 1669 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) - 1670 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / 1671 PAGE_SIZE); 1672 #endif 1673 } 1674 break; 1675 } 1676 c_seg->c_nextslot--; 1677 } 1678 assert(c_seg->c_nextslot); 1679 } 1680 1681 1682 int 1683 c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy) 1684 { 1685 c_slot_mapping_t slot_ptr; 1686 uint32_t c_offset = 0; 1687 uint32_t old_populated_offset; 1688 uint32_t c_rounded_size; 1689 uint32_t c_size; 1690 uint16_t c_indx = 0; 1691 int i; 1692 c_slot_t c_dst; 1693 c_slot_t c_src; 1694 1695 assert(c_seg->c_busy); 1696 1697 #if VALIDATE_C_SEGMENTS 1698 c_seg_validate(c_seg, FALSE); 1699 #endif 1700 if (c_seg->c_bytes_used == 0) { 1701 c_seg_free(c_seg); 1702 return 1; 1703 } 1704 lck_mtx_unlock_always(&c_seg->c_lock); 1705 1706 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) { 1707 goto done; 1708 } 1709 1710 /* TODO: assert first emptyslot's c_size is actually 0 */ 1711 1712 #if DEVELOPMENT || DEBUG 1713 C_SEG_MAKE_WRITEABLE(c_seg); 1714 #endif 1715 1716 #if VALIDATE_C_SEGMENTS 1717 c_seg->c_was_minor_compacted++; 1718 #endif 1719 c_indx = c_seg->c_firstemptyslot; 1720 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 1721 1722 old_populated_offset = c_seg->c_populated_offset; 1723 c_offset = c_dst->c_offset; 1724 1725 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) { 1726 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i); 1727 1728 c_size = UNPACK_C_SIZE(c_src); 1729 1730 if (c_size == 0) { 1731 continue; 1732 } 1733 1734 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 1735 /* N.B.: This memcpy may be an overlapping copy */ 1736 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_rounded_size); 1737 1738 cslot_copy(c_dst, c_src); 1739 c_dst->c_offset = c_offset; 1740 1741 slot_ptr = C_SLOT_UNPACK_PTR(c_dst); 1742 slot_ptr->s_cindx = c_indx; 1743 1744 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 1745 PACK_C_SIZE(c_src, 0); 1746 c_indx++; 1747 1748 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 1749 } 1750 c_seg->c_firstemptyslot = c_indx; 1751 c_seg->c_nextslot = c_indx; 1752 c_seg->c_nextoffset = c_offset; 1753 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 1754 c_seg->c_bytes_unused = 0; 1755 1756 #if VALIDATE_C_SEGMENTS 1757 c_seg_validate(c_seg, TRUE); 1758 #endif 1759 if (old_populated_offset > c_seg->c_populated_offset) { 1760 uint32_t gc_size; 1761 int32_t *gc_ptr; 1762 1763 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset); 1764 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset]; 1765 1766 kernel_memory_depopulate(compressor_map, (vm_offset_t)gc_ptr, gc_size, 1767 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR); 1768 } 1769 1770 #if DEVELOPMENT || DEBUG 1771 C_SEG_WRITE_PROTECT(c_seg); 1772 #endif 1773 1774 done: 1775 if (clear_busy == TRUE) { 1776 lck_mtx_lock_spin_always(&c_seg->c_lock); 1777 C_SEG_WAKEUP_DONE(c_seg); 1778 lck_mtx_unlock_always(&c_seg->c_lock); 1779 } 1780 return 0; 1781 } 1782 1783 1784 static void 1785 c_seg_alloc_nextslot(c_segment_t c_seg) 1786 { 1787 struct c_slot *old_slot_array = NULL; 1788 struct c_slot *new_slot_array = NULL; 1789 int newlen; 1790 int oldlen; 1791 1792 if (c_seg->c_nextslot < c_seg_fixed_array_len) { 1793 return; 1794 } 1795 1796 if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) { 1797 oldlen = c_seg->c_slot_var_array_len; 1798 old_slot_array = c_seg->c_slot_var_array; 1799 1800 if (oldlen == 0) { 1801 newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN; 1802 } else { 1803 newlen = oldlen * 2; 1804 } 1805 1806 new_slot_array = kheap_alloc(KHEAP_DATA_BUFFERS, 1807 sizeof(struct c_slot) * newlen, Z_WAITOK); 1808 1809 lck_mtx_lock_spin_always(&c_seg->c_lock); 1810 1811 if (old_slot_array) { 1812 memcpy(new_slot_array, old_slot_array, 1813 sizeof(struct c_slot) * oldlen); 1814 } 1815 1816 c_seg->c_slot_var_array_len = newlen; 1817 c_seg->c_slot_var_array = new_slot_array; 1818 1819 lck_mtx_unlock_always(&c_seg->c_lock); 1820 1821 if (old_slot_array) { 1822 kheap_free(KHEAP_DATA_BUFFERS, old_slot_array, 1823 sizeof(struct c_slot) * oldlen); 1824 } 1825 } 1826 } 1827 1828 1829 #define C_SEG_MAJOR_COMPACT_STATS_MAX (30) 1830 1831 struct { 1832 uint64_t asked_permission; 1833 uint64_t compactions; 1834 uint64_t moved_slots; 1835 uint64_t moved_bytes; 1836 uint64_t wasted_space_in_swapouts; 1837 uint64_t count_of_swapouts; 1838 uint64_t count_of_freed_segs; 1839 uint64_t bailed_compactions; 1840 uint64_t bytes_freed_rate_us; 1841 } c_seg_major_compact_stats[C_SEG_MAJOR_COMPACT_STATS_MAX]; 1842 1843 int c_seg_major_compact_stats_now = 0; 1844 1845 1846 #define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 90) / 100) 1847 1848 1849 boolean_t 1850 c_seg_major_compact_ok( 1851 c_segment_t c_seg_dst, 1852 c_segment_t c_seg_src) 1853 { 1854 c_seg_major_compact_stats[c_seg_major_compact_stats_now].asked_permission++; 1855 1856 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE && 1857 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) { 1858 return FALSE; 1859 } 1860 1861 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) { 1862 /* 1863 * destination segment is full... can't compact 1864 */ 1865 return FALSE; 1866 } 1867 1868 return TRUE; 1869 } 1870 1871 1872 boolean_t 1873 c_seg_major_compact( 1874 c_segment_t c_seg_dst, 1875 c_segment_t c_seg_src) 1876 { 1877 c_slot_mapping_t slot_ptr; 1878 uint32_t c_rounded_size; 1879 uint32_t c_size; 1880 uint16_t dst_slot; 1881 int i; 1882 c_slot_t c_dst; 1883 c_slot_t c_src; 1884 boolean_t keep_compacting = TRUE; 1885 1886 /* 1887 * segments are not locked but they are both marked c_busy 1888 * which keeps c_decompress from working on them... 1889 * we can safely allocate new pages, move compressed data 1890 * from c_seg_src to c_seg_dst and update both c_segment's 1891 * state w/o holding the master lock 1892 */ 1893 #if DEVELOPMENT || DEBUG 1894 C_SEG_MAKE_WRITEABLE(c_seg_dst); 1895 #endif 1896 1897 #if VALIDATE_C_SEGMENTS 1898 c_seg_dst->c_was_major_compacted++; 1899 c_seg_src->c_was_major_donor++; 1900 #endif 1901 c_seg_major_compact_stats[c_seg_major_compact_stats_now].compactions++; 1902 1903 dst_slot = c_seg_dst->c_nextslot; 1904 1905 for (i = 0; i < c_seg_src->c_nextslot; i++) { 1906 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i); 1907 1908 c_size = UNPACK_C_SIZE(c_src); 1909 1910 if (c_size == 0) { 1911 /* BATCH: move what we have so far; */ 1912 continue; 1913 } 1914 1915 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) { 1916 int size_to_populate; 1917 1918 /* doesn't fit */ 1919 size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset); 1920 1921 if (size_to_populate == 0) { 1922 /* can't fit */ 1923 keep_compacting = FALSE; 1924 break; 1925 } 1926 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) { 1927 size_to_populate = C_SEG_MAX_POPULATE_SIZE; 1928 } 1929 1930 kernel_memory_populate(compressor_map, 1931 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset], 1932 size_to_populate, 1933 KMA_COMPRESSOR, 1934 VM_KERN_MEMORY_COMPRESSOR); 1935 1936 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate); 1937 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE); 1938 } 1939 c_seg_alloc_nextslot(c_seg_dst); 1940 1941 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 1942 1943 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size); 1944 1945 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 1946 1947 c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_slots++; 1948 c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_bytes += c_size; 1949 1950 cslot_copy(c_dst, c_src); 1951 c_dst->c_offset = c_seg_dst->c_nextoffset; 1952 1953 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) { 1954 c_seg_dst->c_firstemptyslot++; 1955 } 1956 c_seg_dst->c_slots_used++; 1957 c_seg_dst->c_nextslot++; 1958 c_seg_dst->c_bytes_used += c_rounded_size; 1959 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 1960 1961 PACK_C_SIZE(c_src, 0); 1962 1963 c_seg_src->c_bytes_used -= c_rounded_size; 1964 c_seg_src->c_bytes_unused += c_rounded_size; 1965 c_seg_src->c_firstemptyslot = 0; 1966 1967 assert(c_seg_src->c_slots_used); 1968 c_seg_src->c_slots_used--; 1969 1970 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) { 1971 /* dest segment is now full */ 1972 keep_compacting = FALSE; 1973 break; 1974 } 1975 } 1976 #if DEVELOPMENT || DEBUG 1977 C_SEG_WRITE_PROTECT(c_seg_dst); 1978 #endif 1979 if (dst_slot < c_seg_dst->c_nextslot) { 1980 PAGE_REPLACEMENT_ALLOWED(TRUE); 1981 /* 1982 * we've now locked out c_decompress from 1983 * converting the slot passed into it into 1984 * a c_segment_t which allows us to use 1985 * the backptr to change which c_segment and 1986 * index the slot points to 1987 */ 1988 while (dst_slot < c_seg_dst->c_nextslot) { 1989 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot); 1990 1991 slot_ptr = C_SLOT_UNPACK_PTR(c_dst); 1992 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 1993 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1; 1994 slot_ptr->s_cindx = dst_slot++; 1995 } 1996 PAGE_REPLACEMENT_ALLOWED(FALSE); 1997 } 1998 return keep_compacting; 1999 } 2000 2001 2002 uint64_t 2003 vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) 2004 { 2005 uint64_t end_msecs; 2006 uint64_t start_msecs; 2007 2008 end_msecs = (end_sec * 1000) + end_nsec / 1000000; 2009 start_msecs = (start_sec * 1000) + start_nsec / 1000000; 2010 2011 return end_msecs - start_msecs; 2012 } 2013 2014 2015 2016 uint32_t compressor_eval_period_in_msecs = 250; 2017 uint32_t compressor_sample_min_in_msecs = 500; 2018 uint32_t compressor_sample_max_in_msecs = 10000; 2019 uint32_t compressor_thrashing_threshold_per_10msecs = 50; 2020 uint32_t compressor_thrashing_min_per_10msecs = 20; 2021 2022 /* When true, reset sample data next chance we get. */ 2023 static boolean_t compressor_need_sample_reset = FALSE; 2024 2025 2026 void 2027 compute_swapout_target_age(void) 2028 { 2029 clock_sec_t cur_ts_sec; 2030 clock_nsec_t cur_ts_nsec; 2031 uint32_t min_operations_needed_in_this_sample; 2032 uint64_t elapsed_msecs_in_eval; 2033 uint64_t elapsed_msecs_in_sample; 2034 boolean_t need_eval_reset = FALSE; 2035 2036 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 2037 2038 elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); 2039 2040 if (compressor_need_sample_reset || 2041 elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { 2042 compressor_need_sample_reset = TRUE; 2043 need_eval_reset = TRUE; 2044 goto done; 2045 } 2046 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); 2047 2048 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) { 2049 goto done; 2050 } 2051 need_eval_reset = TRUE; 2052 2053 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0); 2054 2055 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10; 2056 2057 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample || 2058 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) { 2059 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count, 2060 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0); 2061 2062 swapout_target_age = 0; 2063 2064 compressor_need_sample_reset = TRUE; 2065 need_eval_reset = TRUE; 2066 goto done; 2067 } 2068 last_eval_compression_count = sample_period_compression_count; 2069 last_eval_decompression_count = sample_period_decompression_count; 2070 2071 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) { 2072 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0); 2073 goto done; 2074 } 2075 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) { 2076 uint64_t running_total; 2077 uint64_t working_target; 2078 uint64_t aging_target; 2079 uint32_t oldest_age_of_csegs_sampled = 0; 2080 uint64_t working_set_approximation = 0; 2081 2082 swapout_target_age = 0; 2083 2084 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */ 2085 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */ 2086 running_total = 0; 2087 2088 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) { 2089 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 2090 2091 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 2092 2093 if (running_total >= working_target) { 2094 break; 2095 } 2096 } 2097 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) { 2098 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample; 2099 2100 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) { 2101 running_total = overage_decompressions_during_sample_period; 2102 2103 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) { 2104 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 2105 2106 if (running_total >= aging_target) { 2107 break; 2108 } 2109 } 2110 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled; 2111 2112 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0); 2113 } else { 2114 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0); 2115 } 2116 } else { 2117 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0); 2118 } 2119 2120 compressor_need_sample_reset = TRUE; 2121 need_eval_reset = TRUE; 2122 } else { 2123 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0); 2124 } 2125 done: 2126 if (compressor_need_sample_reset == TRUE) { 2127 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period)); 2128 overage_decompressions_during_sample_period = 0; 2129 2130 start_of_sample_period_sec = cur_ts_sec; 2131 start_of_sample_period_nsec = cur_ts_nsec; 2132 sample_period_decompression_count = 0; 2133 sample_period_compression_count = 0; 2134 last_eval_decompression_count = 0; 2135 last_eval_compression_count = 0; 2136 compressor_need_sample_reset = FALSE; 2137 } 2138 if (need_eval_reset == TRUE) { 2139 start_of_eval_period_sec = cur_ts_sec; 2140 start_of_eval_period_nsec = cur_ts_nsec; 2141 } 2142 } 2143 2144 2145 int compaction_swapper_init_now = 0; 2146 int compaction_swapper_running = 0; 2147 int compaction_swapper_awakened = 0; 2148 int compaction_swapper_abort = 0; 2149 2150 2151 #if CONFIG_JETSAM 2152 boolean_t memorystatus_kill_on_VM_compressor_thrashing(boolean_t); 2153 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t); 2154 boolean_t memorystatus_kill_on_FC_thrashing(boolean_t); 2155 int compressor_thrashing_induced_jetsam = 0; 2156 int filecache_thrashing_induced_jetsam = 0; 2157 static boolean_t vm_compressor_thrashing_detected = FALSE; 2158 #endif /* CONFIG_JETSAM */ 2159 2160 static boolean_t 2161 compressor_needs_to_swap(void) 2162 { 2163 boolean_t should_swap = FALSE; 2164 2165 if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) { 2166 c_segment_t c_seg; 2167 clock_sec_t now; 2168 clock_sec_t age; 2169 clock_nsec_t nsec; 2170 2171 clock_get_system_nanotime(&now, &nsec); 2172 age = 0; 2173 2174 lck_mtx_lock_spin_always(c_list_lock); 2175 2176 if (!queue_empty(&c_age_list_head)) { 2177 c_seg = (c_segment_t) queue_first(&c_age_list_head); 2178 2179 age = now - c_seg->c_creation_ts; 2180 } 2181 lck_mtx_unlock_always(c_list_lock); 2182 2183 if (age >= vm_ripe_target_age) { 2184 should_swap = TRUE; 2185 goto check_if_low_space; 2186 } 2187 } 2188 if (VM_CONFIG_SWAP_IS_ACTIVE) { 2189 if (COMPRESSOR_NEEDS_TO_SWAP()) { 2190 should_swap = TRUE; 2191 vmcs_stats.compressor_swap_threshold_exceeded++; 2192 goto check_if_low_space; 2193 } 2194 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) { 2195 should_swap = TRUE; 2196 vmcs_stats.external_q_throttled++; 2197 goto check_if_low_space; 2198 } 2199 if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2))) { 2200 should_swap = TRUE; 2201 vmcs_stats.free_count_below_reserve++; 2202 goto check_if_low_space; 2203 } 2204 } 2205 2206 #if (XNU_TARGET_OS_OSX && __arm64__) 2207 /* 2208 * Thrashing detection disabled. 2209 */ 2210 #else /* (XNU_TARGET_OS_OSX && __arm64__) */ 2211 2212 compute_swapout_target_age(); 2213 2214 if (swapout_target_age) { 2215 c_segment_t c_seg; 2216 2217 lck_mtx_lock_spin_always(c_list_lock); 2218 2219 if (!queue_empty(&c_age_list_head)) { 2220 c_seg = (c_segment_t) queue_first(&c_age_list_head); 2221 2222 if (c_seg->c_creation_ts > swapout_target_age) { 2223 swapout_target_age = 0; 2224 } 2225 } 2226 lck_mtx_unlock_always(c_list_lock); 2227 } 2228 #if CONFIG_PHANTOM_CACHE 2229 if (vm_phantom_cache_check_pressure()) { 2230 should_swap = TRUE; 2231 } 2232 #endif 2233 if (swapout_target_age) { 2234 should_swap = TRUE; 2235 vmcs_stats.thrashing_detected++; 2236 } 2237 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */ 2238 2239 check_if_low_space: 2240 2241 #if CONFIG_JETSAM 2242 if (should_swap || vm_compressor_low_on_space() == TRUE) { 2243 if (vm_compressor_thrashing_detected == FALSE) { 2244 vm_compressor_thrashing_detected = TRUE; 2245 2246 if (swapout_target_age) { 2247 /* The compressor is thrashing. */ 2248 memorystatus_kill_on_VM_compressor_thrashing(TRUE /* async */); 2249 compressor_thrashing_induced_jetsam++; 2250 } else if (vm_compressor_low_on_space() == TRUE) { 2251 /* The compressor is running low on space. */ 2252 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */); 2253 compressor_thrashing_induced_jetsam++; 2254 } else { 2255 memorystatus_kill_on_FC_thrashing(TRUE /* async */); 2256 filecache_thrashing_induced_jetsam++; 2257 } 2258 } 2259 /* 2260 * let the jetsam take precedence over 2261 * any major compactions we might have 2262 * been able to do... otherwise we run 2263 * the risk of doing major compactions 2264 * on segments we're about to free up 2265 * due to the jetsam activity. 2266 */ 2267 should_swap = FALSE; 2268 } 2269 2270 #else /* CONFIG_JETSAM */ 2271 if (should_swap && vm_swap_low_on_space()) { 2272 vm_compressor_take_paging_space_action(); 2273 } 2274 #endif /* CONFIG_JETSAM */ 2275 2276 if (should_swap == FALSE) { 2277 /* 2278 * vm_compressor_needs_to_major_compact returns true only if we're 2279 * about to run out of available compressor segments... in this 2280 * case, we absolutely need to run a major compaction even if 2281 * we've just kicked off a jetsam or we don't otherwise need to 2282 * swap... terminating objects releases 2283 * pages back to the uncompressed cache, but does not guarantee 2284 * that we will free up even a single compression segment 2285 */ 2286 should_swap = vm_compressor_needs_to_major_compact(); 2287 if (should_swap) { 2288 vmcs_stats.fragmentation_detected++; 2289 #if (XNU_TARGET_OS_OSX && __arm64__) 2290 /* 2291 * SSD based systems don't need the fragmentation 2292 * swapout trigger because that was designed for 2293 * systems where the swapout latencies could be long 2294 * enough that the pressure, if allowed to build up, 2295 * would be tightly tied to the swapouts later on. 2296 */ 2297 should_swap = FALSE; 2298 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */ 2299 } 2300 } 2301 2302 /* 2303 * returning TRUE when swap_supported == FALSE 2304 * will cause the major compaction engine to 2305 * run, but will not trigger any swapping... 2306 * segments that have been major compacted 2307 * will be moved to the majorcompact queue 2308 */ 2309 return should_swap; 2310 } 2311 2312 #if CONFIG_JETSAM 2313 /* 2314 * This function is called from the jetsam thread after killing something to 2315 * mitigate thrashing. 2316 * 2317 * We need to restart our thrashing detection heuristics since memory pressure 2318 * has potentially changed significantly, and we don't want to detect on old 2319 * data from before the jetsam. 2320 */ 2321 void 2322 vm_thrashing_jetsam_done(void) 2323 { 2324 vm_compressor_thrashing_detected = FALSE; 2325 2326 /* Were we compressor-thrashing or filecache-thrashing? */ 2327 if (swapout_target_age) { 2328 swapout_target_age = 0; 2329 compressor_need_sample_reset = TRUE; 2330 } 2331 #if CONFIG_PHANTOM_CACHE 2332 else { 2333 vm_phantom_cache_restart_sample(); 2334 } 2335 #endif 2336 } 2337 #endif /* CONFIG_JETSAM */ 2338 2339 uint32_t vm_wake_compactor_swapper_calls = 0; 2340 uint32_t vm_run_compactor_already_running = 0; 2341 uint32_t vm_run_compactor_empty_minor_q = 0; 2342 uint32_t vm_run_compactor_did_compact = 0; 2343 uint32_t vm_run_compactor_waited = 0; 2344 2345 void 2346 vm_run_compactor(void) 2347 { 2348 if (c_segment_count == 0) { 2349 return; 2350 } 2351 2352 lck_mtx_lock_spin_always(c_list_lock); 2353 2354 if (c_minor_count == 0) { 2355 vm_run_compactor_empty_minor_q++; 2356 2357 lck_mtx_unlock_always(c_list_lock); 2358 return; 2359 } 2360 if (compaction_swapper_running) { 2361 if (vm_pageout_state.vm_restricted_to_single_processor == FALSE) { 2362 vm_run_compactor_already_running++; 2363 2364 lck_mtx_unlock_always(c_list_lock); 2365 return; 2366 } 2367 vm_run_compactor_waited++; 2368 2369 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); 2370 2371 lck_mtx_unlock_always(c_list_lock); 2372 2373 thread_block(THREAD_CONTINUE_NULL); 2374 2375 return; 2376 } 2377 vm_run_compactor_did_compact++; 2378 2379 fastwake_warmup = FALSE; 2380 compaction_swapper_running = 1; 2381 2382 vm_compressor_do_delayed_compactions(FALSE); 2383 2384 compaction_swapper_running = 0; 2385 2386 lck_mtx_unlock_always(c_list_lock); 2387 2388 thread_wakeup((event_t)&compaction_swapper_running); 2389 } 2390 2391 2392 void 2393 vm_wake_compactor_swapper(void) 2394 { 2395 if (compaction_swapper_running || compaction_swapper_awakened || c_segment_count == 0) { 2396 return; 2397 } 2398 2399 if (c_minor_count || vm_compressor_needs_to_major_compact()) { 2400 lck_mtx_lock_spin_always(c_list_lock); 2401 2402 fastwake_warmup = FALSE; 2403 2404 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) { 2405 vm_wake_compactor_swapper_calls++; 2406 2407 compaction_swapper_awakened = 1; 2408 thread_wakeup((event_t)&c_compressor_swap_trigger); 2409 } 2410 lck_mtx_unlock_always(c_list_lock); 2411 } 2412 } 2413 2414 2415 void 2416 vm_consider_swapping() 2417 { 2418 c_segment_t c_seg, c_seg_next; 2419 clock_sec_t now; 2420 clock_nsec_t nsec; 2421 2422 assert(VM_CONFIG_SWAP_IS_PRESENT); 2423 2424 lck_mtx_lock_spin_always(c_list_lock); 2425 2426 compaction_swapper_abort = 1; 2427 2428 while (compaction_swapper_running) { 2429 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); 2430 2431 lck_mtx_unlock_always(c_list_lock); 2432 2433 thread_block(THREAD_CONTINUE_NULL); 2434 2435 lck_mtx_lock_spin_always(c_list_lock); 2436 } 2437 compaction_swapper_abort = 0; 2438 compaction_swapper_running = 1; 2439 2440 vm_swapout_ripe_segments = TRUE; 2441 2442 if (!queue_empty(&c_major_list_head)) { 2443 clock_get_system_nanotime(&now, &nsec); 2444 2445 c_seg = (c_segment_t)queue_first(&c_major_list_head); 2446 2447 while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) { 2448 if (c_overage_swapped_count >= c_overage_swapped_limit) { 2449 break; 2450 } 2451 2452 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list); 2453 2454 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) { 2455 lck_mtx_lock_spin_always(&c_seg->c_lock); 2456 2457 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE); 2458 2459 lck_mtx_unlock_always(&c_seg->c_lock); 2460 } 2461 c_seg = c_seg_next; 2462 } 2463 } 2464 vm_compressor_compact_and_swap(FALSE); 2465 2466 compaction_swapper_running = 0; 2467 2468 vm_swapout_ripe_segments = FALSE; 2469 2470 lck_mtx_unlock_always(c_list_lock); 2471 2472 thread_wakeup((event_t)&compaction_swapper_running); 2473 } 2474 2475 2476 void 2477 vm_consider_waking_compactor_swapper(void) 2478 { 2479 boolean_t need_wakeup = FALSE; 2480 2481 if (c_segment_count == 0) { 2482 return; 2483 } 2484 2485 if (compaction_swapper_running || compaction_swapper_awakened) { 2486 return; 2487 } 2488 2489 if (!compaction_swapper_inited && !compaction_swapper_init_now) { 2490 compaction_swapper_init_now = 1; 2491 need_wakeup = TRUE; 2492 } 2493 2494 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) { 2495 need_wakeup = TRUE; 2496 } else if (compressor_needs_to_swap()) { 2497 need_wakeup = TRUE; 2498 } else if (c_minor_count) { 2499 uint64_t total_bytes; 2500 2501 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64; 2502 2503 if ((total_bytes - compressor_bytes_used) > total_bytes / 10) { 2504 need_wakeup = TRUE; 2505 } 2506 } 2507 if (need_wakeup == TRUE) { 2508 lck_mtx_lock_spin_always(c_list_lock); 2509 2510 fastwake_warmup = FALSE; 2511 2512 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) { 2513 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE); 2514 2515 compaction_swapper_awakened = 1; 2516 thread_wakeup((event_t)&c_compressor_swap_trigger); 2517 } 2518 lck_mtx_unlock_always(c_list_lock); 2519 } 2520 } 2521 2522 2523 #define C_SWAPOUT_LIMIT 4 2524 #define DELAYED_COMPACTIONS_PER_PASS 30 2525 2526 void 2527 vm_compressor_do_delayed_compactions(boolean_t flush_all) 2528 { 2529 c_segment_t c_seg; 2530 int number_compacted = 0; 2531 boolean_t needs_to_swap = FALSE; 2532 2533 2534 VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_START, c_minor_count, flush_all, 0, 0); 2535 2536 #if XNU_TARGET_OS_OSX 2537 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED); 2538 #endif /* XNU_TARGET_OS_OSX */ 2539 2540 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) { 2541 c_seg = (c_segment_t)queue_first(&c_minor_list_head); 2542 2543 lck_mtx_lock_spin_always(&c_seg->c_lock); 2544 2545 if (c_seg->c_busy) { 2546 lck_mtx_unlock_always(c_list_lock); 2547 c_seg_wait_on_busy(c_seg); 2548 lck_mtx_lock_spin_always(c_list_lock); 2549 2550 continue; 2551 } 2552 C_SEG_BUSY(c_seg); 2553 2554 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE); 2555 2556 if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) { 2557 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT) { 2558 needs_to_swap = TRUE; 2559 } 2560 2561 number_compacted = 0; 2562 } 2563 lck_mtx_lock_spin_always(c_list_lock); 2564 } 2565 2566 VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_END, c_minor_count, number_compacted, needs_to_swap, 0); 2567 } 2568 2569 2570 #define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10 2571 2572 static void 2573 vm_compressor_age_swapped_in_segments(boolean_t flush_all) 2574 { 2575 c_segment_t c_seg; 2576 clock_sec_t now; 2577 clock_nsec_t nsec; 2578 2579 clock_get_system_nanotime(&now, &nsec); 2580 2581 while (!queue_empty(&c_swappedin_list_head)) { 2582 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head); 2583 2584 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) { 2585 break; 2586 } 2587 2588 lck_mtx_lock_spin_always(&c_seg->c_lock); 2589 2590 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE); 2591 c_seg->c_agedin_ts = (uint32_t) now; 2592 2593 lck_mtx_unlock_always(&c_seg->c_lock); 2594 } 2595 } 2596 2597 2598 extern int vm_num_swap_files; 2599 extern int vm_num_pinned_swap_files; 2600 extern int vm_swappin_enabled; 2601 2602 extern unsigned int vm_swapfile_total_segs_used; 2603 extern unsigned int vm_swapfile_total_segs_alloced; 2604 2605 2606 void 2607 vm_compressor_flush(void) 2608 { 2609 uint64_t vm_swap_put_failures_at_start; 2610 wait_result_t wait_result = 0; 2611 AbsoluteTime startTime, endTime; 2612 clock_sec_t now_sec; 2613 clock_nsec_t now_nsec; 2614 uint64_t nsec; 2615 2616 HIBLOG("vm_compressor_flush - starting\n"); 2617 2618 clock_get_uptime(&startTime); 2619 2620 lck_mtx_lock_spin_always(c_list_lock); 2621 2622 fastwake_warmup = FALSE; 2623 compaction_swapper_abort = 1; 2624 2625 while (compaction_swapper_running) { 2626 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); 2627 2628 lck_mtx_unlock_always(c_list_lock); 2629 2630 thread_block(THREAD_CONTINUE_NULL); 2631 2632 lck_mtx_lock_spin_always(c_list_lock); 2633 } 2634 compaction_swapper_abort = 0; 2635 compaction_swapper_running = 1; 2636 2637 hibernate_flushing = TRUE; 2638 hibernate_no_swapspace = FALSE; 2639 c_generation_id_flush_barrier = c_generation_id + 1000; 2640 2641 clock_get_system_nanotime(&now_sec, &now_nsec); 2642 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE; 2643 2644 vm_swap_put_failures_at_start = vm_swap_put_failures; 2645 2646 vm_compressor_compact_and_swap(TRUE); 2647 2648 while (!queue_empty(&c_swapout_list_head)) { 2649 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000 * NSEC_PER_USEC); 2650 2651 lck_mtx_unlock_always(c_list_lock); 2652 2653 wait_result = thread_block(THREAD_CONTINUE_NULL); 2654 2655 lck_mtx_lock_spin_always(c_list_lock); 2656 2657 if (wait_result == THREAD_TIMED_OUT) { 2658 break; 2659 } 2660 } 2661 hibernate_flushing = FALSE; 2662 compaction_swapper_running = 0; 2663 2664 if (vm_swap_put_failures > vm_swap_put_failures_at_start) { 2665 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n", 2666 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT); 2667 } 2668 2669 lck_mtx_unlock_always(c_list_lock); 2670 2671 thread_wakeup((event_t)&compaction_swapper_running); 2672 2673 clock_get_uptime(&endTime); 2674 SUB_ABSOLUTETIME(&endTime, &startTime); 2675 absolutetime_to_nanoseconds(endTime, &nsec); 2676 2677 HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n", 2678 nsec / 1000000ULL, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled); 2679 } 2680 2681 2682 int compaction_swap_trigger_thread_awakened = 0; 2683 2684 static void 2685 vm_compressor_swap_trigger_thread(void) 2686 { 2687 current_thread()->options |= TH_OPT_VMPRIV; 2688 2689 /* 2690 * compaction_swapper_init_now is set when the first call to 2691 * vm_consider_waking_compactor_swapper is made from 2692 * vm_pageout_scan... since this function is called upon 2693 * thread creation, we want to make sure to delay adjusting 2694 * the tuneables until we are awakened via vm_pageout_scan 2695 * so that we are at a point where the vm_swapfile_open will 2696 * be operating on the correct directory (in case the default 2697 * of using the VM volume is overridden by the dynamic_pager) 2698 */ 2699 if (compaction_swapper_init_now) { 2700 vm_compaction_swapper_do_init(); 2701 2702 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) { 2703 thread_vm_bind_group_add(); 2704 } 2705 #if CONFIG_THREAD_GROUPS 2706 thread_group_vm_add(); 2707 #endif 2708 thread_set_thread_name(current_thread(), "VM_cswap_trigger"); 2709 compaction_swapper_init_now = 0; 2710 } 2711 lck_mtx_lock_spin_always(c_list_lock); 2712 2713 compaction_swap_trigger_thread_awakened++; 2714 compaction_swapper_awakened = 0; 2715 2716 if (compaction_swapper_running == 0) { 2717 compaction_swapper_running = 1; 2718 2719 vm_compressor_compact_and_swap(FALSE); 2720 2721 compaction_swapper_running = 0; 2722 } 2723 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT); 2724 2725 if (compaction_swapper_running == 0) { 2726 thread_wakeup((event_t)&compaction_swapper_running); 2727 } 2728 2729 lck_mtx_unlock_always(c_list_lock); 2730 2731 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread); 2732 2733 /* NOTREACHED */ 2734 } 2735 2736 2737 void 2738 vm_compressor_record_warmup_start(void) 2739 { 2740 c_segment_t c_seg; 2741 2742 lck_mtx_lock_spin_always(c_list_lock); 2743 2744 if (first_c_segment_to_warm_generation_id == 0) { 2745 if (!queue_empty(&c_age_list_head)) { 2746 c_seg = (c_segment_t)queue_last(&c_age_list_head); 2747 2748 first_c_segment_to_warm_generation_id = c_seg->c_generation_id; 2749 } else { 2750 first_c_segment_to_warm_generation_id = 0; 2751 } 2752 2753 fastwake_recording_in_progress = TRUE; 2754 } 2755 lck_mtx_unlock_always(c_list_lock); 2756 } 2757 2758 2759 void 2760 vm_compressor_record_warmup_end(void) 2761 { 2762 c_segment_t c_seg; 2763 2764 lck_mtx_lock_spin_always(c_list_lock); 2765 2766 if (fastwake_recording_in_progress == TRUE) { 2767 if (!queue_empty(&c_age_list_head)) { 2768 c_seg = (c_segment_t)queue_last(&c_age_list_head); 2769 2770 last_c_segment_to_warm_generation_id = c_seg->c_generation_id; 2771 } else { 2772 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id; 2773 } 2774 2775 fastwake_recording_in_progress = FALSE; 2776 2777 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 2778 } 2779 lck_mtx_unlock_always(c_list_lock); 2780 } 2781 2782 2783 #define DELAY_TRIM_ON_WAKE_SECS 25 2784 2785 void 2786 vm_compressor_delay_trim(void) 2787 { 2788 clock_sec_t sec; 2789 clock_nsec_t nsec; 2790 2791 clock_get_system_nanotime(&sec, &nsec); 2792 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS; 2793 } 2794 2795 2796 void 2797 vm_compressor_do_warmup(void) 2798 { 2799 lck_mtx_lock_spin_always(c_list_lock); 2800 2801 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) { 2802 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 2803 2804 lck_mtx_unlock_always(c_list_lock); 2805 return; 2806 } 2807 2808 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) { 2809 fastwake_warmup = TRUE; 2810 2811 compaction_swapper_awakened = 1; 2812 thread_wakeup((event_t)&c_compressor_swap_trigger); 2813 } 2814 lck_mtx_unlock_always(c_list_lock); 2815 } 2816 2817 void 2818 do_fastwake_warmup_all(void) 2819 { 2820 lck_mtx_lock_spin_always(c_list_lock); 2821 2822 if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) { 2823 lck_mtx_unlock_always(c_list_lock); 2824 return; 2825 } 2826 2827 fastwake_warmup = TRUE; 2828 2829 do_fastwake_warmup(&c_swappedout_list_head, TRUE); 2830 2831 do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE); 2832 2833 fastwake_warmup = FALSE; 2834 2835 lck_mtx_unlock_always(c_list_lock); 2836 } 2837 2838 void 2839 do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg) 2840 { 2841 c_segment_t c_seg = NULL; 2842 AbsoluteTime startTime, endTime; 2843 uint64_t nsec; 2844 2845 2846 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 2847 2848 clock_get_uptime(&startTime); 2849 2850 lck_mtx_unlock_always(c_list_lock); 2851 2852 proc_set_thread_policy(current_thread(), 2853 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); 2854 2855 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2856 2857 lck_mtx_lock_spin_always(c_list_lock); 2858 2859 while (!queue_empty(c_queue) && fastwake_warmup == TRUE) { 2860 c_seg = (c_segment_t) queue_first(c_queue); 2861 2862 if (consider_all_cseg == FALSE) { 2863 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id || 2864 c_seg->c_generation_id > last_c_segment_to_warm_generation_id) { 2865 break; 2866 } 2867 2868 if (vm_page_free_count < (AVAILABLE_MEMORY / 4)) { 2869 break; 2870 } 2871 } 2872 2873 lck_mtx_lock_spin_always(&c_seg->c_lock); 2874 lck_mtx_unlock_always(c_list_lock); 2875 2876 if (c_seg->c_busy) { 2877 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2878 c_seg_wait_on_busy(c_seg); 2879 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2880 } else { 2881 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) { 2882 lck_mtx_unlock_always(&c_seg->c_lock); 2883 } 2884 c_segment_warmup_count++; 2885 2886 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2887 vm_pageout_io_throttle(); 2888 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2889 } 2890 lck_mtx_lock_spin_always(c_list_lock); 2891 } 2892 lck_mtx_unlock_always(c_list_lock); 2893 2894 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2895 2896 proc_set_thread_policy(current_thread(), 2897 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0); 2898 2899 clock_get_uptime(&endTime); 2900 SUB_ABSOLUTETIME(&endTime, &startTime); 2901 absolutetime_to_nanoseconds(endTime, &nsec); 2902 2903 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL); 2904 2905 lck_mtx_lock_spin_always(c_list_lock); 2906 2907 if (consider_all_cseg == FALSE) { 2908 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 2909 } 2910 } 2911 2912 int min_csegs_per_major_compaction = DELAYED_COMPACTIONS_PER_PASS; 2913 extern bool vm_swapout_thread_running; 2914 extern boolean_t compressor_store_stop_compaction; 2915 2916 void 2917 vm_compressor_compact_and_swap(boolean_t flush_all) 2918 { 2919 c_segment_t c_seg, c_seg_next; 2920 boolean_t keep_compacting, switch_state; 2921 clock_sec_t now; 2922 clock_nsec_t nsec; 2923 mach_timespec_t start_ts, end_ts; 2924 unsigned int number_considered, wanted_cseg_found, yield_after_considered_per_pass, number_yields; 2925 uint64_t bytes_to_free, bytes_freed, delta_usec; 2926 2927 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_START, c_age_count, c_minor_count, c_major_count, vm_page_free_count); 2928 2929 if (fastwake_warmup == TRUE) { 2930 uint64_t starting_warmup_count; 2931 2932 starting_warmup_count = c_segment_warmup_count; 2933 2934 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count, 2935 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0); 2936 do_fastwake_warmup(&c_swappedout_list_head, FALSE); 2937 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0); 2938 2939 fastwake_warmup = FALSE; 2940 } 2941 2942 /* 2943 * it's possible for the c_age_list_head to be empty if we 2944 * hit our limits for growing the compressor pool and we subsequently 2945 * hibernated... on the next hibernation we could see the queue as 2946 * empty and not proceeed even though we have a bunch of segments on 2947 * the swapped in queue that need to be dealt with. 2948 */ 2949 vm_compressor_do_delayed_compactions(flush_all); 2950 2951 vm_compressor_age_swapped_in_segments(flush_all); 2952 2953 /* 2954 * we only need to grab the timestamp once per 2955 * invocation of this function since the 2956 * timescale we're interested in is measured 2957 * in days 2958 */ 2959 clock_get_system_nanotime(&now, &nsec); 2960 2961 start_ts.tv_sec = (int) now; 2962 start_ts.tv_nsec = nsec; 2963 delta_usec = 0; 2964 number_considered = 0; 2965 wanted_cseg_found = 0; 2966 number_yields = 0; 2967 bytes_to_free = 0; 2968 bytes_freed = 0; 2969 yield_after_considered_per_pass = MAX(min_csegs_per_major_compaction, DELAYED_COMPACTIONS_PER_PASS); 2970 2971 while (!queue_empty(&c_age_list_head) && !compaction_swapper_abort && !compressor_store_stop_compaction) { 2972 if (hibernate_flushing == TRUE) { 2973 clock_sec_t sec; 2974 2975 if (hibernate_should_abort()) { 2976 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n"); 2977 break; 2978 } 2979 if (hibernate_no_swapspace == TRUE) { 2980 HIBLOG("vm_compressor_flush - out of swap space\n"); 2981 break; 2982 } 2983 if (vm_swap_files_pinned() == FALSE) { 2984 HIBLOG("vm_compressor_flush - unpinned swap files\n"); 2985 break; 2986 } 2987 if (hibernate_in_progress_with_pinned_swap == TRUE && 2988 (vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) { 2989 HIBLOG("vm_compressor_flush - out of pinned swap space\n"); 2990 break; 2991 } 2992 clock_get_system_nanotime(&sec, &nsec); 2993 2994 if (sec > hibernate_flushing_deadline) { 2995 HIBLOG("vm_compressor_flush - failed to finish before deadline\n"); 2996 break; 2997 } 2998 } 2999 if (!vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) { 3000 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000 * NSEC_PER_USEC); 3001 3002 if (!vm_swapout_thread_running) { 3003 thread_wakeup((event_t)&c_swapout_list_head); 3004 } 3005 3006 lck_mtx_unlock_always(c_list_lock); 3007 3008 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 1, c_swapout_count, 0, 0); 3009 3010 thread_block(THREAD_CONTINUE_NULL); 3011 3012 lck_mtx_lock_spin_always(c_list_lock); 3013 } 3014 /* 3015 * Minor compactions 3016 */ 3017 vm_compressor_do_delayed_compactions(flush_all); 3018 3019 vm_compressor_age_swapped_in_segments(flush_all); 3020 3021 if (!vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) { 3022 /* 3023 * we timed out on the above thread_block 3024 * let's loop around and try again 3025 * the timeout allows us to continue 3026 * to do minor compactions to make 3027 * more memory available 3028 */ 3029 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 2, c_swapout_count, 0, 0); 3030 3031 continue; 3032 } 3033 3034 /* 3035 * Swap out segments? 3036 */ 3037 if (flush_all == FALSE) { 3038 boolean_t needs_to_swap; 3039 3040 lck_mtx_unlock_always(c_list_lock); 3041 3042 needs_to_swap = compressor_needs_to_swap(); 3043 3044 lck_mtx_lock_spin_always(c_list_lock); 3045 3046 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 3, needs_to_swap, 0, 0); 3047 3048 if (needs_to_swap == FALSE) { 3049 break; 3050 } 3051 } 3052 if (queue_empty(&c_age_list_head)) { 3053 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 4, c_age_count, 0, 0); 3054 break; 3055 } 3056 c_seg = (c_segment_t) queue_first(&c_age_list_head); 3057 3058 assert(c_seg->c_state == C_ON_AGE_Q); 3059 3060 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) { 3061 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 5, 0, 0, 0); 3062 break; 3063 } 3064 3065 lck_mtx_lock_spin_always(&c_seg->c_lock); 3066 3067 if (c_seg->c_busy) { 3068 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 6, (void*) VM_KERNEL_ADDRPERM(c_seg), 0, 0); 3069 3070 lck_mtx_unlock_always(c_list_lock); 3071 c_seg_wait_on_busy(c_seg); 3072 lck_mtx_lock_spin_always(c_list_lock); 3073 3074 continue; 3075 } 3076 C_SEG_BUSY(c_seg); 3077 3078 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) { 3079 /* 3080 * found an empty c_segment and freed it 3081 * so go grab the next guy in the queue 3082 */ 3083 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 7, 0, 0, 0); 3084 c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++; 3085 continue; 3086 } 3087 /* 3088 * Major compaction 3089 */ 3090 keep_compacting = TRUE; 3091 switch_state = TRUE; 3092 3093 while (keep_compacting == TRUE) { 3094 assert(c_seg->c_busy); 3095 3096 /* look for another segment to consolidate */ 3097 3098 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list); 3099 3100 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next)) { 3101 break; 3102 } 3103 3104 assert(c_seg_next->c_state == C_ON_AGE_Q); 3105 3106 number_considered++; 3107 3108 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE) { 3109 break; 3110 } 3111 3112 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 3113 3114 if (c_seg_next->c_busy) { 3115 /* 3116 * We are going to block for our neighbor. 3117 * If our c_seg is wanted, we should unbusy 3118 * it because we don't know how long we might 3119 * have to block here. 3120 */ 3121 if (c_seg->c_wanted) { 3122 lck_mtx_unlock_always(&c_seg_next->c_lock); 3123 switch_state = FALSE; 3124 c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++; 3125 wanted_cseg_found++; 3126 break; 3127 } 3128 3129 lck_mtx_unlock_always(c_list_lock); 3130 3131 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 8, (void*) VM_KERNEL_ADDRPERM(c_seg_next), 0, 0); 3132 3133 c_seg_wait_on_busy(c_seg_next); 3134 lck_mtx_lock_spin_always(c_list_lock); 3135 3136 continue; 3137 } 3138 /* grab that segment */ 3139 C_SEG_BUSY(c_seg_next); 3140 3141 bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset); 3142 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) { 3143 /* 3144 * found an empty c_segment and freed it 3145 * so we can't continue to use c_seg_next 3146 */ 3147 bytes_freed += bytes_to_free; 3148 c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++; 3149 continue; 3150 } 3151 3152 /* unlock the list ... */ 3153 lck_mtx_unlock_always(c_list_lock); 3154 3155 /* do the major compaction */ 3156 3157 keep_compacting = c_seg_major_compact(c_seg, c_seg_next); 3158 3159 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 9, keep_compacting, 0, 0); 3160 3161 PAGE_REPLACEMENT_DISALLOWED(TRUE); 3162 3163 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 3164 /* 3165 * run a minor compaction on the donor segment 3166 * since we pulled at least some of it's 3167 * data into our target... if we've emptied 3168 * it, now is a good time to free it which 3169 * c_seg_minor_compaction_and_unlock also takes care of 3170 * 3171 * by passing TRUE, we ask for c_busy to be cleared 3172 * and c_wanted to be taken care of 3173 */ 3174 bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset); 3175 if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE)) { 3176 bytes_freed += bytes_to_free; 3177 c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++; 3178 } else { 3179 bytes_to_free -= C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset); 3180 bytes_freed += bytes_to_free; 3181 } 3182 3183 PAGE_REPLACEMENT_DISALLOWED(FALSE); 3184 3185 /* relock the list */ 3186 lck_mtx_lock_spin_always(c_list_lock); 3187 3188 if (c_seg->c_wanted) { 3189 /* 3190 * Our c_seg is in demand. Let's 3191 * unbusy it and wakeup the waiters 3192 * instead of continuing the compaction 3193 * because we could be in this loop 3194 * for a while. 3195 */ 3196 switch_state = FALSE; 3197 wanted_cseg_found++; 3198 c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++; 3199 break; 3200 } 3201 } /* major compaction */ 3202 3203 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 10, number_considered, wanted_cseg_found, 0); 3204 3205 lck_mtx_lock_spin_always(&c_seg->c_lock); 3206 3207 assert(c_seg->c_busy); 3208 assert(!c_seg->c_on_minorcompact_q); 3209 3210 if (switch_state) { 3211 if (VM_CONFIG_SWAP_IS_ACTIVE) { 3212 /* 3213 * This mode of putting a generic c_seg on the swapout list is 3214 * only supported when we have general swapping enabled 3215 */ 3216 clock_sec_t lnow; 3217 clock_nsec_t lnsec; 3218 clock_get_system_nanotime(&lnow, &lnsec); 3219 if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 30) { 3220 vmcs_stats.unripe_under_30s++; 3221 } else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 60) { 3222 vmcs_stats.unripe_under_60s++; 3223 } else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < 300) { 3224 vmcs_stats.unripe_under_300s++; 3225 } 3226 3227 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE); 3228 } else { 3229 if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) { 3230 assert(VM_CONFIG_SWAP_IS_PRESENT); 3231 /* 3232 * we are running compressor sweeps with swap-behind 3233 * make sure the c_seg has aged enough before swapping it 3234 * out... 3235 */ 3236 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) { 3237 c_seg->c_overage_swap = TRUE; 3238 c_overage_swapped_count++; 3239 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE); 3240 } 3241 } 3242 } 3243 if (c_seg->c_state == C_ON_AGE_Q) { 3244 /* 3245 * this c_seg didn't get moved to the swapout queue 3246 * so we need to move it out of the way... 3247 * we just did a major compaction on it so put it 3248 * on that queue 3249 */ 3250 c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE); 3251 } else { 3252 c_seg_major_compact_stats[c_seg_major_compact_stats_now].wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used; 3253 c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_swapouts++; 3254 } 3255 } 3256 3257 C_SEG_WAKEUP_DONE(c_seg); 3258 3259 lck_mtx_unlock_always(&c_seg->c_lock); 3260 3261 if (c_swapout_count) { 3262 /* 3263 * We don't pause/yield here because we will either 3264 * yield below or at the top of the loop with the 3265 * assert_wait_timeout. 3266 */ 3267 if (!vm_swapout_thread_running) { 3268 thread_wakeup((event_t)&c_swapout_list_head); 3269 } 3270 } 3271 3272 if (number_considered >= yield_after_considered_per_pass) { 3273 if (wanted_cseg_found) { 3274 /* 3275 * We stopped major compactions on a c_seg 3276 * that is wanted. We don't know the priority 3277 * of the waiter unfortunately but we are at 3278 * a very high priority and so, just in case 3279 * the waiter is a critical system daemon or 3280 * UI thread, let's give up the CPU in case 3281 * the system is running a few CPU intensive 3282 * tasks. 3283 */ 3284 lck_mtx_unlock_always(c_list_lock); 3285 3286 mutex_pause(2); /* 100us yield */ 3287 3288 number_yields++; 3289 3290 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, 11, number_considered, number_yields, 0); 3291 3292 lck_mtx_lock_spin_always(c_list_lock); 3293 } 3294 3295 number_considered = 0; 3296 wanted_cseg_found = 0; 3297 } 3298 } 3299 clock_get_system_nanotime(&now, &nsec); 3300 end_ts.tv_sec = (int) now; 3301 end_ts.tv_nsec = nsec; 3302 3303 SUB_MACH_TIMESPEC(&end_ts, &start_ts); 3304 3305 delta_usec = (end_ts.tv_sec * USEC_PER_SEC) + (end_ts.tv_nsec / NSEC_PER_USEC) - (number_yields * 100); 3306 3307 delta_usec = MAX(1, delta_usec); /* we could have 0 usec run if conditions weren't right */ 3308 3309 c_seg_major_compact_stats[c_seg_major_compact_stats_now].bytes_freed_rate_us = (bytes_freed / delta_usec); 3310 3311 if ((c_seg_major_compact_stats_now + 1) == C_SEG_MAJOR_COMPACT_STATS_MAX) { 3312 c_seg_major_compact_stats_now = 0; 3313 } else { 3314 c_seg_major_compact_stats_now++; 3315 } 3316 3317 assert(c_seg_major_compact_stats_now < C_SEG_MAJOR_COMPACT_STATS_MAX); 3318 3319 VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_END, c_age_count, c_minor_count, c_major_count, vm_page_free_count); 3320 } 3321 3322 3323 static c_segment_t 3324 c_seg_allocate(c_segment_t *current_chead) 3325 { 3326 c_segment_t c_seg; 3327 int min_needed; 3328 int size_to_populate; 3329 3330 #if XNU_TARGET_OS_OSX 3331 if (vm_compressor_low_on_space()) { 3332 vm_compressor_take_paging_space_action(); 3333 } 3334 #endif /* XNU_TARGET_OS_OSX */ 3335 3336 if ((c_seg = *current_chead) == NULL) { 3337 uint32_t c_segno; 3338 3339 lck_mtx_lock_spin_always(c_list_lock); 3340 3341 while (c_segments_busy == TRUE) { 3342 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT); 3343 3344 lck_mtx_unlock_always(c_list_lock); 3345 3346 thread_block(THREAD_CONTINUE_NULL); 3347 3348 lck_mtx_lock_spin_always(c_list_lock); 3349 } 3350 if (c_free_segno_head == (uint32_t)-1) { 3351 uint32_t c_segments_available_new; 3352 uint32_t compressed_pages; 3353 3354 #if CONFIG_FREEZE 3355 if (freezer_incore_cseg_acct) { 3356 compressed_pages = c_segment_pages_compressed_incore; 3357 } else { 3358 compressed_pages = c_segment_pages_compressed; 3359 } 3360 #else 3361 compressed_pages = c_segment_pages_compressed; 3362 #endif /* CONFIG_FREEZE */ 3363 3364 if (c_segments_available >= c_segments_limit || compressed_pages >= c_segment_pages_compressed_limit) { 3365 lck_mtx_unlock_always(c_list_lock); 3366 3367 return NULL; 3368 } 3369 c_segments_busy = TRUE; 3370 lck_mtx_unlock_always(c_list_lock); 3371 3372 kernel_memory_populate(compressor_map, (vm_offset_t)c_segments_next_page, 3373 PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR); 3374 c_segments_next_page += PAGE_SIZE; 3375 3376 c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE; 3377 3378 if (c_segments_available_new > c_segments_limit) { 3379 c_segments_available_new = c_segments_limit; 3380 } 3381 3382 for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++) { 3383 c_segments[c_segno - 1].c_segno = c_segno; 3384 } 3385 3386 lck_mtx_lock_spin_always(c_list_lock); 3387 3388 c_segments[c_segno - 1].c_segno = c_free_segno_head; 3389 c_free_segno_head = c_segments_available; 3390 c_segments_available = c_segments_available_new; 3391 3392 c_segments_busy = FALSE; 3393 thread_wakeup((event_t) (&c_segments_busy)); 3394 } 3395 c_segno = c_free_segno_head; 3396 assert(c_segno >= 0 && c_segno < c_segments_limit); 3397 3398 c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno; 3399 3400 /* 3401 * do the rest of the bookkeeping now while we're still behind 3402 * the list lock and grab our generation id now into a local 3403 * so that we can install it once we have the c_seg allocated 3404 */ 3405 c_segment_count++; 3406 if (c_segment_count > c_segment_count_max) { 3407 c_segment_count_max = c_segment_count; 3408 } 3409 3410 lck_mtx_unlock_always(c_list_lock); 3411 3412 c_seg = zalloc_flags(compressor_segment_zone, Z_WAITOK | Z_ZERO); 3413 3414 c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno); 3415 3416 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, LCK_ATTR_NULL); 3417 3418 c_seg->c_state = C_IS_EMPTY; 3419 c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX; 3420 c_seg->c_mysegno = c_segno; 3421 3422 lck_mtx_lock_spin_always(c_list_lock); 3423 c_empty_count++; 3424 c_seg_switch_state(c_seg, C_IS_FILLING, FALSE); 3425 c_segments[c_segno].c_seg = c_seg; 3426 assert(c_segments[c_segno].c_segno > c_segments_available); 3427 lck_mtx_unlock_always(c_list_lock); 3428 3429 *current_chead = c_seg; 3430 3431 #if DEVELOPMENT || DEBUG 3432 C_SEG_MAKE_WRITEABLE(c_seg); 3433 #endif 3434 } 3435 c_seg_alloc_nextslot(c_seg); 3436 3437 size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset); 3438 3439 if (size_to_populate) { 3440 min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE); 3441 3442 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) { 3443 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) { 3444 size_to_populate = C_SEG_MAX_POPULATE_SIZE; 3445 } 3446 3447 OSAddAtomic64(size_to_populate / PAGE_SIZE, &vm_pageout_vminfo.vm_compressor_pages_grabbed); 3448 3449 kernel_memory_populate(compressor_map, 3450 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset], 3451 size_to_populate, 3452 KMA_COMPRESSOR, 3453 VM_KERN_MEMORY_COMPRESSOR); 3454 } else { 3455 size_to_populate = 0; 3456 } 3457 } 3458 PAGE_REPLACEMENT_DISALLOWED(TRUE); 3459 3460 lck_mtx_lock_spin_always(&c_seg->c_lock); 3461 3462 if (size_to_populate) { 3463 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate); 3464 } 3465 3466 return c_seg; 3467 } 3468 3469 #if DEVELOPMENT || DEBUG 3470 #if CONFIG_FREEZE 3471 extern boolean_t memorystatus_freeze_to_memory; 3472 #endif /* CONFIG_FREEZE */ 3473 #endif /* DEVELOPMENT || DEBUG */ 3474 3475 static void 3476 c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead) 3477 { 3478 uint32_t unused_bytes; 3479 uint32_t offset_to_depopulate; 3480 int new_state = C_ON_AGE_Q; 3481 clock_sec_t sec; 3482 clock_nsec_t nsec; 3483 boolean_t head_insert = FALSE; 3484 3485 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset)); 3486 3487 if (unused_bytes) { 3488 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))); 3489 3490 /* 3491 * release the extra physical page(s) at the end of the segment 3492 */ 3493 lck_mtx_unlock_always(&c_seg->c_lock); 3494 3495 kernel_memory_depopulate( 3496 compressor_map, 3497 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate], 3498 unused_bytes, 3499 KMA_COMPRESSOR, 3500 VM_KERN_MEMORY_COMPRESSOR); 3501 3502 lck_mtx_lock_spin_always(&c_seg->c_lock); 3503 3504 c_seg->c_populated_offset = offset_to_depopulate; 3505 } 3506 assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE); 3507 3508 #if DEVELOPMENT || DEBUG 3509 { 3510 boolean_t c_seg_was_busy = FALSE; 3511 3512 if (!c_seg->c_busy) { 3513 C_SEG_BUSY(c_seg); 3514 } else { 3515 c_seg_was_busy = TRUE; 3516 } 3517 3518 lck_mtx_unlock_always(&c_seg->c_lock); 3519 3520 C_SEG_WRITE_PROTECT(c_seg); 3521 3522 lck_mtx_lock_spin_always(&c_seg->c_lock); 3523 3524 if (c_seg_was_busy == FALSE) { 3525 C_SEG_WAKEUP_DONE(c_seg); 3526 } 3527 } 3528 #endif 3529 3530 #if CONFIG_FREEZE 3531 if (current_chead == (c_segment_t*) &(freezer_context_global.freezer_ctx_chead) && 3532 VM_CONFIG_SWAP_IS_PRESENT && 3533 VM_CONFIG_FREEZER_SWAP_IS_ACTIVE 3534 #if DEVELOPMENT || DEBUG 3535 && !memorystatus_freeze_to_memory 3536 #endif /* DEVELOPMENT || DEBUG */ 3537 ) { 3538 new_state = C_ON_SWAPOUT_Q; 3539 } 3540 #endif /* CONFIG_FREEZE */ 3541 3542 if (vm_darkwake_mode == TRUE) { 3543 new_state = C_ON_SWAPOUT_Q; 3544 head_insert = TRUE; 3545 } 3546 3547 clock_get_system_nanotime(&sec, &nsec); 3548 c_seg->c_creation_ts = (uint32_t)sec; 3549 3550 lck_mtx_lock_spin_always(c_list_lock); 3551 3552 c_seg->c_generation_id = c_generation_id++; 3553 c_seg_switch_state(c_seg, new_state, head_insert); 3554 3555 #if CONFIG_FREEZE 3556 if (c_seg->c_state == C_ON_SWAPOUT_Q) { 3557 /* 3558 * darkwake and freezer can't co-exist together 3559 * We'll need to fix this accounting as a start. 3560 */ 3561 assert(vm_darkwake_mode == FALSE); 3562 c_seg_update_task_owner(c_seg, freezer_context_global.freezer_ctx_task); 3563 freezer_context_global.freezer_ctx_swapped_bytes += c_seg->c_bytes_used; 3564 } 3565 #endif /* CONFIG_FREEZE */ 3566 3567 if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { 3568 #if CONFIG_FREEZE 3569 assert(c_seg->c_task_owner == NULL); 3570 #endif /* CONFIG_FREEZE */ 3571 c_seg_need_delayed_compaction(c_seg, TRUE); 3572 } 3573 3574 lck_mtx_unlock_always(c_list_lock); 3575 3576 if (c_seg->c_state == C_ON_SWAPOUT_Q) { 3577 /* 3578 * Darkwake and Freeze configs always 3579 * wake up the swapout thread because 3580 * the compactor thread that normally handles 3581 * it may not be running as much in these 3582 * configs. 3583 */ 3584 thread_wakeup((event_t)&c_swapout_list_head); 3585 } 3586 3587 *current_chead = NULL; 3588 } 3589 3590 3591 /* 3592 * returns with c_seg locked 3593 */ 3594 void 3595 c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q) 3596 { 3597 clock_sec_t sec; 3598 clock_nsec_t nsec; 3599 3600 clock_get_system_nanotime(&sec, &nsec); 3601 3602 lck_mtx_lock_spin_always(c_list_lock); 3603 lck_mtx_lock_spin_always(&c_seg->c_lock); 3604 3605 assert(c_seg->c_busy_swapping); 3606 assert(c_seg->c_busy); 3607 3608 c_seg->c_busy_swapping = 0; 3609 3610 if (c_seg->c_overage_swap == TRUE) { 3611 c_overage_swapped_count--; 3612 c_seg->c_overage_swap = FALSE; 3613 } 3614 if (has_data == TRUE) { 3615 if (age_on_swapin_q == TRUE) { 3616 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE); 3617 } else { 3618 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE); 3619 } 3620 3621 if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { 3622 c_seg_need_delayed_compaction(c_seg, TRUE); 3623 } 3624 } else { 3625 c_seg->c_store.c_buffer = (int32_t*) NULL; 3626 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); 3627 3628 c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE); 3629 } 3630 c_seg->c_swappedin_ts = (uint32_t)sec; 3631 3632 lck_mtx_unlock_always(c_list_lock); 3633 } 3634 3635 3636 3637 /* 3638 * c_seg has to be locked and is returned locked if the c_seg isn't freed 3639 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE 3640 * c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise 3641 */ 3642 3643 int 3644 c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q) 3645 { 3646 vm_offset_t addr = 0; 3647 uint32_t io_size = 0; 3648 uint64_t f_offset; 3649 3650 assert(C_SEG_IS_ONDISK(c_seg)); 3651 3652 #if !CHECKSUM_THE_SWAP 3653 c_seg_trim_tail(c_seg); 3654 #endif 3655 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 3656 f_offset = c_seg->c_store.c_swap_handle; 3657 3658 C_SEG_BUSY(c_seg); 3659 c_seg->c_busy_swapping = 1; 3660 3661 /* 3662 * This thread is likely going to block for I/O. 3663 * Make sure it is ready to run when the I/O completes because 3664 * it needs to clear the busy bit on the c_seg so that other 3665 * waiting threads can make progress too. To do that, boost 3666 * the rwlock_count so that the priority is boosted. 3667 */ 3668 set_thread_rwlock_boost(); 3669 lck_mtx_unlock_always(&c_seg->c_lock); 3670 3671 PAGE_REPLACEMENT_DISALLOWED(FALSE); 3672 3673 addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno); 3674 c_seg->c_store.c_buffer = (int32_t*) addr; 3675 3676 kernel_memory_populate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR); 3677 3678 if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) { 3679 PAGE_REPLACEMENT_DISALLOWED(TRUE); 3680 3681 kernel_memory_depopulate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR); 3682 3683 c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q); 3684 } else { 3685 #if ENCRYPTED_SWAP 3686 vm_swap_decrypt(c_seg); 3687 #endif /* ENCRYPTED_SWAP */ 3688 3689 #if CHECKSUM_THE_SWAP 3690 if (c_seg->cseg_swap_size != io_size) { 3691 panic("swapin size doesn't match swapout size"); 3692 } 3693 3694 if (c_seg->cseg_hash != vmc_hash((char*) c_seg->c_store.c_buffer, (int)io_size)) { 3695 panic("c_seg_swapin - Swap hash mismatch\n"); 3696 } 3697 #endif /* CHECKSUM_THE_SWAP */ 3698 3699 PAGE_REPLACEMENT_DISALLOWED(TRUE); 3700 3701 c_seg_swapin_requeue(c_seg, TRUE, force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q); 3702 3703 #if CONFIG_FREEZE 3704 /* 3705 * c_seg_swapin_requeue() returns with the c_seg lock held. 3706 */ 3707 if (!lck_mtx_try_lock_spin_always(c_list_lock)) { 3708 assert(c_seg->c_busy); 3709 3710 lck_mtx_unlock_always(&c_seg->c_lock); 3711 lck_mtx_lock_spin_always(c_list_lock); 3712 lck_mtx_lock_spin_always(&c_seg->c_lock); 3713 } 3714 3715 if (c_seg->c_task_owner) { 3716 c_seg_update_task_owner(c_seg, NULL); 3717 } 3718 3719 lck_mtx_unlock_always(c_list_lock); 3720 3721 OSAddAtomic(c_seg->c_slots_used, &c_segment_pages_compressed_incore); 3722 #endif /* CONFIG_FREEZE */ 3723 3724 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); 3725 3726 if (force_minor_compaction == TRUE) { 3727 if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) { 3728 /* 3729 * c_seg was completely empty so it was freed, 3730 * so be careful not to reference it again 3731 * 3732 * Drop the rwlock_count so that the thread priority 3733 * is returned back to where it is supposed to be. 3734 */ 3735 clear_thread_rwlock_boost(); 3736 return 1; 3737 } 3738 3739 lck_mtx_lock_spin_always(&c_seg->c_lock); 3740 } 3741 } 3742 C_SEG_WAKEUP_DONE(c_seg); 3743 3744 /* 3745 * Drop the rwlock_count so that the thread priority 3746 * is returned back to where it is supposed to be. 3747 */ 3748 clear_thread_rwlock_boost(); 3749 3750 return 0; 3751 } 3752 3753 3754 static void 3755 c_segment_sv_hash_drop_ref(int hash_indx) 3756 { 3757 struct c_sv_hash_entry o_sv_he, n_sv_he; 3758 3759 while (1) { 3760 o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record; 3761 3762 n_sv_he.he_ref = o_sv_he.he_ref - 1; 3763 n_sv_he.he_data = o_sv_he.he_data; 3764 3765 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) { 3766 if (n_sv_he.he_ref == 0) { 3767 OSAddAtomic(-1, &c_segment_svp_in_hash); 3768 } 3769 break; 3770 } 3771 } 3772 } 3773 3774 3775 static int 3776 c_segment_sv_hash_insert(uint32_t data) 3777 { 3778 int hash_sindx; 3779 int misses; 3780 struct c_sv_hash_entry o_sv_he, n_sv_he; 3781 boolean_t got_ref = FALSE; 3782 3783 if (data == 0) { 3784 OSAddAtomic(1, &c_segment_svp_zero_compressions); 3785 } else { 3786 OSAddAtomic(1, &c_segment_svp_nonzero_compressions); 3787 } 3788 3789 hash_sindx = data & C_SV_HASH_MASK; 3790 3791 for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++) { 3792 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record; 3793 3794 while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) { 3795 n_sv_he.he_ref = o_sv_he.he_ref + 1; 3796 n_sv_he.he_data = data; 3797 3798 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) { 3799 if (n_sv_he.he_ref == 1) { 3800 OSAddAtomic(1, &c_segment_svp_in_hash); 3801 } 3802 got_ref = TRUE; 3803 break; 3804 } 3805 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record; 3806 } 3807 if (got_ref == TRUE) { 3808 break; 3809 } 3810 hash_sindx++; 3811 3812 if (hash_sindx == C_SV_HASH_SIZE) { 3813 hash_sindx = 0; 3814 } 3815 } 3816 if (got_ref == FALSE) { 3817 return -1; 3818 } 3819 3820 return hash_sindx; 3821 } 3822 3823 3824 #if RECORD_THE_COMPRESSED_DATA 3825 3826 static void 3827 c_compressed_record_data(char *src, int c_size) 3828 { 3829 if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf) { 3830 panic("c_compressed_record_cptr >= c_compressed_record_ebuf"); 3831 } 3832 3833 *(int *)((void *)c_compressed_record_cptr) = c_size; 3834 3835 c_compressed_record_cptr += 4; 3836 3837 memcpy(c_compressed_record_cptr, src, c_size); 3838 c_compressed_record_cptr += c_size; 3839 } 3840 #endif 3841 3842 3843 static int 3844 c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf) 3845 { 3846 int c_size; 3847 int c_rounded_size = 0; 3848 int max_csize; 3849 c_slot_t cs; 3850 c_segment_t c_seg; 3851 3852 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0); 3853 retry: 3854 if ((c_seg = c_seg_allocate(current_chead)) == NULL) { 3855 return 1; 3856 } 3857 /* 3858 * returns with c_seg lock held 3859 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)... 3860 * c_nextslot has been allocated and 3861 * c_store.c_buffer populated 3862 */ 3863 assert(c_seg->c_state == C_IS_FILLING); 3864 3865 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot); 3866 3867 C_SLOT_ASSERT_PACKABLE(slot_ptr); 3868 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr); 3869 3870 cs->c_offset = c_seg->c_nextoffset; 3871 3872 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset); 3873 3874 if (max_csize > PAGE_SIZE) { 3875 max_csize = PAGE_SIZE; 3876 } 3877 3878 #if CHECKSUM_THE_DATA 3879 cs->c_hash_data = vmc_hash(src, PAGE_SIZE); 3880 #endif 3881 boolean_t incomp_copy = FALSE; 3882 int max_csize_adj = (max_csize - 4); 3883 3884 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) { 3885 #if defined(__arm__) || defined(__arm64__) 3886 uint16_t ccodec = CINVALID; 3887 uint32_t inline_popcount; 3888 if (max_csize >= C_SEG_OFFSET_ALIGNMENT_BOUNDARY) { 3889 c_size = metacompressor((const uint8_t *) src, 3890 (uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset], 3891 max_csize_adj, &ccodec, 3892 scratch_buf, &incomp_copy, &inline_popcount); 3893 #if __ARM_WKDM_POPCNT__ 3894 cs->c_inline_popcount = inline_popcount; 3895 #else 3896 assert(inline_popcount == C_SLOT_NO_POPCOUNT); 3897 #endif 3898 3899 #if C_SEG_OFFSET_ALIGNMENT_BOUNDARY > 4 3900 if (c_size > max_csize_adj) { 3901 c_size = -1; 3902 } 3903 #endif 3904 } else { 3905 c_size = -1; 3906 } 3907 assert(ccodec == CCWK || ccodec == CCLZ4); 3908 cs->c_codec = ccodec; 3909 #endif 3910 } else { 3911 #if defined(__arm__) || defined(__arm64__) 3912 cs->c_codec = CCWK; 3913 #endif 3914 #if defined(__arm64__) 3915 __unreachable_ok_push 3916 if (PAGE_SIZE == 4096) { 3917 c_size = WKdm_compress_4k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 3918 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj); 3919 } else { 3920 c_size = WKdm_compress_16k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 3921 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj); 3922 } 3923 __unreachable_ok_pop 3924 #else 3925 c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 3926 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj); 3927 #endif 3928 } 3929 assertf(((c_size <= max_csize_adj) && (c_size >= -1)), 3930 "c_size invalid (%d, %d), cur compressions: %d", c_size, max_csize_adj, c_segment_pages_compressed); 3931 3932 if (c_size == -1) { 3933 if (max_csize < PAGE_SIZE) { 3934 c_current_seg_filled(c_seg, current_chead); 3935 assert(*current_chead == NULL); 3936 3937 lck_mtx_unlock_always(&c_seg->c_lock); 3938 /* TODO: it may be worth requiring codecs to distinguish 3939 * between incompressible inputs and failures due to 3940 * budget exhaustion. 3941 */ 3942 PAGE_REPLACEMENT_DISALLOWED(FALSE); 3943 goto retry; 3944 } 3945 c_size = PAGE_SIZE; 3946 3947 if (incomp_copy == FALSE) { 3948 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size); 3949 } 3950 3951 OSAddAtomic(1, &c_segment_noncompressible_pages); 3952 } else if (c_size == 0) { 3953 int hash_index; 3954 3955 /* 3956 * special case - this is a page completely full of a single 32 bit value 3957 */ 3958 hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src); 3959 3960 if (hash_index != -1) { 3961 slot_ptr->s_cindx = hash_index; 3962 slot_ptr->s_cseg = C_SV_CSEG_ID; 3963 3964 OSAddAtomic(1, &c_segment_svp_hash_succeeded); 3965 #if RECORD_THE_COMPRESSED_DATA 3966 c_compressed_record_data(src, 4); 3967 #endif 3968 goto sv_compression; 3969 } 3970 c_size = 4; 3971 3972 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size); 3973 3974 OSAddAtomic(1, &c_segment_svp_hash_failed); 3975 } 3976 3977 #if RECORD_THE_COMPRESSED_DATA 3978 c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size); 3979 #endif 3980 #if CHECKSUM_THE_COMPRESSED_DATA 3981 cs->c_hash_compressed_data = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size); 3982 #endif 3983 #if POPCOUNT_THE_COMPRESSED_DATA 3984 cs->c_pop_cdata = vmc_pop((uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset], c_size); 3985 #endif 3986 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 3987 3988 PACK_C_SIZE(cs, c_size); 3989 c_seg->c_bytes_used += c_rounded_size; 3990 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 3991 c_seg->c_slots_used++; 3992 3993 slot_ptr->s_cindx = c_seg->c_nextslot++; 3994 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 3995 slot_ptr->s_cseg = c_seg->c_mysegno + 1; 3996 3997 sv_compression: 3998 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) { 3999 c_current_seg_filled(c_seg, current_chead); 4000 assert(*current_chead == NULL); 4001 } 4002 lck_mtx_unlock_always(&c_seg->c_lock); 4003 4004 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4005 4006 #if RECORD_THE_COMPRESSED_DATA 4007 if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) { 4008 c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf)); 4009 c_compressed_record_cptr = c_compressed_record_sbuf; 4010 } 4011 #endif 4012 if (c_size) { 4013 OSAddAtomic64(c_size, &c_segment_compressed_bytes); 4014 OSAddAtomic64(c_rounded_size, &compressor_bytes_used); 4015 } 4016 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes); 4017 4018 OSAddAtomic(1, &c_segment_pages_compressed); 4019 #if CONFIG_FREEZE 4020 OSAddAtomic(1, &c_segment_pages_compressed_incore); 4021 #endif /* CONFIG_FREEZE */ 4022 OSAddAtomic(1, &sample_period_compression_count); 4023 4024 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0); 4025 4026 return 0; 4027 } 4028 4029 static inline void 4030 sv_decompress(int32_t *ddst, int32_t pattern) 4031 { 4032 // assert(__builtin_constant_p(PAGE_SIZE) != 0); 4033 #if defined(__x86_64__) 4034 memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t)); 4035 #elif defined(__arm64__) 4036 assert((PAGE_SIZE % 128) == 0); 4037 if (pattern == 0) { 4038 fill32_dczva((addr64_t)ddst, PAGE_SIZE); 4039 } else { 4040 fill32_nt((addr64_t)ddst, PAGE_SIZE, pattern); 4041 } 4042 #else 4043 size_t i; 4044 4045 /* Unroll the pattern fill loop 4x to encourage the 4046 * compiler to emit NEON stores, cf. 4047 * <rdar://problem/25839866> Loop autovectorization 4048 * anomalies. 4049 */ 4050 /* * We use separate loops for each PAGE_SIZE 4051 * to allow the autovectorizer to engage, as PAGE_SIZE 4052 * may not be a constant. 4053 */ 4054 4055 __unreachable_ok_push 4056 if (PAGE_SIZE == 4096) { 4057 for (i = 0; i < (4096U / sizeof(int32_t)); i += 4) { 4058 *ddst++ = pattern; 4059 *ddst++ = pattern; 4060 *ddst++ = pattern; 4061 *ddst++ = pattern; 4062 } 4063 } else { 4064 assert(PAGE_SIZE == 16384); 4065 for (i = 0; i < (int)(16384U / sizeof(int32_t)); i += 4) { 4066 *ddst++ = pattern; 4067 *ddst++ = pattern; 4068 *ddst++ = pattern; 4069 *ddst++ = pattern; 4070 } 4071 } 4072 __unreachable_ok_pop 4073 #endif 4074 } 4075 4076 static int 4077 c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot) 4078 { 4079 c_slot_t cs; 4080 c_segment_t c_seg; 4081 uint32_t c_segno; 4082 uint16_t c_indx; 4083 int c_rounded_size; 4084 uint32_t c_size; 4085 int retval = 0; 4086 boolean_t need_unlock = TRUE; 4087 boolean_t consider_defragmenting = FALSE; 4088 boolean_t kdp_mode = FALSE; 4089 4090 if (__improbable(flags & C_KDP)) { 4091 if (not_in_kdp) { 4092 panic("C_KDP passed to decompress page from outside of debugger context"); 4093 } 4094 4095 assert((flags & C_KEEP) == C_KEEP); 4096 assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK); 4097 4098 if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) { 4099 return -2; 4100 } 4101 4102 kdp_mode = TRUE; 4103 *zeroslot = 0; 4104 } 4105 4106 ReTry: 4107 if (__probable(!kdp_mode)) { 4108 PAGE_REPLACEMENT_DISALLOWED(TRUE); 4109 } else { 4110 if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) { 4111 return -2; 4112 } 4113 } 4114 4115 #if HIBERNATION 4116 /* 4117 * if hibernation is enabled, it indicates (via a call 4118 * to 'vm_decompressor_lock' that no further 4119 * decompressions are allowed once it reaches 4120 * the point of flushing all of the currently dirty 4121 * anonymous memory through the compressor and out 4122 * to disk... in this state we allow freeing of compressed 4123 * pages and must honor the C_DONT_BLOCK case 4124 */ 4125 if (__improbable(dst && decompressions_blocked == TRUE)) { 4126 if (flags & C_DONT_BLOCK) { 4127 if (__probable(!kdp_mode)) { 4128 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4129 } 4130 4131 *zeroslot = 0; 4132 return -2; 4133 } 4134 /* 4135 * it's safe to atomically assert and block behind the 4136 * lock held in shared mode because "decompressions_blocked" is 4137 * only set and cleared and the thread_wakeup done when the lock 4138 * is held exclusively 4139 */ 4140 assert_wait((event_t)&decompressions_blocked, THREAD_UNINT); 4141 4142 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4143 4144 thread_block(THREAD_CONTINUE_NULL); 4145 4146 goto ReTry; 4147 } 4148 #endif 4149 /* s_cseg is actually "segno+1" */ 4150 c_segno = slot_ptr->s_cseg - 1; 4151 4152 if (__improbable(c_segno >= c_segments_available)) { 4153 panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)", 4154 c_segno, c_segments_available, slot_ptr, *(int *)((void *)slot_ptr)); 4155 } 4156 4157 if (__improbable(c_segments[c_segno].c_segno < c_segments_available)) { 4158 panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)", 4159 c_segno, slot_ptr, *(int *)((void *)slot_ptr)); 4160 } 4161 4162 c_seg = c_segments[c_segno].c_seg; 4163 4164 if (__probable(!kdp_mode)) { 4165 lck_mtx_lock_spin_always(&c_seg->c_lock); 4166 } else { 4167 if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) { 4168 return -2; 4169 } 4170 } 4171 4172 assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE); 4173 4174 if (dst == NULL && c_seg->c_busy_swapping) { 4175 assert(c_seg->c_busy); 4176 4177 goto bypass_busy_check; 4178 } 4179 if (flags & C_DONT_BLOCK) { 4180 if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) { 4181 *zeroslot = 0; 4182 4183 retval = -2; 4184 goto done; 4185 } 4186 } 4187 if (c_seg->c_busy) { 4188 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4189 4190 c_seg_wait_on_busy(c_seg); 4191 4192 goto ReTry; 4193 } 4194 bypass_busy_check: 4195 4196 c_indx = slot_ptr->s_cindx; 4197 4198 if (__improbable(c_indx >= c_seg->c_nextslot)) { 4199 panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)", 4200 c_indx, c_seg->c_nextslot, c_seg, slot_ptr, *(int *)((void *)slot_ptr)); 4201 } 4202 4203 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 4204 4205 c_size = UNPACK_C_SIZE(cs); 4206 4207 if (__improbable(c_size == 0)) { 4208 panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)", 4209 c_seg, slot_ptr, *(int *)((void *)slot_ptr)); 4210 } 4211 4212 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 4213 4214 if (dst) { 4215 uint32_t age_of_cseg; 4216 clock_sec_t cur_ts_sec; 4217 clock_nsec_t cur_ts_nsec; 4218 4219 if (C_SEG_IS_ONDISK(c_seg)) { 4220 #if CONFIG_FREEZE 4221 if (freezer_incore_cseg_acct) { 4222 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) { 4223 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4224 lck_mtx_unlock_always(&c_seg->c_lock); 4225 4226 memorystatus_kill_on_VM_compressor_space_shortage(FALSE /* async */); 4227 4228 goto ReTry; 4229 } 4230 4231 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count; 4232 if ((incore_seg_count + 1) >= c_segments_nearing_limit) { 4233 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4234 lck_mtx_unlock_always(&c_seg->c_lock); 4235 4236 memorystatus_kill_on_VM_compressor_space_shortage(FALSE /* async */); 4237 4238 goto ReTry; 4239 } 4240 } 4241 #endif /* CONFIG_FREEZE */ 4242 assert(kdp_mode == FALSE); 4243 retval = c_seg_swapin(c_seg, FALSE, TRUE); 4244 assert(retval == 0); 4245 4246 retval = 1; 4247 } 4248 if (c_seg->c_state == C_ON_BAD_Q) { 4249 assert(c_seg->c_store.c_buffer == NULL); 4250 *zeroslot = 0; 4251 4252 retval = -1; 4253 goto done; 4254 } 4255 4256 #if POPCOUNT_THE_COMPRESSED_DATA 4257 unsigned csvpop; 4258 uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset]; 4259 if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) { 4260 panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%x 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata); 4261 } 4262 #endif 4263 4264 #if CHECKSUM_THE_COMPRESSED_DATA 4265 unsigned csvhash; 4266 if (cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) { 4267 panic("Compressed data doesn't match original %p %p %u %u %u", c_seg, cs, c_size, cs->c_hash_compressed_data, csvhash); 4268 } 4269 #endif 4270 if (c_rounded_size == PAGE_SIZE) { 4271 /* 4272 * page wasn't compressible... just copy it out 4273 */ 4274 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE); 4275 } else if (c_size == 4) { 4276 int32_t data; 4277 int32_t *dptr; 4278 4279 /* 4280 * page was populated with a single value 4281 * that didn't fit into our fast hash 4282 * so we packed it in as a single non-compressed value 4283 * that we need to populate the page with 4284 */ 4285 dptr = (int32_t *)(uintptr_t)dst; 4286 data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]); 4287 sv_decompress(dptr, data); 4288 } else { 4289 uint32_t my_cpu_no; 4290 char *scratch_buf; 4291 4292 if (__probable(!kdp_mode)) { 4293 /* 4294 * we're behind the c_seg lock held in spin mode 4295 * which means pre-emption is disabled... therefore 4296 * the following sequence is atomic and safe 4297 */ 4298 my_cpu_no = cpu_number(); 4299 4300 assert(my_cpu_no < compressor_cpus); 4301 4302 scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()]; 4303 } else { 4304 scratch_buf = kdp_compressor_scratch_buf; 4305 } 4306 4307 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) { 4308 #if defined(__arm__) || defined(__arm64__) 4309 uint16_t c_codec = cs->c_codec; 4310 uint32_t inline_popcount; 4311 if (!metadecompressor((const uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset], 4312 (uint8_t *)dst, c_size, c_codec, (void *)scratch_buf, &inline_popcount)) { 4313 retval = -1; 4314 } else { 4315 #if __ARM_WKDM_POPCNT__ 4316 if (inline_popcount != cs->c_inline_popcount) { 4317 /* 4318 * The codec choice in compression and 4319 * decompression must agree, so there 4320 * should never be a disagreement in 4321 * whether an inline population count 4322 * was performed. 4323 */ 4324 assert(inline_popcount != C_SLOT_NO_POPCOUNT); 4325 assert(cs->c_inline_popcount != C_SLOT_NO_POPCOUNT); 4326 printf("decompression failure from physical region %llx+%05x: popcount mismatch (%d != %d)\n", 4327 (unsigned long long)kvtophys((uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset]), c_size, 4328 inline_popcount, 4329 cs->c_inline_popcount); 4330 retval = -1; 4331 } 4332 #else 4333 assert(inline_popcount == C_SLOT_NO_POPCOUNT); 4334 #endif /* __ARM_WKDM_POPCNT__ */ 4335 } 4336 #endif 4337 } else { 4338 #if defined(__arm64__) 4339 __unreachable_ok_push 4340 if (PAGE_SIZE == 4096) { 4341 WKdm_decompress_4k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 4342 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); 4343 } else { 4344 WKdm_decompress_16k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 4345 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); 4346 } 4347 __unreachable_ok_pop 4348 #else 4349 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 4350 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); 4351 #endif 4352 } 4353 } 4354 4355 #if CHECKSUM_THE_DATA 4356 if (cs->c_hash_data != vmc_hash(dst, PAGE_SIZE)) { 4357 #if defined(__arm__) || defined(__arm64__) 4358 int32_t *dinput = &c_seg->c_store.c_buffer[cs->c_offset]; 4359 panic("decompressed data doesn't match original cs: %p, hash: 0x%x, offset: %d, c_size: %d, c_rounded_size: %d, codec: %d, header: 0x%x 0x%x 0x%x", cs, cs->c_hash_data, cs->c_offset, c_size, c_rounded_size, cs->c_codec, *dinput, *(dinput + 1), *(dinput + 2)); 4360 #else 4361 panic("decompressed data doesn't match original cs: %p, hash: %d, offset: 0x%x, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size); 4362 #endif 4363 } 4364 #endif 4365 if (c_seg->c_swappedin_ts == 0 && !kdp_mode) { 4366 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 4367 4368 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts; 4369 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) { 4370 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]); 4371 } else { 4372 OSAddAtomic(1, &overage_decompressions_during_sample_period); 4373 } 4374 4375 OSAddAtomic(1, &sample_period_decompression_count); 4376 } 4377 } 4378 #if CONFIG_FREEZE 4379 else { 4380 /* 4381 * We are freeing an uncompressed page from this c_seg and so balance the ledgers. 4382 */ 4383 if (C_SEG_IS_ONDISK(c_seg)) { 4384 /* 4385 * The compression sweep feature will push out anonymous pages to disk 4386 * without going through the freezer path and so those c_segs, while 4387 * swapped out, won't have an owner. 4388 */ 4389 if (c_seg->c_task_owner) { 4390 task_update_frozen_to_swap_acct(c_seg->c_task_owner, PAGE_SIZE_64, DEBIT_FROM_SWAP); 4391 } 4392 4393 /* 4394 * We are freeing a page in swap without swapping it in. We bump the in-core 4395 * count here to simulate a swapin of a page so that we can accurately 4396 * decrement it below. 4397 */ 4398 OSAddAtomic(1, &c_segment_pages_compressed_incore); 4399 } 4400 } 4401 #endif /* CONFIG_FREEZE */ 4402 4403 if (flags & C_KEEP) { 4404 *zeroslot = 0; 4405 goto done; 4406 } 4407 assert(kdp_mode == FALSE); 4408 4409 c_seg->c_bytes_unused += c_rounded_size; 4410 c_seg->c_bytes_used -= c_rounded_size; 4411 4412 assert(c_seg->c_slots_used); 4413 c_seg->c_slots_used--; 4414 4415 PACK_C_SIZE(cs, 0); 4416 4417 if (c_indx < c_seg->c_firstemptyslot) { 4418 c_seg->c_firstemptyslot = c_indx; 4419 } 4420 4421 OSAddAtomic(-1, &c_segment_pages_compressed); 4422 #if CONFIG_FREEZE 4423 OSAddAtomic(-1, &c_segment_pages_compressed_incore); 4424 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore); 4425 #endif /* CONFIG_FREEZE */ 4426 4427 if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) { 4428 /* 4429 * C_SEG_IS_ONDISK == TRUE can occur when we're doing a 4430 * free of a compressed page (i.e. dst == NULL) 4431 */ 4432 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used); 4433 } 4434 if (c_seg->c_busy_swapping) { 4435 /* 4436 * bypass case for c_busy_swapping... 4437 * let the swapin/swapout paths deal with putting 4438 * the c_seg on the minor compaction queue if needed 4439 */ 4440 assert(c_seg->c_busy); 4441 goto done; 4442 } 4443 assert(!c_seg->c_busy); 4444 4445 if (c_seg->c_state != C_IS_FILLING) { 4446 if (c_seg->c_bytes_used == 0) { 4447 if (!(C_SEG_IS_ONDISK(c_seg))) { 4448 int pages_populated; 4449 4450 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 4451 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); 4452 4453 if (pages_populated) { 4454 assert(c_seg->c_state != C_ON_BAD_Q); 4455 assert(c_seg->c_store.c_buffer != NULL); 4456 4457 C_SEG_BUSY(c_seg); 4458 lck_mtx_unlock_always(&c_seg->c_lock); 4459 4460 kernel_memory_depopulate(compressor_map, 4461 (vm_offset_t) c_seg->c_store.c_buffer, 4462 pages_populated * PAGE_SIZE, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR); 4463 4464 lck_mtx_lock_spin_always(&c_seg->c_lock); 4465 C_SEG_WAKEUP_DONE(c_seg); 4466 } 4467 if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q) { 4468 c_seg_need_delayed_compaction(c_seg, FALSE); 4469 } 4470 } else { 4471 if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) { 4472 c_seg_move_to_sparse_list(c_seg); 4473 consider_defragmenting = TRUE; 4474 } 4475 } 4476 } else if (c_seg->c_on_minorcompact_q) { 4477 assert(c_seg->c_state != C_ON_BAD_Q); 4478 assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg)); 4479 4480 if (C_SEG_SHOULD_MINORCOMPACT_NOW(c_seg)) { 4481 c_seg_try_minor_compaction_and_unlock(c_seg); 4482 need_unlock = FALSE; 4483 } 4484 } else if (!(C_SEG_IS_ONDISK(c_seg))) { 4485 if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q && 4486 C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { 4487 c_seg_need_delayed_compaction(c_seg, FALSE); 4488 } 4489 } else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) { 4490 c_seg_move_to_sparse_list(c_seg); 4491 consider_defragmenting = TRUE; 4492 } 4493 } 4494 done: 4495 if (__improbable(kdp_mode)) { 4496 return retval; 4497 } 4498 4499 if (need_unlock == TRUE) { 4500 lck_mtx_unlock_always(&c_seg->c_lock); 4501 } 4502 4503 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4504 4505 if (consider_defragmenting == TRUE) { 4506 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE); 4507 } 4508 4509 #if !XNU_TARGET_OS_OSX 4510 if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) || vm_compressor_needs_to_major_compact()) { 4511 vm_wake_compactor_swapper(); 4512 } 4513 #endif /* !XNU_TARGET_OS_OSX */ 4514 4515 return retval; 4516 } 4517 4518 4519 int 4520 vm_compressor_get(ppnum_t pn, int *slot, int flags) 4521 { 4522 c_slot_mapping_t slot_ptr; 4523 char *dst; 4524 int zeroslot = 1; 4525 int retval; 4526 4527 dst = pmap_map_compressor_page(pn); 4528 slot_ptr = (c_slot_mapping_t)slot; 4529 4530 assert(dst != NULL); 4531 4532 if (slot_ptr->s_cseg == C_SV_CSEG_ID) { 4533 int32_t data; 4534 int32_t *dptr; 4535 4536 /* 4537 * page was populated with a single value 4538 * that found a home in our hash table 4539 * grab that value from the hash and populate the page 4540 * that we need to populate the page with 4541 */ 4542 dptr = (int32_t *)(uintptr_t)dst; 4543 data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data; 4544 sv_decompress(dptr, data); 4545 if (!(flags & C_KEEP)) { 4546 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx); 4547 4548 OSAddAtomic(-1, &c_segment_pages_compressed); 4549 #if CONFIG_FREEZE 4550 OSAddAtomic(-1, &c_segment_pages_compressed_incore); 4551 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count 0x%x", c_segment_pages_compressed_incore); 4552 #endif /* CONFIG_FREEZE */ 4553 *slot = 0; 4554 } 4555 if (data) { 4556 OSAddAtomic(1, &c_segment_svp_nonzero_decompressions); 4557 } else { 4558 OSAddAtomic(1, &c_segment_svp_zero_decompressions); 4559 } 4560 4561 pmap_unmap_compressor_page(pn, dst); 4562 return 0; 4563 } 4564 4565 retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot); 4566 4567 /* 4568 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP) 4569 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE 4570 */ 4571 if (zeroslot) { 4572 *slot = 0; 4573 } 4574 4575 pmap_unmap_compressor_page(pn, dst); 4576 4577 /* 4578 * returns 0 if we successfully decompressed a page from a segment already in memory 4579 * returns 1 if we had to first swap in the segment, before successfully decompressing the page 4580 * returns -1 if we encountered an error swapping in the segment - decompression failed 4581 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true 4582 */ 4583 return retval; 4584 } 4585 4586 #if DEVELOPMENT || DEBUG 4587 4588 void 4589 vm_compressor_inject_error(int *slot) 4590 { 4591 c_slot_mapping_t slot_ptr = (c_slot_mapping_t)slot; 4592 4593 /* No error detection for single-value compression. */ 4594 if (slot_ptr->s_cseg == C_SV_CSEG_ID) { 4595 printf("%s(): cannot inject errors in SV-compressed pages\n", __func__ ); 4596 return; 4597 } 4598 4599 /* s_cseg is actually "segno+1" */ 4600 const uint32_t c_segno = slot_ptr->s_cseg - 1; 4601 4602 assert(c_segno < c_segments_available); 4603 assert(c_segments[c_segno].c_segno >= c_segments_available); 4604 4605 const c_segment_t c_seg = c_segments[c_segno].c_seg; 4606 4607 PAGE_REPLACEMENT_DISALLOWED(TRUE); 4608 4609 lck_mtx_lock_spin_always(&c_seg->c_lock); 4610 assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE); 4611 4612 const uint16_t c_indx = slot_ptr->s_cindx; 4613 assert(c_indx < c_seg->c_nextslot); 4614 4615 /* 4616 * To safely make this segment temporarily writable, we need to mark 4617 * the segment busy, which allows us to release the segment lock. 4618 */ 4619 while (c_seg->c_busy) { 4620 c_seg_wait_on_busy(c_seg); 4621 lck_mtx_lock_spin_always(&c_seg->c_lock); 4622 } 4623 C_SEG_BUSY(c_seg); 4624 4625 bool already_writable = (c_seg->c_state == C_IS_FILLING); 4626 if (!already_writable) { 4627 /* 4628 * Protection update must be performed preemptibly, so temporarily drop 4629 * the lock. Having set c_busy will prevent most other concurrent 4630 * operations. 4631 */ 4632 lck_mtx_unlock_always(&c_seg->c_lock); 4633 C_SEG_MAKE_WRITEABLE(c_seg); 4634 lck_mtx_lock_spin_always(&c_seg->c_lock); 4635 } 4636 4637 /* 4638 * Once we've released the lock following our c_state == C_IS_FILLING check, 4639 * c_current_seg_filled() can (re-)write-protect the segment. However, it 4640 * will transition from C_IS_FILLING before releasing the c_seg lock, so we 4641 * can detect this by re-checking after we've reobtained the lock. 4642 */ 4643 if (already_writable && c_seg->c_state != C_IS_FILLING) { 4644 lck_mtx_unlock_always(&c_seg->c_lock); 4645 C_SEG_MAKE_WRITEABLE(c_seg); 4646 lck_mtx_lock_spin_always(&c_seg->c_lock); 4647 already_writable = false; 4648 /* Segment can't be freed while c_busy is set. */ 4649 assert(c_seg->c_state != C_IS_FILLING); 4650 } 4651 4652 c_slot_t cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 4653 int32_t *data = &c_seg->c_store.c_buffer[cs->c_offset]; 4654 /* assume that the compressed data holds at least one int32_t */ 4655 assert(UNPACK_C_SIZE(cs) > sizeof(*data)); 4656 /* 4657 * This bit is known to be in the payload of a MISS packet resulting from 4658 * the pattern used in the test pattern from decompression_failure.c. 4659 * Flipping it should result in many corrupted bits in the test page. 4660 */ 4661 data[0] ^= 0x00000100; 4662 if (!already_writable) { 4663 lck_mtx_unlock_always(&c_seg->c_lock); 4664 C_SEG_WRITE_PROTECT(c_seg); 4665 lck_mtx_lock_spin_always(&c_seg->c_lock); 4666 } 4667 4668 C_SEG_WAKEUP_DONE(c_seg); 4669 lck_mtx_unlock_always(&c_seg->c_lock); 4670 4671 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4672 } 4673 4674 #endif /* DEVELOPMENT || DEBUG */ 4675 4676 int 4677 vm_compressor_free(int *slot, int flags) 4678 { 4679 c_slot_mapping_t slot_ptr; 4680 int zeroslot = 1; 4681 int retval; 4682 4683 assert(flags == 0 || flags == C_DONT_BLOCK); 4684 4685 slot_ptr = (c_slot_mapping_t)slot; 4686 4687 if (slot_ptr->s_cseg == C_SV_CSEG_ID) { 4688 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx); 4689 OSAddAtomic(-1, &c_segment_pages_compressed); 4690 #if CONFIG_FREEZE 4691 OSAddAtomic(-1, &c_segment_pages_compressed_incore); 4692 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count 0x%x", c_segment_pages_compressed_incore); 4693 #endif /* CONFIG_FREEZE */ 4694 4695 *slot = 0; 4696 return 0; 4697 } 4698 retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot); 4699 /* 4700 * returns 0 if we successfully freed the specified compressed page 4701 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set 4702 */ 4703 4704 if (retval == 0) { 4705 *slot = 0; 4706 } else { 4707 assert(retval == -2); 4708 } 4709 4710 return retval; 4711 } 4712 4713 4714 int 4715 vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf) 4716 { 4717 char *src; 4718 int retval; 4719 4720 src = pmap_map_compressor_page(pn); 4721 assert(src != NULL); 4722 4723 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf); 4724 pmap_unmap_compressor_page(pn, src); 4725 4726 return retval; 4727 } 4728 4729 void 4730 vm_compressor_transfer( 4731 int *dst_slot_p, 4732 int *src_slot_p) 4733 { 4734 c_slot_mapping_t dst_slot, src_slot; 4735 c_segment_t c_seg; 4736 uint16_t c_indx; 4737 c_slot_t cs; 4738 4739 src_slot = (c_slot_mapping_t) src_slot_p; 4740 4741 if (src_slot->s_cseg == C_SV_CSEG_ID) { 4742 *dst_slot_p = *src_slot_p; 4743 *src_slot_p = 0; 4744 return; 4745 } 4746 dst_slot = (c_slot_mapping_t) dst_slot_p; 4747 Retry: 4748 PAGE_REPLACEMENT_DISALLOWED(TRUE); 4749 /* get segment for src_slot */ 4750 c_seg = c_segments[src_slot->s_cseg - 1].c_seg; 4751 /* lock segment */ 4752 lck_mtx_lock_spin_always(&c_seg->c_lock); 4753 /* wait if it's busy */ 4754 if (c_seg->c_busy && !c_seg->c_busy_swapping) { 4755 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4756 c_seg_wait_on_busy(c_seg); 4757 goto Retry; 4758 } 4759 /* find the c_slot */ 4760 c_indx = src_slot->s_cindx; 4761 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 4762 /* point the c_slot back to dst_slot instead of src_slot */ 4763 C_SLOT_ASSERT_PACKABLE(dst_slot); 4764 cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot); 4765 /* transfer */ 4766 *dst_slot_p = *src_slot_p; 4767 *src_slot_p = 0; 4768 lck_mtx_unlock_always(&c_seg->c_lock); 4769 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4770 } 4771 4772 #if CONFIG_FREEZE 4773 4774 int freezer_finished_filling = 0; 4775 4776 void 4777 vm_compressor_finished_filling( 4778 void **current_chead) 4779 { 4780 c_segment_t c_seg; 4781 4782 if ((c_seg = *(c_segment_t *)current_chead) == NULL) { 4783 return; 4784 } 4785 4786 assert(c_seg->c_state == C_IS_FILLING); 4787 4788 lck_mtx_lock_spin_always(&c_seg->c_lock); 4789 4790 c_current_seg_filled(c_seg, (c_segment_t *)current_chead); 4791 4792 lck_mtx_unlock_always(&c_seg->c_lock); 4793 4794 freezer_finished_filling++; 4795 } 4796 4797 4798 /* 4799 * This routine is used to transfer the compressed chunks from 4800 * the c_seg/cindx pointed to by slot_p into a new c_seg headed 4801 * by the current_chead and a new cindx within that c_seg. 4802 * 4803 * Currently, this routine is only used by the "freezer backed by 4804 * compressor with swap" mode to create a series of c_segs that 4805 * only contain compressed data belonging to one task. So, we 4806 * move a task's previously compressed data into a set of new 4807 * c_segs which will also hold the task's yet to be compressed data. 4808 */ 4809 4810 kern_return_t 4811 vm_compressor_relocate( 4812 void **current_chead, 4813 int *slot_p) 4814 { 4815 c_slot_mapping_t slot_ptr; 4816 c_slot_mapping_t src_slot; 4817 uint32_t c_rounded_size; 4818 uint32_t c_size; 4819 uint16_t dst_slot; 4820 c_slot_t c_dst; 4821 c_slot_t c_src; 4822 uint16_t c_indx; 4823 c_segment_t c_seg_dst = NULL; 4824 c_segment_t c_seg_src = NULL; 4825 kern_return_t kr = KERN_SUCCESS; 4826 4827 4828 src_slot = (c_slot_mapping_t) slot_p; 4829 4830 if (src_slot->s_cseg == C_SV_CSEG_ID) { 4831 /* 4832 * no need to relocate... this is a page full of a single 4833 * value which is hashed to a single entry not contained 4834 * in a c_segment_t 4835 */ 4836 return kr; 4837 } 4838 4839 Relookup_dst: 4840 c_seg_dst = c_seg_allocate((c_segment_t *)current_chead); 4841 /* 4842 * returns with c_seg lock held 4843 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)... 4844 * c_nextslot has been allocated and 4845 * c_store.c_buffer populated 4846 */ 4847 if (c_seg_dst == NULL) { 4848 /* 4849 * Out of compression segments? 4850 */ 4851 kr = KERN_RESOURCE_SHORTAGE; 4852 goto out; 4853 } 4854 4855 assert(c_seg_dst->c_busy == 0); 4856 4857 C_SEG_BUSY(c_seg_dst); 4858 4859 dst_slot = c_seg_dst->c_nextslot; 4860 4861 lck_mtx_unlock_always(&c_seg_dst->c_lock); 4862 4863 Relookup_src: 4864 c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg; 4865 4866 assert(c_seg_dst != c_seg_src); 4867 4868 lck_mtx_lock_spin_always(&c_seg_src->c_lock); 4869 4870 if (C_SEG_IS_ON_DISK_OR_SOQ(c_seg_src) || 4871 c_seg_src->c_state == C_IS_FILLING) { 4872 /* 4873 * Skip this page if :- 4874 * a) the src c_seg is already on-disk (or on its way there) 4875 * A "thaw" can mark a process as eligible for 4876 * another freeze cycle without bringing any of 4877 * its swapped out c_segs back from disk (because 4878 * that is done on-demand). 4879 * Or, this page may be mapped elsewhere in the task's map, 4880 * and we may have marked it for swap already. 4881 * 4882 * b) Or, the src c_seg is being filled by the compressor 4883 * thread. We don't want the added latency of waiting for 4884 * this c_seg in the freeze path and so we skip it. 4885 */ 4886 4887 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4888 4889 lck_mtx_unlock_always(&c_seg_src->c_lock); 4890 4891 c_seg_src = NULL; 4892 4893 goto out; 4894 } 4895 4896 if (c_seg_src->c_busy) { 4897 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4898 c_seg_wait_on_busy(c_seg_src); 4899 4900 c_seg_src = NULL; 4901 4902 PAGE_REPLACEMENT_DISALLOWED(TRUE); 4903 4904 goto Relookup_src; 4905 } 4906 4907 C_SEG_BUSY(c_seg_src); 4908 4909 lck_mtx_unlock_always(&c_seg_src->c_lock); 4910 4911 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4912 4913 /* find the c_slot */ 4914 c_indx = src_slot->s_cindx; 4915 4916 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx); 4917 4918 c_size = UNPACK_C_SIZE(c_src); 4919 4920 assert(c_size); 4921 4922 if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) { 4923 /* 4924 * This segment is full. We need a new one. 4925 */ 4926 4927 PAGE_REPLACEMENT_DISALLOWED(TRUE); 4928 4929 lck_mtx_lock_spin_always(&c_seg_src->c_lock); 4930 C_SEG_WAKEUP_DONE(c_seg_src); 4931 lck_mtx_unlock_always(&c_seg_src->c_lock); 4932 4933 c_seg_src = NULL; 4934 4935 lck_mtx_lock_spin_always(&c_seg_dst->c_lock); 4936 4937 assert(c_seg_dst->c_busy); 4938 assert(c_seg_dst->c_state == C_IS_FILLING); 4939 assert(!c_seg_dst->c_on_minorcompact_q); 4940 4941 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead); 4942 assert(*current_chead == NULL); 4943 4944 C_SEG_WAKEUP_DONE(c_seg_dst); 4945 4946 lck_mtx_unlock_always(&c_seg_dst->c_lock); 4947 4948 c_seg_dst = NULL; 4949 4950 PAGE_REPLACEMENT_DISALLOWED(FALSE); 4951 4952 goto Relookup_dst; 4953 } 4954 4955 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 4956 4957 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size); 4958 /* 4959 * Is platform alignment actually necessary since wkdm aligns its output? 4960 */ 4961 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 4962 4963 cslot_copy(c_dst, c_src); 4964 c_dst->c_offset = c_seg_dst->c_nextoffset; 4965 4966 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) { 4967 c_seg_dst->c_firstemptyslot++; 4968 } 4969 4970 c_seg_dst->c_slots_used++; 4971 c_seg_dst->c_nextslot++; 4972 c_seg_dst->c_bytes_used += c_rounded_size; 4973 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 4974 4975 4976 PACK_C_SIZE(c_src, 0); 4977 4978 c_seg_src->c_bytes_used -= c_rounded_size; 4979 c_seg_src->c_bytes_unused += c_rounded_size; 4980 4981 assert(c_seg_src->c_slots_used); 4982 c_seg_src->c_slots_used--; 4983 4984 if (c_indx < c_seg_src->c_firstemptyslot) { 4985 c_seg_src->c_firstemptyslot = c_indx; 4986 } 4987 4988 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot); 4989 4990 PAGE_REPLACEMENT_ALLOWED(TRUE); 4991 slot_ptr = C_SLOT_UNPACK_PTR(c_dst); 4992 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 4993 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1; 4994 slot_ptr->s_cindx = dst_slot; 4995 4996 PAGE_REPLACEMENT_ALLOWED(FALSE); 4997 4998 out: 4999 if (c_seg_src) { 5000 lck_mtx_lock_spin_always(&c_seg_src->c_lock); 5001 5002 C_SEG_WAKEUP_DONE(c_seg_src); 5003 5004 if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) { 5005 if (!c_seg_src->c_on_minorcompact_q) { 5006 c_seg_need_delayed_compaction(c_seg_src, FALSE); 5007 } 5008 } 5009 5010 lck_mtx_unlock_always(&c_seg_src->c_lock); 5011 } 5012 5013 if (c_seg_dst) { 5014 PAGE_REPLACEMENT_DISALLOWED(TRUE); 5015 5016 lck_mtx_lock_spin_always(&c_seg_dst->c_lock); 5017 5018 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) { 5019 /* 5020 * Nearing or exceeded maximum slot and offset capacity. 5021 */ 5022 assert(c_seg_dst->c_busy); 5023 assert(c_seg_dst->c_state == C_IS_FILLING); 5024 assert(!c_seg_dst->c_on_minorcompact_q); 5025 5026 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead); 5027 assert(*current_chead == NULL); 5028 } 5029 5030 C_SEG_WAKEUP_DONE(c_seg_dst); 5031 5032 lck_mtx_unlock_always(&c_seg_dst->c_lock); 5033 5034 c_seg_dst = NULL; 5035 5036 PAGE_REPLACEMENT_DISALLOWED(FALSE); 5037 } 5038 5039 return kr; 5040 } 5041 #endif /* CONFIG_FREEZE */