vm_phantom_cache.c
1 /* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #include <vm/vm_page.h> 30 #include <vm/vm_object.h> 31 #include <vm/vm_kern.h> 32 #include <vm/vm_pageout.h> 33 #include <vm/vm_phantom_cache.h> 34 #include <vm/vm_compressor.h> 35 36 37 uint32_t phantom_cache_eval_period_in_msecs = 250; 38 uint32_t phantom_cache_thrashing_threshold_ssd = 1000; 39 #if !XNU_TARGET_OS_OSX 40 uint32_t phantom_cache_thrashing_threshold = 500; 41 #else /* !XNU_TARGET_OS_OSX */ 42 uint32_t phantom_cache_thrashing_threshold = 50; 43 #endif /* !XNU_TARGET_OS_OSX */ 44 45 /* 46 * Number of consecutive thrashing periods required before 47 * vm_phantom_cache_check_pressure() returns true. 48 */ 49 #if !XNU_TARGET_OS_OSX 50 unsigned phantom_cache_contiguous_periods = 4; 51 #else /* !XNU_TARGET_OS_OSX */ 52 unsigned phantom_cache_contiguous_periods = 2; 53 #endif /* !XNU_TARGET_OS_OSX */ 54 55 clock_sec_t pc_start_of_eval_period_sec = 0; 56 clock_nsec_t pc_start_of_eval_period_nsec = 0; 57 boolean_t pc_need_eval_reset = FALSE; 58 59 /* One bit per recent sampling period. Bit 0 = current period. */ 60 uint32_t pc_history = 0; 61 62 uint32_t sample_period_ghost_added_count = 0; 63 uint32_t sample_period_ghost_added_count_ssd = 0; 64 uint32_t sample_period_ghost_found_count = 0; 65 uint32_t sample_period_ghost_found_count_ssd = 0; 66 67 uint32_t vm_phantom_object_id = 1; 68 #define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000 69 70 vm_ghost_t vm_phantom_cache; 71 uint32_t vm_phantom_cache_nindx = 1; 72 uint32_t vm_phantom_cache_num_entries = 0; 73 uint32_t vm_phantom_cache_size; 74 75 typedef uint32_t vm_phantom_hash_entry_t; 76 vm_phantom_hash_entry_t *vm_phantom_cache_hash; 77 uint32_t vm_phantom_cache_hash_size; 78 uint32_t vm_ghost_hash_mask; /* Mask for hash function */ 79 uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */ 80 81 82 int pg_masks[4] = { 83 0x1, 0x2, 0x4, 0x8 84 }; 85 86 87 #define vm_phantom_hash(obj_id, offset) (\ 88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask) 89 90 91 struct phantom_cache_stats { 92 uint32_t pcs_wrapped; 93 uint32_t pcs_added_page_to_entry; 94 uint32_t pcs_added_new_entry; 95 uint32_t pcs_replaced_entry; 96 97 uint32_t pcs_lookup_found_page_in_cache; 98 uint32_t pcs_lookup_entry_not_in_cache; 99 uint32_t pcs_lookup_page_not_in_entry; 100 101 uint32_t pcs_updated_phantom_state; 102 } phantom_cache_stats; 103 104 105 106 void 107 vm_phantom_cache_init() 108 { 109 unsigned int num_entries; 110 unsigned int log1; 111 unsigned int size; 112 113 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { 114 return; 115 } 116 #if !XNU_TARGET_OS_OSX 117 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY); 118 #else /* !XNU_TARGET_OS_OSX */ 119 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY); 120 #endif /* !XNU_TARGET_OS_OSX */ 121 vm_phantom_cache_num_entries = 1; 122 123 while (vm_phantom_cache_num_entries < num_entries) { 124 vm_phantom_cache_num_entries <<= 1; 125 } 126 127 /* 128 * We index this with g_next_index, so don't exceed the width of that bitfield. 129 */ 130 if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) { 131 vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS); 132 } 133 134 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries; 135 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries; 136 137 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) { 138 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n"); 139 } 140 bzero(vm_phantom_cache, vm_phantom_cache_size); 141 142 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS) { 143 panic("vm_phantom_cache_init: kernel_memory_allocate failed\n"); 144 } 145 bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size); 146 147 148 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1; 149 150 /* 151 * Calculate object_id shift value for hashing algorithm: 152 * O = log2(sizeof(struct vm_object)) 153 * B = log2(vm_page_bucket_count) 154 * hash shifts the object_id left by 155 * B/2 - O 156 */ 157 size = vm_phantom_cache_num_entries; 158 for (log1 = 0; size > 1; log1++) { 159 size /= 2; 160 } 161 162 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */ 163 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */ 164 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */ 165 166 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) { 167 printf("vm_phantom_cache_init: WARNING -- strange page hash\n"); 168 } 169 } 170 171 172 void 173 vm_phantom_cache_add_ghost(vm_page_t m) 174 { 175 vm_ghost_t vpce; 176 vm_object_t object; 177 int ghost_index; 178 int pg_mask; 179 boolean_t isSSD = FALSE; 180 vm_phantom_hash_entry_t ghost_hash_index; 181 182 object = VM_PAGE_OBJECT(m); 183 184 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 185 vm_object_lock_assert_exclusive(object); 186 187 if (vm_phantom_cache_num_entries == 0) { 188 return; 189 } 190 191 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK]; 192 193 if (object->phantom_object_id == 0) { 194 vnode_pager_get_isSSD(object->pager, &isSSD); 195 196 if (isSSD == TRUE) { 197 object->phantom_isssd = TRUE; 198 } 199 200 object->phantom_object_id = vm_phantom_object_id++; 201 202 if (vm_phantom_object_id == 0) { 203 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP; 204 } 205 } else { 206 if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) { 207 vpce->g_pages_held |= pg_mask; 208 209 phantom_cache_stats.pcs_added_page_to_entry++; 210 goto done; 211 } 212 } 213 /* 214 * if we're here then the vm_ghost_t of this vm_page_t 215 * is not present in the phantom cache... take the next 216 * available entry in the LRU first evicting the existing 217 * entry if we've wrapped the ring 218 */ 219 ghost_index = vm_phantom_cache_nindx++; 220 221 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) { 222 vm_phantom_cache_nindx = 1; 223 224 phantom_cache_stats.pcs_wrapped++; 225 } 226 vpce = &vm_phantom_cache[ghost_index]; 227 228 if (vpce->g_obj_id) { 229 /* 230 * we're going to replace an existing entry 231 * so first remove it from the hash 232 */ 233 vm_ghost_t nvpce; 234 235 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset); 236 237 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]]; 238 239 if (nvpce == vpce) { 240 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index; 241 } else { 242 for (;;) { 243 if (nvpce->g_next_index == 0) { 244 panic("didn't find ghost in hash\n"); 245 } 246 247 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) { 248 nvpce->g_next_index = vpce->g_next_index; 249 break; 250 } 251 nvpce = &vm_phantom_cache[nvpce->g_next_index]; 252 } 253 } 254 phantom_cache_stats.pcs_replaced_entry++; 255 } else { 256 phantom_cache_stats.pcs_added_new_entry++; 257 } 258 259 vpce->g_pages_held = pg_mask; 260 vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK; 261 vpce->g_obj_id = object->phantom_object_id; 262 263 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset); 264 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index]; 265 vm_phantom_cache_hash[ghost_hash_index] = ghost_index; 266 267 done: 268 vm_pageout_vminfo.vm_phantom_cache_added_ghost++; 269 270 if (object->phantom_isssd) { 271 OSAddAtomic(1, &sample_period_ghost_added_count_ssd); 272 } else { 273 OSAddAtomic(1, &sample_period_ghost_added_count); 274 } 275 } 276 277 278 vm_ghost_t 279 vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask) 280 { 281 uint64_t g_obj_offset; 282 uint32_t g_obj_id; 283 uint32_t ghost_index; 284 vm_object_t object; 285 286 object = VM_PAGE_OBJECT(m); 287 288 if ((g_obj_id = object->phantom_object_id) == 0) { 289 /* 290 * no entries in phantom cache for this object 291 */ 292 return NULL; 293 } 294 g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK; 295 296 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)]; 297 298 while (ghost_index) { 299 vm_ghost_t vpce; 300 301 vpce = &vm_phantom_cache[ghost_index]; 302 303 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) { 304 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) { 305 phantom_cache_stats.pcs_lookup_found_page_in_cache++; 306 307 return vpce; 308 } 309 phantom_cache_stats.pcs_lookup_page_not_in_entry++; 310 311 return NULL; 312 } 313 ghost_index = vpce->g_next_index; 314 } 315 phantom_cache_stats.pcs_lookup_entry_not_in_cache++; 316 317 return NULL; 318 } 319 320 321 322 void 323 vm_phantom_cache_update(vm_page_t m) 324 { 325 int pg_mask; 326 vm_ghost_t vpce; 327 vm_object_t object; 328 329 object = VM_PAGE_OBJECT(m); 330 331 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 332 vm_object_lock_assert_exclusive(object); 333 334 if (vm_phantom_cache_num_entries == 0) { 335 return; 336 } 337 338 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK]; 339 340 if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) { 341 vpce->g_pages_held &= ~pg_mask; 342 343 phantom_cache_stats.pcs_updated_phantom_state++; 344 vm_pageout_vminfo.vm_phantom_cache_found_ghost++; 345 346 if (object->phantom_isssd) { 347 OSAddAtomic(1, &sample_period_ghost_found_count_ssd); 348 } else { 349 OSAddAtomic(1, &sample_period_ghost_found_count); 350 } 351 } 352 } 353 354 355 #define PHANTOM_CACHE_DEBUG 1 356 357 #if PHANTOM_CACHE_DEBUG 358 359 int sample_period_ghost_counts_indx = 0; 360 361 struct { 362 uint32_t added; 363 uint32_t found; 364 uint32_t added_ssd; 365 uint32_t found_ssd; 366 uint32_t elapsed_ms; 367 boolean_t pressure_detected; 368 } sample_period_ghost_counts[256]; 369 370 #endif 371 372 /* 373 * Determine if the file cache is thrashing from sampling interval statistics. 374 * 375 * Pages added to the phantom cache = pages evicted from the file cache. 376 * Pages found in the phantom cache = reads of pages that were recently evicted. 377 * Threshold is the latency-dependent number of reads we consider thrashing. 378 */ 379 static boolean_t 380 is_thrashing(uint32_t added, uint32_t found, uint32_t threshold) 381 { 382 /* Ignore normal activity below the threshold. */ 383 if (added < threshold || found < threshold) { 384 return FALSE; 385 } 386 387 /* 388 * When thrashing in a way that we can mitigate, most of the pages read 389 * into the file cache were recently evicted, and 'found' will be close 390 * to 'added'. 391 * 392 * When replacing the current working set because a new app is 393 * launched, we see very high read traffic with sporadic phantom cache 394 * hits. 395 * 396 * This is not thrashing, or freeing up memory wouldn't help much 397 * anyway. 398 */ 399 if (found < added / 2) { 400 return FALSE; 401 } 402 403 return TRUE; 404 } 405 406 /* 407 * the following function is never called 408 * from multiple threads simultaneously due 409 * to a condition variable used to serialize 410 * at the compressor level... thus no need 411 * to provide locking for the sample processing 412 */ 413 boolean_t 414 vm_phantom_cache_check_pressure() 415 { 416 clock_sec_t cur_ts_sec; 417 clock_nsec_t cur_ts_nsec; 418 uint64_t elapsed_msecs_in_eval; 419 boolean_t pressure_detected = FALSE; 420 421 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 422 423 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec); 424 425 /* 426 * Reset evaluation period after phantom_cache_eval_period_in_msecs or 427 * whenever vm_phantom_cache_restart_sample has been called. 428 */ 429 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) { 430 pc_need_eval_reset = TRUE; 431 } 432 433 if (pc_need_eval_reset == TRUE) { 434 #if PHANTOM_CACHE_DEBUG 435 /* 436 * maintain some info about the last 256 sample periods 437 */ 438 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count; 439 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count; 440 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd; 441 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd; 442 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval; 443 444 sample_period_ghost_counts_indx++; 445 446 if (sample_period_ghost_counts_indx >= 256) { 447 sample_period_ghost_counts_indx = 0; 448 } 449 #endif 450 sample_period_ghost_added_count = 0; 451 sample_period_ghost_found_count = 0; 452 sample_period_ghost_added_count_ssd = 0; 453 sample_period_ghost_found_count_ssd = 0; 454 455 pc_start_of_eval_period_sec = cur_ts_sec; 456 pc_start_of_eval_period_nsec = cur_ts_nsec; 457 pc_history <<= 1; 458 pc_need_eval_reset = FALSE; 459 } else { 460 /* 461 * Since the trashing rate is really a function of the read latency of the disk 462 * we have to consider both the SSD and spinning disk case since the file cache 463 * could be backed by either or even both flavors. When the object is first 464 * assigned a phantom_object_id, we query the pager to determine if the backing 465 * backing media is an SSD and remember that answer in the vm_object. We use 466 * that info to maintains counts for both the SSD and spinning disk cases. 467 */ 468 if (is_thrashing(sample_period_ghost_added_count, 469 sample_period_ghost_found_count, 470 phantom_cache_thrashing_threshold) || 471 is_thrashing(sample_period_ghost_added_count_ssd, 472 sample_period_ghost_found_count_ssd, 473 phantom_cache_thrashing_threshold_ssd)) { 474 /* Thrashing in the current period: Set bit 0. */ 475 pc_history |= 1; 476 } 477 } 478 479 /* 480 * Declare pressure_detected after phantom_cache_contiguous_periods. 481 * 482 * Create a bitmask with the N low bits set. These bits must all be set 483 * in pc_history. The high bits of pc_history are ignored. 484 */ 485 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1; 486 if ((pc_history & bitmask) == bitmask) { 487 pressure_detected = TRUE; 488 } 489 490 if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) { 491 pressure_detected = FALSE; 492 } 493 494 #if PHANTOM_CACHE_DEBUG 495 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected; 496 #endif 497 return pressure_detected; 498 } 499 500 /* 501 * Restart the current sampling because conditions have changed significantly, 502 * and we don't want to react to old data. 503 * 504 * This function can be called from any thread. 505 */ 506 void 507 vm_phantom_cache_restart_sample(void) 508 { 509 pc_need_eval_reset = TRUE; 510 }