zalloc_internal.h
1 /* 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 /* 29 * @OSF_COPYRIGHT@ 30 */ 31 /* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 /* 57 */ 58 59 #ifndef _KERN_ZALLOC_INTERNAL_H_ 60 #define _KERN_ZALLOC_INTERNAL_H_ 61 62 #include <kern/zalloc.h> 63 #include <kern/locks.h> 64 #include <kern/btlog.h> 65 #include <kern/simple_lock.h> 66 67 #include <os/atomic_private.h> 68 #include <sys/queue.h> 69 70 #if KASAN 71 #include <san/kasan.h> 72 #include <kern/spl.h> 73 #endif /* !KASAN */ 74 75 /*! 76 * @file <kern/zalloc_internal.h> 77 * 78 * @abstract 79 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and 80 * kalloc subsystems. 81 */ 82 83 __BEGIN_DECLS 84 85 #pragma GCC visibility push(hidden) 86 87 #if CONFIG_GZALLOC 88 typedef struct gzalloc_data { 89 uint32_t gzfc_index; 90 vm_offset_t *gzfc; 91 } gzalloc_data_t; 92 #endif 93 94 /* 95 * A zone is a collection of fixed size blocks for which there 96 * is fast allocation/deallocation access. Kernel routines can 97 * use zones to manage data structures dynamically, creating a zone 98 * for each type of data structure to be managed. 99 * 100 */ 101 102 /*! 103 * @typedef zone_pva_t 104 * 105 * @brief 106 * Type used to point to a page virtual address in the zone allocator. 107 * 108 * @description 109 * - Valid pages have the top bit set. 110 * - 0 represents the "NULL" page 111 * - non 0 values with the top bit cleared do not represent any valid page. 112 * the zone freelists use this space to encode "queue" addresses. 113 */ 114 typedef struct zone_packed_virtual_address { 115 uint32_t packed_address; 116 } zone_pva_t; 117 118 /*! 119 * @struct zone_stats 120 * 121 * @abstract 122 * Per-cpu structure used for basic zone stats. 123 * 124 * @discussion 125 * The values aren't scaled for per-cpu zones. 126 */ 127 struct zone_stats { 128 uint64_t zs_mem_allocated; 129 uint64_t zs_mem_freed; 130 uint32_t zs_poison_seqno; /* counter for poisoning every N frees */ 131 uint32_t zs_alloc_rr; /* allocation rr bias */ 132 }; 133 134 STAILQ_HEAD(zone_depot, zone_magazine); 135 136 struct zone { 137 /* 138 * Readonly / rarely written fields 139 */ 140 141 /* 142 * The first 4 fields match a zone_view. 143 * 144 * z_self points back to the zone when the zone is initialized, 145 * or is NULL else. 146 */ 147 struct zone *z_self; 148 zone_stats_t z_stats; 149 const char *z_name; 150 struct zone_view *z_views; 151 152 struct thread *z_expander; 153 struct zone_cache *__zpercpu z_pcpu_cache; 154 155 uint16_t z_chunk_pages; /* size used for more memory in pages */ 156 uint16_t z_chunk_elems; /* count of allocations per chunk */ 157 uint16_t z_elems_rsv; /* maintain a free reserve of elements */ 158 uint16_t z_elem_size; /* size of an element */ 159 160 uint64_t 161 /* 162 * Lifecycle state (Mutable after creation) 163 */ 164 z_destroyed :1, /* zone is (being) destroyed */ 165 z_async_refilling :1, /* asynchronous allocation pending? */ 166 z_replenish_wait :1, /* someone is waiting on the replenish thread */ 167 z_expanding_wait :1, /* is thread waiting for expansion? */ 168 z_expander_vm_priv :1, /* a vm privileged thread is expanding */ 169 170 /* 171 * Security sensitive configuration bits 172 */ 173 z_allows_foreign :1, /* allow non-zalloc space */ 174 z_destructible :1, /* zone can be zdestroy()ed */ 175 kalloc_heap :2, /* zone_kheap_id_t when part of a kalloc heap */ 176 z_noencrypt :1, /* do not encrypt pages when hibernating */ 177 z_submap_idx :2, /* a Z_SUBMAP_IDX_* value */ 178 z_va_sequester :1, /* page sequester: no VA reuse with other zones */ 179 z_free_zeroes :1, /* clear memory of elements on free and assert on alloc */ 180 181 /* 182 * Behavior configuration bits 183 */ 184 z_percpu :1, /* the zone is percpu */ 185 z_permanent :1, /* the zone allocations are permanent */ 186 z_replenishes :1, /* uses the async replenish mechanism for VM */ 187 z_nocaching :1, /* disallow zone caching for this zone */ 188 collectable :1, /* garbage collect empty pages */ 189 exhaustible :1, /* merely return if empty? */ 190 expandable :1, /* expand zone (with message)? */ 191 no_callout :1, 192 193 _reserved :26, 194 195 /* 196 * Debugging features 197 */ 198 alignment_required :1, /* element alignment needs to be preserved */ 199 gzalloc_tracked :1, /* this zone is tracked by gzalloc */ 200 gzalloc_exempt :1, /* this zone doesn't participate with gzalloc */ 201 kasan_fakestacks :1, 202 kasan_noquarantine :1, /* whether to use the kasan quarantine */ 203 tag_zone_index :7, 204 tags :1, 205 tags_inline :1, 206 zleak_on :1, /* Are we collecting allocation information? */ 207 zone_logging :1; /* Enable zone logging for this zone. */ 208 209 /* 210 * often mutated fields 211 */ 212 213 lck_spin_t z_lock; 214 struct zone_depot z_recirc; 215 216 /* 217 * Page accounting (wired / VA) 218 * 219 * Those numbers are unscaled for z_percpu zones 220 * (zone_scale_for_percpu() needs to be used to find the true value). 221 */ 222 uint32_t z_wired_max; /* how large can this zone grow */ 223 uint32_t z_wired_hwm; /* z_wired_cur high watermark */ 224 uint32_t z_wired_cur; /* number of pages used by this zone */ 225 uint32_t z_wired_empty; /* pages collectable by GC */ 226 uint32_t z_va_cur; /* amount of VA used by this zone */ 227 228 /* 229 * list of metadata structs, which maintain per-page free element lists 230 * 231 * Note: Due to the index packing in page metadata, 232 * these pointers can't be at the beginning of the zone struct. 233 */ 234 zone_pva_t z_pageq_empty; /* populated, completely empty pages */ 235 zone_pva_t z_pageq_partial;/* populated, partially filled pages */ 236 zone_pva_t z_pageq_full; /* populated, completely full pages */ 237 zone_pva_t z_pageq_va; /* non-populated VA pages */ 238 239 /* 240 * Zone statistics 241 * 242 * z_contention_wma: 243 * weighted moving average of the number of contentions per second, 244 * in Z_CONTENTION_WMA_UNIT units (fixed point decimal). 245 * 246 * z_contention_cur: 247 * count of recorded contentions that will be fused in z_contention_wma 248 * at the next period. 249 * 250 * z_recirc_cur: 251 * number of magazines in the recirculation depot. 252 * 253 * z_elems_free: 254 * number of free elements in the zone. 255 * 256 * z_elems_{min,max}: 257 * tracks the low/high watermark of z_elems_free for the current 258 * weighted moving average period. 259 * 260 * z_elems_free_wss: 261 * weighted moving average of the (z_elems_free_max - z_elems_free_min) 262 * amplited which is used by the GC for trim operations. 263 * 264 * z_elems_avail: 265 * number of elements in the zone (at all). 266 */ 267 #define Z_CONTENTION_WMA_UNIT (1u << 8) 268 uint32_t z_contention_wma; 269 uint32_t z_contention_cur; 270 uint32_t z_recirc_cur; 271 uint32_t z_elems_free_max; 272 uint32_t z_elems_free_wss; 273 uint32_t z_elems_free_min; 274 uint32_t z_elems_free; /* Number of free elements */ 275 uint32_t z_elems_avail; /* Number of elements available */ 276 277 #if CONFIG_ZLEAKS 278 uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */ 279 #endif 280 #if CONFIG_GZALLOC 281 gzalloc_data_t gz; 282 #endif 283 #if KASAN_ZALLOC 284 uint32_t z_kasan_redzone; 285 spl_t z_kasan_spl; 286 #endif 287 #if DEBUG || DEVELOPMENT || CONFIG_ZLEAKS 288 /* zone logging structure to hold stacks and element references to those stacks. */ 289 btlog_t *zlog_btlog; 290 #endif 291 }; 292 293 294 __options_decl(zone_security_options_t, uint64_t, { 295 /* 296 * Zsecurity option to enable sequestering VA of zones 297 */ 298 ZSECURITY_OPTIONS_SEQUESTER = 0x00000001, 299 /* 300 * Zsecurity option to enable creating separate kalloc zones for 301 * bags of bytes 302 */ 303 ZSECURITY_OPTIONS_SUBMAP_USER_DATA = 0x00000004, 304 /* 305 * Zsecurity option to enable sequestering of kalloc zones used by 306 * kexts (KHEAP_KEXT heap) 307 */ 308 ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC = 0x00000008, 309 /* 310 * Zsecurity option to enable strict free of iokit objects to zone 311 * or heap they were allocated from. 312 */ 313 ZSECURITY_OPTIONS_STRICT_IOKIT_FREE = 0x00000010, 314 }); 315 316 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) 317 #define KALLOC_DLUT_SIZE (2048 / KALLOC_MINALIGN) 318 319 struct kheap_zones { 320 struct kalloc_zone_cfg *cfg; 321 struct kalloc_heap *views; 322 zone_kheap_id_t heap_id; 323 uint16_t max_k_zone; 324 uint8_t dlut[KALLOC_DLUT_SIZE]; /* table of indices into k_zone[] */ 325 uint8_t k_zindex_start; 326 /* If there's no hit in the DLUT, then start searching from k_zindex_start. */ 327 zone_t *k_zone; 328 }; 329 330 extern zone_security_options_t zsecurity_options; 331 extern zone_id_t _Atomic num_zones; 332 extern uint32_t zone_view_count; 333 extern struct zone zone_array[]; 334 extern const char * const kalloc_heap_names[KHEAP_ID_COUNT]; 335 extern bool panic_include_zprint; 336 #if CONFIG_ZLEAKS 337 extern bool panic_include_ztrace; 338 extern struct ztrace *top_ztrace; 339 #endif 340 extern mach_memory_info_t *panic_kext_memory_info; 341 extern vm_size_t panic_kext_memory_size; 342 extern unsigned int zone_map_jetsam_limit; 343 344 #define zone_index_foreach(i) \ 345 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \ 346 i < num_zones_##i; i++) 347 348 #define zone_foreach(z) \ 349 for (zone_t z = &zone_array[1], \ 350 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \ 351 z < last_zone_##z; z++) 352 353 struct zone_map_range { 354 vm_offset_t min_address; 355 vm_offset_t max_address; 356 } __attribute__((aligned(2 * sizeof(vm_offset_t)))); 357 358 __pure2 359 static inline vm_offset_t 360 zone_elem_size(zone_t zone) 361 { 362 return zone->z_elem_size; 363 } 364 365 static inline uint32_t 366 zone_count_allocated(zone_t zone) 367 { 368 return zone->z_elems_avail - zone->z_elems_free; 369 } 370 371 static inline vm_size_t 372 zone_scale_for_percpu(zone_t zone, vm_size_t size) 373 { 374 if (zone->z_percpu) { 375 size *= zpercpu_count(); 376 } 377 return size; 378 } 379 380 static inline vm_size_t 381 zone_size_wired(zone_t zone) 382 { 383 /* 384 * this either require the zone lock, 385 * or to be used for statistics purposes only. 386 */ 387 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed)); 388 return zone_scale_for_percpu(zone, size); 389 } 390 391 static inline vm_size_t 392 zone_size_free(zone_t zone) 393 { 394 return zone_scale_for_percpu(zone, 395 (vm_size_t)zone->z_elem_size * zone->z_elems_free); 396 } 397 398 static inline vm_size_t 399 zone_size_allocated(zone_t zone) 400 { 401 return zone_scale_for_percpu(zone, 402 (vm_size_t)zone->z_elem_size * zone_count_allocated(zone)); 403 } 404 405 static inline vm_size_t 406 zone_size_wasted(zone_t zone) 407 { 408 return zone_size_wired(zone) - zone_scale_for_percpu(zone, 409 (vm_size_t)zone->z_elem_size * zone->z_elems_avail); 410 } 411 412 /* 413 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a 414 * userspace reboot is needed. The only other way to query for this information 415 * is via mach_memory_info() which is unavailable on release kernels. 416 */ 417 extern uint64_t get_zones_collectable_bytes(void); 418 419 /*! 420 * @enum zone_gc_level_t 421 * 422 * @const ZONE_GC_TRIM 423 * Request a trimming GC: it will trim allocations in excess 424 * of the working set size estimate only. 425 * 426 * @const ZONE_GC_DRAIN 427 * Request a draining GC: this is an aggressive mode that will 428 * cause all caches to be drained and all free pages returned to the system. 429 * 430 * @const ZONE_GC_JETSAM 431 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or 432 * @c ZONE_GC_DRAIN depending on the state of the zone map. 433 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever 434 * request a @c ZONE_GC_JETSAM level. 435 */ 436 __enum_closed_decl(zone_gc_level_t, uint32_t, { 437 ZONE_GC_TRIM, 438 ZONE_GC_DRAIN, 439 ZONE_GC_JETSAM, 440 }); 441 442 /*! 443 * @function zone_gc 444 * 445 * @brief 446 * Reduces memory used by zones by trimming caches and freelists. 447 * 448 * @discussion 449 * @c zone_gc() is called: 450 * - by the pageout daemon when the system needs more free pages. 451 * - by the VM when contiguous page allocation requests get stuck 452 * (see vm_page_find_contiguous()). 453 * 454 * @param level The zone GC level requested. 455 */ 456 extern void zone_gc(zone_gc_level_t level); 457 458 extern void zone_gc_trim(void); 459 extern void zone_gc_drain(void); 460 461 #define ZONE_WSS_UPDATE_PERIOD 10 462 /*! 463 * @function compute_zone_working_set_size 464 * 465 * @brief 466 * Recomputes the working set size for every zone 467 * 468 * @discussion 469 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10), 470 * computing an exponential moving average with a weight of 75%, 471 * so that the history of the last minute is the dominating factor. 472 */ 473 extern void compute_zone_working_set_size(void *); 474 475 /* Debug logging for zone-map-exhaustion jetsams. */ 476 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity); 477 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size); 478 479 /* Bootstrap zone module (create zone zone) */ 480 extern void zone_bootstrap(void); 481 482 /*! 483 * @function zone_foreign_mem_init 484 * 485 * @brief 486 * Steal memory from pmap (prior to initialization of zalloc) 487 * for the special vm zones that allow foreign memory and store 488 * the range so as to facilitate range checking in zfree. 489 */ 490 __startup_func 491 extern vm_offset_t zone_foreign_mem_init( 492 vm_size_t size); 493 494 /*! 495 * @function zone_get_foreign_alloc_size 496 * 497 * @brief 498 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple 499 * of the allocation granule for the zone with the given creation flags and 500 * element size. 501 */ 502 __startup_func 503 extern vm_size_t zone_get_foreign_alloc_size( 504 const char *name __unused, 505 vm_size_t elem_size, 506 zone_create_flags_t flags, 507 uint16_t min_pages); 508 509 /*! 510 * @function zone_cram_foreign 511 * 512 * @brief 513 * Cram memory allocated with @c zone_foreign_mem_init() into a zone. 514 * 515 * @param zone The zone to cram memory into. 516 * @param newmem The base address for the memory to cram. 517 * @param size The size of the memory to cram into the zone. 518 */ 519 __startup_func 520 extern void zone_cram_foreign( 521 zone_t zone, 522 vm_offset_t newmem, 523 vm_size_t size); 524 525 extern bool zone_maps_owned( 526 vm_address_t addr, 527 vm_size_t size); 528 529 extern void zone_map_sizes( 530 vm_map_size_t *psize, 531 vm_map_size_t *pfree, 532 vm_map_size_t *plargest_free); 533 534 extern bool 535 zone_map_nearing_exhaustion(void); 536 537 #if defined(__LP64__) 538 #define ZONE_POISON 0xdeadbeefdeadbeef 539 #else 540 #define ZONE_POISON 0xdeadbeef 541 #endif 542 543 static inline vm_tag_t 544 zalloc_flags_get_tag(zalloc_flags_t flags) 545 { 546 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT); 547 } 548 549 extern void *zalloc_ext( 550 zone_t zone, 551 zone_stats_t zstats, 552 zalloc_flags_t flags); 553 554 extern void zfree_ext( 555 zone_t zone, 556 zone_stats_t zstats, 557 void *addr); 558 559 /*! 560 * @function zone_replenish_configure 561 * 562 * @brief 563 * Used by zones backing the VM to maintain a reserve of free elements. 564 * 565 * @discussion 566 * This function should not be used by anyone else than the VM. 567 */ 568 extern void zone_replenish_configure( 569 zone_t zone); 570 571 extern vm_size_t zone_element_size( 572 void *addr, 573 zone_t *z); 574 575 /*! 576 * @function zone_owns 577 * 578 * @abstract 579 * This function is a soft version of zone_require that checks if a given 580 * pointer belongs to the specified zone and should not be used outside 581 * allocator code. 582 * 583 * @discussion 584 * Note that zone_owns() can only work with: 585 * - zones not allowing foreign memory 586 * - zones in the general submap. 587 * 588 * @param zone the zone the address needs to belong to. 589 * @param addr the element address to check. 590 */ 591 extern bool zone_owns( 592 zone_t zone, 593 void *addr); 594 595 /* 596 * Structure for keeping track of a backtrace, used for leak detection. 597 * This is in the .h file because it is used during panic, see kern/debug.c 598 * A non-zero size indicates that the trace is in use. 599 */ 600 struct ztrace { 601 vm_size_t zt_size; /* How much memory are all the allocations referring to this trace taking up? */ 602 uint32_t zt_depth; /* depth of stack (0 to MAX_ZTRACE_DEPTH) */ 603 void* zt_stack[MAX_ZTRACE_DEPTH]; /* series of return addresses from OSBacktrace */ 604 uint32_t zt_collisions; /* How many times did a different stack land here while it was occupied? */ 605 uint32_t zt_hit_count; /* for determining effectiveness of hash function */ 606 }; 607 608 #ifndef VM_MAX_TAG_ZONES 609 #error MAX_TAG_ZONES 610 #endif 611 #if VM_MAX_TAG_ZONES 612 613 extern uint32_t zone_index_from_tag_index( 614 uint32_t tag_zone_index, 615 vm_size_t *elem_size); 616 617 #endif /* VM_MAX_TAG_ZONES */ 618 619 static inline void 620 zone_lock(zone_t zone) 621 { 622 #if KASAN_ZALLOC 623 spl_t s = 0; 624 if (zone->kasan_fakestacks) { 625 s = splsched(); 626 } 627 #endif /* KASAN_ZALLOC */ 628 lck_spin_lock(&zone->z_lock); 629 #if KASAN_ZALLOC 630 zone->z_kasan_spl = s; 631 #endif /* KASAN_ZALLOC */ 632 } 633 634 static inline void 635 zone_unlock(zone_t zone) 636 { 637 #if KASAN_ZALLOC 638 spl_t s = zone->z_kasan_spl; 639 zone->z_kasan_spl = 0; 640 #endif /* KASAN_ZALLOC */ 641 lck_spin_unlock(&zone->z_lock); 642 #if KASAN_ZALLOC 643 if (zone->kasan_fakestacks) { 644 splx(s); 645 } 646 #endif /* KASAN_ZALLOC */ 647 } 648 649 #if CONFIG_GZALLOC 650 void gzalloc_init(vm_size_t); 651 void gzalloc_zone_init(zone_t); 652 void gzalloc_empty_free_cache(zone_t); 653 boolean_t gzalloc_enabled(void); 654 655 vm_offset_t gzalloc_alloc(zone_t, zone_stats_t zstats, zalloc_flags_t flags); 656 void gzalloc_free(zone_t, zone_stats_t zstats, void *); 657 boolean_t gzalloc_element_size(void *, zone_t *, vm_size_t *); 658 #endif /* CONFIG_GZALLOC */ 659 660 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */ 661 int track_this_zone(const char *zonename, const char *logname); 662 663 #if DEBUG || DEVELOPMENT 664 extern boolean_t run_zone_test(void); 665 extern void zone_gc_replenish_test(void); 666 extern void zone_alloc_replenish_test(void); 667 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag); 668 extern bool zalloc_disable_copyio_check; 669 #else 670 #define zalloc_disable_copyio_check false 671 #endif /* DEBUG || DEVELOPMENT */ 672 673 #pragma GCC visibility pop 674 675 __END_DECLS 676 677 #endif /* _KERN_ZALLOC_INTERNAL_H_ */