stack.c
1 /* 2 * Copyright (c) 2003-2019 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 /* 29 * Kernel stack management routines. 30 */ 31 32 #include <mach/mach_host.h> 33 #include <mach/mach_types.h> 34 #include <mach/processor_set.h> 35 36 #include <kern/kern_types.h> 37 #include <kern/lock_group.h> 38 #include <kern/mach_param.h> 39 #include <kern/percpu.h> 40 #include <kern/processor.h> 41 #include <kern/thread.h> 42 #include <kern/zalloc.h> 43 #include <kern/kalloc.h> 44 #include <kern/ledger.h> 45 46 #include <vm/vm_map.h> 47 #include <vm/vm_kern.h> 48 49 #include <mach_debug.h> 50 #include <san/kasan.h> 51 52 /* 53 * We allocate stacks from generic kernel VM. 54 * 55 * The stack_free_list can only be accessed at splsched, 56 * because stack_alloc_try/thread_invoke operate at splsched. 57 */ 58 59 decl_simple_lock_data(static, stack_lock_data); 60 #define stack_lock() simple_lock(&stack_lock_data, LCK_GRP_NULL) 61 #define stack_unlock() simple_unlock(&stack_lock_data) 62 63 #define STACK_CACHE_SIZE 2 64 65 static vm_offset_t stack_free_list; 66 67 static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ 68 static unsigned int stack_hiwat; 69 unsigned int stack_total; /* current total count */ 70 unsigned long long stack_allocs; /* total count of allocations */ 71 72 static unsigned int stack_free_target; 73 static int stack_free_delta; 74 75 static unsigned int stack_new_count; /* total new stack allocations */ 76 77 static vm_offset_t stack_addr_mask; 78 79 unsigned int kernel_stack_pages; 80 vm_offset_t kernel_stack_size; 81 vm_offset_t kernel_stack_mask; 82 vm_offset_t kernel_stack_depth_max; 83 84 struct stack_cache { 85 vm_offset_t free; 86 unsigned int count; 87 }; 88 static struct stack_cache PERCPU_DATA(stack_cache); 89 90 /* 91 * The next field is at the base of the stack, 92 * so the low end is left unsullied. 93 */ 94 #define stack_next(stack) \ 95 (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) 96 97 static inline int 98 log2(vm_offset_t size) 99 { 100 int result; 101 for (result = 0; size > 0; result++) { 102 size >>= 1; 103 } 104 return result; 105 } 106 107 static inline vm_offset_t 108 roundup_pow2(vm_offset_t size) 109 { 110 return 1UL << (log2(size - 1) + 1); 111 } 112 113 static vm_offset_t stack_alloc_internal(void); 114 static void stack_free_stack(vm_offset_t); 115 116 void 117 stack_init(void) 118 { 119 simple_lock_init(&stack_lock_data, 0); 120 121 kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; 122 kernel_stack_size = KERNEL_STACK_SIZE; 123 kernel_stack_mask = -KERNEL_STACK_SIZE; 124 kernel_stack_depth_max = 0; 125 126 if (PE_parse_boot_argn("kernel_stack_pages", 127 &kernel_stack_pages, 128 sizeof(kernel_stack_pages))) { 129 kernel_stack_size = kernel_stack_pages * PAGE_SIZE; 130 printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n", 131 kernel_stack_pages, (void *) kernel_stack_size); 132 } 133 134 if (kernel_stack_size < round_page(kernel_stack_size)) { 135 panic("stack_init: stack size %p not a multiple of page size %d\n", 136 (void *) kernel_stack_size, PAGE_SIZE); 137 } 138 139 stack_addr_mask = roundup_pow2(kernel_stack_size) - 1; 140 kernel_stack_mask = ~stack_addr_mask; 141 } 142 143 /* 144 * stack_alloc: 145 * 146 * Allocate a stack for a thread, may 147 * block. 148 */ 149 150 static vm_offset_t 151 stack_alloc_internal(void) 152 { 153 vm_offset_t stack = 0; 154 spl_t s; 155 int flags = 0; 156 kern_return_t kr = KERN_SUCCESS; 157 158 s = splsched(); 159 stack_lock(); 160 stack_allocs++; 161 stack = stack_free_list; 162 if (stack != 0) { 163 stack_free_list = stack_next(stack); 164 stack_free_count--; 165 } else { 166 if (++stack_total > stack_hiwat) { 167 stack_hiwat = stack_total; 168 } 169 stack_new_count++; 170 } 171 stack_free_delta--; 172 stack_unlock(); 173 splx(s); 174 175 if (stack == 0) { 176 /* 177 * Request guard pages on either side of the stack. Ask 178 * kernel_memory_allocate() for two extra pages to account 179 * for these. 180 */ 181 182 flags = KMA_GUARD_FIRST | KMA_GUARD_LAST | KMA_KSTACK | KMA_KOBJECT | KMA_ZERO; 183 kr = kernel_memory_allocate(kernel_map, &stack, 184 kernel_stack_size + (2 * PAGE_SIZE), 185 stack_addr_mask, 186 flags, 187 VM_KERN_MEMORY_STACK); 188 if (kr != KERN_SUCCESS) { 189 panic("stack_alloc: kernel_memory_allocate(size:0x%llx, mask: 0x%llx, flags: 0x%x) failed with %d\n", (uint64_t)(kernel_stack_size + (2 * PAGE_SIZE)), (uint64_t)stack_addr_mask, flags, kr); 190 } 191 192 /* 193 * The stack address that comes back is the address of the lower 194 * guard page. Skip past it to get the actual stack base address. 195 */ 196 197 stack += PAGE_SIZE; 198 } 199 return stack; 200 } 201 202 void 203 stack_alloc( 204 thread_t thread) 205 { 206 assert(thread->kernel_stack == 0); 207 machine_stack_attach(thread, stack_alloc_internal()); 208 } 209 210 void 211 stack_handoff(thread_t from, thread_t to) 212 { 213 assert(from == current_thread()); 214 machine_stack_handoff(from, to); 215 } 216 217 /* 218 * stack_free: 219 * 220 * Detach and free the stack for a thread. 221 */ 222 void 223 stack_free( 224 thread_t thread) 225 { 226 vm_offset_t stack = machine_stack_detach(thread); 227 228 assert(stack); 229 if (stack != thread->reserved_stack) { 230 stack_free_stack(stack); 231 } 232 } 233 234 void 235 stack_free_reserved( 236 thread_t thread) 237 { 238 if (thread->reserved_stack != thread->kernel_stack) { 239 stack_free_stack(thread->reserved_stack); 240 } 241 } 242 243 static void 244 stack_free_stack( 245 vm_offset_t stack) 246 { 247 struct stack_cache *cache; 248 spl_t s; 249 250 #if KASAN_DEBUG 251 /* Sanity check - stack should be unpoisoned by now */ 252 assert(kasan_check_shadow(stack, kernel_stack_size, 0)); 253 #endif 254 255 s = splsched(); 256 cache = PERCPU_GET(stack_cache); 257 if (cache->count < STACK_CACHE_SIZE) { 258 stack_next(stack) = cache->free; 259 cache->free = stack; 260 cache->count++; 261 } else { 262 stack_lock(); 263 stack_next(stack) = stack_free_list; 264 stack_free_list = stack; 265 if (++stack_free_count > stack_free_hiwat) { 266 stack_free_hiwat = stack_free_count; 267 } 268 stack_free_delta++; 269 stack_unlock(); 270 } 271 splx(s); 272 } 273 274 /* 275 * stack_alloc_try: 276 * 277 * Non-blocking attempt to allocate a 278 * stack for a thread. 279 * 280 * Returns TRUE on success. 281 * 282 * Called at splsched. 283 */ 284 boolean_t 285 stack_alloc_try( 286 thread_t thread) 287 { 288 struct stack_cache *cache; 289 vm_offset_t stack; 290 291 cache = PERCPU_GET(stack_cache); 292 stack = cache->free; 293 if (stack != 0) { 294 cache->free = stack_next(stack); 295 cache->count--; 296 } else { 297 if (stack_free_list != 0) { 298 stack_lock(); 299 stack = stack_free_list; 300 if (stack != 0) { 301 stack_free_list = stack_next(stack); 302 stack_free_count--; 303 stack_free_delta--; 304 } 305 stack_unlock(); 306 } 307 } 308 309 if (stack != 0 || (stack = thread->reserved_stack) != 0) { 310 machine_stack_attach(thread, stack); 311 return TRUE; 312 } 313 314 return FALSE; 315 } 316 317 static unsigned int stack_collect_tick, last_stack_tick; 318 319 /* 320 * stack_collect: 321 * 322 * Free excess kernel stacks, may 323 * block. 324 */ 325 void 326 stack_collect(void) 327 { 328 if (stack_collect_tick != last_stack_tick) { 329 unsigned int target; 330 vm_offset_t stack; 331 spl_t s; 332 333 s = splsched(); 334 stack_lock(); 335 336 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 337 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 338 339 while (stack_free_count > target) { 340 stack = stack_free_list; 341 stack_free_list = stack_next(stack); 342 stack_free_count--; stack_total--; 343 stack_unlock(); 344 splx(s); 345 346 /* 347 * Get the stack base address, then decrement by one page 348 * to account for the lower guard page. Add two extra pages 349 * to the size to account for the guard pages on both ends 350 * that were originally requested when the stack was allocated 351 * back in stack_alloc(). 352 */ 353 354 stack = (vm_offset_t)vm_map_trunc_page( 355 stack, 356 VM_MAP_PAGE_MASK(kernel_map)); 357 stack -= PAGE_SIZE; 358 if (vm_map_remove( 359 kernel_map, 360 stack, 361 stack + kernel_stack_size + (2 * PAGE_SIZE), 362 VM_MAP_REMOVE_KUNWIRE) 363 != KERN_SUCCESS) { 364 panic("stack_collect: vm_map_remove"); 365 } 366 stack = 0; 367 368 s = splsched(); 369 stack_lock(); 370 371 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 372 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 373 } 374 375 last_stack_tick = stack_collect_tick; 376 377 stack_unlock(); 378 splx(s); 379 } 380 } 381 382 /* 383 * compute_stack_target: 384 * 385 * Computes a new target free list count 386 * based on recent alloc / free activity. 387 * 388 * Limits stack collection to once per 389 * computation period. 390 */ 391 void 392 compute_stack_target( 393 __unused void *arg) 394 { 395 spl_t s; 396 397 s = splsched(); 398 stack_lock(); 399 400 if (stack_free_target > 5) { 401 stack_free_target = (4 * stack_free_target) / 5; 402 } else if (stack_free_target > 0) { 403 stack_free_target--; 404 } 405 406 stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 407 408 stack_free_delta = 0; 409 stack_collect_tick++; 410 411 stack_unlock(); 412 splx(s); 413 } 414 415 /* OBSOLETE */ 416 void stack_privilege( 417 thread_t thread); 418 419 void 420 stack_privilege( 421 __unused thread_t thread) 422 { 423 /* OBSOLETE */ 424 } 425 426 /* 427 * Return info on stack usage for threads in a specific processor set 428 */ 429 kern_return_t 430 processor_set_stack_usage( 431 processor_set_t pset, 432 unsigned int *totalp, 433 vm_size_t *spacep, 434 vm_size_t *residentp, 435 vm_size_t *maxusagep, 436 vm_offset_t *maxstackp) 437 { 438 #if !MACH_DEBUG 439 return KERN_NOT_SUPPORTED; 440 #else 441 unsigned int total; 442 vm_size_t maxusage; 443 vm_offset_t maxstack; 444 445 thread_t *thread_list; 446 thread_t thread; 447 448 unsigned int actual; /* this many things */ 449 unsigned int i; 450 451 vm_size_t size, size_needed; 452 void *addr; 453 454 if (pset == PROCESSOR_SET_NULL || pset != &pset0) { 455 return KERN_INVALID_ARGUMENT; 456 } 457 458 size = 0; 459 addr = NULL; 460 461 for (;;) { 462 lck_mtx_lock(&tasks_threads_lock); 463 464 actual = threads_count; 465 466 /* do we have the memory we need? */ 467 468 size_needed = actual * sizeof(thread_t); 469 if (size_needed <= size) { 470 break; 471 } 472 473 lck_mtx_unlock(&tasks_threads_lock); 474 475 if (size != 0) { 476 kheap_free(KHEAP_TEMP, addr, size); 477 } 478 479 assert(size_needed > 0); 480 size = size_needed; 481 482 addr = kheap_alloc(KHEAP_TEMP, size, Z_WAITOK); 483 if (addr == 0) { 484 return KERN_RESOURCE_SHORTAGE; 485 } 486 } 487 488 /* OK, have memory and list is locked */ 489 thread_list = (thread_t *) addr; 490 for (i = 0, thread = (thread_t)(void *) queue_first(&threads); 491 !queue_end(&threads, (queue_entry_t) thread); 492 thread = (thread_t)(void *) queue_next(&thread->threads)) { 493 thread_reference_internal(thread); 494 thread_list[i++] = thread; 495 } 496 assert(i <= actual); 497 498 lck_mtx_unlock(&tasks_threads_lock); 499 500 /* calculate maxusage and free thread references */ 501 502 total = 0; 503 maxusage = 0; 504 maxstack = 0; 505 while (i > 0) { 506 thread_t threadref = thread_list[--i]; 507 508 if (threadref->kernel_stack != 0) { 509 total++; 510 } 511 512 thread_deallocate(threadref); 513 } 514 515 if (size != 0) { 516 kheap_free(KHEAP_TEMP, addr, size); 517 } 518 519 *totalp = total; 520 *residentp = *spacep = total * round_page(kernel_stack_size); 521 *maxusagep = maxusage; 522 *maxstackp = maxstack; 523 return KERN_SUCCESS; 524 525 #endif /* MACH_DEBUG */ 526 } 527 528 vm_offset_t 529 min_valid_stack_address(void) 530 { 531 return (vm_offset_t)vm_map_min(kernel_map); 532 } 533 534 vm_offset_t 535 max_valid_stack_address(void) 536 { 537 return (vm_offset_t)vm_map_max(kernel_map); 538 }