kern_support.c
1 /* 2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ 29 /* 30 * pthread_synch.c 31 */ 32 33 #pragma mark - Front Matter 34 35 #define _PTHREAD_CONDATTR_T 36 #define _PTHREAD_COND_T 37 #define _PTHREAD_MUTEXATTR_T 38 #define _PTHREAD_MUTEX_T 39 #define _PTHREAD_RWLOCKATTR_T 40 #define _PTHREAD_RWLOCK_T 41 42 #undef pthread_mutexattr_t 43 #undef pthread_mutex_t 44 #undef pthread_condattr_t 45 #undef pthread_cond_t 46 #undef pthread_rwlockattr_t 47 #undef pthread_rwlock_t 48 49 #include <sys/cdefs.h> 50 #include <os/log.h> 51 52 // <rdar://problem/26158937> panic() should be marked noreturn 53 extern void panic(const char *string, ...) __printflike(1,2) __dead2; 54 55 #include <sys/param.h> 56 #include <sys/queue.h> 57 #include <sys/resourcevar.h> 58 //#include <sys/proc_internal.h> 59 #include <sys/kauth.h> 60 #include <sys/systm.h> 61 #include <sys/timeb.h> 62 #include <sys/times.h> 63 #include <sys/acct.h> 64 #include <sys/kernel.h> 65 #include <sys/wait.h> 66 #include <sys/signalvar.h> 67 #include <sys/sysctl.h> 68 #include <sys/syslog.h> 69 #include <sys/stat.h> 70 #include <sys/lock.h> 71 #include <sys/kdebug.h> 72 //#include <sys/sysproto.h> 73 #include <sys/vm.h> 74 #include <sys/user.h> /* for coredump */ 75 #include <sys/proc_info.h> /* for fill_procworkqueue */ 76 77 #include <mach/mach_port.h> 78 #include <mach/mach_types.h> 79 #include <mach/semaphore.h> 80 #include <mach/sync_policy.h> 81 #include <mach/task.h> 82 #include <mach/vm_prot.h> 83 #include <kern/kern_types.h> 84 #include <kern/task.h> 85 #include <kern/clock.h> 86 #include <mach/kern_return.h> 87 #include <kern/thread.h> 88 #include <kern/zalloc.h> 89 #include <kern/sched_prim.h> /* for thread_exception_return */ 90 #include <kern/processor.h> 91 #include <kern/assert.h> 92 #include <mach/mach_vm.h> 93 #include <mach/mach_param.h> 94 #include <mach/thread_status.h> 95 #include <mach/thread_policy.h> 96 #include <mach/message.h> 97 #include <mach/port.h> 98 //#include <vm/vm_protos.h> 99 #include <vm/vm_fault.h> 100 #include <vm/vm_map.h> 101 #include <mach/thread_act.h> /* for thread_resume */ 102 #include <machine/machine_routines.h> 103 #include <mach/shared_region.h> 104 105 #include <libkern/OSAtomic.h> 106 #include <libkern/libkern.h> 107 108 #include "kern_internal.h" 109 110 #ifndef WQ_SETUP_EXIT_THREAD 111 #define WQ_SETUP_EXIT_THREAD 8 112 #endif 113 114 // XXX: Ditto for thread tags from kern/thread.h 115 #define THREAD_TAG_MAINTHREAD 0x1 116 #define THREAD_TAG_PTHREAD 0x10 117 #define THREAD_TAG_WORKQUEUE 0x20 118 119 lck_grp_attr_t *pthread_lck_grp_attr; 120 lck_grp_t *pthread_lck_grp; 121 lck_attr_t *pthread_lck_attr; 122 123 #define C_32_STK_ALIGN 16 124 #define C_64_STK_ALIGN 16 125 126 // WORKQ use the largest alignment any platform needs 127 #define C_WORKQ_STK_ALIGN 16 128 129 #if defined(__arm64__) 130 /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page. 131 * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned 132 */ 133 #define PTHREAD_T_OFFSET (12*1024) 134 #else 135 #define PTHREAD_T_OFFSET 0 136 #endif 137 138 /* 139 * Flags filed passed to bsdthread_create and back in pthread_start 140 31 <---------------------------------> 0 141 _________________________________________ 142 | flags(8) | policy(8) | importance(16) | 143 ----------------------------------------- 144 */ 145 146 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401> 147 #define PTHREAD_START_SETSCHED 0x02000000 148 // was PTHREAD_START_DETACHED 0x04000000 149 #define PTHREAD_START_QOSCLASS 0x08000000 150 #define PTHREAD_START_TSD_BASE_SET 0x10000000 151 #define PTHREAD_START_SUSPENDED 0x20000000 152 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff 153 #define PTHREAD_START_POLICY_BITSHIFT 16 154 #define PTHREAD_START_POLICY_MASK 0xff 155 #define PTHREAD_START_IMPORTANCE_MASK 0xffff 156 157 #define SCHED_OTHER POLICY_TIMESHARE 158 #define SCHED_FIFO POLICY_FIFO 159 #define SCHED_RR POLICY_RR 160 161 #define BASEPRI_DEFAULT 31 162 163 uint32_t pthread_debug_tracing = 1; 164 165 static uint32_t pthread_mutex_default_policy; 166 167 SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED, 168 &pthread_mutex_default_policy, 0, ""); 169 170 #pragma mark - Process/Thread Setup/Teardown syscalls 171 172 static mach_vm_offset_t 173 stack_addr_hint(proc_t p, vm_map_t vmap) 174 { 175 mach_vm_offset_t stackaddr; 176 mach_vm_offset_t aslr_offset; 177 bool proc64bit = proc_is64bit(p); 178 bool proc64bit_data = proc_is64bit_data(p); 179 180 // We can't safely take random values % something unless its a power-of-two 181 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two"); 182 183 #if defined(__i386__) || defined(__x86_64__) 184 (void)proc64bit_data; 185 if (proc64bit) { 186 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu 187 aslr_offset = random() % (1 << 28); // about 512 stacks 188 } else { 189 // Actually bigger than the image shift, we've got ~256MB to work with 190 aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE); 191 } 192 aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap)); 193 if (proc64bit) { 194 // Above nanomalloc range (see NANOZONE_SIGNATURE) 195 stackaddr = 0x700000000000 + aslr_offset; 196 } else { 197 stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset; 198 } 199 #elif defined(__arm__) || defined(__arm64__) 200 user_addr_t main_thread_stack_top = 0; 201 if (pthread_kern->proc_get_user_stack) { 202 main_thread_stack_top = pthread_kern->proc_get_user_stack(p); 203 } 204 if (proc64bit && main_thread_stack_top) { 205 // The main thread stack position is randomly slid by xnu (c.f. 206 // load_main() in mach_loader.c), so basing pthread stack allocations 207 // where the main thread stack ends is already ASLRd and doing so 208 // avoids creating a gap in the process address space that may cause 209 // extra PTE memory usage. rdar://problem/33328206 210 stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top, 211 vm_map_page_mask(vmap)); 212 } else { 213 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better 214 aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE); 215 aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, 216 vm_map_page_mask(vmap)); 217 if (proc64bit) { 218 // 64 stacks below shared region 219 stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset; 220 } else { 221 // If you try to slide down from this point, you risk ending up in memory consumed by malloc 222 if (proc64bit_data) { 223 stackaddr = SHARED_REGION_BASE_ARM64_32; 224 } else { 225 stackaddr = SHARED_REGION_BASE_ARM; 226 } 227 228 stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset; 229 } 230 } 231 #else 232 #error Need to define a stack address hint for this architecture 233 #endif 234 return stackaddr; 235 } 236 237 static bool 238 _pthread_priority_to_policy(pthread_priority_t priority, 239 thread_qos_policy_data_t *data) 240 { 241 data->qos_tier = _pthread_priority_thread_qos(priority); 242 data->tier_importance = _pthread_priority_relpri(priority); 243 if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 || 244 data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { 245 return false; 246 } 247 return true; 248 } 249 250 /** 251 * bsdthread_create system call. Used by pthread_create. 252 */ 253 int 254 _bsdthread_create(struct proc *p, 255 __unused user_addr_t user_func, __unused user_addr_t user_funcarg, 256 user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, 257 user_addr_t *retval) 258 { 259 kern_return_t kret; 260 void * sright; 261 int error = 0; 262 mach_vm_offset_t th_tsd_base; 263 mach_port_name_t th_thport; 264 thread_t th; 265 task_t ctask = current_task(); 266 unsigned int policy, importance; 267 uint32_t tsd_offset; 268 bool start_suspended = (flags & PTHREAD_START_SUSPENDED); 269 270 if (pthread_kern->proc_get_register(p) == 0) { 271 return EINVAL; 272 } 273 274 PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0); 275 276 kret = pthread_kern->thread_create(ctask, &th); 277 if (kret != KERN_SUCCESS) 278 return(ENOMEM); 279 thread_reference(th); 280 281 pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD); 282 283 sright = (void *)pthread_kern->convert_thread_to_port(th); 284 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask)); 285 if (!MACH_PORT_VALID(th_thport)) { 286 error = EMFILE; // userland will convert this into a crash 287 goto out; 288 } 289 290 if ((flags & PTHREAD_START_CUSTOM) == 0) { 291 error = EINVAL; 292 goto out; 293 } 294 295 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3); 296 297 tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); 298 if (tsd_offset) { 299 th_tsd_base = user_pthread + tsd_offset; 300 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); 301 if (kret == KERN_SUCCESS) { 302 flags |= PTHREAD_START_TSD_BASE_SET; 303 } 304 } 305 /* 306 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel 307 * supports this flag (after the fact). 308 */ 309 flags &= ~PTHREAD_START_SUSPENDED; 310 311 /* 312 * Set up registers & function call. 313 */ 314 #if defined(__i386__) || defined(__x86_64__) 315 if (proc_is64bit_data(p)) { 316 x86_thread_state64_t state = { 317 .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), 318 .rdi = (uint64_t)user_pthread, 319 .rsi = (uint64_t)th_thport, 320 .rdx = (uint64_t)user_func, /* golang wants this */ 321 .rcx = (uint64_t)user_funcarg, /* golang wants this */ 322 .r8 = (uint64_t)user_stack, /* golang wants this */ 323 .r9 = (uint64_t)flags, 324 325 .rsp = (uint64_t)user_stack, 326 }; 327 328 (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); 329 } else { 330 x86_thread_state32_t state = { 331 .eip = (uint32_t)pthread_kern->proc_get_threadstart(p), 332 .eax = (uint32_t)user_pthread, 333 .ebx = (uint32_t)th_thport, 334 .ecx = (uint32_t)user_func, /* golang wants this */ 335 .edx = (uint32_t)user_funcarg, /* golang wants this */ 336 .edi = (uint32_t)user_stack, /* golang wants this */ 337 .esi = (uint32_t)flags, 338 339 .esp = (uint32_t)user_stack, 340 }; 341 342 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); 343 } 344 #elif defined(__arm__) || defined(__arm64__) 345 if (proc_is64bit_data(p)) { 346 #ifdef __arm64__ 347 arm_thread_state64_t state = { 348 .pc = (uint64_t)pthread_kern->proc_get_threadstart(p), 349 .x[0] = (uint64_t)user_pthread, 350 .x[1] = (uint64_t)th_thport, 351 .x[2] = (uint64_t)user_func, /* golang wants this */ 352 .x[3] = (uint64_t)user_funcarg, /* golang wants this */ 353 .x[4] = (uint64_t)user_stack, /* golang wants this */ 354 .x[5] = (uint64_t)flags, 355 356 .sp = (uint64_t)user_stack, 357 }; 358 359 (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); 360 #else 361 panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); 362 #endif // defined(__arm64__) 363 } else { 364 arm_thread_state_t state = { 365 .pc = (uint32_t)pthread_kern->proc_get_threadstart(p), 366 .r[0] = (uint32_t)user_pthread, 367 .r[1] = (uint32_t)th_thport, 368 .r[2] = (uint32_t)user_func, /* golang wants this */ 369 .r[3] = (uint32_t)user_funcarg, /* golang wants this */ 370 .r[4] = (uint32_t)user_stack, /* golang wants this */ 371 .r[5] = (uint32_t)flags, 372 373 .sp = (uint32_t)user_stack, 374 }; 375 376 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); 377 } 378 #else 379 #error bsdthread_create not defined for this architecture 380 #endif 381 382 if (flags & PTHREAD_START_SETSCHED) { 383 /* Set scheduling parameters if needed */ 384 thread_extended_policy_data_t extinfo; 385 thread_precedence_policy_data_t precedinfo; 386 387 importance = (flags & PTHREAD_START_IMPORTANCE_MASK); 388 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK; 389 390 if (policy == SCHED_OTHER) { 391 extinfo.timeshare = 1; 392 } else { 393 extinfo.timeshare = 0; 394 } 395 396 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); 397 398 precedinfo.importance = (importance - BASEPRI_DEFAULT); 399 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); 400 } else if (flags & PTHREAD_START_QOSCLASS) { 401 /* Set thread QoS class if requested. */ 402 thread_qos_policy_data_t qos; 403 404 if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) { 405 error = EINVAL; 406 goto out; 407 } 408 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, 409 (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); 410 } 411 412 if (pthread_kern->proc_get_mach_thread_self_tsd_offset) { 413 uint64_t mach_thread_self_offset = 414 pthread_kern->proc_get_mach_thread_self_tsd_offset(p); 415 if (mach_thread_self_offset && tsd_offset) { 416 bool proc64bit = proc_is64bit(p); 417 if (proc64bit) { 418 uint64_t th_thport_tsd = (uint64_t)th_thport; 419 error = copyout(&th_thport_tsd, user_pthread + tsd_offset + 420 mach_thread_self_offset, sizeof(th_thport_tsd)); 421 } else { 422 uint32_t th_thport_tsd = (uint32_t)th_thport; 423 error = copyout(&th_thport_tsd, user_pthread + tsd_offset + 424 mach_thread_self_offset, sizeof(th_thport_tsd)); 425 } 426 if (error) { 427 goto out; 428 } 429 } 430 } 431 432 if (!start_suspended) { 433 kret = pthread_kern->thread_resume(th); 434 if (kret != KERN_SUCCESS) { 435 error = EINVAL; 436 goto out; 437 } 438 } 439 thread_deallocate(th); /* drop the creator reference */ 440 441 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0); 442 443 *retval = user_pthread; 444 return(0); 445 446 out: 447 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport); 448 if (pthread_kern->thread_will_park_or_terminate) { 449 pthread_kern->thread_will_park_or_terminate(th); 450 } 451 (void)thread_terminate(th); 452 (void)thread_deallocate(th); 453 return(error); 454 } 455 456 /** 457 * bsdthread_terminate system call. Used by pthread_terminate 458 */ 459 int 460 _bsdthread_terminate(__unused struct proc *p, 461 user_addr_t stackaddr, 462 size_t size, 463 uint32_t kthport, 464 uint32_t sem, 465 __unused int32_t *retval) 466 { 467 mach_vm_offset_t freeaddr; 468 mach_vm_size_t freesize; 469 kern_return_t kret; 470 thread_t th = current_thread(); 471 472 freeaddr = (mach_vm_offset_t)stackaddr; 473 freesize = size; 474 475 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff); 476 477 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { 478 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){ 479 vm_map_t user_map = pthread_kern->current_map(); 480 freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map)); 481 kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); 482 #if MACH_ASSERT 483 if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) { 484 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret); 485 } 486 #endif 487 kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE); 488 assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); 489 } else { 490 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize); 491 if (kret != KERN_SUCCESS) { 492 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); 493 } 494 } 495 } 496 497 if (pthread_kern->thread_will_park_or_terminate) { 498 pthread_kern->thread_will_park_or_terminate(th); 499 } 500 (void)thread_terminate(th); 501 if (sem != MACH_PORT_NULL) { 502 kret = pthread_kern->semaphore_signal_internal_trap(sem); 503 if (kret != KERN_SUCCESS) { 504 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); 505 } 506 } 507 508 if (kthport != MACH_PORT_NULL) { 509 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport); 510 } 511 512 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0); 513 514 pthread_kern->thread_exception_return(); 515 __builtin_unreachable(); 516 } 517 518 /** 519 * bsdthread_register system call. Performs per-process setup. Responsible for 520 * returning capabilitiy bits to userspace and receiving userspace function addresses. 521 */ 522 int 523 _bsdthread_register(struct proc *p, 524 user_addr_t threadstart, 525 user_addr_t wqthread, 526 int pthsize, 527 user_addr_t pthread_init_data, 528 user_addr_t pthread_init_data_size, 529 uint64_t dispatchqueue_offset, 530 int32_t *retval) 531 { 532 struct _pthread_registration_data data = {}; 533 uint32_t max_tsd_offset; 534 kern_return_t kr; 535 size_t pthread_init_sz = 0; 536 537 /* syscall randomizer test can pass bogus values */ 538 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) { 539 return(EINVAL); 540 } 541 /* 542 * if we have pthread_init_data, then we use that and target_concptr 543 * (which is an offset) get data. 544 */ 545 if (pthread_init_data != 0) { 546 if (pthread_init_data_size < sizeof(data.version)) { 547 return EINVAL; 548 } 549 pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size); 550 int ret = copyin(pthread_init_data, &data, pthread_init_sz); 551 if (ret) { 552 return ret; 553 } 554 if (data.version != (size_t)pthread_init_data_size) { 555 return EINVAL; 556 } 557 } else { 558 data.dispatch_queue_offset = dispatchqueue_offset; 559 } 560 561 /* We have to do this before proc_get_register so that it resets after fork */ 562 mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map()); 563 pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr); 564 565 /* prevent multiple registrations */ 566 if (pthread_kern->proc_get_register(p) != 0) { 567 return(EINVAL); 568 } 569 570 pthread_kern->proc_set_threadstart(p, threadstart); 571 pthread_kern->proc_set_wqthread(p, wqthread); 572 pthread_kern->proc_set_pthsize(p, pthsize); 573 pthread_kern->proc_set_register(p); 574 575 uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t); 576 if ((uint32_t)pthsize >= tsd_slot_sz && 577 data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) { 578 max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz); 579 } else { 580 data.tsd_offset = 0; 581 max_tsd_offset = 0; 582 } 583 pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset); 584 585 if (data.dispatch_queue_offset > max_tsd_offset) { 586 data.dispatch_queue_offset = 0; 587 } 588 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset); 589 590 if (pthread_kern->proc_set_return_to_kernel_offset) { 591 if (data.return_to_kernel_offset > max_tsd_offset) { 592 data.return_to_kernel_offset = 0; 593 } 594 pthread_kern->proc_set_return_to_kernel_offset(p, 595 data.return_to_kernel_offset); 596 } 597 598 if (pthread_kern->proc_set_mach_thread_self_tsd_offset) { 599 if (data.mach_thread_self_offset > max_tsd_offset) { 600 data.mach_thread_self_offset = 0; 601 } 602 pthread_kern->proc_set_mach_thread_self_tsd_offset(p, 603 data.mach_thread_self_offset); 604 } 605 606 if (pthread_init_data != 0) { 607 /* Outgoing data that userspace expects as a reply */ 608 data.version = sizeof(struct _pthread_registration_data); 609 data.main_qos = _pthread_unspecified_priority(); 610 611 if (pthread_kern->qos_main_thread_active()) { 612 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; 613 thread_qos_policy_data_t qos; 614 boolean_t gd = FALSE; 615 616 kr = pthread_kern->thread_policy_get(current_thread(), 617 THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); 618 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) { 619 /* 620 * Unspecified threads means the kernel wants us 621 * to impose legacy upon the thread. 622 */ 623 qos.qos_tier = THREAD_QOS_LEGACY; 624 qos.tier_importance = 0; 625 626 kr = pthread_kern->thread_policy_set_internal(current_thread(), 627 THREAD_QOS_POLICY, (thread_policy_t)&qos, 628 THREAD_QOS_POLICY_COUNT); 629 } 630 631 if (kr == KERN_SUCCESS) { 632 data.main_qos = _pthread_priority_make_from_thread_qos( 633 qos.qos_tier, 0, 0); 634 } 635 } 636 637 data.stack_addr_hint = stackaddr; 638 data.mutex_default_policy = pthread_mutex_default_policy; 639 640 kr = copyout(&data, pthread_init_data, pthread_init_sz); 641 if (kr != KERN_SUCCESS) { 642 return EINVAL; 643 } 644 } 645 646 /* return the supported feature set as the return value. */ 647 *retval = PTHREAD_FEATURE_SUPPORTED; 648 649 return(0); 650 } 651 652 653 #pragma mark - Workqueue Thread Support 654 655 static mach_vm_size_t 656 workq_thread_allocsize(proc_t p, vm_map_t wq_map, 657 mach_vm_size_t *guardsize_out) 658 { 659 mach_vm_size_t guardsize = vm_map_page_size(wq_map); 660 mach_vm_size_t pthread_size = vm_map_round_page_mask( 661 pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, 662 vm_map_page_mask(wq_map)); 663 if (guardsize_out) *guardsize_out = guardsize; 664 return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; 665 } 666 667 int 668 workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr) 669 { 670 mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p); 671 mach_vm_size_t guardsize, th_allocsize; 672 kern_return_t kret; 673 674 th_allocsize = workq_thread_allocsize(p, vmap, &guardsize); 675 kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1, 676 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE, 677 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 678 679 if (kret != KERN_SUCCESS) { 680 kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize, 681 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); 682 } 683 684 if (kret != KERN_SUCCESS) { 685 goto fail; 686 } 687 688 /* 689 * The guard page is at the lowest address 690 * The stack base is the highest address 691 */ 692 kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE); 693 if (kret != KERN_SUCCESS) { 694 goto fail_vm_deallocate; 695 } 696 697 if (out_addr) { 698 *out_addr = stackaddr; 699 } 700 return 0; 701 702 fail_vm_deallocate: 703 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); 704 fail: 705 return kret; 706 } 707 708 int 709 workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr) 710 { 711 return mach_vm_deallocate(vmap, stackaddr, 712 workq_thread_allocsize(p, vmap, NULL)); 713 } 714 715 void 716 workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th, 717 vm_map_t vmap, user_addr_t stackaddr) 718 { 719 // Keep this in sync with workq_setup_thread() 720 const vm_size_t guardsize = vm_map_page_size(vmap); 721 const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize; 722 const vm_map_offset_t freesize = vm_map_trunc_page_mask( 723 (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, 724 vm_map_page_mask(vmap)) - guardsize; 725 726 __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr, 727 freesize, VM_BEHAVIOR_REUSABLE); 728 #if MACH_ASSERT 729 if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) { 730 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr); 731 } 732 #endif 733 } 734 735 struct workq_thread_addrs { 736 user_addr_t self; 737 user_addr_t stack_bottom; 738 user_addr_t stack_top; 739 }; 740 741 static inline void 742 workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr) 743 { 744 th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN); 745 } 746 747 static void 748 workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr, 749 struct workq_thread_addrs *th_addrs) 750 { 751 const vm_size_t guardsize = vm_map_page_size(map); 752 753 th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE + 754 guardsize + PTHREAD_T_OFFSET); 755 workq_thread_set_top_addr(th_addrs, th_addrs->self); 756 th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize); 757 } 758 759 static inline void 760 workq_set_register_state(proc_t p, thread_t th, 761 struct workq_thread_addrs *addrs, mach_port_name_t kport, 762 user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count) 763 { 764 user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); 765 if (!wqstart_fnptr) { 766 panic("workqueue thread start function pointer is NULL"); 767 } 768 769 #if defined(__i386__) || defined(__x86_64__) 770 if (proc_is64bit_data(p) == 0) { 771 x86_thread_state32_t state = { 772 .eip = (unsigned int)wqstart_fnptr, 773 .eax = /* arg0 */ (unsigned int)addrs->self, 774 .ebx = /* arg1 */ (unsigned int)kport, 775 .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom, 776 .edx = /* arg3 */ (unsigned int)kevent_list, 777 .edi = /* arg4 */ (unsigned int)upcall_flags, 778 .esi = /* arg5 */ (unsigned int)kevent_count, 779 780 .esp = (int)((vm_offset_t)addrs->stack_top), 781 }; 782 783 int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); 784 if (error != KERN_SUCCESS) { 785 panic(__func__ ": thread_set_wq_state failed: %d", error); 786 } 787 } else { 788 x86_thread_state64_t state64 = { 789 // x86-64 already passes all the arguments in registers, so we just put them in their final place here 790 .rip = (uint64_t)wqstart_fnptr, 791 .rdi = (uint64_t)addrs->self, 792 .rsi = (uint64_t)kport, 793 .rdx = (uint64_t)addrs->stack_bottom, 794 .rcx = (uint64_t)kevent_list, 795 .r8 = (uint64_t)upcall_flags, 796 .r9 = (uint64_t)kevent_count, 797 798 .rsp = (uint64_t)(addrs->stack_top) 799 }; 800 801 int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); 802 if (error != KERN_SUCCESS) { 803 panic(__func__ ": thread_set_wq_state failed: %d", error); 804 } 805 } 806 #elif defined(__arm__) || defined(__arm64__) 807 if (!proc_is64bit_data(p)) { 808 arm_thread_state_t state = { 809 .pc = (int)wqstart_fnptr, 810 .r[0] = (unsigned int)addrs->self, 811 .r[1] = (unsigned int)kport, 812 .r[2] = (unsigned int)addrs->stack_bottom, 813 .r[3] = (unsigned int)kevent_list, 814 // will be pushed onto the stack as arg4/5 815 .r[4] = (unsigned int)upcall_flags, 816 .r[5] = (unsigned int)kevent_count, 817 818 .sp = (int)(addrs->stack_top) 819 }; 820 821 int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); 822 if (error != KERN_SUCCESS) { 823 panic(__func__ ": thread_set_wq_state failed: %d", error); 824 } 825 } else { 826 #if defined(__arm64__) 827 arm_thread_state64_t state = { 828 .pc = (uint64_t)wqstart_fnptr, 829 .x[0] = (uint64_t)addrs->self, 830 .x[1] = (uint64_t)kport, 831 .x[2] = (uint64_t)addrs->stack_bottom, 832 .x[3] = (uint64_t)kevent_list, 833 .x[4] = (uint64_t)upcall_flags, 834 .x[5] = (uint64_t)kevent_count, 835 836 .sp = (uint64_t)((vm_offset_t)addrs->stack_top), 837 }; 838 839 int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); 840 if (error != KERN_SUCCESS) { 841 panic(__func__ ": thread_set_wq_state failed: %d", error); 842 } 843 #else /* defined(__arm64__) */ 844 panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); 845 #endif /* defined(__arm64__) */ 846 } 847 #else 848 #error setup_wqthread not defined for this architecture 849 #endif 850 } 851 852 static inline int 853 workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, 854 user_addr_t eventlist, int nevents, int kevent_flags, 855 user_addr_t *kevent_list_out, int *kevent_count_out) 856 { 857 int ret; 858 859 user_addr_t kevent_list = th_addrs->self - 860 WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); 861 user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE; 862 user_size_t data_available = WQ_KEVENT_DATA_SIZE; 863 864 ret = pthread_kern->kevent_workq_internal(p, eventlist, nevents, 865 kevent_list, WQ_KEVENT_LIST_LEN, 866 data_buf, &data_available, 867 kevent_flags, kevent_count_out); 868 869 // squash any errors into just empty output 870 if (ret != 0 || *kevent_count_out == -1) { 871 *kevent_list_out = NULL; 872 *kevent_count_out = 0; 873 return ret; 874 } 875 876 workq_thread_set_top_addr(th_addrs, data_buf + data_available); 877 *kevent_list_out = kevent_list; 878 return ret; 879 } 880 881 /** 882 * configures initial thread stack/registers to jump into: 883 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents); 884 * to get there we jump through assembily stubs in pthread_asm.s. Those 885 * routines setup a stack frame, using the current stack pointer, and marshall 886 * arguments from registers to the stack as required by the ABI. 887 * 888 * One odd thing we do here is to start the pthread_t 4k below what would be the 889 * top of the stack otherwise. This is because usually only the first 4k of the 890 * pthread_t will be used and so we want to put it on the same 16k page as the 891 * top of the stack to save memory. 892 * 893 * When we are done the stack will look like: 894 * |-----------| th_stackaddr + th_allocsize 895 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET 896 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events 897 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes 898 * |stack gap | bottom aligned to 16 bytes 899 * | STACK | 900 * | ⇓ | 901 * | | 902 * |guard page | guardsize 903 * |-----------| th_stackaddr 904 */ 905 __attribute__((noreturn,noinline)) 906 void 907 workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, 908 mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags) 909 { 910 struct workq_thread_addrs th_addrs; 911 bool first_use = (setup_flags & WQ_SETUP_FIRST_USE); 912 user_addr_t kevent_list = NULL; 913 int kevent_count = 0; 914 915 workq_thread_get_addrs(map, stackaddr, &th_addrs); 916 917 if (first_use) { 918 uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); 919 if (tsd_offset) { 920 mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset; 921 kern_return_t kret = pthread_kern->thread_set_tsd_base(th, 922 th_tsd_base); 923 if (kret == KERN_SUCCESS) { 924 upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET; 925 } 926 } 927 928 /* 929 * Pre-fault the first page of the new thread's stack and the page that will 930 * contain the pthread_t structure. 931 */ 932 vm_map_offset_t mask = vm_map_page_mask(map); 933 vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask); 934 vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask); 935 if (th_page != stk_page) { 936 vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); 937 } 938 vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); 939 } 940 941 if (setup_flags & WQ_SETUP_EXIT_THREAD) { 942 kevent_count = WORKQ_EXIT_THREAD_NKEVENT; 943 } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) { 944 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE; 945 workq_kevent(p, &th_addrs, NULL, 0, flags, &kevent_list, &kevent_count); 946 } 947 948 workq_set_register_state(p, th, &th_addrs, kport, 949 kevent_list, upcall_flags, kevent_count); 950 951 if (first_use) { 952 pthread_kern->thread_bootstrap_return(); 953 } else { 954 pthread_kern->unix_syscall_return(EJUSTRETURN); 955 } 956 __builtin_unreachable(); 957 } 958 959 int 960 workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, 961 user_addr_t stackaddr, mach_port_name_t kport, 962 user_addr_t events, int nevents, int upcall_flags) 963 { 964 struct workq_thread_addrs th_addrs; 965 user_addr_t kevent_list = NULL; 966 int kevent_count = 0, error; 967 __assert_only kern_return_t kr; 968 969 workq_thread_get_addrs(map, stackaddr, &th_addrs); 970 971 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE | 972 KEVENT_FLAG_PARKING; 973 error = workq_kevent(p, &th_addrs, events, nevents, flags, 974 &kevent_list, &kevent_count); 975 976 if (error || kevent_count == 0) { 977 return error; 978 } 979 980 kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); 981 assert(kr == KERN_SUCCESS); 982 983 workq_set_register_state(p, th, &th_addrs, kport, 984 kevent_list, upcall_flags, kevent_count); 985 986 pthread_kern->unix_syscall_return(EJUSTRETURN); 987 __builtin_unreachable(); 988 } 989 990 int 991 _thread_selfid(__unused struct proc *p, uint64_t *retval) 992 { 993 thread_t thread = current_thread(); 994 *retval = thread_tid(thread); 995 return KERN_SUCCESS; 996 } 997 998 void 999 _pthread_init(void) 1000 { 1001 pthread_lck_grp_attr = lck_grp_attr_alloc_init(); 1002 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); 1003 1004 /* 1005 * allocate the lock attribute for pthread synchronizers 1006 */ 1007 pthread_lck_attr = lck_attr_alloc_init(); 1008 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); 1009 1010 pth_global_hashinit(); 1011 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); 1012 psynch_zoneinit(); 1013 1014 int policy_bootarg; 1015 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) { 1016 pthread_mutex_default_policy = policy_bootarg; 1017 } 1018 1019 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy); 1020 }