affinity.c
1 /* 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #include <kern/affinity.h> 30 #include <kern/task.h> 31 #include <kern/kalloc.h> 32 #include <machine/cpu_affinity.h> 33 34 /* 35 * Affinity involves 2 objects: 36 * - affinity namespace: 37 * shared by a task family, this controls affinity tag lookup and 38 * allocation; it anchors all affinity sets in one namespace 39 * - affinity set: 40 * anchors all threads with membership of this affinity set 41 * and which share an affinity tag in the owning namespace. 42 * 43 * Locking: 44 * - The task lock protects the creation of an affinity namespace. 45 * - The affinity namespace mutex protects the inheritance of a namespace 46 * and its thread membership. This includes its destruction when the task 47 * reference count goes to zero. 48 * - The thread mutex protects a thread's affinity set membership, but in 49 * addition, the thread_lock is taken to write thread->affinity_set since this 50 * field (representng the active affinity set) is read by the scheduler. 51 * 52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. 53 */ 54 55 #if AFFINITY_DEBUG 56 #define DBG(x...) kprintf("DBG: " x) 57 #else 58 #define DBG(x...) 59 #endif 60 61 struct affinity_space { 62 lck_mtx_t aspc_lock; 63 uint32_t aspc_task_count; 64 queue_head_t aspc_affinities; 65 }; 66 typedef struct affinity_space *affinity_space_t; 67 68 static affinity_space_t affinity_space_alloc(void); 69 static void affinity_space_free(affinity_space_t aspc); 70 static affinity_set_t affinity_set_alloc(void); 71 static void affinity_set_free(affinity_set_t aset); 72 static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag); 73 static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset); 74 static void affinity_set_add(affinity_set_t aset, thread_t thread); 75 static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); 76 77 /* 78 * The following globals may be modified by the sysctls 79 * kern.affinity_sets_enabled - disables hinting if cleared 80 * kern.affinity_sets_mapping - controls cache distribution policy 81 * See bsd/kern_sysctl.c 82 * 83 * Affinity sets are not used on embedded, which typically only 84 * has a single pset, and last-processor affinity is 85 * more important than pset affinity. 86 */ 87 #if !defined(XNU_TARGET_OS_OSX) 88 boolean_t affinity_sets_enabled = FALSE; 89 int affinity_sets_mapping = 0; 90 #else /* !defined(XNU_TARGET_OS_OSX) */ 91 boolean_t affinity_sets_enabled = TRUE; 92 int affinity_sets_mapping = 1; 93 #endif /* !defined(XNU_TARGET_OS_OSX) */ 94 95 boolean_t 96 thread_affinity_is_supported(void) 97 { 98 return ml_get_max_affinity_sets() != 0; 99 } 100 101 102 /* 103 * thread_affinity_get() 104 * Return the affinity tag for a thread. 105 * Called with the thread mutex held. 106 */ 107 uint32_t 108 thread_affinity_get(thread_t thread) 109 { 110 uint32_t tag; 111 112 if (thread->affinity_set != NULL) { 113 tag = thread->affinity_set->aset_tag; 114 } else { 115 tag = THREAD_AFFINITY_TAG_NULL; 116 } 117 118 return tag; 119 } 120 121 122 /* 123 * thread_affinity_set() 124 * Place a thread in an affinity set identified by a tag. 125 * Called with thread referenced but not locked. 126 */ 127 kern_return_t 128 thread_affinity_set(thread_t thread, uint32_t tag) 129 { 130 affinity_set_t aset; 131 affinity_set_t empty_aset = NULL; 132 affinity_space_t aspc; 133 affinity_space_t new_aspc = NULL; 134 135 DBG("thread_affinity_set(%p,%u)\n", thread, tag); 136 137 task_lock(thread->task); 138 aspc = thread->task->affinity_space; 139 if (aspc == NULL) { 140 task_unlock(thread->task); 141 new_aspc = affinity_space_alloc(); 142 if (new_aspc == NULL) { 143 return KERN_RESOURCE_SHORTAGE; 144 } 145 task_lock(thread->task); 146 if (thread->task->affinity_space == NULL) { 147 thread->task->affinity_space = new_aspc; 148 new_aspc = NULL; 149 } 150 aspc = thread->task->affinity_space; 151 } 152 task_unlock(thread->task); 153 if (new_aspc) { 154 affinity_space_free(new_aspc); 155 } 156 157 thread_mtx_lock(thread); 158 if (!thread->active) { 159 /* Beaten to lock and the thread is dead */ 160 thread_mtx_unlock(thread); 161 return KERN_TERMINATED; 162 } 163 164 lck_mtx_lock(&aspc->aspc_lock); 165 aset = thread->affinity_set; 166 if (aset != NULL) { 167 /* 168 * Remove thread from current affinity set 169 */ 170 DBG("thread_affinity_set(%p,%u) removing from aset %p\n", 171 thread, tag, aset); 172 empty_aset = affinity_set_remove(aset, thread); 173 } 174 175 if (tag != THREAD_AFFINITY_TAG_NULL) { 176 aset = affinity_set_find(aspc, tag); 177 if (aset != NULL) { 178 /* 179 * Add thread to existing affinity set 180 */ 181 DBG("thread_affinity_set(%p,%u) found aset %p\n", 182 thread, tag, aset); 183 } else { 184 /* 185 * Use the new affinity set, add this thread 186 * and place it in a suitable processor set. 187 */ 188 if (empty_aset != NULL) { 189 aset = empty_aset; 190 empty_aset = NULL; 191 } else { 192 aset = affinity_set_alloc(); 193 if (aset == NULL) { 194 lck_mtx_unlock(&aspc->aspc_lock); 195 thread_mtx_unlock(thread); 196 return KERN_RESOURCE_SHORTAGE; 197 } 198 } 199 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n", 200 thread, tag, aset); 201 aset->aset_tag = tag; 202 affinity_set_place(aspc, aset); 203 } 204 affinity_set_add(aset, thread); 205 } 206 207 lck_mtx_unlock(&aspc->aspc_lock); 208 thread_mtx_unlock(thread); 209 210 /* 211 * If we wound up not using an empty aset we created, 212 * free it here. 213 */ 214 if (empty_aset != NULL) { 215 affinity_set_free(empty_aset); 216 } 217 218 if (thread == current_thread()) { 219 thread_block(THREAD_CONTINUE_NULL); 220 } 221 222 return KERN_SUCCESS; 223 } 224 225 /* 226 * task_affinity_create() 227 * Called from task create. 228 */ 229 void 230 task_affinity_create(task_t parent_task, task_t child_task) 231 { 232 affinity_space_t aspc = parent_task->affinity_space; 233 234 DBG("task_affinity_create(%p,%p)\n", parent_task, child_task); 235 236 assert(aspc); 237 238 /* 239 * Bump the task reference count on the shared namespace and 240 * give it to the child. 241 */ 242 lck_mtx_lock(&aspc->aspc_lock); 243 aspc->aspc_task_count++; 244 child_task->affinity_space = aspc; 245 lck_mtx_unlock(&aspc->aspc_lock); 246 } 247 248 /* 249 * task_affinity_deallocate() 250 * Called from task_deallocate() when there's a namespace to dereference. 251 */ 252 void 253 task_affinity_deallocate(task_t task) 254 { 255 affinity_space_t aspc = task->affinity_space; 256 257 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n", 258 task, aspc, aspc->aspc_task_count); 259 260 lck_mtx_lock(&aspc->aspc_lock); 261 if (--(aspc->aspc_task_count) == 0) { 262 assert(queue_empty(&aspc->aspc_affinities)); 263 lck_mtx_unlock(&aspc->aspc_lock); 264 affinity_space_free(aspc); 265 } else { 266 lck_mtx_unlock(&aspc->aspc_lock); 267 } 268 } 269 270 /* 271 * task_affinity_info() 272 * Return affinity tag info (number, min, max) for the task. 273 * 274 * Conditions: task is locked. 275 */ 276 kern_return_t 277 task_affinity_info( 278 task_t task, 279 task_info_t task_info_out, 280 mach_msg_type_number_t *task_info_count) 281 { 282 affinity_set_t aset; 283 affinity_space_t aspc; 284 task_affinity_tag_info_t info; 285 286 *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT; 287 info = (task_affinity_tag_info_t) task_info_out; 288 info->set_count = 0; 289 info->task_count = 0; 290 info->min = THREAD_AFFINITY_TAG_NULL; 291 info->max = THREAD_AFFINITY_TAG_NULL; 292 293 aspc = task->affinity_space; 294 if (aspc) { 295 lck_mtx_lock(&aspc->aspc_lock); 296 queue_iterate(&aspc->aspc_affinities, 297 aset, affinity_set_t, aset_affinities) { 298 info->set_count++; 299 if (info->min == THREAD_AFFINITY_TAG_NULL || 300 aset->aset_tag < (uint32_t) info->min) { 301 info->min = aset->aset_tag; 302 } 303 if (info->max == THREAD_AFFINITY_TAG_NULL || 304 aset->aset_tag > (uint32_t) info->max) { 305 info->max = aset->aset_tag; 306 } 307 } 308 info->task_count = aspc->aspc_task_count; 309 lck_mtx_unlock(&aspc->aspc_lock); 310 } 311 return KERN_SUCCESS; 312 } 313 314 /* 315 * Called from thread_dup() during fork() with child's mutex held. 316 * Set the child into the parent's affinity set. 317 * Note the affinity space is shared. 318 */ 319 void 320 thread_affinity_dup(thread_t parent, thread_t child) 321 { 322 affinity_set_t aset; 323 affinity_space_t aspc; 324 325 thread_mtx_lock(parent); 326 aset = parent->affinity_set; 327 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset); 328 if (aset == NULL) { 329 thread_mtx_unlock(parent); 330 return; 331 } 332 333 aspc = aset->aset_space; 334 assert(aspc == parent->task->affinity_space); 335 assert(aspc == child->task->affinity_space); 336 337 lck_mtx_lock(&aspc->aspc_lock); 338 affinity_set_add(aset, child); 339 lck_mtx_unlock(&aspc->aspc_lock); 340 341 thread_mtx_unlock(parent); 342 } 343 344 /* 345 * thread_affinity_terminate() 346 * Remove thread from any affinity set. 347 * Called with the thread mutex locked. 348 */ 349 void 350 thread_affinity_terminate(thread_t thread) 351 { 352 affinity_set_t aset = thread->affinity_set; 353 affinity_space_t aspc; 354 355 DBG("thread_affinity_terminate(%p)\n", thread); 356 357 aspc = aset->aset_space; 358 lck_mtx_lock(&aspc->aspc_lock); 359 if (affinity_set_remove(aset, thread)) { 360 affinity_set_free(aset); 361 } 362 lck_mtx_unlock(&aspc->aspc_lock); 363 } 364 365 /* 366 * thread_affinity_exec() 367 * Called from execve() to cancel any current affinity - a new image implies 368 * the calling thread terminates any expressed or inherited affinity. 369 */ 370 void 371 thread_affinity_exec(thread_t thread) 372 { 373 if (thread->affinity_set != AFFINITY_SET_NULL) { 374 thread_affinity_terminate(thread); 375 } 376 } 377 378 /* 379 * Create an empty affinity namespace data structure. 380 */ 381 static affinity_space_t 382 affinity_space_alloc(void) 383 { 384 affinity_space_t aspc; 385 386 aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space)); 387 if (aspc == NULL) { 388 return NULL; 389 } 390 391 lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr); 392 queue_init(&aspc->aspc_affinities); 393 aspc->aspc_task_count = 1; 394 395 DBG("affinity_space_create() returns %p\n", aspc); 396 return aspc; 397 } 398 399 /* 400 * Destroy the given empty affinity namespace data structure. 401 */ 402 static void 403 affinity_space_free(affinity_space_t aspc) 404 { 405 assert(queue_empty(&aspc->aspc_affinities)); 406 407 lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp); 408 DBG("affinity_space_free(%p)\n", aspc); 409 kfree(aspc, sizeof(struct affinity_space)); 410 } 411 412 413 /* 414 * Create an empty affinity set data structure 415 * entering it into a list anchored by the owning task. 416 */ 417 static affinity_set_t 418 affinity_set_alloc(void) 419 { 420 affinity_set_t aset; 421 422 aset = (affinity_set_t) kalloc(sizeof(struct affinity_set)); 423 if (aset == NULL) { 424 return NULL; 425 } 426 427 aset->aset_thread_count = 0; 428 queue_init(&aset->aset_affinities); 429 queue_init(&aset->aset_threads); 430 aset->aset_num = 0; 431 aset->aset_pset = PROCESSOR_SET_NULL; 432 aset->aset_space = NULL; 433 434 DBG("affinity_set_create() returns %p\n", aset); 435 return aset; 436 } 437 438 /* 439 * Destroy the given empty affinity set data structure 440 * after removing it from the parent task. 441 */ 442 static void 443 affinity_set_free(affinity_set_t aset) 444 { 445 assert(queue_empty(&aset->aset_threads)); 446 447 DBG("affinity_set_free(%p)\n", aset); 448 kfree(aset, sizeof(struct affinity_set)); 449 } 450 451 /* 452 * Add a thread to an affinity set. 453 * The caller must have the thread mutex and space locked. 454 */ 455 static void 456 affinity_set_add(affinity_set_t aset, thread_t thread) 457 { 458 spl_t s; 459 460 DBG("affinity_set_add(%p,%p)\n", aset, thread); 461 queue_enter(&aset->aset_threads, 462 thread, thread_t, affinity_threads); 463 aset->aset_thread_count++; 464 s = splsched(); 465 thread_lock(thread); 466 thread->affinity_set = affinity_sets_enabled ? aset : NULL; 467 thread_unlock(thread); 468 splx(s); 469 } 470 471 /* 472 * Remove a thread from an affinity set returning the set if now empty. 473 * The caller must have the thread mutex and space locked. 474 */ 475 static affinity_set_t 476 affinity_set_remove(affinity_set_t aset, thread_t thread) 477 { 478 spl_t s; 479 480 s = splsched(); 481 thread_lock(thread); 482 thread->affinity_set = NULL; 483 thread_unlock(thread); 484 splx(s); 485 486 aset->aset_thread_count--; 487 queue_remove(&aset->aset_threads, 488 thread, thread_t, affinity_threads); 489 if (queue_empty(&aset->aset_threads)) { 490 queue_remove(&aset->aset_space->aspc_affinities, 491 aset, affinity_set_t, aset_affinities); 492 assert(aset->aset_thread_count == 0); 493 aset->aset_tag = THREAD_AFFINITY_TAG_NULL; 494 aset->aset_num = 0; 495 aset->aset_pset = PROCESSOR_SET_NULL; 496 aset->aset_space = NULL; 497 DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread); 498 return aset; 499 } else { 500 DBG("affinity_set_remove(%p,%p)\n", aset, thread); 501 return NULL; 502 } 503 } 504 505 /* 506 * Find an affinity set in the parent task with the given affinity tag. 507 * The caller must have the space locked. 508 */ 509 static affinity_set_t 510 affinity_set_find(affinity_space_t space, uint32_t tag) 511 { 512 affinity_set_t aset; 513 514 queue_iterate(&space->aspc_affinities, 515 aset, affinity_set_t, aset_affinities) { 516 if (aset->aset_tag == tag) { 517 DBG("affinity_set_find(%p,%u) finds %p\n", 518 space, tag, aset); 519 return aset; 520 } 521 } 522 DBG("affinity_set_find(%p,%u) not found\n", space, tag); 523 return NULL; 524 } 525 526 /* 527 * affinity_set_place() assigns an affinity set to a suitable processor_set. 528 * The selection criteria is: 529 * - the set currently occupied by the least number of affinities 530 * belonging to the owning the task. 531 * The caller must have the space locked. 532 */ 533 static void 534 affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) 535 { 536 unsigned short set_occupancy[MAX_CPUS] = { 0 }; 537 unsigned num_cpu_asets = ml_get_max_affinity_sets(); 538 unsigned i_least_occupied; 539 affinity_set_t aset; 540 541 if (__improbable(num_cpu_asets > MAX_CPUS)) { 542 // If this triggers then the array needs to be made bigger. 543 panic("num_cpu_asets = %d > %d too big in %s\n", num_cpu_asets, MAX_CPUS, __FUNCTION__); 544 } 545 546 /* 547 * Scan the affinity sets calculating the number of sets 548 * occupy the available physical affinities. 549 */ 550 queue_iterate(&aspc->aspc_affinities, 551 aset, affinity_set_t, aset_affinities) { 552 if (aset->aset_num < num_cpu_asets) { 553 set_occupancy[aset->aset_num]++; 554 } else { 555 panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__); 556 } 557 } 558 559 /* 560 * Find the least occupied set (or the first empty set). 561 * To distribute placements somewhat, start searching from 562 * a cpu affinity chosen randomly per namespace: 563 * [(unsigned int)aspc % 127] % num_cpu_asets 564 * unless this mapping policy is overridden. 565 */ 566 if (affinity_sets_mapping == 0) { 567 i_least_occupied = 0; 568 } else { 569 i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); 570 } 571 for (unsigned i = 0; i < num_cpu_asets; i++) { 572 unsigned int j = (i_least_occupied + i) % num_cpu_asets; 573 if (set_occupancy[j] == 0) { 574 i_least_occupied = j; 575 break; 576 } 577 if (set_occupancy[j] < set_occupancy[i_least_occupied]) { 578 i_least_occupied = j; 579 } 580 } 581 new_aset->aset_num = i_least_occupied; 582 new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied); 583 584 /* Add the new affinity set to the group */ 585 new_aset->aset_space = aspc; 586 queue_enter(&aspc->aspc_affinities, 587 new_aset, affinity_set_t, aset_affinities); 588 589 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n", 590 aspc, new_aset, new_aset->aset_num, new_aset->aset_pset); 591 }