pkg_jobs_schedule.c
1 /*- 2 * Copyright (c) 2024 The FreeBSD Foundation 3 * 4 * This software was developed by Isaac Freund <ifreund@freebsdfoundation.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include <assert.h> 29 30 #include "pkg.h" 31 #include "pkg/vec.h" 32 #include "private/event.h" 33 #include "private/pkg.h" 34 #include "private/pkg_jobs.h" 35 36 #define dbg(x, ...) pkg_dbg(PKG_DBG_SCHEDULER, x, __VA_ARGS__) 37 38 extern struct pkg_ctx ctx; 39 40 static const char * 41 pkg_jobs_schedule_job_type_string(struct pkg_solved *job) 42 { 43 switch (job->type) { 44 case PKG_SOLVED_INSTALL: 45 return "install"; 46 case PKG_SOLVED_DELETE: 47 return "delete"; 48 case PKG_SOLVED_UPGRADE: 49 return "upgrade"; 50 case PKG_SOLVED_UPGRADE_INSTALL: 51 return "split upgrade install"; 52 case PKG_SOLVED_UPGRADE_REMOVE: 53 return "split upgrade delete"; 54 default: 55 assert(false); 56 } 57 } 58 59 /* 60 * Returns true if pkg a directly depends on pkg b. 61 * 62 * Checking only direct dependencies is sufficient to define the edges in a 63 * graph that models indirect dependencies as well as long as all of the 64 * intermediate dependencies are also nodes in the graph. 65 */ 66 static bool pkg_jobs_schedule_direct_depends(struct pkg *a, struct pkg *b) 67 { 68 struct pkg_dep *dep = NULL; 69 while (pkg_deps(a, &dep) == EPKG_OK) { 70 if (STREQ(b->uid, dep->uid)) { 71 return (true); 72 } 73 } 74 return (false); 75 } 76 77 enum pkg_jobs_schedule_graph_edge_type { 78 PKG_SCHEDULE_EDGE_NONE, 79 PKG_SCHEDULE_EDGE_NEW_DEP_NEW, 80 PKG_SCHEDULE_EDGE_OLD_DEP_OLD, 81 PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW, 82 PKG_SCHEDULE_EDGE_SPLIT_UPGRADE, 83 }; 84 85 /* 86 * Jobs are nodes in a directed graph. Edges represent job scheduling order 87 * requirements. The existence of an edge from node A to node B indicates 88 * that job A must be executed before job B. 89 * 90 * There is a directed edge from node A to node B if and only if 91 * one of the following conditions holds: 92 * 93 * 1. B's new package depends on A's new package 94 * 2. A's old package depends on B's old package 95 * 3. A's old package conflicts with B's new package 96 * 4. A and B are the two halves of a split upgrade job 97 * and A is the delete half. 98 * 99 * There is one exception made to rule 2 in order to avoid splitting in 100 * the common case of upgrading packages X and Y where Xold depends on Yold 101 * and Xnew depends on Ynew. In this case, rule 2 is ignored and there is no 102 * edge from X to Y. 103 */ 104 static enum pkg_jobs_schedule_graph_edge_type 105 pkg_jobs_schedule_graph_edge(struct pkg_solved *a, struct pkg_solved *b) 106 { 107 if (a == b) { 108 return (PKG_SCHEDULE_EDGE_NONE); 109 } 110 111 if (a->xlink == b || b->xlink == a) { 112 assert(a->xlink == b && b->xlink == a); 113 assert(a->type == PKG_SOLVED_UPGRADE_INSTALL || 114 a->type == PKG_SOLVED_UPGRADE_REMOVE); 115 assert(b->type == PKG_SOLVED_UPGRADE_INSTALL || 116 b->type == PKG_SOLVED_UPGRADE_REMOVE); 117 assert(a->type != b->type); 118 if (a->type == PKG_SOLVED_UPGRADE_REMOVE) { 119 return (PKG_SCHEDULE_EDGE_SPLIT_UPGRADE); 120 } 121 return (PKG_SCHEDULE_EDGE_NONE); 122 } 123 124 /* TODO: These switches would be unnecessary if delete jobs used 125 * items[1] rather than items[0]. I suspect other cleanups could 126 * be made as well. */ 127 struct pkg *a_new = NULL; 128 struct pkg *a_old = NULL; 129 switch (a->type) { 130 case PKG_SOLVED_INSTALL: 131 case PKG_SOLVED_UPGRADE_INSTALL: 132 a_new = a->items[0]->pkg; 133 break; 134 case PKG_SOLVED_DELETE: 135 case PKG_SOLVED_UPGRADE_REMOVE: 136 a_old = a->items[0]->pkg; 137 break; 138 case PKG_SOLVED_UPGRADE: 139 a_new = a->items[0]->pkg; 140 a_old = a->items[1]->pkg; 141 break; 142 default: 143 assert(false); 144 } 145 146 struct pkg *b_new = NULL; 147 struct pkg *b_old = NULL; 148 switch (b->type) { 149 case PKG_SOLVED_INSTALL: 150 case PKG_SOLVED_UPGRADE_INSTALL: 151 b_new = b->items[0]->pkg; 152 break; 153 case PKG_SOLVED_DELETE: 154 case PKG_SOLVED_UPGRADE_REMOVE: 155 b_old = b->items[0]->pkg; 156 break; 157 case PKG_SOLVED_UPGRADE: 158 b_new = b->items[0]->pkg; 159 b_old = b->items[1]->pkg; 160 break; 161 default: 162 assert(false); 163 } 164 165 if (a_new != NULL && b_new != NULL && 166 pkg_jobs_schedule_direct_depends(b_new, a_new)) { 167 return (PKG_SCHEDULE_EDGE_NEW_DEP_NEW); 168 } 169 if (a_old != NULL && b_new != NULL) { 170 struct pkg_conflict *conflict = NULL; 171 while (pkg_conflicts(a_old, &conflict) == EPKG_OK) { 172 if (STREQ(b_new->uid, conflict->uid)) { 173 return (PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW); 174 } 175 } 176 } 177 if (a_old != NULL && b_old != NULL && 178 pkg_jobs_schedule_direct_depends(a_old, b_old)) { 179 if (!(a_new != NULL && b_new != NULL && 180 pkg_jobs_schedule_direct_depends(a_new, b_new))) { 181 return (PKG_SCHEDULE_EDGE_OLD_DEP_OLD); 182 } 183 } 184 185 return (PKG_SCHEDULE_EDGE_NONE); 186 } 187 188 static void 189 pkg_jobs_schedule_dbg_job(pkg_solved_list *jobs, struct pkg_solved *job) 190 { 191 if (ctx.debug_level < 4) { 192 return; 193 } 194 195 dbg(4, "job: %s %s", pkg_jobs_schedule_job_type_string(job), 196 job->items[0]->pkg->uid); 197 198 vec_foreach(*jobs, i) { 199 struct pkg_solved *other = jobs->d[i]; 200 if (jobs->d[i] == NULL) 201 continue; 202 switch (pkg_jobs_schedule_graph_edge(job, other)) { 203 case PKG_SCHEDULE_EDGE_NONE: 204 break; 205 case PKG_SCHEDULE_EDGE_NEW_DEP_NEW: 206 dbg(4, " edge to %s %s, new depends on new", 207 pkg_jobs_schedule_job_type_string(other), 208 other->items[0]->pkg->uid); 209 break; 210 case PKG_SCHEDULE_EDGE_OLD_DEP_OLD: 211 dbg(4, " edge to %s %s, old depends on old", 212 pkg_jobs_schedule_job_type_string(other), 213 other->items[0]->pkg->uid); 214 break; 215 case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW: 216 dbg(4, " edge to %s %s, old conflicts with new", 217 pkg_jobs_schedule_job_type_string(other), 218 other->items[0]->pkg->uid); 219 break; 220 case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE: 221 dbg(4, " edge to %s %s, split upgrade", 222 pkg_jobs_schedule_job_type_string(other), 223 other->items[0]->pkg->uid); 224 break; 225 default: 226 assert(false); 227 } 228 } 229 } 230 231 static void 232 pkg_job_schedule_fprint_node_id(FILE *file, struct pkg_solved *node) 233 { 234 fprintf(file, "\"%s %s\"", pkg_jobs_schedule_job_type_string(node), 235 node->items[0]->pkg->uid); 236 } 237 238 static void 239 pkg_jobs_schedule_debug_dot_file(pkg_solved_list *nodes) 240 { 241 const char *path = pkg_object_string(pkg_config_get("DEBUG_SCHEDULER_DOT_FILE")); 242 if (path == NULL) { 243 return; 244 } 245 FILE *file = fopen(path, "we"); 246 if (file == NULL) { 247 pkg_emit_errno("fopen", path); 248 return; 249 } 250 fprintf(file, "digraph {\n"); 251 for (size_t i = 0; i < vec_len(nodes); i++) { 252 for (size_t j = 0; j < vec_len(nodes); j++) { 253 enum pkg_jobs_schedule_graph_edge_type edge = 254 pkg_jobs_schedule_graph_edge(nodes->d[i], nodes->d[j]); 255 if (edge == PKG_SCHEDULE_EDGE_NONE) { 256 continue; 257 } 258 pkg_job_schedule_fprint_node_id(file, nodes->d[i]); 259 fprintf(file, " -> "); 260 pkg_job_schedule_fprint_node_id(file, nodes->d[j]); 261 fprintf(file, "\n"); 262 switch (edge) { 263 case PKG_SCHEDULE_EDGE_NEW_DEP_NEW: 264 fprintf(file, " [label=\"new dep new\"]\n"); 265 break; 266 case PKG_SCHEDULE_EDGE_OLD_DEP_OLD: 267 fprintf(file, " [label=\"old dep old\"]\n"); 268 break; 269 case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW: 270 fprintf(file, " [label=\"old conflict new\"]\n"); 271 break; 272 case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE: 273 fprintf(file, " [label=\"split upgrade\"]\n"); 274 break; 275 case PKG_SCHEDULE_EDGE_NONE: 276 default: 277 assert(false); 278 } 279 } 280 } 281 fprintf(file, "}\n"); 282 fclose(file); 283 } 284 285 286 static bool 287 pkg_jobs_schedule_has_incoming_edge(pkg_solved_list *nodes, 288 struct pkg_solved *node) 289 { 290 vec_foreach(*nodes, i) { 291 if (pkg_jobs_schedule_graph_edge(nodes->d[i], node) != PKG_SCHEDULE_EDGE_NONE) { 292 return (true); 293 } 294 } 295 return (false); 296 } 297 298 /* 299 * Prioritizing the install jobs and deprioritizing the delete jobs of split 300 * upgrades reduces the distance between the two halves of the split job in the 301 * final execution order. 302 */ 303 static int 304 pkg_jobs_schedule_priority(struct pkg_solved *node) 305 { 306 switch (node->type) { 307 case PKG_SOLVED_UPGRADE_INSTALL: 308 return 1; 309 case PKG_SOLVED_UPGRADE_REMOVE: 310 return -1; 311 default: 312 return 0; 313 } 314 } 315 316 /* This comparison function is used as a tiebreaker in the topological sort. */ 317 static int 318 pkg_jobs_schedule_cmp_available(struct pkg_solved *a, struct pkg_solved *b) 319 { 320 int ret = pkg_jobs_schedule_priority(a) - pkg_jobs_schedule_priority(b); 321 if (ret == 0) { 322 /* Falling back to lexicographical ordering ensures that job execution 323 * order is always consistent and makes testing easier. */ 324 return strcmp(b->items[0]->pkg->uid, a->items[0]->pkg->uid); 325 } else { 326 return ret; 327 } 328 } 329 330 /* 331 * Move job nodes with no incoming edges from unavailable to available. 332 * If changed is non-NULL, only check jobs with an incoming edge from changed. */ 333 static void 334 pkg_jobs_schedule_update_available(pkg_solved_list *unavailable, 335 pkg_solved_list *available, struct pkg_solved *changed) 336 { 337 for (size_t i = 0; i < unavailable->len;) { 338 struct pkg_solved *job = unavailable->d[i]; 339 if ((changed == NULL || 340 pkg_jobs_schedule_graph_edge(changed, job) != PKG_SCHEDULE_EDGE_NONE) && 341 !pkg_jobs_schedule_has_incoming_edge(unavailable, job) && 342 !pkg_jobs_schedule_has_incoming_edge(available, job)) { 343 vec_push(available, job); 344 vec_swap_remove(unavailable, i); 345 } else { 346 i++; 347 } 348 } 349 } 350 351 /* Topological sort based on Kahn's algorithm with a tiebreaker */ 352 static void 353 pkg_jobs_schedule_topological_sort(pkg_solved_list *jobs) 354 { 355 pkg_solved_list unavailable = *jobs; 356 pkg_solved_list available = vec_init(); 357 *jobs = (pkg_solved_list)vec_init(); 358 359 pkg_jobs_schedule_update_available(&unavailable, &available, NULL); 360 361 while (available.len > 0) { 362 /* Add the highest priority job from the set of available jobs 363 * to the sorted list */ 364 size_t max = 0; 365 for (size_t i = 1; i < available.len; i++) { 366 if (pkg_jobs_schedule_cmp_available(available.d[i], available.d[max]) > 0) { 367 max = i; 368 } 369 } 370 struct pkg_solved *node = available.d[max]; 371 vec_push(jobs, node); 372 vec_swap_remove(&available, max); 373 374 /* Again, place all job nodes with no incoming edges in the set 375 * of available jobs, ignoring any incoming edges from job nodes 376 * already added to the sorted list */ 377 pkg_jobs_schedule_update_available(&unavailable, &available, node); 378 } 379 380 /* The unavailable set will only be non-empty at this point if there is a 381 * cycle in the graph and all cycles must be eliminated by splitting 382 * upgrade jobs before calling this function. */ 383 assert(unavailable.len == 0); 384 385 vec_free(&unavailable); 386 vec_free(&available); 387 } 388 389 /* 390 * This is a depth-first search that keeps track of the path taken to the 391 * current node in the graph. If a node on this path is encountered a 392 * second time a cycle has been found. 393 */ 394 static struct pkg_solved * 395 pkg_jobs_schedule_find_cycle(pkg_solved_list *jobs, 396 struct pkg_solved **path, struct pkg_solved *node) 397 { 398 /* Push node to path */ 399 assert(node->mark == PKG_SOLVED_CYCLE_MARK_NONE); 400 node->mark = PKG_SOLVED_CYCLE_MARK_PATH; 401 assert(node->path_prev == NULL); 402 node->path_prev = *path; 403 *path = node; 404 405 vec_foreach(*jobs, i) { 406 if (pkg_jobs_schedule_graph_edge(node, jobs->d[i]) != PKG_SCHEDULE_EDGE_NONE) { 407 switch (jobs->d[i]->mark){ 408 case PKG_SOLVED_CYCLE_MARK_NONE:; 409 struct pkg_solved *cycle = 410 pkg_jobs_schedule_find_cycle(jobs, path, jobs->d[i]); 411 if (cycle != NULL) { 412 return (cycle); 413 } 414 break; 415 case PKG_SOLVED_CYCLE_MARK_DONE: 416 break; 417 case PKG_SOLVED_CYCLE_MARK_PATH: 418 return (jobs->d[i]); /* Found a cycle */ 419 default: 420 assert(false); 421 } 422 } 423 } 424 425 /* Pop node from path */ 426 assert(node->mark == PKG_SOLVED_CYCLE_MARK_PATH); 427 node->mark = PKG_SOLVED_CYCLE_MARK_DONE; 428 *path = node->path_prev; 429 node->path_prev = NULL; 430 431 return (NULL); 432 } 433 434 int pkg_jobs_schedule(struct pkg_jobs *j) 435 { 436 pkg_jobs_schedule_debug_dot_file(&j->jobs); 437 438 while (true) { 439 dbg(3, "checking job scheduling graph for cycles..."); 440 441 vec_foreach(j->jobs, i) { 442 j->jobs.d[i]->mark = PKG_SOLVED_CYCLE_MARK_NONE; 443 j->jobs.d[i]->path_prev = NULL; 444 445 pkg_jobs_schedule_dbg_job(&j->jobs, j->jobs.d[i]); 446 } 447 448 /* The graph may not be connected, in which case it is necessary to 449 * run multiple searches for cycles from different start nodes. */ 450 struct pkg_solved *path = NULL; 451 struct pkg_solved *cycle = NULL; 452 vec_foreach(j->jobs, i) { 453 switch (j->jobs.d[i]->mark) { 454 case PKG_SOLVED_CYCLE_MARK_NONE: 455 cycle = pkg_jobs_schedule_find_cycle(&j->jobs, &path, j->jobs.d[i]); 456 break; 457 case PKG_SOLVED_CYCLE_MARK_DONE: 458 break; 459 case PKG_SOLVED_CYCLE_MARK_PATH: 460 default: 461 assert(false); 462 } 463 if (cycle != NULL) { 464 break; 465 } 466 } 467 468 if (cycle == NULL) { 469 dbg(3, "no job scheduling graph cycles found"); 470 assert(path == NULL); 471 break; 472 } 473 474 dbg(3, "job scheduling graph cycle found"); 475 assert(path != NULL); 476 assert(path->path_prev != NULL); 477 assert(path != cycle); 478 479 /* Close the path into a cycle to eliminate some edge cases in the loop. */ 480 cycle->path_prev = path; 481 482 /* Not all upgrade jobs would break the cycle if split. 483 * It is helpful to think of each upgrade job as two separate 484 * nodes, the remove half and the install half. If a cycle only 485 * involved one half of the upgrade job, splitting the upgrade 486 * job would not break the cycle or even change its length. 487 * 488 * Furthermore, since there is an edge from the remove half of 489 * an upgrade job to the install half, the install half must 490 * come *before* the remove half in the cycle. Otherwise 491 * splitting the upgrade job will only make the cycle one node 492 * longer. 493 */ 494 enum pkg_jobs_schedule_graph_edge_type out = 495 pkg_jobs_schedule_graph_edge(path, cycle); 496 assert(out != PKG_SCHEDULE_EDGE_NONE); 497 enum pkg_jobs_schedule_graph_edge_type in = 498 pkg_jobs_schedule_graph_edge(path->path_prev, path); 499 assert(in != PKG_SCHEDULE_EDGE_NONE); 500 while (true) { 501 if (path->type == PKG_SOLVED_UPGRADE) { 502 assert(out != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE); 503 assert(in != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE); 504 if ((out == PKG_SCHEDULE_EDGE_OLD_DEP_OLD || 505 out == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW) && 506 (in == PKG_SCHEDULE_EDGE_NEW_DEP_NEW || 507 in == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW)) { 508 /* Found an upgrade job that would break 509 * the cycle if split. */ 510 break; 511 } 512 } 513 if (path == cycle) { 514 pkg_emit_error("failed to break job scheduling cycle"); 515 return (EPKG_FATAL); 516 } 517 path = path->path_prev; 518 assert(path != NULL); 519 out = in; 520 in = pkg_jobs_schedule_graph_edge(path->path_prev, path); 521 assert(in != PKG_SCHEDULE_EDGE_NONE); 522 } 523 524 /* path is now the upgrade job chosen to be split */ 525 dbg(2, "splitting upgrade %s job", path->items[0]->pkg->uid); 526 527 struct pkg_solved *new = xcalloc(1, sizeof(struct pkg_solved)); 528 new->type = PKG_SOLVED_UPGRADE_REMOVE; 529 new->items[0] = path->items[1]; 530 new->xlink = path; 531 path->type = PKG_SOLVED_UPGRADE_INSTALL; 532 path->items[1] = NULL; 533 path->xlink = new; 534 vec_push(&j->jobs, new); 535 } 536 537 pkg_jobs_schedule_topological_sort(&j->jobs); 538 539 dbg(3, "finished job scheduling"); 540 541 return (EPKG_OK); 542 }