/ libpkg / pkg_jobs_schedule.c
pkg_jobs_schedule.c
  1  /*-
  2   * Copyright (c) 2024 The FreeBSD Foundation
  3   *
  4   * This software was developed by Isaac Freund <ifreund@freebsdfoundation.org>
  5   * under sponsorship from the FreeBSD Foundation.
  6   *
  7   * Redistribution and use in source and binary forms, with or without
  8   * modification, are permitted provided that the following conditions
  9   * are met:
 10   * 1. Redistributions of source code must retain the above copyright
 11   *    notice, this list of conditions and the following disclaimer
 12   *    in this position and unchanged.
 13   * 2. Redistributions in binary form must reproduce the above copyright
 14   *    notice, this list of conditions and the following disclaimer in the
 15   *    documentation and/or other materials provided with the distribution.
 16   *
 17   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 18   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 19   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 20   * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 21   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 22   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 26   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27   */
 28  #include <assert.h>
 29  
 30  #include "pkg.h"
 31  #include "pkg/vec.h"
 32  #include "private/event.h"
 33  #include "private/pkg.h"
 34  #include "private/pkg_jobs.h"
 35  
 36  #define dbg(x, ...) pkg_dbg(PKG_DBG_SCHEDULER, x, __VA_ARGS__)
 37  
 38  extern struct pkg_ctx ctx;
 39  
 40  static const char *
 41  pkg_jobs_schedule_job_type_string(struct pkg_solved *job)
 42  {
 43  	switch (job->type) {
 44  	case PKG_SOLVED_INSTALL:
 45  		return "install";
 46  	case PKG_SOLVED_DELETE:
 47  		return "delete";
 48  	case PKG_SOLVED_UPGRADE:
 49  		return "upgrade";
 50  	case PKG_SOLVED_UPGRADE_INSTALL:
 51  		return "split upgrade install";
 52  	case PKG_SOLVED_UPGRADE_REMOVE:
 53  		return "split upgrade delete";
 54  	default:
 55  		assert(false);
 56  	}
 57  }
 58  
 59  /*
 60   * Returns true if pkg a directly depends on pkg b.
 61   *
 62   * Checking only direct dependencies is sufficient to define the edges in a
 63   * graph that models indirect dependencies as well as long as all of the
 64   * intermediate dependencies are also nodes in the graph.
 65   */
 66  static bool pkg_jobs_schedule_direct_depends(struct pkg *a, struct pkg *b)
 67  {
 68  	struct pkg_dep *dep = NULL;
 69  	while (pkg_deps(a, &dep) == EPKG_OK) {
 70  		if (STREQ(b->uid, dep->uid)) {
 71  			return (true);
 72  		}
 73  	}
 74  	return (false);
 75  }
 76  
 77  enum pkg_jobs_schedule_graph_edge_type {
 78  	PKG_SCHEDULE_EDGE_NONE,
 79  	PKG_SCHEDULE_EDGE_NEW_DEP_NEW,
 80  	PKG_SCHEDULE_EDGE_OLD_DEP_OLD,
 81  	PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW,
 82  	PKG_SCHEDULE_EDGE_SPLIT_UPGRADE,
 83  };
 84  
 85  /*
 86   * Jobs are nodes in a directed graph. Edges represent job scheduling order
 87   * requirements. The existence of an edge from node A to node B indicates
 88   * that job A must be executed before job B.
 89   *
 90   * There is a directed edge from node A to node B if and only if
 91   * one of the following conditions holds:
 92   *
 93   * 1. B's new package depends on A's new package
 94   * 2. A's old package depends on B's old package
 95   * 3. A's old package conflicts with B's new package
 96   * 4. A and B are the two halves of a split upgrade job
 97   *    and A is the delete half.
 98   *
 99   * There is one exception made to rule 2 in order to avoid splitting in
100   * the common case of upgrading packages X and Y where Xold depends on Yold
101   * and Xnew depends on Ynew. In this case, rule 2 is ignored and there is no
102   * edge from X to Y.
103   */
104  static enum pkg_jobs_schedule_graph_edge_type
105  pkg_jobs_schedule_graph_edge(struct pkg_solved *a, struct pkg_solved *b)
106  {
107  	if (a == b) {
108  		return (PKG_SCHEDULE_EDGE_NONE);
109  	}
110  
111  	if (a->xlink == b || b->xlink == a) {
112  		assert(a->xlink == b && b->xlink == a);
113  		assert(a->type == PKG_SOLVED_UPGRADE_INSTALL ||
114  		       a->type == PKG_SOLVED_UPGRADE_REMOVE);
115  		assert(b->type == PKG_SOLVED_UPGRADE_INSTALL ||
116  		       b->type == PKG_SOLVED_UPGRADE_REMOVE);
117  		assert(a->type != b->type);
118  		if (a->type == PKG_SOLVED_UPGRADE_REMOVE) {
119  			return (PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
120  		}
121  		return (PKG_SCHEDULE_EDGE_NONE);
122  	}
123  
124  	/* TODO: These switches would be unnecessary if delete jobs used
125  	 * items[1] rather than items[0]. I suspect other cleanups could
126  	 * be made as well. */
127  	struct pkg *a_new = NULL;
128  	struct pkg *a_old = NULL;
129  	switch (a->type) {
130  	case PKG_SOLVED_INSTALL:
131  	case PKG_SOLVED_UPGRADE_INSTALL:
132  		a_new = a->items[0]->pkg;
133  		break;
134  	case PKG_SOLVED_DELETE:
135  	case PKG_SOLVED_UPGRADE_REMOVE:
136  		a_old = a->items[0]->pkg;
137  		break;
138  	case PKG_SOLVED_UPGRADE:
139  		a_new = a->items[0]->pkg;
140  		a_old = a->items[1]->pkg;
141  		break;
142  	default:
143  		assert(false);
144  	}
145  
146  	struct pkg *b_new = NULL;
147  	struct pkg *b_old = NULL;
148  	switch (b->type) {
149  	case PKG_SOLVED_INSTALL:
150  	case PKG_SOLVED_UPGRADE_INSTALL:
151  		b_new = b->items[0]->pkg;
152  		break;
153  	case PKG_SOLVED_DELETE:
154  	case PKG_SOLVED_UPGRADE_REMOVE:
155  		b_old = b->items[0]->pkg;
156  		break;
157  	case PKG_SOLVED_UPGRADE:
158  		b_new = b->items[0]->pkg;
159  		b_old = b->items[1]->pkg;
160  		break;
161  	default:
162  		assert(false);
163  	}
164  
165  	if (a_new != NULL && b_new != NULL &&
166  	    pkg_jobs_schedule_direct_depends(b_new, a_new)) {
167  		return (PKG_SCHEDULE_EDGE_NEW_DEP_NEW);
168  	}
169  	if (a_old != NULL && b_new != NULL) {
170  		struct pkg_conflict *conflict = NULL;
171  		while (pkg_conflicts(a_old, &conflict) == EPKG_OK) {
172  			if (STREQ(b_new->uid, conflict->uid)) {
173  				return (PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW);
174  			}
175  		}
176  	}
177  	if (a_old != NULL && b_old != NULL &&
178  	    pkg_jobs_schedule_direct_depends(a_old, b_old)) {
179  		if (!(a_new != NULL && b_new != NULL &&
180  		    pkg_jobs_schedule_direct_depends(a_new, b_new))) {
181  			return (PKG_SCHEDULE_EDGE_OLD_DEP_OLD);
182  		}
183  	}
184  
185  	return (PKG_SCHEDULE_EDGE_NONE);
186  }
187  
188  static void
189  pkg_jobs_schedule_dbg_job(pkg_solved_list *jobs, struct pkg_solved *job)
190  {
191  	if (ctx.debug_level < 4) {
192  		return;
193  	}
194  
195  	dbg(4, "job: %s %s", pkg_jobs_schedule_job_type_string(job),
196  	    job->items[0]->pkg->uid);
197  
198  	vec_foreach(*jobs, i) {
199  		struct pkg_solved *other = jobs->d[i];
200  		if (jobs->d[i] == NULL)
201  			continue;
202  		switch (pkg_jobs_schedule_graph_edge(job, other)) {
203  		case PKG_SCHEDULE_EDGE_NONE:
204  			break;
205  		case PKG_SCHEDULE_EDGE_NEW_DEP_NEW:
206  			dbg(4, "  edge to %s %s, new depends on new",
207  			    pkg_jobs_schedule_job_type_string(other),
208  			    other->items[0]->pkg->uid);
209  			break;
210  		case PKG_SCHEDULE_EDGE_OLD_DEP_OLD:
211  			dbg(4, "  edge to %s %s, old depends on old",
212  			    pkg_jobs_schedule_job_type_string(other),
213  			    other->items[0]->pkg->uid);
214  			break;
215  		case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW:
216  			dbg(4, "  edge to %s %s, old conflicts with new",
217  			    pkg_jobs_schedule_job_type_string(other),
218  			    other->items[0]->pkg->uid);
219  			break;
220  		case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE:
221  			dbg(4, "  edge to %s %s, split upgrade",
222  			    pkg_jobs_schedule_job_type_string(other),
223  			    other->items[0]->pkg->uid);
224  			break;
225  		default:
226  			assert(false);
227  		}
228  	}
229  }
230  
231  static void
232  pkg_job_schedule_fprint_node_id(FILE *file, struct pkg_solved *node)
233  {
234  	fprintf(file, "\"%s %s\"", pkg_jobs_schedule_job_type_string(node),
235  		node->items[0]->pkg->uid);
236  }
237  
238  static void
239  pkg_jobs_schedule_debug_dot_file(pkg_solved_list *nodes)
240  {
241  	const char *path = pkg_object_string(pkg_config_get("DEBUG_SCHEDULER_DOT_FILE"));
242  	if (path == NULL) {
243  		return;
244  	}
245  	FILE *file = fopen(path, "we");
246  	if (file == NULL) {
247  		pkg_emit_errno("fopen", path);
248  		return;
249  	}
250  	fprintf(file, "digraph {\n");
251  	for (size_t i = 0; i < vec_len(nodes); i++) {
252  		for (size_t j = 0; j < vec_len(nodes); j++) {
253  			enum pkg_jobs_schedule_graph_edge_type edge =
254  				pkg_jobs_schedule_graph_edge(nodes->d[i], nodes->d[j]);
255  			if (edge == PKG_SCHEDULE_EDGE_NONE) {
256  				continue;
257  			}
258  			pkg_job_schedule_fprint_node_id(file, nodes->d[i]);
259  			fprintf(file, " -> ");
260  			pkg_job_schedule_fprint_node_id(file, nodes->d[j]);
261  			fprintf(file, "\n");
262  			switch (edge) {
263  			case PKG_SCHEDULE_EDGE_NEW_DEP_NEW:
264  				fprintf(file, " [label=\"new dep new\"]\n");
265  				break;
266  			case PKG_SCHEDULE_EDGE_OLD_DEP_OLD:
267  				fprintf(file, " [label=\"old dep old\"]\n");
268  				break;
269  			case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW:
270  				fprintf(file, " [label=\"old conflict new\"]\n");
271  				break;
272  			case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE:
273  				fprintf(file, " [label=\"split upgrade\"]\n");
274  				break;
275  			case PKG_SCHEDULE_EDGE_NONE:
276  			default:
277  				assert(false);
278  			}
279  		}
280  	}
281  	fprintf(file, "}\n");
282  	fclose(file);
283  }
284  
285  
286  static bool
287  pkg_jobs_schedule_has_incoming_edge(pkg_solved_list *nodes,
288      struct pkg_solved *node)
289  {
290  	vec_foreach(*nodes, i) {
291  		if (pkg_jobs_schedule_graph_edge(nodes->d[i], node) != PKG_SCHEDULE_EDGE_NONE) {
292  			return (true);
293  		}
294  	}
295  	return (false);
296  }
297  
298  /*
299   * Prioritizing the install jobs and deprioritizing the delete jobs of split
300   * upgrades reduces the distance between the two halves of the split job in the
301   * final execution order.
302   */
303  static int
304  pkg_jobs_schedule_priority(struct pkg_solved *node)
305  {
306  	switch (node->type) {
307  	case PKG_SOLVED_UPGRADE_INSTALL:
308  		return 1;
309  	case PKG_SOLVED_UPGRADE_REMOVE:
310  		return -1;
311  	default:
312  		return 0;
313  	}
314  }
315  
316  /* This comparison function is used as a tiebreaker in the topological sort. */
317  static int
318  pkg_jobs_schedule_cmp_available(struct pkg_solved *a, struct pkg_solved *b)
319  {
320  	int ret = pkg_jobs_schedule_priority(a) - pkg_jobs_schedule_priority(b);
321  	if (ret == 0) {
322  		/* Falling back to lexicographical ordering ensures that job execution
323  		 * order is always consistent and makes testing easier. */
324  		return strcmp(b->items[0]->pkg->uid, a->items[0]->pkg->uid);
325  	} else {
326  		return ret;
327  	}
328  }
329  
330  /*
331   * Move job nodes with no incoming edges from unavailable to available.
332   * If changed is non-NULL, only check jobs with an incoming edge from changed. */
333  static void
334  pkg_jobs_schedule_update_available(pkg_solved_list *unavailable,
335      pkg_solved_list *available, struct pkg_solved *changed)
336  {
337  	for (size_t i = 0; i < unavailable->len;) {
338  		struct pkg_solved *job = unavailable->d[i];
339  		if ((changed == NULL ||
340  		    pkg_jobs_schedule_graph_edge(changed, job) != PKG_SCHEDULE_EDGE_NONE) &&
341  		    !pkg_jobs_schedule_has_incoming_edge(unavailable, job) &&
342  		    !pkg_jobs_schedule_has_incoming_edge(available, job)) {
343  			vec_push(available, job);
344  			vec_swap_remove(unavailable, i);
345  		} else {
346  			i++;
347  		}
348  	}
349  }
350  
351  /* Topological sort based on Kahn's algorithm with a tiebreaker */
352  static void
353  pkg_jobs_schedule_topological_sort(pkg_solved_list *jobs)
354  {
355  	pkg_solved_list unavailable = *jobs;
356  	pkg_solved_list available = vec_init();
357  	*jobs = (pkg_solved_list)vec_init();
358  
359  	pkg_jobs_schedule_update_available(&unavailable, &available, NULL);
360  
361  	while (available.len > 0) {
362  		/* Add the highest priority job from the set of available jobs
363  		 * to the sorted list */
364  		size_t max = 0;
365  		for (size_t i = 1; i < available.len; i++) {
366  			if (pkg_jobs_schedule_cmp_available(available.d[i], available.d[max]) > 0) {
367  				max = i;
368  			}
369  		}
370  		struct pkg_solved *node = available.d[max];
371  		vec_push(jobs, node);
372  		vec_swap_remove(&available, max);
373  
374  		/* Again, place all job nodes with no incoming edges in the set
375  		 * of available jobs, ignoring any incoming edges from job nodes
376  		 * already added to the sorted list */
377  		pkg_jobs_schedule_update_available(&unavailable, &available, node);
378  	}
379  
380  	/* The unavailable set will only be non-empty at this point if there is a
381  	 * cycle in the graph and all cycles must be eliminated by splitting
382  	 * upgrade jobs before calling this function. */
383  	assert(unavailable.len == 0);
384  
385  	vec_free(&unavailable);
386  	vec_free(&available);
387  }
388  
389  /*
390   * This is a depth-first search that keeps track of the path taken to the
391   * current node in the graph. If a node on this path is encountered a
392   * second time a cycle has been found.
393   */
394  static struct pkg_solved *
395  pkg_jobs_schedule_find_cycle(pkg_solved_list *jobs,
396      struct pkg_solved **path, struct pkg_solved *node)
397  {
398  	/* Push node to path */
399  	assert(node->mark == PKG_SOLVED_CYCLE_MARK_NONE);
400  	node->mark = PKG_SOLVED_CYCLE_MARK_PATH;
401  	assert(node->path_prev == NULL);
402  	node->path_prev = *path;
403  	*path = node;
404  
405  	vec_foreach(*jobs, i) {
406  		if (pkg_jobs_schedule_graph_edge(node, jobs->d[i]) != PKG_SCHEDULE_EDGE_NONE) {
407  			switch (jobs->d[i]->mark){
408  			case PKG_SOLVED_CYCLE_MARK_NONE:;
409  				struct pkg_solved *cycle =
410  				    pkg_jobs_schedule_find_cycle(jobs, path, jobs->d[i]);
411  				if (cycle != NULL) {
412  					return (cycle);
413  				}
414  				break;
415  			case PKG_SOLVED_CYCLE_MARK_DONE:
416  				break;
417  			case PKG_SOLVED_CYCLE_MARK_PATH:
418  				return (jobs->d[i]); /* Found a cycle */
419  			default:
420  				assert(false);
421  			}
422  		}
423  	}
424  
425  	/* Pop node from path */
426  	assert(node->mark == PKG_SOLVED_CYCLE_MARK_PATH);
427  	node->mark = PKG_SOLVED_CYCLE_MARK_DONE;
428  	*path = node->path_prev;
429  	node->path_prev = NULL;
430  
431  	return (NULL);
432  }
433  
434  int pkg_jobs_schedule(struct pkg_jobs *j)
435  {
436  	pkg_jobs_schedule_debug_dot_file(&j->jobs);
437  
438  	while (true) {
439  		dbg(3, "checking job scheduling graph for cycles...");
440  
441  		vec_foreach(j->jobs, i) {
442  			j->jobs.d[i]->mark = PKG_SOLVED_CYCLE_MARK_NONE;
443  			j->jobs.d[i]->path_prev = NULL;
444  
445  			pkg_jobs_schedule_dbg_job(&j->jobs, j->jobs.d[i]);
446  		}
447  
448  		/* The graph may not be connected, in which case it is necessary to
449  		 * run multiple searches for cycles from different start nodes. */
450  		struct pkg_solved *path = NULL;
451  		struct pkg_solved *cycle = NULL;
452  		vec_foreach(j->jobs, i) {
453  			switch (j->jobs.d[i]->mark) {
454  			case PKG_SOLVED_CYCLE_MARK_NONE:
455  				cycle = pkg_jobs_schedule_find_cycle(&j->jobs, &path, j->jobs.d[i]);
456  				break;
457  			case PKG_SOLVED_CYCLE_MARK_DONE:
458  				break;
459  			case PKG_SOLVED_CYCLE_MARK_PATH:
460  			default:
461  				assert(false);
462  			}
463  			if (cycle != NULL) {
464  				break;
465  			}
466  		}
467  
468  		if (cycle == NULL) {
469  			dbg(3, "no job scheduling graph cycles found");
470  			assert(path == NULL);
471  			break;
472  		}
473  
474  		dbg(3, "job scheduling graph cycle found");
475  		assert(path != NULL);
476  		assert(path->path_prev != NULL);
477  		assert(path != cycle);
478  
479  		/* Close the path into a cycle to eliminate some edge cases in the loop. */
480  		cycle->path_prev = path;
481  
482  		/* Not all upgrade jobs would break the cycle if split.
483  		 * It is helpful to think of each upgrade job as two separate
484  		 * nodes, the remove half and the install half. If a cycle only
485  		 * involved one half of the upgrade job, splitting the upgrade
486  		 * job would not break the cycle or even change its length.
487  		 *
488  		 * Furthermore, since there is an edge from the remove half of
489  		 * an upgrade job to the install half, the install half must
490  		 * come *before* the remove half in the cycle. Otherwise
491  		 * splitting the upgrade job will only make the cycle one node
492  		 * longer.
493  		 */
494  		enum pkg_jobs_schedule_graph_edge_type out =
495  		    pkg_jobs_schedule_graph_edge(path, cycle);
496  		assert(out != PKG_SCHEDULE_EDGE_NONE);
497  		enum pkg_jobs_schedule_graph_edge_type in =
498  		    pkg_jobs_schedule_graph_edge(path->path_prev, path);
499  		assert(in != PKG_SCHEDULE_EDGE_NONE);
500  		while (true) {
501  			if (path->type == PKG_SOLVED_UPGRADE) {
502  				assert(out != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
503  				assert(in != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
504  				if ((out == PKG_SCHEDULE_EDGE_OLD_DEP_OLD ||
505  				     out == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW) &&
506  				    (in == PKG_SCHEDULE_EDGE_NEW_DEP_NEW ||
507  				     in == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW)) {
508  					/* Found an upgrade job that would break
509  					 * the cycle if split. */
510  					break;
511  				}
512  			}
513  			if (path == cycle) {
514  				pkg_emit_error("failed to break job scheduling cycle");
515  			 	return (EPKG_FATAL);
516  			}
517  			path = path->path_prev;
518  			assert(path != NULL);
519  			out = in;
520  			in = pkg_jobs_schedule_graph_edge(path->path_prev, path);
521  			assert(in != PKG_SCHEDULE_EDGE_NONE);
522  		}
523  
524  		/* path is now the upgrade job chosen to be split */
525  		dbg(2, "splitting upgrade %s job", path->items[0]->pkg->uid);
526  
527  		struct pkg_solved *new = xcalloc(1, sizeof(struct pkg_solved));
528  		new->type = PKG_SOLVED_UPGRADE_REMOVE;
529  		new->items[0] = path->items[1];
530  		new->xlink = path;
531  		path->type = PKG_SOLVED_UPGRADE_INSTALL;
532  		path->items[1] = NULL;
533  		path->xlink = new;
534  		vec_push(&j->jobs, new);
535  	}
536  
537  	pkg_jobs_schedule_topological_sort(&j->jobs);
538  
539  	dbg(3, "finished job scheduling");
540  
541  	return (EPKG_OK);
542  }