use std::array::IntoIter; use std::cell::Cell; use std::collections::HashMap; use std::collections::HashSet; use std::hash::Hash; type Order = usize; /// Directed Graph with dynamic topological sorting /// /// Design and implementation based "A Dynamic Topological Sort Algorithm for Directed Acyclic /// Graphs" by David J. Pearce and Paul H.J. Kelly which can be found on [the author's /// website][paper]. /// /// Variable- and method names have been chosen to reflect most closely resemble the names in the /// original paper. /// /// This digraph tracks its own topological order and updates it when new edges are added to the /// graph. If a cycle is added that would create a cycle, that edge is rejected and the graph is not /// visibly changed. /// /// [paper]: https://whileydave.com/publications/pk07_jea/ #[derive(Default, Debug)] pub struct DiGraph where V: Eq + Hash + Copy, { nodes: HashMap>, /// Next topological sort order next_ord: Order, } #[derive(Debug)] struct Node where V: Eq + Hash + Clone, { in_edges: HashSet, out_edges: HashSet, // The "Ord" field is a Cell to ensure we can update it in an immutable context. // `std::collections::HashMap` doesn't let you have multiple mutable references to elements, but // this way we can use immutable references and still update `ord`. This saves quite a few // hashmap lookups in the final reorder function. ord: Cell, } impl DiGraph where V: Eq + Hash + Copy, { /// Add a new node to the graph. /// /// If the node already existed, this function does not add it and uses the existing node data. /// The function returns mutable references to the in-edges, out-edges, and finally the index of /// the node in the topological order. /// /// New nodes are appended to the end of the topological order when added. fn add_node(&mut self, n: V) -> (&mut HashSet, &mut HashSet, Order) { let next_ord = &mut self.next_ord; let node = self.nodes.entry(n).or_insert_with(|| { let order = *next_ord; *next_ord = next_ord.checked_add(1).expect("Topological order overflow"); Node { ord: Cell::new(order), in_edges: Default::default(), out_edges: Default::default(), } }); (&mut node.in_edges, &mut node.out_edges, node.ord.get()) } pub(crate) fn remove_node(&mut self, n: V) -> bool { match self.nodes.remove(&n) { None => false, Some(Node { out_edges, in_edges, .. }) => { out_edges.into_iter().for_each(|m| { self.nodes.get_mut(&m).unwrap().in_edges.remove(&n); }); in_edges.into_iter().for_each(|m| { self.nodes.get_mut(&m).unwrap().out_edges.remove(&n); }); true } } } /// Attempt to add an edge to the graph /// /// Nodes, both from and to, are created as needed when creating new edges. If the new edge /// would introduce a cycle, the edge is rejected and `false` is returned. pub(crate) fn add_edge(&mut self, x: V, y: V) -> bool { if x == y { // self-edges are not considered cycles return true; } let (_, out_edges, ub) = self.add_node(x); if !out_edges.insert(y) { // Edge already exists, nothing to be done return true; } let (in_edges, _, lb) = self.add_node(y); in_edges.insert(x); if lb < ub { // This edge might introduce a cycle, need to recompute the topological sort let mut visited = IntoIter::new([x, y]).collect(); let mut delta_f = Vec::new(); let mut delta_b = Vec::new(); if !self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) { // This edge introduces a cycle, so we want to reject it and remove it from the // graph again to keep the "does not contain cycles" invariant. // We use map instead of unwrap to avoid an `unwrap()` but we know that these // entries are present as we just added them above. self.nodes.get_mut(&y).map(|node| node.in_edges.remove(&x)); self.nodes.get_mut(&x).map(|node| node.out_edges.remove(&y)); // No edge was added return false; } // No need to check as we should've found the cycle on the forward pass self.dfs_b(&self.nodes[&x], lb, &mut visited, &mut delta_b); // Original paper keeps it around but this saves us from clearing it drop(visited); self.reorder(delta_f, delta_b); } true } /// Forwards depth-first-search fn dfs_f<'a>( &'a self, n: &'a Node, ub: Order, visited: &mut HashSet, delta_f: &mut Vec<&'a Node>, ) -> bool { delta_f.push(n); n.out_edges.iter().all(|w| { let node = &self.nodes[w]; let ord = node.ord.get(); if ord == ub { // Found a cycle false } else if !visited.contains(w) && ord < ub { // Need to check recursively visited.insert(*w); self.dfs_f(node, ub, visited, delta_f) } else { // Already seen this one or not interesting true } }) } /// Backwards depth-first-search fn dfs_b<'a>( &'a self, n: &'a Node, lb: Order, visited: &mut HashSet, delta_b: &mut Vec<&'a Node>, ) { delta_b.push(n); for w in &n.in_edges { let node = &self.nodes[w]; if !visited.contains(w) && lb < node.ord.get() { visited.insert(*w); self.dfs_b(node, lb, visited, delta_b); } } } fn reorder(&self, mut delta_f: Vec<&Node>, mut delta_b: Vec<&Node>) { self.sort(&mut delta_f); self.sort(&mut delta_b); let mut l = Vec::with_capacity(delta_f.len() + delta_b.len()); let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len()); for v in delta_b.into_iter().chain(delta_f) { orders.push(v.ord.get()); l.push(v); } // Original paper cleverly merges the two lists by using that both are sorted. We just sort // again. This is slower but also much simpler. orders.sort_unstable(); for (node, order) in l.into_iter().zip(orders) { node.ord.set(order); } } fn sort(&self, ids: &mut [&Node]) { // Can use unstable sort because mutex ids should not be equal ids.sort_unstable_by_key(|v| &v.ord); } } #[cfg(test)] mod tests { use rand::seq::SliceRandom; use rand::thread_rng; use super::*; #[test] fn test_digraph() { let mut graph = DiGraph::default(); // Add some safe edges assert!(graph.add_edge(0, 1)); assert!(graph.add_edge(1, 2)); assert!(graph.add_edge(2, 3)); assert!(graph.add_edge(4, 2)); // Try to add an edge that introduces a cycle assert!(!graph.add_edge(3, 1)); // Add an edge that should reorder 0 to be after 4 assert!(graph.add_edge(4, 0)); } /// Fuzz the DiGraph implementation by adding a bunch of valid edges. /// /// This test generates all possible forward edges in a 100-node graph consisting of natural /// numbers, shuffles them, then adds them to the graph. This will always be a valid directed, /// acyclic graph because there is a trivial order (the natural numbers) but because the edges /// are added in a random order the DiGraph will still occassionally need to reorder nodes. #[test] fn fuzz_digraph() { // Note: this fuzzer is quadratic in the number of nodes, so this cannot be too large or it // will slow down the tests too much. const NUM_NODES: usize = 100; let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES); for i in 0..NUM_NODES { for j in i..NUM_NODES { edges.push((i, j)); } } edges.shuffle(&mut thread_rng()); let mut graph = DiGraph::default(); for (x, y) in edges { assert!(graph.add_edge(x, y)); } } }