From 39b493a8714c4b28e548375869530a571a7bb344 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 12:57:21 +0200 Subject: [PATCH] Merge hash maps in graph structures This saves quite a few hash-map lookups which improves performance by about 25%. --- src/graph.rs | 114 +++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index aea1839..3f0b2d2 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,3 +1,4 @@ +use std::array::IntoIter; use std::collections::HashMap; use std::collections::HashSet; use std::hash::Hash; @@ -18,17 +19,24 @@ type Order = usize; /// visibly changed. /// /// [paper]: https://whileydave.com/publications/pk07_jea/ -#[derive(Clone, Default, Debug)] +#[derive(Default, Debug)] pub struct DiGraph where V: Eq + Hash + Copy, { - in_edges: HashMap>, - out_edges: HashMap>, + nodes: HashMap>, /// Next topological sort order next_ord: Order, - /// Topological sort order. Order is not guaranteed to be contiguous - ord: HashMap, +} + +#[derive(Debug)] +struct Node +where + V: Eq + Hash + Clone, +{ + in_edges: HashSet, + out_edges: HashSet, + ord: Order, } impl DiGraph @@ -44,29 +52,36 @@ where /// New nodes are appended to the end of the topological order when added. fn add_node(&mut self, n: V) -> (&mut HashSet, &mut HashSet, Order) { let next_ord = &mut self.next_ord; - let in_edges = self.in_edges.entry(n).or_default(); - let out_edges = self.out_edges.entry(n).or_default(); - let order = *self.ord.entry(n).or_insert_with(|| { + let node = self.nodes.entry(n).or_insert_with(|| { let order = *next_ord; *next_ord = next_ord.checked_add(1).expect("Topological order overflow"); - order + + Node { + ord: order, + in_edges: Default::default(), + out_edges: Default::default(), + } }); - (in_edges, out_edges, order) + (&mut node.in_edges, &mut node.out_edges, node.ord) } pub(crate) fn remove_node(&mut self, n: V) -> bool { - match self.out_edges.remove(&n) { + match self.nodes.remove(&n) { None => false, - Some(out_edges) => { - for other in out_edges { - self.in_edges.get_mut(&other).unwrap().remove(&n); - } + Some(Node { + out_edges, + in_edges, + .. + }) => { + out_edges.into_iter().for_each(|m| { + self.nodes.get_mut(&m).unwrap().in_edges.remove(&n); + }); - for other in self.in_edges.remove(&n).unwrap() { - self.out_edges.get_mut(&other).unwrap().remove(&n); - } + in_edges.into_iter().for_each(|m| { + self.nodes.get_mut(&m).unwrap().out_edges.remove(&n); + }); true } @@ -96,25 +111,25 @@ where if lb < ub { // This edge might introduce a cycle, need to recompute the topological sort - let mut visited = HashSet::new(); - let mut delta_f = Vec::new(); - let mut delta_b = Vec::new(); + let mut visited = IntoIter::new([x, y]).collect(); + let mut delta_f = vec![y]; + let mut delta_b = vec![x]; - if !self.dfs_f(y, ub, &mut visited, &mut delta_f) { + if !self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) { // This edge introduces a cycle, so we want to reject it and remove it from the // graph again to keep the "does not contain cycles" invariant. // We use map instead of unwrap to avoid an `unwrap()` but we know that these // entries are present as we just added them above. - self.in_edges.get_mut(&y).map(|nodes| nodes.remove(&x)); - self.out_edges.get_mut(&x).map(|nodes| nodes.remove(&y)); + self.nodes.get_mut(&y).map(|node| node.in_edges.remove(&x)); + self.nodes.get_mut(&x).map(|node| node.out_edges.remove(&x)); // No edge was added return false; } // No need to check as we should've found the cycle on the forward pass - self.dfs_b(x, lb, &mut visited, &mut delta_b); + self.dfs_b(&self.nodes[&x], lb, &mut visited, &mut delta_b); // Original paper keeps it around but this saves us from clearing it drop(visited); @@ -126,19 +141,24 @@ where } /// Forwards depth-first-search - fn dfs_f(&self, n: V, ub: Order, visited: &mut HashSet, delta_f: &mut Vec) -> bool { - visited.insert(n); - delta_f.push(n); + fn dfs_f( + &self, + n: &Node, + ub: Order, + visited: &mut HashSet, + delta_f: &mut Vec, + ) -> bool { + n.out_edges.iter().all(|w| { + let node = &self.nodes[w]; - self.out_edges[&n].iter().all(|w| { - let order = self.ord[w]; - - if order == ub { + if node.ord == ub { // Found a cycle false - } else if !visited.contains(w) && order < ub { + } else if !visited.contains(w) && node.ord < ub { // Need to check recursively - self.dfs_f(*w, ub, visited, delta_f) + visited.insert(*w); + delta_f.push(*w); + self.dfs_f(node, ub, visited, delta_f) } else { // Already seen this one or not interesting true @@ -147,13 +167,14 @@ where } /// Backwards depth-first-search - fn dfs_b(&self, n: V, lb: Order, visited: &mut HashSet, delta_b: &mut Vec) { - visited.insert(n); - delta_b.push(n); + fn dfs_b(&self, n: &Node, lb: Order, visited: &mut HashSet, delta_b: &mut Vec) { + for w in &n.in_edges { + let node = &self.nodes[w]; + if !visited.contains(w) && lb < node.ord { + visited.insert(*w); + delta_b.push(*w); - for w in &self.in_edges[&n] { - if !visited.contains(w) && lb < self.ord[w] { - self.dfs_b(*w, lb, visited, delta_b); + self.dfs_b(node, lb, visited, delta_b); } } } @@ -165,13 +186,8 @@ where let mut l = Vec::with_capacity(delta_f.len() + delta_b.len()); let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len()); - for w in delta_b { - orders.push(self.ord[&w]); - l.push(w); - } - - for v in delta_f { - orders.push(self.ord[&v]); + for v in delta_b.into_iter().chain(delta_f) { + orders.push(self.nodes[&v].ord); l.push(v); } @@ -180,13 +196,13 @@ where orders.sort_unstable(); for (node, order) in l.into_iter().zip(orders) { - self.ord.insert(node, order); + self.nodes.get_mut(&node).unwrap().ord = order; } } fn sort(&self, ids: &mut [V]) { // Can use unstable sort because mutex ids should not be equal - ids.sort_unstable_by_key(|v| self.ord[v]); + ids.sort_unstable_by_key(|v| self.nodes[&v].ord); } }