From 6ef9cb12f871cd55cc4ef2908c3e0a43da9c89aa Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 12:13:21 +0200 Subject: [PATCH 1/6] Implement basic fuzz testing for the digraph impl --- Cargo.toml | 3 +++ src/graph.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 1c5645b..6b175ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,6 @@ repository = "https://github.com/bertptrs/tracing-mutex" [dependencies] lazy_static = "1" + +[dev-dependencies] +rand = "0.8" diff --git a/src/graph.rs b/src/graph.rs index 81e8a04..759f874 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -192,6 +192,9 @@ where #[cfg(test)] mod tests { + use rand::prelude::SliceRandom; + use rand::thread_rng; + use super::*; #[test] @@ -210,4 +213,32 @@ mod tests { // Add an edge that should reorder 0 to be after 4 assert!(graph.add_edge(4, 0)); } + + /// Fuzz the DiGraph implementation by adding a bunch of valid edges. + /// + /// This test generates all possible forward edges in a 100-node graph consisting of natural + /// numbers, shuffles them, then adds them to the graph. This will always be a valid directed, + /// acyclic graph because there is a trivial order (the natural numbers) but because the edges + /// are added in a random order the DiGraph will still occassionally need to reorder nodes. + #[test] + fn fuzz_digraph() { + // Note: this fuzzer is quadratic in the number of nodes, so this cannot be too large or it + // will slow down the tests too much. + const NUM_NODES: usize = 100; + let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES); + + for i in 0..NUM_NODES { + for j in i..NUM_NODES { + edges.push((i, j)); + } + } + + edges.shuffle(&mut thread_rng()); + + let mut graph = DiGraph::default(); + + for (x, y) in edges { + assert!(graph.add_edge(x, y)); + } + } } From cca3cf7827a93538945ec1947c416e65cd40bd4e Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 12:15:14 +0200 Subject: [PATCH 2/6] Fix unintentional exponential order ids --- src/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph.rs b/src/graph.rs index 759f874..aea1839 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -49,7 +49,7 @@ where let order = *self.ord.entry(n).or_insert_with(|| { let order = *next_ord; - *next_ord += next_ord.checked_add(1).expect("Topological order overflow"); + *next_ord = next_ord.checked_add(1).expect("Topological order overflow"); order }); From 39b493a8714c4b28e548375869530a571a7bb344 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 12:57:21 +0200 Subject: [PATCH 3/6] Merge hash maps in graph structures This saves quite a few hash-map lookups which improves performance by about 25%. --- src/graph.rs | 114 +++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index aea1839..3f0b2d2 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,3 +1,4 @@ +use std::array::IntoIter; use std::collections::HashMap; use std::collections::HashSet; use std::hash::Hash; @@ -18,17 +19,24 @@ type Order = usize; /// visibly changed. /// /// [paper]: https://whileydave.com/publications/pk07_jea/ -#[derive(Clone, Default, Debug)] +#[derive(Default, Debug)] pub struct DiGraph where V: Eq + Hash + Copy, { - in_edges: HashMap>, - out_edges: HashMap>, + nodes: HashMap>, /// Next topological sort order next_ord: Order, - /// Topological sort order. Order is not guaranteed to be contiguous - ord: HashMap, +} + +#[derive(Debug)] +struct Node +where + V: Eq + Hash + Clone, +{ + in_edges: HashSet, + out_edges: HashSet, + ord: Order, } impl DiGraph @@ -44,29 +52,36 @@ where /// New nodes are appended to the end of the topological order when added. fn add_node(&mut self, n: V) -> (&mut HashSet, &mut HashSet, Order) { let next_ord = &mut self.next_ord; - let in_edges = self.in_edges.entry(n).or_default(); - let out_edges = self.out_edges.entry(n).or_default(); - let order = *self.ord.entry(n).or_insert_with(|| { + let node = self.nodes.entry(n).or_insert_with(|| { let order = *next_ord; *next_ord = next_ord.checked_add(1).expect("Topological order overflow"); - order + + Node { + ord: order, + in_edges: Default::default(), + out_edges: Default::default(), + } }); - (in_edges, out_edges, order) + (&mut node.in_edges, &mut node.out_edges, node.ord) } pub(crate) fn remove_node(&mut self, n: V) -> bool { - match self.out_edges.remove(&n) { + match self.nodes.remove(&n) { None => false, - Some(out_edges) => { - for other in out_edges { - self.in_edges.get_mut(&other).unwrap().remove(&n); - } + Some(Node { + out_edges, + in_edges, + .. + }) => { + out_edges.into_iter().for_each(|m| { + self.nodes.get_mut(&m).unwrap().in_edges.remove(&n); + }); - for other in self.in_edges.remove(&n).unwrap() { - self.out_edges.get_mut(&other).unwrap().remove(&n); - } + in_edges.into_iter().for_each(|m| { + self.nodes.get_mut(&m).unwrap().out_edges.remove(&n); + }); true } @@ -96,25 +111,25 @@ where if lb < ub { // This edge might introduce a cycle, need to recompute the topological sort - let mut visited = HashSet::new(); - let mut delta_f = Vec::new(); - let mut delta_b = Vec::new(); + let mut visited = IntoIter::new([x, y]).collect(); + let mut delta_f = vec![y]; + let mut delta_b = vec![x]; - if !self.dfs_f(y, ub, &mut visited, &mut delta_f) { + if !self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) { // This edge introduces a cycle, so we want to reject it and remove it from the // graph again to keep the "does not contain cycles" invariant. // We use map instead of unwrap to avoid an `unwrap()` but we know that these // entries are present as we just added them above. - self.in_edges.get_mut(&y).map(|nodes| nodes.remove(&x)); - self.out_edges.get_mut(&x).map(|nodes| nodes.remove(&y)); + self.nodes.get_mut(&y).map(|node| node.in_edges.remove(&x)); + self.nodes.get_mut(&x).map(|node| node.out_edges.remove(&x)); // No edge was added return false; } // No need to check as we should've found the cycle on the forward pass - self.dfs_b(x, lb, &mut visited, &mut delta_b); + self.dfs_b(&self.nodes[&x], lb, &mut visited, &mut delta_b); // Original paper keeps it around but this saves us from clearing it drop(visited); @@ -126,19 +141,24 @@ where } /// Forwards depth-first-search - fn dfs_f(&self, n: V, ub: Order, visited: &mut HashSet, delta_f: &mut Vec) -> bool { - visited.insert(n); - delta_f.push(n); + fn dfs_f( + &self, + n: &Node, + ub: Order, + visited: &mut HashSet, + delta_f: &mut Vec, + ) -> bool { + n.out_edges.iter().all(|w| { + let node = &self.nodes[w]; - self.out_edges[&n].iter().all(|w| { - let order = self.ord[w]; - - if order == ub { + if node.ord == ub { // Found a cycle false - } else if !visited.contains(w) && order < ub { + } else if !visited.contains(w) && node.ord < ub { // Need to check recursively - self.dfs_f(*w, ub, visited, delta_f) + visited.insert(*w); + delta_f.push(*w); + self.dfs_f(node, ub, visited, delta_f) } else { // Already seen this one or not interesting true @@ -147,13 +167,14 @@ where } /// Backwards depth-first-search - fn dfs_b(&self, n: V, lb: Order, visited: &mut HashSet, delta_b: &mut Vec) { - visited.insert(n); - delta_b.push(n); + fn dfs_b(&self, n: &Node, lb: Order, visited: &mut HashSet, delta_b: &mut Vec) { + for w in &n.in_edges { + let node = &self.nodes[w]; + if !visited.contains(w) && lb < node.ord { + visited.insert(*w); + delta_b.push(*w); - for w in &self.in_edges[&n] { - if !visited.contains(w) && lb < self.ord[w] { - self.dfs_b(*w, lb, visited, delta_b); + self.dfs_b(node, lb, visited, delta_b); } } } @@ -165,13 +186,8 @@ where let mut l = Vec::with_capacity(delta_f.len() + delta_b.len()); let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len()); - for w in delta_b { - orders.push(self.ord[&w]); - l.push(w); - } - - for v in delta_f { - orders.push(self.ord[&v]); + for v in delta_b.into_iter().chain(delta_f) { + orders.push(self.nodes[&v].ord); l.push(v); } @@ -180,13 +196,13 @@ where orders.sort_unstable(); for (node, order) in l.into_iter().zip(orders) { - self.ord.insert(node, order); + self.nodes.get_mut(&node).unwrap().ord = order; } } fn sort(&self, ids: &mut [V]) { // Can use unstable sort because mutex ids should not be equal - ids.sort_unstable_by_key(|v| self.ord[v]); + ids.sort_unstable_by_key(|v| self.nodes[&v].ord); } } From d242ac5bc2bbf71edcce9e9dd118c506ef6f93e0 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 14:25:44 +0200 Subject: [PATCH 4/6] Use interior mutability for updating graph order --- src/graph.rs | 54 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index 3f0b2d2..0a55729 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,4 +1,5 @@ use std::array::IntoIter; +use std::cell::Cell; use std::collections::HashMap; use std::collections::HashSet; use std::hash::Hash; @@ -36,7 +37,11 @@ where { in_edges: HashSet, out_edges: HashSet, - ord: Order, + // The "Ord" field is a Cell to ensure we can update it in an immutable context. + // `std::collections::HashMap` doesn't let you have multiple mutable references to elements, but + // this way we can use immutable references and still update `ord`. This saves quite a few + // hashmap lookups in the final reorder function. + ord: Cell, } impl DiGraph @@ -58,13 +63,13 @@ where *next_ord = next_ord.checked_add(1).expect("Topological order overflow"); Node { - ord: order, + ord: Cell::new(order), in_edges: Default::default(), out_edges: Default::default(), } }); - (&mut node.in_edges, &mut node.out_edges, node.ord) + (&mut node.in_edges, &mut node.out_edges, node.ord.get()) } pub(crate) fn remove_node(&mut self, n: V) -> bool { @@ -112,8 +117,8 @@ where if lb < ub { // This edge might introduce a cycle, need to recompute the topological sort let mut visited = IntoIter::new([x, y]).collect(); - let mut delta_f = vec![y]; - let mut delta_b = vec![x]; + let mut delta_f = Vec::new(); + let mut delta_b = Vec::new(); if !self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) { // This edge introduces a cycle, so we want to reject it and remove it from the @@ -141,23 +146,25 @@ where } /// Forwards depth-first-search - fn dfs_f( - &self, - n: &Node, + fn dfs_f<'a>( + &'a self, + n: &'a Node, ub: Order, visited: &mut HashSet, - delta_f: &mut Vec, + delta_f: &mut Vec<&'a Node>, ) -> bool { + delta_f.push(n); + n.out_edges.iter().all(|w| { let node = &self.nodes[w]; + let ord = node.ord.get(); - if node.ord == ub { + if ord == ub { // Found a cycle false - } else if !visited.contains(w) && node.ord < ub { + } else if !visited.contains(w) && ord < ub { // Need to check recursively visited.insert(*w); - delta_f.push(*w); self.dfs_f(node, ub, visited, delta_f) } else { // Already seen this one or not interesting @@ -167,19 +174,26 @@ where } /// Backwards depth-first-search - fn dfs_b(&self, n: &Node, lb: Order, visited: &mut HashSet, delta_b: &mut Vec) { + fn dfs_b<'a>( + &'a self, + n: &'a Node, + lb: Order, + visited: &mut HashSet, + delta_b: &mut Vec<&'a Node>, + ) { + delta_b.push(n); + for w in &n.in_edges { let node = &self.nodes[w]; - if !visited.contains(w) && lb < node.ord { + if !visited.contains(w) && lb < node.ord.get() { visited.insert(*w); - delta_b.push(*w); self.dfs_b(node, lb, visited, delta_b); } } } - fn reorder(&mut self, mut delta_f: Vec, mut delta_b: Vec) { + fn reorder(&self, mut delta_f: Vec<&Node>, mut delta_b: Vec<&Node>) { self.sort(&mut delta_f); self.sort(&mut delta_b); @@ -187,7 +201,7 @@ where let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len()); for v in delta_b.into_iter().chain(delta_f) { - orders.push(self.nodes[&v].ord); + orders.push(v.ord.get()); l.push(v); } @@ -196,13 +210,13 @@ where orders.sort_unstable(); for (node, order) in l.into_iter().zip(orders) { - self.nodes.get_mut(&node).unwrap().ord = order; + node.ord.set(order); } } - fn sort(&self, ids: &mut [V]) { + fn sort(&self, ids: &mut [&Node]) { // Can use unstable sort because mutex ids should not be equal - ids.sort_unstable_by_key(|v| self.nodes[&v].ord); + ids.sort_unstable_by_key(|v| &v.ord); } } From ca12ae6b0e33ff080cf1250af784f70a4eb6de76 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 14:39:43 +0200 Subject: [PATCH 5/6] Add changelog --- CHANGELOG.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4ca9de7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project +adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- New data structure for interal dependency graph, resulting in quicker graph updates. + +### Fixed +- Fixed an issue where internal graph ordering indices were exponential rather than sequential. + +## [0.1.0] - 2020-05-16 + +Initial release. + +[Unreleased]: https://github.com/bertptrs/tracing-mutex/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/bertptrs/tracing-mutex/releases/tag/v0.1.0 From ebb8132cf8d429754aecf1fe3c0a0d0e2b16d126 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Mon, 24 May 2021 15:10:41 +0200 Subject: [PATCH 6/6] Add a fuzz-test for the mutex ID's graph --- src/graph.rs | 2 +- src/lib.rs | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index 0a55729..b1a8de3 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -222,7 +222,7 @@ where #[cfg(test)] mod tests { - use rand::prelude::SliceRandom; + use rand::seq::SliceRandom; use rand::thread_rng; use super::*; diff --git a/src/lib.rs b/src/lib.rs index 10a6cf4..8aae2f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -89,9 +89,6 @@ lazy_static! { /// /// This type is currently private to prevent usage while the exact implementation is figured out, /// but it will likely be public in the future. -/// -/// One possible alteration is to make this type not `Copy` but `Drop`, and handle deregistering -/// the lock from there. struct MutexId(usize); impl MutexId { @@ -271,6 +268,9 @@ fn get_dependency_graph() -> impl DerefMut> { #[cfg(test)] mod tests { + use rand::seq::SliceRandom; + use rand::thread_rng; + use super::*; #[test] @@ -303,4 +303,32 @@ mod tests { // If b's destructor correctly ran correctly we can now add an edge from c to a. assert!(get_dependency_graph().add_edge(c.value(), a.value())); } + + /// Fuzz the global dependency graph by fake-acquiring lots of mutexes in a valid order. + /// + /// This test generates all possible forward edges in a 100-node graph consisting of natural + /// numbers, shuffles them, then adds them to the graph. This will always be a valid directed, + /// acyclic graph because there is a trivial order (the natural numbers) but because the edges + /// are added in a random order the DiGraph will still occassionally need to reorder nodes. + #[test] + fn fuzz_mutex_id() { + const NUM_NODES: usize = 100; + + let ids: Vec = (0..NUM_NODES).map(|_| Default::default()).collect(); + + let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES); + for i in 0..NUM_NODES { + for j in i..NUM_NODES { + edges.push((i, j)); + } + } + + edges.shuffle(&mut thread_rng()); + + for (x, y) in edges { + // Acquire the mutexes, smallest first to ensure a cycle-free graph + let _ignored = ids[x].get_borrowed(); + let _ = ids[y].get_borrowed(); + } + } }