Merge pull request #1 from bertptrs/improve-digraph

This commit is contained in:
2021-05-24 15:13:37 +02:00
committed by GitHub
4 changed files with 162 additions and 50 deletions

20
CHANGELOG.md Normal file
View File

@@ -0,0 +1,20 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Changed
- New data structure for interal dependency graph, resulting in quicker graph updates.
### Fixed
- Fixed an issue where internal graph ordering indices were exponential rather than sequential.
## [0.1.0] - 2020-05-16
Initial release.
[Unreleased]: https://github.com/bertptrs/tracing-mutex/compare/v0.1.0...HEAD
[0.1.0]: https://github.com/bertptrs/tracing-mutex/releases/tag/v0.1.0

View File

@@ -13,3 +13,6 @@ repository = "https://github.com/bertptrs/tracing-mutex"
[dependencies] [dependencies]
lazy_static = "1" lazy_static = "1"
[dev-dependencies]
rand = "0.8"

View File

@@ -1,3 +1,5 @@
use std::array::IntoIter;
use std::cell::Cell;
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::HashSet; use std::collections::HashSet;
use std::hash::Hash; use std::hash::Hash;
@@ -18,17 +20,28 @@ type Order = usize;
/// visibly changed. /// visibly changed.
/// ///
/// [paper]: https://whileydave.com/publications/pk07_jea/ /// [paper]: https://whileydave.com/publications/pk07_jea/
#[derive(Clone, Default, Debug)] #[derive(Default, Debug)]
pub struct DiGraph<V> pub struct DiGraph<V>
where where
V: Eq + Hash + Copy, V: Eq + Hash + Copy,
{ {
in_edges: HashMap<V, HashSet<V>>, nodes: HashMap<V, Node<V>>,
out_edges: HashMap<V, HashSet<V>>,
/// Next topological sort order /// Next topological sort order
next_ord: Order, next_ord: Order,
/// Topological sort order. Order is not guaranteed to be contiguous }
ord: HashMap<V, Order>,
#[derive(Debug)]
struct Node<V>
where
V: Eq + Hash + Clone,
{
in_edges: HashSet<V>,
out_edges: HashSet<V>,
// The "Ord" field is a Cell to ensure we can update it in an immutable context.
// `std::collections::HashMap` doesn't let you have multiple mutable references to elements, but
// this way we can use immutable references and still update `ord`. This saves quite a few
// hashmap lookups in the final reorder function.
ord: Cell<Order>,
} }
impl<V> DiGraph<V> impl<V> DiGraph<V>
@@ -44,29 +57,36 @@ where
/// New nodes are appended to the end of the topological order when added. /// New nodes are appended to the end of the topological order when added.
fn add_node(&mut self, n: V) -> (&mut HashSet<V>, &mut HashSet<V>, Order) { fn add_node(&mut self, n: V) -> (&mut HashSet<V>, &mut HashSet<V>, Order) {
let next_ord = &mut self.next_ord; let next_ord = &mut self.next_ord;
let in_edges = self.in_edges.entry(n).or_default();
let out_edges = self.out_edges.entry(n).or_default();
let order = *self.ord.entry(n).or_insert_with(|| { let node = self.nodes.entry(n).or_insert_with(|| {
let order = *next_ord; let order = *next_ord;
*next_ord += next_ord.checked_add(1).expect("Topological order overflow"); *next_ord = next_ord.checked_add(1).expect("Topological order overflow");
order
Node {
ord: Cell::new(order),
in_edges: Default::default(),
out_edges: Default::default(),
}
}); });
(in_edges, out_edges, order) (&mut node.in_edges, &mut node.out_edges, node.ord.get())
} }
pub(crate) fn remove_node(&mut self, n: V) -> bool { pub(crate) fn remove_node(&mut self, n: V) -> bool {
match self.out_edges.remove(&n) { match self.nodes.remove(&n) {
None => false, None => false,
Some(out_edges) => { Some(Node {
for other in out_edges { out_edges,
self.in_edges.get_mut(&other).unwrap().remove(&n); in_edges,
} ..
}) => {
out_edges.into_iter().for_each(|m| {
self.nodes.get_mut(&m).unwrap().in_edges.remove(&n);
});
for other in self.in_edges.remove(&n).unwrap() { in_edges.into_iter().for_each(|m| {
self.out_edges.get_mut(&other).unwrap().remove(&n); self.nodes.get_mut(&m).unwrap().out_edges.remove(&n);
} });
true true
} }
@@ -96,25 +116,25 @@ where
if lb < ub { if lb < ub {
// This edge might introduce a cycle, need to recompute the topological sort // This edge might introduce a cycle, need to recompute the topological sort
let mut visited = HashSet::new(); let mut visited = IntoIter::new([x, y]).collect();
let mut delta_f = Vec::new(); let mut delta_f = Vec::new();
let mut delta_b = Vec::new(); let mut delta_b = Vec::new();
if !self.dfs_f(y, ub, &mut visited, &mut delta_f) { if !self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) {
// This edge introduces a cycle, so we want to reject it and remove it from the // This edge introduces a cycle, so we want to reject it and remove it from the
// graph again to keep the "does not contain cycles" invariant. // graph again to keep the "does not contain cycles" invariant.
// We use map instead of unwrap to avoid an `unwrap()` but we know that these // We use map instead of unwrap to avoid an `unwrap()` but we know that these
// entries are present as we just added them above. // entries are present as we just added them above.
self.in_edges.get_mut(&y).map(|nodes| nodes.remove(&x)); self.nodes.get_mut(&y).map(|node| node.in_edges.remove(&x));
self.out_edges.get_mut(&x).map(|nodes| nodes.remove(&y)); self.nodes.get_mut(&x).map(|node| node.out_edges.remove(&x));
// No edge was added // No edge was added
return false; return false;
} }
// No need to check as we should've found the cycle on the forward pass // No need to check as we should've found the cycle on the forward pass
self.dfs_b(x, lb, &mut visited, &mut delta_b); self.dfs_b(&self.nodes[&x], lb, &mut visited, &mut delta_b);
// Original paper keeps it around but this saves us from clearing it // Original paper keeps it around but this saves us from clearing it
drop(visited); drop(visited);
@@ -126,19 +146,26 @@ where
} }
/// Forwards depth-first-search /// Forwards depth-first-search
fn dfs_f(&self, n: V, ub: Order, visited: &mut HashSet<V>, delta_f: &mut Vec<V>) -> bool { fn dfs_f<'a>(
visited.insert(n); &'a self,
n: &'a Node<V>,
ub: Order,
visited: &mut HashSet<V>,
delta_f: &mut Vec<&'a Node<V>>,
) -> bool {
delta_f.push(n); delta_f.push(n);
self.out_edges[&n].iter().all(|w| { n.out_edges.iter().all(|w| {
let order = self.ord[w]; let node = &self.nodes[w];
let ord = node.ord.get();
if order == ub { if ord == ub {
// Found a cycle // Found a cycle
false false
} else if !visited.contains(w) && order < ub { } else if !visited.contains(w) && ord < ub {
// Need to check recursively // Need to check recursively
self.dfs_f(*w, ub, visited, delta_f) visited.insert(*w);
self.dfs_f(node, ub, visited, delta_f)
} else { } else {
// Already seen this one or not interesting // Already seen this one or not interesting
true true
@@ -147,31 +174,34 @@ where
} }
/// Backwards depth-first-search /// Backwards depth-first-search
fn dfs_b(&self, n: V, lb: Order, visited: &mut HashSet<V>, delta_b: &mut Vec<V>) { fn dfs_b<'a>(
visited.insert(n); &'a self,
n: &'a Node<V>,
lb: Order,
visited: &mut HashSet<V>,
delta_b: &mut Vec<&'a Node<V>>,
) {
delta_b.push(n); delta_b.push(n);
for w in &self.in_edges[&n] { for w in &n.in_edges {
if !visited.contains(w) && lb < self.ord[w] { let node = &self.nodes[w];
self.dfs_b(*w, lb, visited, delta_b); if !visited.contains(w) && lb < node.ord.get() {
visited.insert(*w);
self.dfs_b(node, lb, visited, delta_b);
} }
} }
} }
fn reorder(&mut self, mut delta_f: Vec<V>, mut delta_b: Vec<V>) { fn reorder(&self, mut delta_f: Vec<&Node<V>>, mut delta_b: Vec<&Node<V>>) {
self.sort(&mut delta_f); self.sort(&mut delta_f);
self.sort(&mut delta_b); self.sort(&mut delta_b);
let mut l = Vec::with_capacity(delta_f.len() + delta_b.len()); let mut l = Vec::with_capacity(delta_f.len() + delta_b.len());
let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len()); let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len());
for w in delta_b { for v in delta_b.into_iter().chain(delta_f) {
orders.push(self.ord[&w]); orders.push(v.ord.get());
l.push(w);
}
for v in delta_f {
orders.push(self.ord[&v]);
l.push(v); l.push(v);
} }
@@ -180,18 +210,21 @@ where
orders.sort_unstable(); orders.sort_unstable();
for (node, order) in l.into_iter().zip(orders) { for (node, order) in l.into_iter().zip(orders) {
self.ord.insert(node, order); node.ord.set(order);
} }
} }
fn sort(&self, ids: &mut [V]) { fn sort(&self, ids: &mut [&Node<V>]) {
// Can use unstable sort because mutex ids should not be equal // Can use unstable sort because mutex ids should not be equal
ids.sort_unstable_by_key(|v| self.ord[v]); ids.sort_unstable_by_key(|v| &v.ord);
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rand::seq::SliceRandom;
use rand::thread_rng;
use super::*; use super::*;
#[test] #[test]
@@ -210,4 +243,32 @@ mod tests {
// Add an edge that should reorder 0 to be after 4 // Add an edge that should reorder 0 to be after 4
assert!(graph.add_edge(4, 0)); assert!(graph.add_edge(4, 0));
} }
/// Fuzz the DiGraph implementation by adding a bunch of valid edges.
///
/// This test generates all possible forward edges in a 100-node graph consisting of natural
/// numbers, shuffles them, then adds them to the graph. This will always be a valid directed,
/// acyclic graph because there is a trivial order (the natural numbers) but because the edges
/// are added in a random order the DiGraph will still occassionally need to reorder nodes.
#[test]
fn fuzz_digraph() {
// Note: this fuzzer is quadratic in the number of nodes, so this cannot be too large or it
// will slow down the tests too much.
const NUM_NODES: usize = 100;
let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES);
for i in 0..NUM_NODES {
for j in i..NUM_NODES {
edges.push((i, j));
}
}
edges.shuffle(&mut thread_rng());
let mut graph = DiGraph::default();
for (x, y) in edges {
assert!(graph.add_edge(x, y));
}
}
} }

View File

@@ -89,9 +89,6 @@ lazy_static! {
/// ///
/// This type is currently private to prevent usage while the exact implementation is figured out, /// This type is currently private to prevent usage while the exact implementation is figured out,
/// but it will likely be public in the future. /// but it will likely be public in the future.
///
/// One possible alteration is to make this type not `Copy` but `Drop`, and handle deregistering
/// the lock from there.
struct MutexId(usize); struct MutexId(usize);
impl MutexId { impl MutexId {
@@ -271,6 +268,9 @@ fn get_dependency_graph() -> impl DerefMut<Target = DiGraph<usize>> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rand::seq::SliceRandom;
use rand::thread_rng;
use super::*; use super::*;
#[test] #[test]
@@ -303,4 +303,32 @@ mod tests {
// If b's destructor correctly ran correctly we can now add an edge from c to a. // If b's destructor correctly ran correctly we can now add an edge from c to a.
assert!(get_dependency_graph().add_edge(c.value(), a.value())); assert!(get_dependency_graph().add_edge(c.value(), a.value()));
} }
/// Fuzz the global dependency graph by fake-acquiring lots of mutexes in a valid order.
///
/// This test generates all possible forward edges in a 100-node graph consisting of natural
/// numbers, shuffles them, then adds them to the graph. This will always be a valid directed,
/// acyclic graph because there is a trivial order (the natural numbers) but because the edges
/// are added in a random order the DiGraph will still occassionally need to reorder nodes.
#[test]
fn fuzz_mutex_id() {
const NUM_NODES: usize = 100;
let ids: Vec<MutexId> = (0..NUM_NODES).map(|_| Default::default()).collect();
let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES);
for i in 0..NUM_NODES {
for j in i..NUM_NODES {
edges.push((i, j));
}
}
edges.shuffle(&mut thread_rng());
for (x, y) in edges {
// Acquire the mutexes, smallest first to ensure a cycle-free graph
let _ignored = ids[x].get_borrowed();
let _ = ids[y].get_borrowed();
}
}
} }