Implement fast dynamic topsort algorithm

This commit is contained in:
2021-03-27 17:20:37 +01:00
parent 5638ebffd8
commit c196589cfd
3 changed files with 235 additions and 56 deletions

View File

@@ -1,38 +1,76 @@
use std::collections::HashMap;
use std::collections::HashSet;
use crate::MutexID;
use crate::MutexId;
type Order = usize;
/// Directed Graph with dynamic topological sorting
///
/// Design and implementation based "A Dynamic Topological Sort Algorithm for
/// Directed Acyclic Graphs" by David J. Pearce and Paul H.J. Kelly which can
/// be found on [the author's website][paper].
///
/// Variable- and method names have been chosen to reflect most closely
/// resemble the names in the original paper.
///
/// This digraph tracks its own topological order and updates it when new edges
/// are added to the graph. After a cycle has been introduced, the order is no
/// longer kept up to date as it doesn't exist, but new edges are still
/// tracked. Nodes are added implicitly when they're used in edges.
///
/// [paper]: https://whileydave.com/publications/pk07_jea/
#[derive(Clone, Default, Debug)]
pub struct DiGraph {
in_edges: HashMap<MutexID, Vec<MutexID>>,
out_edges: HashMap<MutexID, Vec<MutexID>>,
in_edges: HashMap<MutexId, Vec<MutexId>>,
out_edges: HashMap<MutexId, Vec<MutexId>>,
/// Next topological sort order
next_ord: Order,
/// Poison flag, set if a cycle is detected when adding a new edge and
/// unset when removing a node successfully removed the cycle.
contains_cycle: bool,
/// Topological sort order. Order is not guaranteed to be contiguous
ord: HashMap<MutexId, Order>,
}
impl DiGraph {
fn add_node(&mut self, node: MutexID) -> (&mut Vec<MutexID>, &mut Vec<MutexID>) {
let in_edges = self.in_edges.entry(node).or_default();
let out_edges = self.out_edges.entry(node).or_default();
/// Add a new node to the graph.
///
/// If the node already existed, this function does not add it and uses the
/// existing node data. The function returns mutable references to the
/// in-edges, out-edges, and finally the index of the node in the topological
/// order.
///
/// New nodes are appended to the end of the topological order when added.
fn add_node(&mut self, n: MutexId) -> (&mut Vec<MutexId>, &mut Vec<MutexId>, Order) {
let next_ord = &mut self.next_ord;
let in_edges = self.in_edges.entry(n).or_default();
let out_edges = self.out_edges.entry(n).or_default();
(in_edges, out_edges)
let order = *self.ord.entry(n).or_insert_with(|| {
let order = *next_ord;
*next_ord += next_ord.checked_add(1).expect("Topological order overflow");
order
});
(in_edges, out_edges, order)
}
pub(crate) fn remove_node(&mut self, node: MutexID) -> bool {
match self.out_edges.remove(&node) {
pub(crate) fn remove_node(&mut self, n: MutexId) -> bool {
match self.out_edges.remove(&n) {
None => false,
Some(out_edges) => {
for other in out_edges {
self.in_edges
.get_mut(&other)
.unwrap()
.retain(|c| c != &node);
self.in_edges.get_mut(&other).unwrap().retain(|c| c != &n);
}
for other in self.in_edges.remove(&node).unwrap() {
self.out_edges
.get_mut(&other)
.unwrap()
.retain(|c| c != &node);
for other in self.in_edges.remove(&n).unwrap() {
self.out_edges.get_mut(&other).unwrap().retain(|c| c != &n);
}
if self.contains_cycle {
// Need to build a valid topological order
self.recompute_topological_order();
}
true
@@ -43,58 +81,192 @@ impl DiGraph {
/// Add an edge to the graph
///
/// Nodes, both from and to, are created as needed when creating new edges.
pub(crate) fn add_edge(&mut self, from: MutexID, to: MutexID) -> bool {
if from == to {
pub(crate) fn add_edge(&mut self, x: MutexId, y: MutexId) -> bool {
if x == y {
// self-edges are not considered cycles
return false;
}
let (_, out_edges) = self.add_node(from);
let (_, out_edges, ub) = self.add_node(x);
out_edges.push(to);
if out_edges.contains(&y) {
// Edge already exists, nothing to be done
return false;
}
let (in_edges, _) = self.add_node(to);
out_edges.push(y);
// No need for existence check assuming the datastructure is consistent
in_edges.push(from);
let (in_edges, _, lb) = self.add_node(y);
in_edges.push(x);
if !self.contains_cycle && lb < ub {
// This edge might introduce a cycle, need to recompute the topological sort
let mut visited = HashSet::new();
let mut delta_f = Vec::new();
let mut delta_b = Vec::new();
if !self.dfs_f(y, ub, &mut visited, &mut delta_f) {
self.contains_cycle = true;
return true;
}
// No need to check as we should've found the cycle on the forward pass
self.dfs_b(x, lb, &mut visited, &mut delta_b);
// Original paper keeps it around but this saves us from clearing it
drop(visited);
self.reorder(delta_f, delta_b);
}
true
}
pub fn has_cycles(&self) -> bool {
let mut marks = HashSet::new();
let mut temp = HashSet::new();
/// Forwards depth-first-search
fn dfs_f(
&self,
n: MutexId,
ub: Order,
visited: &mut HashSet<MutexId>,
delta_f: &mut Vec<MutexId>,
) -> bool {
visited.insert(n);
delta_f.push(n);
self.out_edges
.keys()
.copied()
.any(|node| !self.visit(node, &mut marks, &mut temp))
self.out_edges[&n].iter().all(|w| {
let order = self.ord[w];
if order == ub {
// Found a cycle
false
} else if !visited.contains(w) && order < ub {
// Need to check recursively
self.dfs_f(*w, ub, visited, delta_f)
} else {
// Already seen this one or not interesting
true
}
})
}
/// Backwards depth-first-search
fn dfs_b(
&self,
n: MutexId,
lb: Order,
visited: &mut HashSet<MutexId>,
delta_b: &mut Vec<MutexId>,
) {
visited.insert(n);
delta_b.push(n);
for w in &self.in_edges[&n] {
if !visited.contains(w) && lb < self.ord[w] {
self.dfs_b(*w, lb, visited, delta_b);
}
}
}
fn reorder(&mut self, mut delta_f: Vec<MutexId>, mut delta_b: Vec<MutexId>) {
self.sort(&mut delta_f);
self.sort(&mut delta_b);
let mut l = Vec::with_capacity(delta_f.len() + delta_b.len());
let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len());
for w in delta_b {
orders.push(self.ord[&w]);
l.push(w);
}
for v in delta_f {
orders.push(self.ord[&v]);
l.push(v);
}
// Original paper cleverly merges the two lists by using that both are
// sorted. We just sort again. This is slower but also much simpler.
orders.sort_unstable();
for (node, order) in l.into_iter().zip(orders) {
self.ord.insert(node, order);
}
}
fn sort(&self, ids: &mut [MutexId]) {
// Can use unstable sort because mutex ids should not be equal
ids.sort_unstable_by_key(|v| self.ord[v]);
}
pub fn has_cycles(&self) -> bool {
self.contains_cycle
}
/// Attempt to recompute a valid topological order.
///
/// This method implements the DFS method to find leave nodes to find the reverse order
fn recompute_topological_order(&mut self) {
// This function should only be called when the graph contains a cycle.
debug_assert!(self.contains_cycle);
let mut permanent_marks = HashSet::with_capacity(self.out_edges.len());
let mut temporary_marks = HashSet::new();
let mut rev_order = Vec::with_capacity(self.out_edges.len());
for node in self.out_edges.keys() {
if permanent_marks.contains(node) {
continue;
}
if !self.visit(
*node,
&mut permanent_marks,
&mut temporary_marks,
&mut rev_order,
) {
// Cycle found, no order possible
return;
}
}
// We didn't find a cycle, so we can reset
self.contains_cycle = false;
// Newly allocated order is contiguous 0..rev_order.len()
self.next_ord = rev_order.len();
self.ord.clear();
self.ord
.extend(rev_order.into_iter().rev().enumerate().map(|(k, v)| (v, k)))
}
/// Helper function for `Self::recompute_topological_order`.
fn visit(
&self,
node: MutexID,
marks: &mut HashSet<MutexID>,
temp: &mut HashSet<MutexID>,
v: MutexId,
permanent_marks: &mut HashSet<MutexId>,
temporary_marks: &mut HashSet<MutexId>,
rev_order: &mut Vec<MutexId>,
) -> bool {
if marks.contains(&node) {
if permanent_marks.contains(&v) {
return true;
}
if !temp.insert(node) {
if !temporary_marks.insert(v) {
return false;
}
if self.out_edges[&node]
if !self.out_edges[&v]
.iter()
.copied()
.any(|node| !self.visit(node, marks, temp))
.all(|&w| self.visit(w, permanent_marks, temporary_marks, rev_order))
{
return false;
}
temp.remove(&node);
temporary_marks.remove(&v);
permanent_marks.insert(v);
marks.insert(node);
rev_order.push(v);
true
}
@@ -103,11 +275,11 @@ impl DiGraph {
#[cfg(test)]
mod tests {
use super::*;
use crate::MutexID;
use crate::MutexId;
#[test]
fn test_digraph() {
let id: Vec<MutexID> = (0..5).map(|_| MutexID::new()).collect();
let id: Vec<MutexId> = (0..5).map(|_| MutexId::new()).collect();
let mut graph = DiGraph::default();
// Add some safe edges

View File

@@ -1,4 +1,5 @@
use std::cell::RefCell;
use std::fmt;
use std::ops::DerefMut;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
@@ -22,7 +23,7 @@ thread_local! {
///
/// Assuming that locks are roughly released in the reverse order in which they were acquired,
/// a stack should be more efficient to keep track of the current state than a set would be.
static HELD_LOCKS: RefCell<Vec<MutexID>> = RefCell::new(Vec::new());
static HELD_LOCKS: RefCell<Vec<MutexId>> = RefCell::new(Vec::new());
}
lazy_static! {
@@ -38,10 +39,10 @@ lazy_static! {
///
/// One possible alteration is to make this type not `Copy` but `Drop`, and handle deregistering
/// the lock from there.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
struct MutexID(usize);
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
struct MutexId(usize);
impl MutexID {
impl MutexId {
/// Get a new, unique, mutex ID.
///
/// This ID is guaranteed to be unique within the runtime of the program.
@@ -86,8 +87,14 @@ impl MutexID {
}
}
impl fmt::Debug for MutexId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "MutexID({:?})", self.0)
}
}
#[derive(Debug)]
struct BorrowedMutex(MutexID);
struct BorrowedMutex(MutexId);
/// Drop a lock held by the current thread.
///
@@ -133,8 +140,8 @@ mod tests {
#[test]
fn test_next_mutex_id() {
let initial = MutexID::new();
let next = MutexID::new();
let initial = MutexId::new();
let next = MutexId::new();
// Can't assert N + 1 because multiple threads running tests
assert!(initial.0 < next.0);

View File

@@ -30,13 +30,13 @@ use std::sync::TryLockResult;
use crate::get_depedency_graph;
use crate::BorrowedMutex;
use crate::MutexID;
use crate::MutexId;
/// Wrapper for `std::sync::Mutex`
#[derive(Debug)]
pub struct TracingMutex<T> {
inner: Mutex<T>,
id: MutexID,
id: MutexId,
}
/// Wrapper for `std::sync::MutexGuard`
@@ -73,7 +73,7 @@ impl<T> TracingMutex<T> {
pub fn new(t: T) -> Self {
Self {
inner: Mutex::new(t),
id: MutexID::new(),
id: MutexId::new(),
}
}
@@ -168,7 +168,7 @@ impl<'a, T: fmt::Display> fmt::Display for TracingMutexGuard<'a, T> {
#[derive(Debug)]
pub struct TracingRwLock<T> {
inner: RwLock<T>,
id: MutexID,
id: MutexId,
}
/// Hybrid wrapper for both `std::sync::RwLockReadGuard` and `std::sync::RwLockWriteGuard`.
@@ -189,7 +189,7 @@ impl<T> TracingRwLock<T> {
pub fn new(t: T) -> Self {
Self {
inner: RwLock::new(t),
id: MutexID::new(),
id: MutexId::new(),
}
}