Implement fast dynamic topsort algorithm

2026-02-27 19:20:38 +01:00 · 2021-03-27 17:20:37 +01:00
parent 5638ebffd8
commit c196589cfd
3 changed files with 235 additions and 56 deletions
--- a/src/graph.rs
+++ b/src/graph.rs
@@ -1,38 +1,76 @@
 use std::collections::HashMap;
 use std::collections::HashSet;
-use crate::MutexID;
+use crate::MutexId;
 type Order = usize;
 /// Directed Graph with dynamic topological sorting
 ///
 /// Design and implementation based "A Dynamic Topological Sort Algorithm for
 /// Directed Acyclic Graphs" by David J. Pearce and Paul H.J. Kelly which can
 /// be found on [the author's website][paper].
 ///
 /// Variable- and method names have been chosen to reflect most closely
 /// resemble the names in the original paper.
 ///
 /// This digraph tracks its own topological order and updates it when new edges
 /// are added to the graph. After a cycle has been introduced, the order is no
 /// longer kept up to date as it doesn't exist, but new edges are still
 /// tracked. Nodes are added implicitly when they're used in edges.
 ///
 /// [paper]: https://whileydave.com/publications/pk07_jea/
 #[derive(Clone, Default, Debug)]
 pub struct DiGraph {
-    in_edges: HashMap<MutexID, Vec<MutexID>>,
+    in_edges: HashMap<MutexId, Vec<MutexId>>,
-    out_edges: HashMap<MutexID, Vec<MutexID>>,
+    out_edges: HashMap<MutexId, Vec<MutexId>>,
    /// Next topological sort order
    next_ord: Order,
    /// Poison flag, set if a cycle is detected when adding a new edge and
    /// unset when removing a node successfully removed the cycle.
    contains_cycle: bool,
    /// Topological sort order. Order is not guaranteed to be contiguous
    ord: HashMap<MutexId, Order>,
 }
 impl DiGraph {
-    fn add_node(&mut self, node: MutexID) -> (&mut Vec<MutexID>, &mut Vec<MutexID>) {
+    /// Add a new node to the graph.
-        let in_edges = self.in_edges.entry(node).or_default();
+    ///
-        let out_edges = self.out_edges.entry(node).or_default();
+    /// If the node already existed, this function does not add it and uses the
    /// existing node data. The function returns mutable references to the
    /// in-edges, out-edges, and finally the index of the node in the topological
    /// order.
    ///
    /// New nodes are appended to the end of the topological order when added.
    fn add_node(&mut self, n: MutexId) -> (&mut Vec<MutexId>, &mut Vec<MutexId>, Order) {
        let next_ord = &mut self.next_ord;
        let in_edges = self.in_edges.entry(n).or_default();
        let out_edges = self.out_edges.entry(n).or_default();
-        (in_edges, out_edges)
+        let order = *self.ord.entry(n).or_insert_with(|| {
            let order = *next_ord;
            *next_ord += next_ord.checked_add(1).expect("Topological order overflow");
            order
        });
        (in_edges, out_edges, order)
    }
-    pub(crate) fn remove_node(&mut self, node: MutexID) -> bool {
+    pub(crate) fn remove_node(&mut self, n: MutexId) -> bool {
-        match self.out_edges.remove(&node) {
+        match self.out_edges.remove(&n) {
            None => false,
            Some(out_edges) => {
                for other in out_edges {
-                    self.in_edges
+                    self.in_edges.get_mut(&other).unwrap().retain(|c| c != &n);
                        .get_mut(&other)
                        .unwrap()
                        .retain(|c| c != &node);
                }
-                for other in self.in_edges.remove(&node).unwrap() {
+                for other in self.in_edges.remove(&n).unwrap() {
-                    self.out_edges
+                    self.out_edges.get_mut(&other).unwrap().retain(|c| c != &n);
-                        .get_mut(&other)
+                }
-                        .unwrap()
+
-                        .retain(|c| c != &node);
+                if self.contains_cycle {
                    // Need to build a valid topological order
                    self.recompute_topological_order();
                }
                true
@@ -43,58 +81,192 @@ impl DiGraph {
    /// Add an edge to the graph
    ///
    /// Nodes, both from and to, are created as needed when creating new edges.
-    pub(crate) fn add_edge(&mut self, from: MutexID, to: MutexID) -> bool {
+    pub(crate) fn add_edge(&mut self, x: MutexId, y: MutexId) -> bool {
-        if from == to {
+        if x == y {
            // self-edges are not considered cycles
            return false;
        }
-        let (_, out_edges) = self.add_node(from);
+        let (_, out_edges, ub) = self.add_node(x);
-        out_edges.push(to);
+        if out_edges.contains(&y) {
            // Edge already exists, nothing to be done
            return false;
        }
-        let (in_edges, _) = self.add_node(to);
+        out_edges.push(y);
-        // No need for existence check assuming the datastructure is consistent
+        let (in_edges, _, lb) = self.add_node(y);
-        in_edges.push(from);
+
        in_edges.push(x);
        if !self.contains_cycle && lb < ub {
            // This edge might introduce a cycle, need to recompute the topological sort
            let mut visited = HashSet::new();
            let mut delta_f = Vec::new();
            let mut delta_b = Vec::new();
            if !self.dfs_f(y, ub, &mut visited, &mut delta_f) {
                self.contains_cycle = true;
                return true;
            }
            // No need to check as we should've found the cycle on the forward pass
            self.dfs_b(x, lb, &mut visited, &mut delta_b);
            // Original paper keeps it around but this saves us from clearing it
            drop(visited);
            self.reorder(delta_f, delta_b);
        }
        true
    }
-    pub fn has_cycles(&self) -> bool {
+    /// Forwards depth-first-search
-        let mut marks = HashSet::new();
+    fn dfs_f(
-        let mut temp = HashSet::new();
+        &self,
        n: MutexId,
        ub: Order,
        visited: &mut HashSet<MutexId>,
        delta_f: &mut Vec<MutexId>,
    ) -> bool {
        visited.insert(n);
        delta_f.push(n);
-        self.out_edges
+        self.out_edges[&n].iter().all(|w| {
-            .keys()
+            let order = self.ord[w];
-            .copied()
+
-            .any(|node| !self.visit(node, &mut marks, &mut temp))
+            if order == ub {
                // Found a cycle
                false
            } else if !visited.contains(w) && order < ub {
                // Need to check recursively
                self.dfs_f(*w, ub, visited, delta_f)
            } else {
                // Already seen this one or not interesting
                true
            }
        })
    }
    /// Backwards depth-first-search
    fn dfs_b(
        &self,
        n: MutexId,
        lb: Order,
        visited: &mut HashSet<MutexId>,
        delta_b: &mut Vec<MutexId>,
    ) {
        visited.insert(n);
        delta_b.push(n);
        for w in &self.in_edges[&n] {
            if !visited.contains(w) && lb < self.ord[w] {
                self.dfs_b(*w, lb, visited, delta_b);
            }
        }
    }
    fn reorder(&mut self, mut delta_f: Vec<MutexId>, mut delta_b: Vec<MutexId>) {
        self.sort(&mut delta_f);
        self.sort(&mut delta_b);
        let mut l = Vec::with_capacity(delta_f.len() + delta_b.len());
        let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len());
        for w in delta_b {
            orders.push(self.ord[&w]);
            l.push(w);
        }
        for v in delta_f {
            orders.push(self.ord[&v]);
            l.push(v);
        }
        // Original paper cleverly merges the two lists by using that both are
        // sorted. We just sort again. This is slower but also much simpler.
        orders.sort_unstable();
        for (node, order) in l.into_iter().zip(orders) {
            self.ord.insert(node, order);
        }
    }
    fn sort(&self, ids: &mut [MutexId]) {
        // Can use unstable sort because mutex ids should not be equal
        ids.sort_unstable_by_key(|v| self.ord[v]);
    }
    pub fn has_cycles(&self) -> bool {
        self.contains_cycle
    }
    /// Attempt to recompute a valid topological order.
    ///
    /// This method implements the DFS method to find leave nodes to find the reverse order
    fn recompute_topological_order(&mut self) {
        // This function should only be called when the graph contains a cycle.
        debug_assert!(self.contains_cycle);
        let mut permanent_marks = HashSet::with_capacity(self.out_edges.len());
        let mut temporary_marks = HashSet::new();
        let mut rev_order = Vec::with_capacity(self.out_edges.len());
        for node in self.out_edges.keys() {
            if permanent_marks.contains(node) {
                continue;
            }
            if !self.visit(
                *node,
                &mut permanent_marks,
                &mut temporary_marks,
                &mut rev_order,
            ) {
                // Cycle found, no order possible
                return;
            }
        }
        // We didn't find a cycle, so we can reset
        self.contains_cycle = false;
        // Newly allocated order is contiguous 0..rev_order.len()
        self.next_ord = rev_order.len();
        self.ord.clear();
        self.ord
            .extend(rev_order.into_iter().rev().enumerate().map(|(k, v)| (v, k)))
    }
    /// Helper function for `Self::recompute_topological_order`.
    fn visit(
        &self,
-        node: MutexID,
+        v: MutexId,
-        marks: &mut HashSet<MutexID>,
+        permanent_marks: &mut HashSet<MutexId>,
-        temp: &mut HashSet<MutexID>,
+        temporary_marks: &mut HashSet<MutexId>,
        rev_order: &mut Vec<MutexId>,
    ) -> bool {
-        if marks.contains(&node) {
+        if permanent_marks.contains(&v) {
            return true;
        }
-        if !temp.insert(node) {
+        if !temporary_marks.insert(v) {
            return false;
        }
-        if self.out_edges[&node]
+        if !self.out_edges[&v]
            .iter()
-            .copied()
+            .all(|&w| self.visit(w, permanent_marks, temporary_marks, rev_order))
            .any(|node| !self.visit(node, marks, temp))
        {
            return false;
        }
-        temp.remove(&node);
+        temporary_marks.remove(&v);
        permanent_marks.insert(v);
-        marks.insert(node);
+        rev_order.push(v);
        true
    }
@@ -103,11 +275,11 @@ impl DiGraph {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::MutexID;
+    use crate::MutexId;
    #[test]
    fn test_digraph() {
-        let id: Vec<MutexID> = (0..5).map(|_| MutexID::new()).collect();
+        let id: Vec<MutexId> = (0..5).map(|_| MutexId::new()).collect();
        let mut graph = DiGraph::default();
        // Add some safe edges
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
 use std::cell::RefCell;
 use std::fmt;
 use std::ops::DerefMut;
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering;
@@ -22,7 +23,7 @@ thread_local! {
    ///
    /// Assuming that locks are roughly released in the reverse order in which they were acquired,
    /// a stack should be more efficient to keep track of the current state than a set would be.
-    static HELD_LOCKS: RefCell<Vec<MutexID>> = RefCell::new(Vec::new());
+    static HELD_LOCKS: RefCell<Vec<MutexId>> = RefCell::new(Vec::new());
 }
 lazy_static! {
@@ -38,10 +39,10 @@ lazy_static! {
 ///
 /// One possible alteration is to make this type not `Copy` but `Drop`, and handle deregistering
 /// the lock from there.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-struct MutexID(usize);
+struct MutexId(usize);
-impl MutexID {
+impl MutexId {
    /// Get a new, unique, mutex ID.
    ///
    /// This ID is guaranteed to be unique within the runtime of the program.
@@ -86,8 +87,14 @@ impl MutexID {
    }
 }
 impl fmt::Debug for MutexId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "MutexID({:?})", self.0)
    }
 }
 #[derive(Debug)]
-struct BorrowedMutex(MutexID);
+struct BorrowedMutex(MutexId);
 /// Drop a lock held by the current thread.
 ///
@@ -133,8 +140,8 @@ mod tests {
    #[test]
    fn test_next_mutex_id() {
-        let initial = MutexID::new();
+        let initial = MutexId::new();
-        let next = MutexID::new();
+        let next = MutexId::new();
        // Can't assert N + 1 because multiple threads running tests
        assert!(initial.0 < next.0);
--- a/src/stdsync.rs
+++ b/src/stdsync.rs
@@ -30,13 +30,13 @@ use std::sync::TryLockResult;
 use crate::get_depedency_graph;
 use crate::BorrowedMutex;
-use crate::MutexID;
+use crate::MutexId;
 /// Wrapper for `std::sync::Mutex`
 #[derive(Debug)]
 pub struct TracingMutex<T> {
    inner: Mutex<T>,
-    id: MutexID,
+    id: MutexId,
 }
 /// Wrapper for `std::sync::MutexGuard`
@@ -73,7 +73,7 @@ impl<T> TracingMutex<T> {
    pub fn new(t: T) -> Self {
        Self {
            inner: Mutex::new(t),
-            id: MutexID::new(),
+            id: MutexId::new(),
        }
    }
@@ -168,7 +168,7 @@ impl<'a, T: fmt::Display> fmt::Display for TracingMutexGuard<'a, T> {
 #[derive(Debug)]
 pub struct TracingRwLock<T> {
    inner: RwLock<T>,
-    id: MutexID,
+    id: MutexId,
 }
 /// Hybrid wrapper for both `std::sync::RwLockReadGuard` and `std::sync::RwLockWriteGuard`.
@@ -189,7 +189,7 @@ impl<T> TracingRwLock<T> {
    pub fn new(t: T) -> Self {
        Self {
            inner: RwLock::new(t),
-            id: MutexID::new(),
+            id: MutexId::new(),
        }
    }