From c3b28bf6c72c25fb31a4ba6556c185b93e009655 Mon Sep 17 00:00:00 2001 From: "Tom D." Date: Sun, 11 Feb 2024 08:29:00 +0100 Subject: [PATCH 1/2] refactor(tsort): Switch to Knuth algorithm T This commit is a complete rewrite of the tsort util, resulting in a x10 performance improvement BREAKING CHANGE: the order is no longer the same as gnu tsort in some fringe cases. --- src/uu/tsort/src/tsort.rs | 180 +++++++++++++++++++++++++------------- 1 file changed, 117 insertions(+), 63 deletions(-) diff --git a/src/uu/tsort/src/tsort.rs b/src/uu/tsort/src/tsort.rs index ca0319ecf4..4a61f27103 100644 --- a/src/uu/tsort/src/tsort.rs +++ b/src/uu/tsort/src/tsort.rs @@ -2,8 +2,10 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +//spell-checker:ignore TAOCP use clap::{crate_version, Arg, Command}; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::fmt::Write; use std::fs::File; use std::io::{stdin, BufReader, Read}; use std::path::Path; @@ -45,7 +47,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let mut input_buffer = String::new(); reader.read_to_string(&mut input_buffer)?; - let mut g = Graph::new(); + let mut g = Graph::default(); for line in input_buffer.lines() { let tokens: Vec<_> = line.split_whitespace().collect(); @@ -68,22 +70,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } } - g.run_tsort(); - - if !g.is_acyclic() { - return Err(USimpleError::new( - 1, - format!("{input}, input contains a loop:"), - )); - } - - for x in &g.result { - println!("{x}"); + match g.run_tsort() { + Err(cycle) => { + let mut error_message = format!( + "{}: {}: input contains a loop:\n", + uucore::util_name(), + input + ); + for node in &cycle { + writeln!(error_message, "{}: {}", uucore::util_name(), node).unwrap(); + } + eprint!("{}", error_message); + println!("{}", cycle.join("\n")); + Err(USimpleError::new(1, "")) + } + Ok(ordering) => { + println!("{}", ordering.join("\n")); + Ok(()) + } } - - Ok(()) } - pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(crate_version!()) @@ -100,77 +106,125 @@ pub fn uu_app() -> Command { // We use String as a representation of node here // but using integer may improve performance. -#[derive(Default)] -struct Graph<'input> { - in_edges: BTreeMap<&'input str, BTreeSet<&'input str>>, - out_edges: BTreeMap<&'input str, Vec<&'input str>>, - result: Vec<&'input str>, + +struct Node<'input> { + successor_names: Vec<&'input str>, + predecessor_count: usize, } -impl<'input> Graph<'input> { +impl<'input> Node<'input> { fn new() -> Self { - Self::default() - } - - fn has_node(&self, n: &str) -> bool { - self.in_edges.contains_key(n) + Node { + successor_names: Vec::new(), + predecessor_count: 0, + } } - fn has_edge(&self, from: &str, to: &str) -> bool { - self.in_edges[to].contains(from) + fn add_successor(&mut self, successor_name: &'input str) { + self.successor_names.push(successor_name); } +} +#[derive(Default)] +struct Graph<'input> { + nodes: HashMap<&'input str, Node<'input>>, +} - fn init_node(&mut self, n: &'input str) { - self.in_edges.insert(n, BTreeSet::new()); - self.out_edges.insert(n, vec![]); +impl<'input> Graph<'input> { + fn add_node(&mut self, name: &'input str) { + self.nodes.entry(name).or_insert_with(Node::new); } fn add_edge(&mut self, from: &'input str, to: &'input str) { - if !self.has_node(to) { - self.init_node(to); - } + self.add_node(from); + if from != to { + self.add_node(to); + + let from_node = self.nodes.get_mut(from).unwrap(); + from_node.add_successor(to); - if !self.has_node(from) { - self.init_node(from); + let to_node = self.nodes.get_mut(to).unwrap(); + to_node.predecessor_count += 1; + } + } + /// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1. + fn run_tsort(&mut self) -> Result, Vec<&'input str>> { + let mut result = Vec::with_capacity(self.nodes.len()); + // First, we find a node that have no prerequisites (independent nodes) + // If no such node exists, then there is a cycle. + let mut independent_nodes_queue: VecDeque<&'input str> = self + .nodes + .iter() + .filter_map(|(&name, node)| { + if node.predecessor_count == 0 { + Some(name) + } else { + None + } + }) + .collect(); + independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes + // FIXME: this doesn't comply entirely with the GNU coreutils implementation. + + // we remove each independent node, from the graph, updating each successor predecessor_count variable as we do. + while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() { + result.push(name_of_next_node_to_process); + if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) { + for successor_name in node_to_process.successor_names { + let successor_node = self.nodes.get_mut(successor_name).unwrap(); + successor_node.predecessor_count -= 1; + if successor_node.predecessor_count == 0 { + // if we find nodes without any other prerequisites, we add them to the queue. + independent_nodes_queue.push_back(successor_name); + } + } + } } - if from != to && !self.has_edge(from, to) { - self.in_edges.get_mut(to).unwrap().insert(from); - self.out_edges.get_mut(from).unwrap().push(to); + // if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted. + if self.nodes.is_empty() { + Ok(result) + } else { + // otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does) + Err(self.detect_cycle()) } } - // Kahn's algorithm - // O(|V|+|E|) - fn run_tsort(&mut self) { - let mut start_nodes = vec![]; - for (n, edges) in &self.in_edges { - if edges.is_empty() { - start_nodes.push(*n); + fn detect_cycle(&self) -> Vec<&'input str> { + let mut visited = HashSet::new(); + let mut stack = Vec::with_capacity(self.nodes.len()); + for &node in self.nodes.keys() { + if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) { + return stack; } } + unreachable!(); + } - while !start_nodes.is_empty() { - let n = start_nodes.remove(0); - - self.result.push(n); + fn dfs( + &self, + node: &'input str, + visited: &mut HashSet<&'input str>, + stack: &mut Vec<&'input str>, + ) -> bool { + if stack.contains(&node) { + return true; + } + if visited.contains(&node) { + return false; + } - let n_out_edges = self.out_edges.get_mut(&n).unwrap(); - #[allow(clippy::explicit_iter_loop)] - for m in n_out_edges.iter() { - let m_in_edges = self.in_edges.get_mut(m).unwrap(); - m_in_edges.remove(&n); + visited.insert(node); + stack.push(node); - // If m doesn't have other in-coming edges add it to start_nodes - if m_in_edges.is_empty() { - start_nodes.push(m); + if let Some(successor_names) = self.nodes.get(node).map(|n| &n.successor_names) { + for &successor in successor_names { + if self.dfs(successor, visited, stack) { + return true; } } - n_out_edges.clear(); } - } - fn is_acyclic(&self) -> bool { - self.out_edges.values().all(|edge| edge.is_empty()) + stack.pop(); + false } } From 66c3efa0b7aa430f5058131a6b0ca2fd373e8bea Mon Sep 17 00:00:00 2001 From: "Tom D." Date: Sat, 19 Oct 2024 12:06:08 +0200 Subject: [PATCH 2/2] doc(tsort): Add benchmarking documentation --- src/uu/tsort/BENCHMARKING.md | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 src/uu/tsort/BENCHMARKING.md diff --git a/src/uu/tsort/BENCHMARKING.md b/src/uu/tsort/BENCHMARKING.md new file mode 100644 index 0000000000..52b9dfc57e --- /dev/null +++ b/src/uu/tsort/BENCHMARKING.md @@ -0,0 +1,64 @@ +# Benchmarking `tsort` + +Much of what makes `tsort` fast is the efficiency of its algorithm and implementation for topological sorting. +Our implementation of `tsort` also outputs a cycle whenever such ordering does not exist, just like GNU `tsort`. + +## Strategies + +To test `tsort`'s performance for its nominal use case, we need to test it with a DAG. One of the worst cases is when all nodes are just representing a succession of independent steps. +We should also test cycle detection for good measure. + +### Random acyclic graph (DAG) + +This will output a DAG composed of 1 million pairs of edges between nodes numbered from 0 to 10,000, ensuring that the graph is acyclic by always assigning the edge with the smallest id to the node with the highest one. + +```python +import random + +N = 10000 + +for i in range(100*N): + a = random.randint(0, N) + b = random.randint(0, N) + print(f"{min(a, b)} {max(a, b)}") +``` + +### Random graph with cycles + +The following will output a graph with multiples edges, it also allows some degree of tuning to test different cases. + +```python +import random + +# Parameters for the graph +num_nodes = 100 +num_edges = 150 +cycle_percentage = 0.10 +max_cycle_size = 6 + +num_cycles = int(num_edges * cycle_percentage) + +for _ in range(num_edges - num_cycles): + a = random.randint(0, num_nodes) + b = random.randint(0, num_nodes) + print(f"{a} {b}") + + +for _ in range(num_cycles): + cycle_size = random.randint(3, max_cycle_size) + cycle_nodes = random.sample(range(num_nodes), cycle_size) + for i in range(cycle_size): + print(f"{cycle_nodes[i]} {cycle_nodes[(i + 1) % cycle_size]}") +``` + +## Running Benchmarks +The above scripts will output the generated graphs to the standard output. They can therefore be used directly as tests. In order to run a Benchmark, the output should be redirected to a file. +Use [`hyperfine`](https://github.com/sharkdp/hyperfine) to compare the performance of different `tsort` versions. For example, you can compare the performance of GNU `tsort` and another implementation with the following command: + +```sh +hyperfine 'tsort random_graph.txt' 'uu_tsort random_graph.txt' +``` + +## Note + +Benchmark results from the above scripts are fuzzy and change from run to run unless a seed is set.