diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 607ae681d25..a52ea5c3799 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -613,11 +613,20 @@ public: VecDeque queue; queue.reserve(std::max(256, 2 * m_todo.Count())); - // Create an initial entry with m_todo as undecided. Also use it as best if not provided, - // so that during the work processing loop below, and during the add_fn/split_fn calls, we - // do not need to deal with the best=empty case. - if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, m_todo); - queue.emplace_back(SetInfo{}, SetType{m_todo}); + // Create initial entries per connected component of m_todo. While clusters themselves are + // generally connected, this is not necessarily true after some parts have already been + // removed from m_todo. Without this, effort can be wasted on searching "inc" sets that + // span multiple components. + auto to_cover = m_todo; + do { + auto component = m_depgraph.FindConnectedComponent(to_cover); + to_cover -= component; + // If best is not provided, set it to the first component, so that during the work + // processing loop below, and during the add_fn/split_fn calls, we do not need to deal + // with the best=empty case. + if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, component); + queue.emplace_back(/*inc=*/SetInfo{}, /*und=*/std::move(component)); + } while (to_cover.Any()); /** Local copy of the iteration limit. */ uint64_t iterations_left = max_iterations; @@ -645,7 +654,7 @@ public: // space runs out (see below), we know that no reallocation of the queue should ever // occur. Assume(queue.size() < queue.capacity()); - queue.emplace_back(std::move(inc), std::move(und)); + queue.emplace_back(/*inc=*/std::move(inc), /*und=*/std::move(und)); }; /** Internal process function. It takes an existing work item, and splits it in two: one diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 2dfdfbb41de..abbcab0907b 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -165,6 +165,23 @@ std::pair, bool> SimpleLinearize(const DepGraph +void MakeConnected(DepGraph& depgraph) +{ + auto todo = BS::Fill(depgraph.TxCount()); + auto comp = depgraph.FindConnectedComponent(todo); + Assume(depgraph.IsConnected(comp)); + todo -= comp; + while (todo.Any()) { + auto nextcomp = depgraph.FindConnectedComponent(todo); + Assume(depgraph.IsConnected(nextcomp)); + depgraph.AddDependency(comp.Last(), nextcomp.First()); + todo -= nextcomp; + comp = nextcomp; + } +} + /** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */ template SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& todo, SpanReader& reader) @@ -369,6 +386,20 @@ FUZZ_TARGET(clusterlin_components) assert(depgraph.FindConnectedComponent(todo).None()); } +FUZZ_TARGET(clusterlin_make_connected) +{ + // Verify that MakeConnected makes graphs connected. + + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + MakeConnected(depgraph); + SanityCheck(depgraph); + assert(depgraph.IsConnected()); +} + FUZZ_TARGET(clusterlin_chunking) { // Verify the correctness of the ChunkLinearization function. @@ -468,13 +499,17 @@ FUZZ_TARGET(clusterlin_search_finder) // and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and // AncestorCandidateFinder. - // Retrieve an RNG seed and a depgraph from the fuzz input. + // Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input. SpanReader reader(buffer); DepGraph depgraph; uint64_t rng_seed{0}; + uint8_t make_connected{1}; try { - reader >> Using(depgraph) >> rng_seed; + reader >> Using(depgraph) >> rng_seed >> make_connected; } catch (const std::ios_base::failure&) {} + // The most complicated graphs are connected ones (other ones just split up). Optionally force + // the graph to be connected. + if (make_connected) MakeConnected(depgraph); // Instantiate ALL the candidate finders. SearchCandidateFinder src_finder(depgraph, rng_seed); @@ -513,9 +548,11 @@ FUZZ_TARGET(clusterlin_search_finder) assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo)); } - // At most 2^N-1 iterations can be required: the number of non-empty subsets a graph with N - // transactions has. - assert(iterations_done <= ((uint64_t{1} << todo.Count()) - 1)); + // At most 2^(N-1) iterations can be required: the maximum number of non-empty topological + // subsets a (connected) cluster with N transactions can have. Even when the cluster is no + // longer connected after removing certain transactions, this holds, because the connected + // components are searched separately. + assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1))); // Perform quality checks only if SearchCandidateFinder claims an optimal result. if (iterations_done < max_iterations) { @@ -685,14 +722,19 @@ FUZZ_TARGET(clusterlin_linearize) { // Verify the behavior of Linearize(). - // Retrieve an RNG seed, an iteration count, and a depgraph from the fuzz input. + // Retrieve an RNG seed, an iteration count, a depgraph, and whether to make it connected from + // the fuzz input. SpanReader reader(buffer); DepGraph depgraph; uint64_t rng_seed{0}; uint64_t iter_count{0}; + uint8_t make_connected{1}; try { - reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed; + reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed >> make_connected; } catch (const std::ios_base::failure&) {} + // The most complicated graphs are connected ones (other ones just split up). Optionally force + // the graph to be connected. + if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. std::vector old_linearization; @@ -721,10 +763,10 @@ FUZZ_TARGET(clusterlin_linearize) } // If the iteration count is sufficiently high, an optimal linearization must be found. - // Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is - // 2 * (2^n - 1) + // Each linearization step can use up to 2^(k-1) iterations, with steps k=1..n. That sum is + // 2^n - 1. const uint64_t n = depgraph.TxCount(); - if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) { + if (n <= 19 && iter_count > (uint64_t{1} << n)) { assert(optimal); }