From 0aa874a357865dd4768091f26dff238e66fb8d83 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 25 Oct 2024 14:11:50 -0400 Subject: [PATCH 01/25] clusterlin: Add FixLinearization function + fuzz test This function takes an existing ordering for transactions in a DepGraph, and makes it a valid linearization for it (i.e., topological). Any topological prefix of the input remains untouched. --- src/cluster_linearize.h | 32 +++++++++++++++ src/test/fuzz/cluster_linearize.cpp | 62 +++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 7c7401706f6..32cedf9f840 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1336,6 +1336,38 @@ std::vector MergeLinearizations(const DepGraph& depgraph, return ret; } +/** Make linearization topological, retaining its ordering where possible. */ +template +void FixLinearization(const DepGraph& depgraph, std::span linearization) noexcept +{ + // This algorithm can be summarized as moving every element in the linearization backwards + // until it is placed after all its ancestors. + SetType done; + const auto len = linearization.size(); + // Iterate over the elements of linearization from back to front (i is distance from back). + for (ClusterIndex i = 0; i < len; ++i) { + /** The element at that position. */ + ClusterIndex elem = linearization[len - 1 - i]; + /** j represents how far from the back of the linearization elem should be placed. */ + ClusterIndex j = i; + // Figure out which elements need to be moved before elem. + SetType place_before = done & depgraph.Ancestors(elem); + // Find which position to place elem in (updating j), continuously moving the elements + // in between forward. + while (place_before.Any()) { + // j cannot be 0 here; if it was, then there was necessarily nothing earlier which + // elem needs to be place before anymore, and place_before would be empty. + Assume(j > 0); + auto to_swap = linearization[len - 1 - (j - 1)]; + place_before.Reset(to_swap); + linearization[len - 1 - (j--)] = to_swap; + } + // Put elem in its final position and mark it as done. + linearization[len - 1 - j] = elem; + done.Set(elem); + } +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 5b3770636ab..de066237b2a 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -1118,3 +1118,65 @@ FUZZ_TARGET(clusterlin_merge) auto cmp2 = CompareChunks(chunking_merged, chunking2); assert(cmp2 >= 0); } + +FUZZ_TARGET(clusterlin_fix_linearization) +{ + // Verify expected properties of FixLinearization() on arbitrary linearizations. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Construct an arbitrary linearization (not necessarily topological for depgraph). + std::vector linearization; + /** Which transactions of depgraph are yet to be included in linearization. */ + TestBitSet todo = depgraph.Positions(); + while (todo.Any()) { + // Read a number from the fuzz input in range [0, todo.Count()). + uint64_t val{0}; + try { + reader >> VARINT(val); + } catch (const std::ios_base::failure&) {} + val %= todo.Count(); + // Find the val'th element in todo, remove it from todo, and append it to linearization. + for (auto idx : todo) { + if (val == 0) { + linearization.push_back(idx); + todo.Reset(idx); + break; + } + --val; + } + } + assert(linearization.size() == depgraph.TxCount()); + + // Determine what prefix of linearization is topological, i.e., the position of the first entry + // in linearization which corresponds to a transaction that is not preceded by all its + // ancestors. + size_t topo_prefix = 0; + todo = depgraph.Positions(); + while (topo_prefix < linearization.size()) { + ClusterIndex idx = linearization[topo_prefix]; + todo.Reset(idx); + if (todo.Overlaps(depgraph.Ancestors(idx))) break; + ++topo_prefix; + } + + // Then make a fixed copy of linearization. + auto linearization_fixed = linearization; + FixLinearization(depgraph, linearization_fixed); + // Sanity check it (which includes testing whether it is topological). + SanityCheck(depgraph, linearization_fixed); + + // FixLinearization does not modify the topological prefix of linearization. + assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix, + linearization_fixed.begin())); + // This also means that if linearization was entirely topological, FixLinearization cannot have + // modified it. This is implied by the assertion above already, but repeat it explicitly. + if (topo_prefix == linearization.size()) { + assert(linearization == linearization_fixed); + } +} From bfeb69f6e00d94b94171cebf351fac69bec489cc Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 12 Nov 2024 15:13:58 -0500 Subject: [PATCH 02/25] clusterlin: Make IsAcyclic() a DepGraph member function ... instead of being a separate test-only function. Also add a fuzz test for it returning false. --- src/cluster_linearize.h | 11 ++++++++++ src/test/fuzz/cluster_linearize.cpp | 33 +++++++++++++++++++++++++++-- src/test/util/cluster_linearize.h | 14 +----------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 32cedf9f840..28dde840ef4 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -309,6 +309,17 @@ public: return a < b; }); } + + /** Check if this graph is acyclic. */ + bool IsAcyclic() const noexcept + { + for (auto i : Positions()) { + if ((Ancestors(i) & Descendants(i)) != SetType::Singleton(i)) { + return false; + } + } + return true; + } }; /** A set of transactions together with their aggregate feerate. */ diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index de066237b2a..f5c0c897c98 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -401,13 +401,42 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Construct a graph by deserializing. SpanReader reader(buffer); DepGraph depgraph; + ClusterIndex par_code{0}, chl_code{0}; try { - reader >> Using(depgraph); + reader >> Using(depgraph) >> VARINT(par_code) >> VARINT(chl_code); } catch (const std::ios_base::failure&) {} SanityCheck(depgraph); // Verify the graph is a DAG. - assert(IsAcyclic(depgraph)); + assert(depgraph.IsAcyclic()); + + // Introduce a cycle, and then test that IsAcyclic returns false. + if (depgraph.TxCount() < 2) return; + ClusterIndex par(0), chl(0); + // Pick any transaction of depgraph as parent. + par_code %= depgraph.TxCount(); + for (auto i : depgraph.Positions()) { + if (par_code == 0) { + par = i; + break; + } + --par_code; + } + // Pick any ancestor of par (excluding itself) as child, if any. + auto ancestors = depgraph.Ancestors(par) - TestBitSet::Singleton(par); + if (ancestors.None()) return; + chl_code %= ancestors.Count(); + for (auto i : ancestors) { + if (chl_code == 0) { + chl = i; + break; + } + --chl_code; + } + // Add the cycle-introducing dependency. + depgraph.AddDependencies(TestBitSet::Singleton(par), chl); + // Check that we now detect a cycle. + assert(!depgraph.IsAcyclic()); } FUZZ_TARGET(clusterlin_components) diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 7ae56232bac..3db51a6b80b 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -23,18 +23,6 @@ using namespace cluster_linearize; using TestBitSet = BitSet<32>; -/** Check if a graph is acyclic. */ -template -bool IsAcyclic(const DepGraph& depgraph) noexcept -{ - for (ClusterIndex i : depgraph.Positions()) { - if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) { - return false; - } - } - return true; -} - /** A formatter for a bespoke serialization for acyclic DepGraph objects. * * The serialization format outputs information about transactions in a topological order (parents @@ -337,7 +325,7 @@ void SanityCheck(const DepGraph& depgraph) assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child))); } } - if (IsAcyclic(depgraph)) { + if (depgraph.IsAcyclic()) { // If DepGraph is acyclic, serialize + deserialize must roundtrip. std::vector ser; VectorWriter writer(ser, 0); From d4497738999873c8432d02fd71e14f1afc2065a8 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 31 Jan 2025 16:26:06 -0500 Subject: [PATCH 03/25] scripted-diff: (refactor) ClusterIndex -> DepGraphIndex Since cluster_linearize.h does not actually have a Cluster type anymore, it is more appropriate to rename the index type to DepGraphIndex. -BEGIN VERIFY SCRIPT- sed -i 's/Data type to represent transaction indices in clusters./Data type to represent transaction indices in DepGraphs and the clusters they represent./' $(git grep -l 'using ClusterIndex') sed -i 's|\|DepGraphIndex|g' $(git grep -l 'ClusterIndex') -END VERIFY SCRIPT- --- src/bench/cluster_linearize.cpp | 44 ++++----- src/cluster_linearize.h | 132 +++++++++++++-------------- src/test/cluster_linearize_tests.cpp | 4 +- src/test/fuzz/cluster_linearize.cpp | 70 +++++++------- src/test/util/cluster_linearize.h | 36 ++++---- 5 files changed, 143 insertions(+), 143 deletions(-) diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp index 7d011975ddb..cb06f3fc28a 100644 --- a/src/bench/cluster_linearize.cpp +++ b/src/bench/cluster_linearize.cpp @@ -23,10 +23,10 @@ namespace { * remaining transaction, whose removal requires updating all remaining transactions' ancestor * set feerates. */ template -DepGraph MakeLinearGraph(ClusterIndex ntx) +DepGraph MakeLinearGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({-int32_t(i), 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(i - 1), i); } @@ -38,10 +38,10 @@ DepGraph MakeLinearGraph(ClusterIndex ntx) * rechunking is needed after every candidate (the last transaction gets picked every time). */ template -DepGraph MakeWideGraph(ClusterIndex ntx) +DepGraph MakeWideGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({int32_t(i) + 1, 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(0), i); } @@ -51,10 +51,10 @@ DepGraph MakeWideGraph(ClusterIndex ntx) // Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the implemented // algorithm (purely empirically determined). template -DepGraph MakeHardGraph(ClusterIndex ntx) +DepGraph MakeHardGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { if (ntx & 1) { // Odd cluster size. // @@ -121,7 +121,7 @@ DepGraph MakeHardGraph(ClusterIndex ntx) * iterations difference. */ template -void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) +void BenchLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) { const auto depgraph = MakeHardGraph(ntx); uint64_t rng_seed = 0; @@ -147,12 +147,12 @@ void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t * cheap. */ template -void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseAnc(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeLinearGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); @@ -167,41 +167,41 @@ void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench * AncestorCandidateFinder is cheap. */ template -void BenchLinearizeNoItersWorstCaseLIMO(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseLIMO(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeWideGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); } template -void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchPostLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph = MakeWideGraph(ntx); - std::vector lin(ntx); + std::vector lin(ntx); bench.run([&] { - for (ClusterIndex i = 0; i < ntx; ++i) lin[i] = i; + for (DepGraphIndex i = 0; i < ntx; ++i) lin[i] = i; PostLinearize(depgraph, lin); }); } template -void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchMergeLinearizationsWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({i, 1}); if (i) depgraph.AddDependencies(SetType::Singleton(0), i); } - std::vector lin1; - std::vector lin2; + std::vector lin1; + std::vector lin2; lin1.push_back(0); lin2.push_back(0); - for (ClusterIndex i = 1; i < ntx; ++i) { + for (DepGraphIndex i = 1; i < ntx; ++i) { lin1.push_back(i); lin2.push_back(ntx - i); } @@ -214,7 +214,7 @@ template void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array& serialized) { // Determine how many transactions the serialized cluster has. - ClusterIndex num_tx{0}; + DepGraphIndex num_tx{0}; { SpanReader reader{serialized}; DepGraph> depgraph; diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 28dde840ef4..b01daedf4bf 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -19,8 +19,8 @@ namespace cluster_linearize { -/** Data type to represent transaction indices in clusters. */ -using ClusterIndex = uint32_t; +/** Data type to represent transaction indices in DepGraphs and the clusters they represent. */ +using DepGraphIndex = uint32_t; /** Data structure that holds a transaction graph's preprocessed data (fee, size, ancestors, * descendants). */ @@ -86,11 +86,11 @@ public: * * Complexity: O(N^2) where N=depgraph.TxCount(). */ - DepGraph(const DepGraph& depgraph, std::span mapping, ClusterIndex pos_range) noexcept : entries(pos_range) + DepGraph(const DepGraph& depgraph, std::span mapping, DepGraphIndex pos_range) noexcept : entries(pos_range) { Assume(mapping.size() == depgraph.PositionRange()); Assume((pos_range == 0) == (depgraph.TxCount() == 0)); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { auto new_idx = mapping[i]; Assume(new_idx < pos_range); // Add transaction. @@ -100,7 +100,7 @@ public: // Fill in fee and size. entries[new_idx].feerate = depgraph.entries[i].feerate; } - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Fill in dependencies by mapping direct parents. SetType parents; for (auto j : depgraph.GetReducedParents(i)) parents.Set(mapping[j]); @@ -113,29 +113,29 @@ public: /** Get the set of transactions positions in use. Complexity: O(1). */ const SetType& Positions() const noexcept { return m_used; } /** Get the range of positions in this DepGraph. All entries in Positions() are in [0, PositionRange() - 1]. */ - ClusterIndex PositionRange() const noexcept { return entries.size(); } + DepGraphIndex PositionRange() const noexcept { return entries.size(); } /** Get the number of transactions in the graph. Complexity: O(1). */ auto TxCount() const noexcept { return m_used.Count(); } /** Get the feerate of a given transaction i. Complexity: O(1). */ - const FeeFrac& FeeRate(ClusterIndex i) const noexcept { return entries[i].feerate; } + const FeeFrac& FeeRate(DepGraphIndex i) const noexcept { return entries[i].feerate; } /** Get the mutable feerate of a given transaction i. Complexity: O(1). */ - FeeFrac& FeeRate(ClusterIndex i) noexcept { return entries[i].feerate; } + FeeFrac& FeeRate(DepGraphIndex i) noexcept { return entries[i].feerate; } /** Get the ancestors of a given transaction i. Complexity: O(1). */ - const SetType& Ancestors(ClusterIndex i) const noexcept { return entries[i].ancestors; } + const SetType& Ancestors(DepGraphIndex i) const noexcept { return entries[i].ancestors; } /** Get the descendants of a given transaction i. Complexity: O(1). */ - const SetType& Descendants(ClusterIndex i) const noexcept { return entries[i].descendants; } + const SetType& Descendants(DepGraphIndex i) const noexcept { return entries[i].descendants; } /** Add a new unconnected transaction to this transaction graph (in the first available - * position), and return its ClusterIndex. + * position), and return its DepGraphIndex. * * Complexity: O(1) (amortized, due to resizing of backing vector). */ - ClusterIndex AddTransaction(const FeeFrac& feefrac) noexcept + DepGraphIndex AddTransaction(const FeeFrac& feefrac) noexcept { static constexpr auto ALL_POSITIONS = SetType::Fill(SetType::Size()); auto available = ALL_POSITIONS - m_used; Assume(available.Any()); - ClusterIndex new_idx = available.First(); + DepGraphIndex new_idx = available.First(); if (new_idx == entries.size()) { entries.emplace_back(feefrac, SetType::Singleton(new_idx), SetType::Singleton(new_idx)); } else { @@ -174,7 +174,7 @@ public: * * Complexity: O(N) where N=TxCount(). */ - void AddDependencies(const SetType& parents, ClusterIndex child) noexcept + void AddDependencies(const SetType& parents, DepGraphIndex child) noexcept { Assume(m_used[child]); Assume(parents.IsSubsetOf(m_used)); @@ -205,7 +205,7 @@ public: * * Complexity: O(N) where N=Ancestors(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedParents(ClusterIndex i) const noexcept + SetType GetReducedParents(DepGraphIndex i) const noexcept { SetType parents = Ancestors(i); parents.Reset(i); @@ -226,7 +226,7 @@ public: * * Complexity: O(N) where N=Descendants(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedChildren(ClusterIndex i) const noexcept + SetType GetReducedChildren(DepGraphIndex i) const noexcept { SetType children = Descendants(i); children.Reset(i); @@ -298,11 +298,11 @@ public: * * Complexity: O(select.Count() * log(select.Count())). */ - void AppendTopo(std::vector& list, const SetType& select) const noexcept + void AppendTopo(std::vector& list, const SetType& select) const noexcept { - ClusterIndex old_len = list.size(); + DepGraphIndex old_len = list.size(); for (auto i : select) list.push_back(i); - std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept { + std::sort(list.begin() + old_len, list.end(), [&](DepGraphIndex a, DepGraphIndex b) noexcept { const auto a_anc_count = entries[a].ancestors.Count(); const auto b_anc_count = entries[b].ancestors.Count(); if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count; @@ -338,7 +338,7 @@ struct SetInfo SetInfo(const SetType& txn, const FeeFrac& fr) noexcept : transactions(txn), feerate(fr) {} /** Construct a SetInfo for a given transaction in a depgraph. */ - explicit SetInfo(const DepGraph& depgraph, ClusterIndex pos) noexcept : + explicit SetInfo(const DepGraph& depgraph, DepGraphIndex pos) noexcept : transactions(SetType::Singleton(pos)), feerate(depgraph.FeeRate(pos)) {} /** Construct a SetInfo for a set of transactions in a depgraph. */ @@ -346,7 +346,7 @@ struct SetInfo transactions(txn), feerate(depgraph.FeeRate(txn)) {} /** Add a transaction to this SetInfo (which must not yet be in it). */ - void Set(const DepGraph& depgraph, ClusterIndex pos) noexcept + void Set(const DepGraph& depgraph, DepGraphIndex pos) noexcept { Assume(!transactions[pos]); transactions.Set(pos); @@ -382,10 +382,10 @@ struct SetInfo /** Compute the feerates of the chunks of linearization. */ template -std::vector ChunkLinearization(const DepGraph& depgraph, std::span linearization) noexcept +std::vector ChunkLinearization(const DepGraph& depgraph, std::span linearization) noexcept { std::vector ret; - for (ClusterIndex i : linearization) { + for (DepGraphIndex i : linearization) { /** The new chunk to be added, initially a singleton. */ auto new_chunk = depgraph.FeeRate(i); // As long as the new chunk has a higher feerate than the last chunk so far, absorb it. @@ -407,13 +407,13 @@ class LinearizationChunking const DepGraph& m_depgraph; /** The linearization we started from, possibly with removed prefix stripped. */ - std::span m_linearization; + std::span m_linearization; /** Chunk sets and their feerates, of what remains of the linearization. */ std::vector> m_chunks; /** How large a prefix of m_chunks corresponds to removed transactions. */ - ClusterIndex m_chunks_skip{0}; + DepGraphIndex m_chunks_skip{0}; /** Which transactions remain in the linearization. */ SetType m_todo; @@ -448,7 +448,7 @@ class LinearizationChunking public: /** Initialize a LinearizationSubset object for a given length of linearization. */ - explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, std::span lin LIFETIMEBOUND) noexcept : + explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, std::span lin LIFETIMEBOUND) noexcept : m_depgraph(depgraph), m_linearization(lin) { // Mark everything in lin as todo still. @@ -459,10 +459,10 @@ public: } /** Determine how many chunks remain in the linearization. */ - ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } + DepGraphIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ - const SetInfo& GetChunk(ClusterIndex n) const noexcept + const SetInfo& GetChunk(DepGraphIndex n) const noexcept { Assume(n + m_chunks_skip < m_chunks.size()); return m_chunks[n + m_chunks_skip]; @@ -505,7 +505,7 @@ public: Assume(subset.transactions.IsSubsetOf(m_todo)); SetInfo accumulator; // Iterate over all chunks of the remaining linearization. - for (ClusterIndex i = 0; i < NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < NumChunksLeft(); ++i) { // Find what (if any) intersection the chunk has with subset. const SetType to_add = GetChunk(i).transactions & subset.transactions; if (to_add.Any()) { @@ -557,13 +557,13 @@ public: m_ancestor_set_feerates(depgraph.PositionRange()) { // Precompute ancestor-set feerates. - for (ClusterIndex i : m_depgraph.Positions()) { + for (DepGraphIndex i : m_depgraph.Positions()) { /** The remaining ancestors for transaction i. */ SetType anc_to_add = m_depgraph.Ancestors(i); FeeFrac anc_feerate; // Reuse accumulated feerate from first ancestor, if usable. Assume(anc_to_add.Any()); - ClusterIndex first = anc_to_add.First(); + DepGraphIndex first = anc_to_add.First(); if (first < i) { anc_feerate = m_ancestor_set_feerates[first]; Assume(!anc_feerate.IsEmpty()); @@ -603,7 +603,7 @@ public: } /** Count the number of remaining unlinearized transactions. */ - ClusterIndex NumRemaining() const noexcept + DepGraphIndex NumRemaining() const noexcept { return m_todo.Count(); } @@ -616,7 +616,7 @@ public: SetInfo FindCandidateSet() const noexcept { Assume(!AllDone()); - std::optional best; + std::optional best; for (auto i : m_todo) { if (best.has_value()) { Assume(!m_ancestor_set_feerates[i].IsEmpty()); @@ -644,9 +644,9 @@ class SearchCandidateFinder /** Internal RNG. */ InsecureRandomContext m_rng; /** m_sorted_to_original[i] is the original position that sorted transaction position i had. */ - std::vector m_sorted_to_original; + std::vector m_sorted_to_original; /** m_original_to_sorted[i] is the sorted position original transaction position i has. */ - std::vector m_original_to_sorted; + std::vector m_original_to_sorted; /** Internal dependency graph for the cluster (with transactions in decreasing individual * feerate order). */ DepGraph m_sorted_depgraph; @@ -684,7 +684,7 @@ public: { // Determine reordering mapping, by sorting by decreasing feerate. Unused positions are // not included, as they will never be looked up anyway. - ClusterIndex sorted_pos{0}; + DepGraphIndex sorted_pos{0}; for (auto i : depgraph.Positions()) { m_sorted_to_original[sorted_pos++] = i; } @@ -694,7 +694,7 @@ public: return feerate_cmp > 0; }); // Compute reverse mapping. - for (ClusterIndex i = 0; i < m_sorted_to_original.size(); ++i) { + for (DepGraphIndex i = 0; i < m_sorted_to_original.size(); ++i) { m_original_to_sorted[m_sorted_to_original[i]] = i; } // Compute reordered dependency graph. @@ -793,7 +793,7 @@ public: /** The set of transactions in m_todo which have feerate > best's. */ SetType imp = m_todo; while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -850,7 +850,7 @@ public: best = inc; // See if we can remove any entries from imp now. while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -891,7 +891,7 @@ public: // If pot is empty, then so is inc. Assume(elem.inc.feerate.IsEmpty() == elem.pot_feerate.IsEmpty()); - const ClusterIndex first = elem.und.First(); + const DepGraphIndex first = elem.und.First(); if (!elem.inc.feerate.IsEmpty()) { // If no undecided transactions remain with feerate higher than best, this entry // cannot be improved beyond best. @@ -917,17 +917,17 @@ public: // most. Let I(t) be the size of the undecided set after including t, and E(t) the size // of the undecided set after excluding t. Then choose the split transaction t such // that 2^I(t) + 2^E(t) is minimal, tie-breaking by highest individual feerate for t. - ClusterIndex split = 0; + DepGraphIndex split = 0; const auto select = elem.und & m_sorted_depgraph.Ancestors(first); Assume(select.Any()); - std::optional> split_counts; + std::optional> split_counts; for (auto t : select) { // Call max = max(I(t), E(t)) and min = min(I(t), E(t)). Let counts = {max,min}. // Sorting by the tuple counts is equivalent to sorting by 2^I(t) + 2^E(t). This // expression is equal to 2^max + 2^min = 2^max * (1 + 1/2^(max - min)). The second // factor (1 + 1/2^(max - min)) there is in (1,2]. Thus increasing max will always // increase it, even when min decreases. Because of this, we can first sort by max. - std::pair counts{ + std::pair counts{ (elem.und - m_sorted_depgraph.Ancestors(t)).Count(), (elem.und - m_sorted_depgraph.Descendants(t)).Count()}; if (counts.first < counts.second) std::swap(counts.first, counts.second); @@ -1027,13 +1027,13 @@ public: * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount(). */ template -std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span old_linearization = {}) noexcept +std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span old_linearization = {}) noexcept { Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount()); if (depgraph.TxCount() == 0) return {{}, true}; uint64_t iterations_left = max_iterations; - std::vector linearization; + std::vector linearization; AncestorCandidateFinder anc_finder(depgraph); std::optional> src_finder; @@ -1121,7 +1121,7 @@ std::pair, bool> Linearize(const DepGraph& de * postlinearize" process. */ template -void PostLinearize(const DepGraph& depgraph, std::span linearization) +void PostLinearize(const DepGraph& depgraph, std::span linearization) { // This algorithm performs a number of passes (currently 2); the even ones operate from back to // front, the odd ones from front to back. Each results in an equal-or-better linearization @@ -1159,9 +1159,9 @@ void PostLinearize(const DepGraph& depgraph, std::span li // entries[0]. /** Index of the sentinel in the entries array below. */ - static constexpr ClusterIndex SENTINEL{0}; + static constexpr DepGraphIndex SENTINEL{0}; /** Indicator that a group has no previous transaction. */ - static constexpr ClusterIndex NO_PREV_TX{0}; + static constexpr DepGraphIndex NO_PREV_TX{0}; /** Data structure per transaction entry. */ @@ -1169,16 +1169,16 @@ void PostLinearize(const DepGraph& depgraph, std::span li { /** The index of the previous transaction in this group; NO_PREV_TX if this is the first * entry of a group. */ - ClusterIndex prev_tx; + DepGraphIndex prev_tx; // The fields below are only used for transactions that are the last one in a group // (referred to as tail transactions below). /** Index of the first transaction in this group, possibly itself. */ - ClusterIndex first_tx; + DepGraphIndex first_tx; /** Index of the last transaction in the previous group. The first group (the sentinel) * points back to the last group here, making it a singly-linked circular list. */ - ClusterIndex prev_group; + DepGraphIndex prev_group; /** All transactions in the group. Empty for the sentinel. */ SetType group; /** All dependencies of the group (descendants in even passes; ancestors in odd ones). */ @@ -1221,12 +1221,12 @@ void PostLinearize(const DepGraph& depgraph, std::span li Assume(entries[SENTINEL].feerate.IsEmpty()); // Iterate over all elements in the existing linearization. - for (ClusterIndex i = 0; i < linearization.size(); ++i) { + for (DepGraphIndex i = 0; i < linearization.size(); ++i) { // Even passes are from back to front; odd passes from front to back. - ClusterIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; + DepGraphIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; // Construct a new group containing just idx. In even passes, the meaning of // parent/child and high/low feerate are swapped. - ClusterIndex cur_group = idx + 1; + DepGraphIndex cur_group = idx + 1; entries[cur_group].group = SetType::Singleton(idx); entries[cur_group].deps = rev ? depgraph.Descendants(idx): depgraph.Ancestors(idx); entries[cur_group].feerate = depgraph.FeeRate(idx); @@ -1238,8 +1238,8 @@ void PostLinearize(const DepGraph& depgraph, std::span li entries[SENTINEL].prev_group = cur_group; // Start merge/swap cycle. - ClusterIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. - ClusterIndex prev_group = entries[cur_group].prev_group; + DepGraphIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. + DepGraphIndex prev_group = entries[cur_group].prev_group; // Continue as long as the current group has higher feerate than the previous one. while (entries[cur_group].feerate >> entries[prev_group].feerate) { // prev_group/cur_group/next_group refer to (the last transactions of) 3 @@ -1267,7 +1267,7 @@ void PostLinearize(const DepGraph& depgraph, std::span li entries[cur_group].prev_group = prev_group; } else { // There is no dependency between cur_group and prev_group; swap them. - ClusterIndex preprev_group = entries[prev_group].prev_group; + DepGraphIndex preprev_group = entries[prev_group].prev_group; // If PP, P, C, N were the old preprev, prev, cur, next groups, then the new // layout becomes [PP, C, P, N]. Update prev_groups to reflect that order. entries[next_group].prev_group = prev_group; @@ -1282,10 +1282,10 @@ void PostLinearize(const DepGraph& depgraph, std::span li } // Convert the entries back to linearization (overwriting the existing one). - ClusterIndex cur_group = entries[0].prev_group; - ClusterIndex done = 0; + DepGraphIndex cur_group = entries[0].prev_group; + DepGraphIndex done = 0; while (cur_group != SENTINEL) { - ClusterIndex cur_tx = cur_group; + DepGraphIndex cur_tx = cur_group; // Traverse the transactions of cur_group (from back to front), and write them in the // same order during odd passes, and reversed (front to back) in even passes. if (rev) { @@ -1310,7 +1310,7 @@ void PostLinearize(const DepGraph& depgraph, std::span li * Complexity: O(N^2) where N=depgraph.TxCount(); O(N) if both inputs are identical. */ template -std::vector MergeLinearizations(const DepGraph& depgraph, std::span lin1, std::span lin2) +std::vector MergeLinearizations(const DepGraph& depgraph, std::span lin1, std::span lin2) { Assume(lin1.size() == depgraph.TxCount()); Assume(lin2.size() == depgraph.TxCount()); @@ -1318,7 +1318,7 @@ std::vector MergeLinearizations(const DepGraph& depgraph, /** Chunkings of what remains of both input linearizations. */ LinearizationChunking chunking1(depgraph, lin1), chunking2(depgraph, lin2); /** Output linearization. */ - std::vector ret; + std::vector ret; if (depgraph.TxCount() == 0) return ret; ret.reserve(depgraph.TxCount()); @@ -1349,18 +1349,18 @@ std::vector MergeLinearizations(const DepGraph& depgraph, /** Make linearization topological, retaining its ordering where possible. */ template -void FixLinearization(const DepGraph& depgraph, std::span linearization) noexcept +void FixLinearization(const DepGraph& depgraph, std::span linearization) noexcept { // This algorithm can be summarized as moving every element in the linearization backwards // until it is placed after all its ancestors. SetType done; const auto len = linearization.size(); // Iterate over the elements of linearization from back to front (i is distance from back). - for (ClusterIndex i = 0; i < len; ++i) { + for (DepGraphIndex i = 0; i < len; ++i) { /** The element at that position. */ - ClusterIndex elem = linearization[len - 1 - i]; + DepGraphIndex elem = linearization[len - 1 - i]; /** j represents how far from the back of the linearization elem should be placed. */ - ClusterIndex j = i; + DepGraphIndex j = i; // Figure out which elements need to be moved before elem. SetType place_before = done & depgraph.Ancestors(elem); // Find which position to place elem in (updating j), continuously moving the elements diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp index 265ccdc805e..3413af4a219 100644 --- a/src/test/cluster_linearize_tests.cpp +++ b/src/test/cluster_linearize_tests.cpp @@ -28,11 +28,11 @@ void TestDepGraphSerialization(const std::vector>& c // Construct DepGraph from cluster argument. DepGraph depgraph; SetType holes; - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddTransaction(cluster[i].first); if (cluster[i] == HOLE) holes.Set(i); } - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddDependencies(cluster[i].second, i); } depgraph.RemoveTransactions(holes); diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index f5c0c897c98..c7e40a833da 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -149,9 +149,9 @@ public: * than AncestorCandidateFinder and SearchCandidateFinder. */ template -std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) +std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) { - std::vector linearization; + std::vector linearization; SimpleCandidateFinder finder(depgraph); SetType todo = depgraph.Positions(); bool optimal = true; @@ -203,9 +203,9 @@ SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& tod /** Given a dependency graph, construct any valid linearization for it, reading from a SpanReader. */ template -std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) +std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) { - std::vector linearization; + std::vector linearization; TestBitSet todo = depgraph.Positions(); // In every iteration one topologically-valid transaction is appended to linearization. while (todo.Any()) { @@ -253,18 +253,18 @@ FUZZ_TARGET(clusterlin_depgraph_sim) * sim[i]->first is its individual feerate, and sim[i]->second is its set of ancestors. */ std::array>, TestBitSet::Size()> sim; /** The number of non-nullopt position in sim. */ - ClusterIndex num_tx_sim{0}; + DepGraphIndex num_tx_sim{0}; /** Read a valid index of a transaction from the provider. */ auto idx_fn = [&]() { - auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); - for (ClusterIndex i = 0; i < sim.size(); ++i) { + auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (offset == 0) return i; --offset; } assert(false); - return ClusterIndex(-1); + return DepGraphIndex(-1); }; /** Read a valid subset of the transactions from the provider. */ @@ -273,7 +273,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) const auto mask = provider.ConsumeIntegralInRange(0, range); auto mask_shifted = mask; TestBitSet subset; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (mask_shifted & 1) { subset.Set(i); @@ -289,7 +289,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto range = (uint64_t{1} << sim.size()) - 1; const auto mask = provider.ConsumeIntegralInRange(0, range); TestBitSet set; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if ((mask >> i) & 1) { set.Set(i); } @@ -301,7 +301,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto anc_update_fn = [&]() { while (true) { bool updates{false}; - for (ClusterIndex chl = 0; chl < sim.size(); ++chl) { + for (DepGraphIndex chl = 0; chl < sim.size(); ++chl) { if (!sim[chl].has_value()) continue; for (auto par : sim[chl]->second) { if (!sim[chl]->second.IsSupersetOf(sim[par]->second)) { @@ -315,7 +315,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) }; /** Compare the state of transaction i in the simulation with the real one. */ - auto check_fn = [&](ClusterIndex i) { + auto check_fn = [&](DepGraphIndex i) { // Compare used positions. assert(real.Positions()[i] == sim[i].has_value()); if (sim[i].has_value()) { @@ -338,7 +338,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto idx = real.AddTransaction(feerate); // Verify that the returned index is correct. assert(!sim[idx].has_value()); - for (ClusterIndex i = 0; i < TestBitSet::Size(); ++i) { + for (DepGraphIndex i = 0; i < TestBitSet::Size(); ++i) { if (!sim[i].has_value()) { assert(idx == i); break; @@ -351,7 +351,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) } if ((command % 3) <= 1 && num_tx_sim > 0) { // AddDependencies. - ClusterIndex child = idx_fn(); + DepGraphIndex child = idx_fn(); auto parents = subset_fn(); // Apply to DepGraph. real.AddDependencies(parents, child); @@ -370,7 +370,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Apply to DepGraph. real.RemoveTransactions(del); // Apply to sim. - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (sim[i].has_value()) { if (del[i]) { --num_tx_sim; @@ -388,7 +388,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Compare the real obtained depgraph against the simulation. anc_update_fn(); - for (ClusterIndex i = 0; i < sim.size(); ++i) check_fn(i); + for (DepGraphIndex i = 0; i < sim.size(); ++i) check_fn(i); assert(real.TxCount() == num_tx_sim); // Sanity check the result (which includes round-tripping serialization, if applicable). SanityCheck(real); @@ -401,7 +401,7 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Construct a graph by deserializing. SpanReader reader(buffer); DepGraph depgraph; - ClusterIndex par_code{0}, chl_code{0}; + DepGraphIndex par_code{0}, chl_code{0}; try { reader >> Using(depgraph) >> VARINT(par_code) >> VARINT(chl_code); } catch (const std::ios_base::failure&) {} @@ -412,7 +412,7 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Introduce a cycle, and then test that IsAcyclic returns false. if (depgraph.TxCount() < 2) return; - ClusterIndex par(0), chl(0); + DepGraphIndex par(0), chl(0); // Pick any transaction of depgraph as parent. par_code %= depgraph.TxCount(); for (auto i : depgraph.Positions()) { @@ -498,7 +498,7 @@ FUZZ_TARGET(clusterlin_components) reader >> VARINT(subset_bits); } catch (const std::ios_base::failure&) {} TestBitSet subset; - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { if (todo[i]) { if (subset_bits & 1) subset.Set(i); subset_bits >>= 1; @@ -555,7 +555,7 @@ FUZZ_TARGET(clusterlin_chunking) for (const auto& chunk_feerate : chunking) { assert(todo.Any()); SetInfo accumulator, best; - for (ClusterIndex idx : linearization) { + for (DepGraphIndex idx : linearization) { if (todo[idx]) { accumulator.Set(depgraph, idx); if (best.feerate.IsEmpty() || accumulator.feerate >> best.feerate) { @@ -766,7 +766,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) assert(chunking.NumChunksLeft() > 0); // Construct linearization with just todo. - std::vector linearization_left; + std::vector linearization_left; for (auto i : linearization) { if (todo[i]) linearization_left.push_back(i); } @@ -776,13 +776,13 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // Verify that it matches the feerates of the chunks of chunking. assert(chunking.NumChunksLeft() == chunking_left.size()); - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { assert(chunking.GetChunk(i).feerate == chunking_left[i]); } // Check consistency of chunking. TestBitSet combined; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { const auto& chunk_info = chunking.GetChunk(i); // Chunks must be non-empty. assert(chunk_info.transactions.Any()); @@ -833,7 +833,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // - No non-empty intersection between the intersection and a prefix of the chunks of the // remainder of the linearization may be better than the intersection. TestBitSet prefix; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { prefix |= chunking.GetChunk(i).transactions; auto reintersect = SetInfo(depgraph, prefix & intersect.transactions); if (!reintersect.feerate.IsEmpty()) { @@ -875,7 +875,7 @@ FUZZ_TARGET(clusterlin_linearize) if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. - std::vector old_linearization; + std::vector old_linearization; { uint8_t have_old_linearization{0}; try { @@ -934,8 +934,8 @@ FUZZ_TARGET(clusterlin_linearize) // Only for very small clusters, test every topologically-valid permutation. if (depgraph.TxCount() <= 7) { - std::vector perm_linearization; - for (ClusterIndex i : depgraph.Positions()) perm_linearization.push_back(i); + std::vector perm_linearization; + for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i); // Iterate over all valid permutations. do { // Determine whether perm_linearization is topological. @@ -971,7 +971,7 @@ FUZZ_TARGET(clusterlin_postlinearize) } catch (const std::ios_base::failure&) {} // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph, reader); SanityCheck(depgraph, linearization); @@ -1019,7 +1019,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) // Now construct a new graph, copying the nodes, but leaving only the first parent (even // direction) or the first child (odd direction). DepGraph depgraph_tree; - for (ClusterIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { if (depgraph_gen.Positions()[i]) { depgraph_tree.AddTransaction(depgraph_gen.FeeRate(i)); } else { @@ -1031,14 +1031,14 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) depgraph_tree.RemoveTransactions(TestBitSet::Fill(depgraph_gen.PositionRange()) - depgraph_gen.Positions()); if (direction & 1) { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto children = depgraph_gen.GetReducedChildren(i); if (children.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(i), children.First()); } } } else { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto parents = depgraph_gen.GetReducedParents(i); if (parents.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(parents.First()), i); @@ -1047,7 +1047,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) } // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph_tree, reader); SanityCheck(depgraph_tree, linearization); @@ -1104,7 +1104,7 @@ FUZZ_TARGET(clusterlin_postlinearize_moved_leaf) // Construct a linearization identical to lin, but with the tail end of lin_leaf moved to the // back. - std::vector lin_moved; + std::vector lin_moved; for (auto i : lin) { if (i != lin_leaf.back()) lin_moved.push_back(i); } @@ -1160,7 +1160,7 @@ FUZZ_TARGET(clusterlin_fix_linearization) } catch (const std::ios_base::failure&) {} // Construct an arbitrary linearization (not necessarily topological for depgraph). - std::vector linearization; + std::vector linearization; /** Which transactions of depgraph are yet to be included in linearization. */ TestBitSet todo = depgraph.Positions(); while (todo.Any()) { @@ -1188,7 +1188,7 @@ FUZZ_TARGET(clusterlin_fix_linearization) size_t topo_prefix = 0; todo = depgraph.Positions(); while (topo_prefix < linearization.size()) { - ClusterIndex idx = linearization[topo_prefix]; + DepGraphIndex idx = linearization[topo_prefix]; todo.Reset(idx); if (todo.Overlaps(depgraph.Ancestors(idx))) break; ++topo_prefix; diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 3db51a6b80b..fa7945aac65 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -122,10 +122,10 @@ struct DepGraphFormatter static void Ser(Stream& s, const DepGraph& depgraph) { /** Construct a topological order to serialize the transactions in. */ - std::vector topo_order; + std::vector topo_order; topo_order.reserve(depgraph.TxCount()); for (auto i : depgraph.Positions()) topo_order.push_back(i); - std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) { + std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) { auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count(); if (anc_a != anc_b) return anc_a < anc_b; return a < b; @@ -136,9 +136,9 @@ struct DepGraphFormatter SetType done; // Loop over the transactions in topological order. - for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { + for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { /** Which depgraph index we are currently writing. */ - ClusterIndex idx = topo_order[topo_idx]; + DepGraphIndex idx = topo_order[topo_idx]; // Write size, which must be larger than 0. s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED); // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative). @@ -146,9 +146,9 @@ struct DepGraphFormatter // Write dependency information. SetType written_parents; uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over. - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which depgraph index we are currently considering as parent of idx. */ - ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; + DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; // Ignore transactions which are already known to be ancestors. if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue; if (depgraph.Ancestors(idx)[dep_idx]) { @@ -191,9 +191,9 @@ struct DepGraphFormatter DepGraph topo_depgraph; /** Mapping from serialization order to cluster order, used later to reconstruct the * cluster order. */ - std::vector reordering; + std::vector reordering; /** How big the entries vector in the reconstructed depgraph will be (including holes). */ - ClusterIndex total_size{0}; + DepGraphIndex total_size{0}; // Read transactions in topological order. while (true) { @@ -217,9 +217,9 @@ struct DepGraphFormatter // Read dependency information. auto topo_idx = reordering.size(); s >> VARINT(diff); - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which topo_depgraph index we are currently considering as parent of topo_idx. */ - ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist; + DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist; // Ignore transactions which are already known ancestors of topo_idx. if (new_ancestors[dep_topo_idx]) continue; if (diff == 0) { @@ -286,9 +286,9 @@ template void SanityCheck(const DepGraph& depgraph) { // Verify Positions and PositionRange consistency. - ClusterIndex num_positions{0}; - ClusterIndex position_range{0}; - for (ClusterIndex i : depgraph.Positions()) { + DepGraphIndex num_positions{0}; + DepGraphIndex position_range{0}; + for (DepGraphIndex i : depgraph.Positions()) { ++num_positions; position_range = i + 1; } @@ -297,7 +297,7 @@ void SanityCheck(const DepGraph& depgraph) assert(position_range >= num_positions); assert(position_range <= SetType::Size()); // Consistency check between ancestors internally. - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Transactions include themselves as ancestors. assert(depgraph.Ancestors(i)[i]); // If a is an ancestor of b, then b's ancestors must include all of a's ancestors. @@ -306,8 +306,8 @@ void SanityCheck(const DepGraph& depgraph) } } // Consistency check between ancestors and descendants. - for (ClusterIndex i : depgraph.Positions()) { - for (ClusterIndex j : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { + for (DepGraphIndex j : depgraph.Positions()) { assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]); } // No transaction is a parent or child of itself. @@ -348,7 +348,7 @@ void SanityCheck(const DepGraph& depgraph) // In acyclic graphs, the union of parents with parents of parents etc. yields the // full ancestor set (and similar for children and descendants). std::vector parents(depgraph.PositionRange()), children(depgraph.PositionRange()); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { parents[i] = depgraph.GetReducedParents(i); children[i] = depgraph.GetReducedChildren(i); } @@ -380,7 +380,7 @@ void SanityCheck(const DepGraph& depgraph) /** Perform a sanity check on a linearization. */ template -void SanityCheck(const DepGraph& depgraph, std::span linearization) +void SanityCheck(const DepGraph& depgraph, std::span linearization) { // Check completeness. assert(linearization.size() == depgraph.TxCount()); From 6eab3b2d7380b8ff818e3a1cefeb7731b7342e04 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 30 Jan 2025 17:14:52 -0500 Subject: [PATCH 04/25] feefrac: Introduce tagged wrappers to distinguish vsize/WU rates --- src/util/feefrac.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/util/feefrac.h b/src/util/feefrac.h index 6097d1ec638..2a1754ac514 100644 --- a/src/util/feefrac.h +++ b/src/util/feefrac.h @@ -156,4 +156,26 @@ struct FeeFrac */ std::partial_ordering CompareChunks(std::span chunks0, std::span chunks1); +/** Tagged wrapper around FeeFrac to avoid unit confusion. */ +template +struct FeePerUnit : public FeeFrac +{ + // Inherit FeeFrac constructors. + using FeeFrac::FeeFrac; + + /** Convert a FeeFrac to a FeePerUnit. */ + static FeePerUnit FromFeeFrac(const FeeFrac& feefrac) noexcept + { + return {feefrac.fee, feefrac.size}; + } +}; + +// FeePerUnit instance for satoshi / vbyte. +struct VSizeTag {}; +using FeePerVSize = FeePerUnit; + +// FeePerUnit instance for satoshi / WU. +struct WeightTag {}; +using FeePerWeight = FeePerUnit; + #endif // BITCOIN_UTIL_FEEFRAC_H From 8ad3ed26818a620cb973cd4e5eaa7b49313f562b Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 21 Aug 2024 14:37:00 -0400 Subject: [PATCH 05/25] txgraph: Add initial version (feature) This adds an initial version of the txgraph module, with the TxGraph class. It encapsulates knowledge about the fees, sizes, and dependencies between all mempool transactions, but nothing else. In particular, it lacks knowledge about txids, inputs, outputs, CTransactions, ... and so forth. Instead, it exposes a generic TxGraph::Ref type to reference nodes in the TxGraph, which can be passed around and stored by layers on top. --- src/CMakeLists.txt | 1 + src/txgraph.cpp | 1138 ++++++++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 139 ++++++ 3 files changed, 1278 insertions(+) create mode 100644 src/txgraph.cpp create mode 100644 src/txgraph.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 41577b2ad6d..e9a67faa51a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -301,6 +301,7 @@ add_library(bitcoin_node STATIC EXCLUDE_FROM_ALL signet.cpp torcontrol.cpp txdb.cpp + txgraph.cpp txmempool.cpp txorphanage.cpp txrequest.cpp diff --git a/src/txgraph.cpp b/src/txgraph.cpp new file mode 100644 index 00000000000..81802e2ddf4 --- /dev/null +++ b/src/txgraph.cpp @@ -0,0 +1,1138 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +using namespace cluster_linearize; + +// Forward declare the TxGraph implementation class. +class TxGraphImpl; + +/** Position of a DepGraphIndex within a Cluster::m_linearization. */ +using LinearizationIndex = uint32_t; +/** Position of a Cluster within Graph::m_clusters. */ +using ClusterSetIndex = uint32_t; + +/** Quality levels for cached cluster linearizations. */ +enum class QualityLevel +{ + /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ + NEEDS_SPLIT, + /** This cluster has undergone changes that warrant re-linearization. */ + NEEDS_RELINEARIZE, + /** The minimal level of linearization has been performed, but it is not known to be optimal. */ + ACCEPTABLE, + /** The linearization is known to be optimal. */ + OPTIMAL, + /** This cluster is not registered in any m_clusters. + * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + NONE, +}; + +/** A grouping of connected transactions inside a TxGraphImpl. */ +class Cluster +{ + friend class TxGraphImpl; + using GraphIndex = TxGraph::GraphIndex; + using SetType = BitSet; + /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ + DepGraph m_depgraph; + /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for + * positions i that do not exist in m_depgraph shouldn't ever be accessed and thus don't + * matter. m_mapping.size() equals m_depgraph.PositionRange(). */ + std::vector m_mapping; + /** The current linearization of the cluster. m_linearization.size() equals + * m_depgraph.TxCount(). This is always kept topological. */ + std::vector m_linearization; + /** The quality level of m_linearization. */ + QualityLevel m_quality{QualityLevel::NONE}; + /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + +public: + /** Construct an empty Cluster. */ + Cluster() noexcept = default; + /** Construct a singleton Cluster. */ + explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; + + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + Cluster(const Cluster&) = delete; + Cluster& operator=(const Cluster&) = delete; + Cluster(Cluster&&) = delete; + Cluster& operator=(Cluster&&) = delete; + + // Generic helper functions. + + /** Whether the linearization of this Cluster can be exposed. */ + bool IsAcceptable() const noexcept + { + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; + } + /** Whether the linearization of this Cluster is optimal. */ + bool IsOptimal() const noexcept + { + return m_quality == QualityLevel::OPTIMAL; + } + /** Whether this cluster requires splitting. */ + bool NeedsSplitting() const noexcept + { + return m_quality == QualityLevel::NEEDS_SPLIT; + } + /** Get the number of transactions in this Cluster. */ + LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Only called by Graph::SwapIndexes. */ + void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } + /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ + void Updated(TxGraphImpl& graph) noexcept; + + // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. + + /** Apply all removals from the front of to_remove that apply to this Cluster, popping them + * off. These must be at least one such entry. */ + void ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept; + /** Split this cluster (must have a NEEDS_SPLIT* quality). Returns whether to delete this + * Cluster afterwards. */ + [[nodiscard]] bool Split(TxGraphImpl& graph) noexcept; + /** Move all transactions from cluster to *this (as separate components). */ + void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept; + /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */ + void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; + /** Improve the linearization of this Cluster. */ + void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + + // Functions that implement the Cluster-specific side of public TxGraph functions. + + /** Get a vector of Refs for the ancestors of a given Cluster element. */ + std::vector GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Get a vector of Refs for the descendants of a given Cluster element. */ + std::vector GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ + std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; + /** Get the individual transaction feerate of a Cluster element. */ + FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; + /** Modify the fee of a Cluster element. */ + void SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept; +}; + +/** The transaction graph. + * + * The overall design of the data structure consists of 3 interlinked representations: + * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) + * + * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects + * refer back to the Clusters and Refs the corresponding transaction is contained in. + * + * While redundant, this permits moving all of them independently, without invalidating things + * or costly iteration to fix up everything: + * - Entry objects can be moved to fill holes left by removed transactions in the Entry vector + * (see TxGraphImpl::Compact). + * - Clusters can be rewritten continuously (removals can cause them to split, new dependencies + * can cause them to be merged). + * - Ref objects can be held outside the class, while permitting them to be moved around, and + * inherited from. + */ +class TxGraphImpl final : public TxGraph +{ + friend class Cluster; +private: + /** Internal RNG. */ + FastRandomContext m_rng; + + /** Information about one group of Clusters to be merged. */ + struct GroupEntry + { + /** Which clusters are to be merged. */ + std::vector m_clusters; + /** Which dependencies are to be applied to those merged clusters, as (parent, child) + * pairs. */ + std::vector> m_deps; + }; + + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::array>, int(QualityLevel::NONE)> m_clusters; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Information about the merges to be performed, if known. */ + std::optional> m_group_data = std::vector{}; + /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). + * */ + GraphIndex m_txcount{0}; + + /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + struct Locator + { + /** Which Cluster the Entry appears in (nullptr = missing). */ + Cluster* cluster{nullptr}; + /** Where in the Cluster it appears (only if cluster != nullptr). */ + DepGraphIndex index{0}; + + /** Mark this Locator as missing. */ + void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as present, in the specified Cluster. */ + void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is present (in some Cluster). */ + bool IsPresent() const noexcept { return cluster != nullptr; } + }; + + /** Internal information about each transaction in a TxGraphImpl. */ + struct Entry + { + /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ + Ref* m_ref{nullptr}; + /** Which Cluster and position therein this Entry appears in. */ + Locator m_locator; + }; + + /** The set of all transactions. GraphIndex values index into this. */ + std::vector m_entries; + + /** Set of Entries which have no linked Ref anymore. */ + std::vector m_unlinked; + +public: + /** Construct a new TxGraphImpl. */ + explicit TxGraphImpl() noexcept {} + + // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). + TxGraphImpl(const TxGraphImpl&) = delete; + TxGraphImpl& operator=(const TxGraphImpl&) = delete; + TxGraphImpl(TxGraphImpl&&) = delete; + TxGraphImpl& operator=(TxGraphImpl&&) = delete; + + // Simple helper functions. + + /** Swap the Entrys referred to by a and b. */ + void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; + /** Extract a Cluster. */ + std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** Delete a Cluster. */ + void DeleteCluster(Cluster& cluster) noexcept; + /** Insert a Cluster. */ + ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ + void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + + // Functions for handling Refs. + + /** Only called by Ref's move constructor/assignment to update Ref locations. */ + void UpdateRef(GraphIndex idx, Ref& new_location) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = &new_location; + } + + /** Only called by Ref::~Ref to unlink Refs, and Ref's move assignment. */ + void UnlinkRef(GraphIndex idx) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = nullptr; + m_unlinked.push_back(idx); + Compact(); + } + + // Functions related to various normalization/application steps. + /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex + * values for remaining Entrys, so this only does something when no to-be-applied operations + * referring to GraphIndexes remain). */ + void Compact() noexcept; + /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a + * NEEDS_SPLIT* QualityLevel). */ + void ApplyRemovals() noexcept; + /** Split an individual cluster. */ + void Split(Cluster& cluster) noexcept; + /** Split all clusters that need splitting. */ + void SplitAll() noexcept; + /** Populate m_group_data based on m_deps_to_add. */ + void GroupClusters() noexcept; + /** Merge the specified clusters. */ + void Merge(std::span to_merge) noexcept; + /** Apply all m_deps_to_add to the relevant Clusters. */ + void ApplyDependencies() noexcept; + /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ + void MakeAcceptable(Cluster& cluster) noexcept; + + // Implementations for the public TxGraph interface. + + Ref AddTransaction(const FeePerWeight& feerate) noexcept final; + void RemoveTransaction(const Ref& arg) noexcept final; + void AddDependency(const Ref& parent, const Ref& child) noexcept final; + void SetTransactionFee(const Ref&, int64_t fee) noexcept final; + + bool Exists(const Ref& arg) noexcept final; + FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; + std::vector GetCluster(const Ref& arg) noexcept final; + std::vector GetAncestors(const Ref& arg) noexcept final; + std::vector GetDescendants(const Ref& arg) noexcept final; + GraphIndex GetTransactionCount() noexcept final; +}; + +void Cluster::Updated(TxGraphImpl& graph) noexcept +{ + // Update all the Locators for this Cluster's Entrys. + for (DepGraphIndex idx : m_linearization) { + auto& entry = graph.m_entries[m_mapping[idx]]; + entry.m_locator.SetPresent(this, idx); + } +} + +void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept +{ + // Iterate over the prefix of to_remove that applies to this cluster. + Assume(!to_remove.empty()); + SetType todo; + do { + GraphIndex idx = to_remove.front(); + Assume(idx < graph.m_entries.size()); + auto& entry = graph.m_entries[idx]; + auto& locator = entry.m_locator; + // Stop once we hit an entry that applies to another Cluster. + if (locator.cluster != this) break; + // - Remember it in a set of to-remove DepGraphIndexes. + todo.Set(locator.index); + // - Remove from m_mapping. This isn't strictly necessary as unused positions in m_mapping + // are just never accessed, but set it to -1 here to increase the ability to detect a bug + // that causes it to be accessed regardless. + m_mapping[locator.index] = GraphIndex(-1); + // - Mark it as removed in the Entry's locator. + locator.SetMissing(); + to_remove = to_remove.subspan(1); + --graph.m_txcount; + } while(!to_remove.empty()); + + Assume(todo.Any()); + // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries + // removed, so we benefit from batching all the removals). + m_depgraph.RemoveTransactions(todo); + m_mapping.resize(m_depgraph.PositionRange()); + + // Filter removals out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + Updated(graph); +} + +bool Cluster::Split(TxGraphImpl& graph) noexcept +{ + // This function can only be called when the Cluster needs splitting. + Assume(NeedsSplitting()); + /** Which positions are still left in this Cluster. */ + auto todo = m_depgraph.Positions(); + /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and + * its position therein. */ + std::vector> remap(m_depgraph.PositionRange()); + std::vector new_clusters; + bool first{true}; + // Iterate over the connected components of this Cluster's m_depgraph. + while (todo.Any()) { + auto component = m_depgraph.FindConnectedComponent(todo); + if (first && component == todo) { + // The existing Cluster is an entire component. Leave it be, but update its quality. + Assume(todo == m_depgraph.Positions()); + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + // We need to recompute and cache its chunking. + Updated(graph); + return false; + } + first = false; + // Construct a new Cluster to hold the found component. + auto new_cluster = std::make_unique(); + new_clusters.push_back(new_cluster.get()); + // Remember that all the component's transactions go to this new Cluster. The positions + // will be determined below, so use -1 for now. + for (auto i : component) { + remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; + } + graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + todo -= component; + } + // Redistribute the transactions. + for (auto i : m_linearization) { + /** The cluster which transaction originally in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy the transaction to the new cluster's depgraph, and remember the position. + remap[i].second = new_cluster->m_depgraph.AddTransaction(m_depgraph.FeeRate(i)); + // Create new mapping entry. + new_cluster->m_mapping.push_back(m_mapping[i]); + // Create a new linearization entry. As we're only appending transactions, they equal the + // DepGraphIndex. + new_cluster->m_linearization.push_back(remap[i].second); + } + // Redistribute the dependencies. + for (auto i : m_linearization) { + /** The cluster transaction in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy its parents, translating positions. + SetType new_parents; + for (auto par : m_depgraph.GetReducedParents(i)) new_parents.Set(remap[par].second); + new_cluster->m_depgraph.AddDependencies(new_parents, remap[i].second); + } + // Update all the Locators of moved transactions. + for (Cluster* new_cluster : new_clusters) { + new_cluster->Updated(graph); + } + // Wipe this Cluster, and return that it needs to be deleted. + m_depgraph = DepGraph{}; + m_mapping.clear(); + m_linearization.clear(); + return true; +} + +void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept +{ + /** Vector to store the positions in this Cluster for each position in other. */ + std::vector remap(other.m_depgraph.PositionRange()); + // Iterate over all transactions in the other Cluster (the one being absorbed). + for (auto pos : other.m_linearization) { + auto idx = other.m_mapping[pos]; + // Copy the transaction into this Cluster, and remember its position. + auto new_pos = m_depgraph.AddTransaction(other.m_depgraph.FeeRate(pos)); + remap[pos] = new_pos; + if (new_pos == m_mapping.size()) { + m_mapping.push_back(idx); + } else { + m_mapping[new_pos] = idx; + } + m_linearization.push_back(new_pos); + // Copy the transaction's dependencies, translating them using remap. Note that since + // pos iterates over other.m_linearization, which is in topological order, all parents + // of pos should already be in remap. + SetType parents; + for (auto par : other.m_depgraph.GetReducedParents(pos)) { + parents.Set(remap[par]); + } + m_depgraph.AddDependencies(parents, remap[pos]); + // Update the transaction's Locator. There is no need to call Updated() to update chunk + // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting + // merged Cluster later anyway). + graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + } + // Purge the other Cluster, now that everything has been moved. + other.m_depgraph = DepGraph{}; + other.m_linearization.clear(); + other.m_mapping.clear(); +} + +void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept +{ + // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters + // between which dependencies are added, which simply concatenates their linearizations. Invoke + // PostLinearize, which has the effect that the linearization becomes a merge-sort of the + // constituent linearizations. Do this here rather than in Cluster::Merge, because this + // function is only invoked once per merged Cluster, rather than once per constituent one. + // This concatenation + post-linearization could be replaced with an explicit merge-sort. + PostLinearize(m_depgraph, m_linearization); + + // Sort the list of dependencies to apply by child, so those can be applied in batch. + std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; }); + // Iterate over groups of to-be-added dependencies with the same child. + auto it = to_apply.begin(); + while (it != to_apply.end()) { + auto& first_child = graph.m_entries[it->second].m_locator; + const auto child_idx = first_child.index; + // Iterate over all to-be-added dependencies within that same child, gather the relevant + // parents. + SetType parents; + while (it != to_apply.end()) { + auto& child = graph.m_entries[it->second].m_locator; + auto& parent = graph.m_entries[it->first].m_locator; + Assume(child.cluster == this && parent.cluster == this); + if (child.index != child_idx) break; + parents.Set(parent.index); + ++it; + } + // Push all dependencies to the underlying DepGraph. Note that this is O(N) in the size of + // the cluster, regardless of the number of parents being added, so batching them together + // has a performance benefit. + m_depgraph.AddDependencies(parents, child_idx); + } + + // Finally fix the linearization, as the new dependencies may have invalidated the + // linearization, and post-linearize it to fix up the worst problems with it. + FixLinearization(m_depgraph, m_linearization); + PostLinearize(m_depgraph, m_linearization); + + // Finally push the changes to graph.m_entries. + Updated(graph); +} + +std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +{ + Assume(quality != QualityLevel::NONE); + + auto& quality_clusters = m_clusters[int(quality)]; + Assume(setindex < quality_clusters.size()); + + // Extract the Cluster-owning unique_ptr. + std::unique_ptr ret = std::move(quality_clusters[setindex]); + ret->m_quality = QualityLevel::NONE; + ret->m_setindex = ClusterSetIndex(-1); + + // Clean up space in quality_cluster. + auto max_setindex = quality_clusters.size() - 1; + if (setindex != max_setindex) { + // If the cluster was not the last element of quality_clusters, move that to take its place. + quality_clusters.back()->m_setindex = setindex; + quality_clusters[setindex] = std::move(quality_clusters.back()); + } + // The last element of quality_clusters is now unused; drop it. + quality_clusters.pop_back(); + + return ret; +} + +ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +{ + // Cannot insert with quality level NONE (as that would mean not inserted). + Assume(quality != QualityLevel::NONE); + // The passed-in Cluster must not currently be in the TxGraphImpl. + Assume(cluster->m_quality == QualityLevel::NONE); + + // Append it at the end of the relevant TxGraphImpl::m_cluster. + auto& quality_clusters = m_clusters[int(quality)]; + ClusterSetIndex ret = quality_clusters.size(); + cluster->m_quality = quality; + cluster->m_setindex = ret; + quality_clusters.push_back(std::move(cluster)); + return ret; +} + +void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +{ + Assume(new_quality != QualityLevel::NONE); + + // Don't do anything if the quality did not change. + if (old_quality == new_quality) return; + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(old_quality, old_index); + // And re-insert it where it belongs. + InsertCluster(std::move(cluster_ptr), new_quality); +} + +void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept +{ + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + // And throw it away. + cluster_ptr.reset(); +} + +void TxGraphImpl::ApplyRemovals() noexcept +{ + auto& to_remove = m_to_remove; + // Skip if there is nothing to remove. + if (to_remove.empty()) return; + // Group the set of to-be-removed entries by Cluster*. + std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + }); + // Process per Cluster. + std::span to_remove_span{m_to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. This may happen when RemoveTransaction + // was called twice on the same Ref. + to_remove_span = to_remove_span.subspan(1); + } + } + m_to_remove.clear(); + Compact(); +} + +void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept +{ + Assume(a < m_entries.size()); + Assume(b < m_entries.size()); + // Swap the Entry objects. + std::swap(m_entries[a], m_entries[b]); + // Iterate over both objects. + for (int i = 0; i < 2; ++i) { + GraphIndex idx = i ? b : a; + Entry& entry = m_entries[idx]; + // Update linked Ref. + if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update the locator. The rest of the Entry information will not change, so no need to + // invoke Cluster::Updated(). + Locator& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } + } +} + +void TxGraphImpl::Compact() noexcept +{ + // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. + // It is easier to delay the compaction until they have been applied. + if (!m_deps_to_add.empty()) return; + if (!m_to_remove.empty()) return; + + // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last + // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of + // later-processed ones during the "swap with end of m_entries" step below (which might + // invalidate them). + std::sort(m_unlinked.begin(), m_unlinked.end(), std::greater{}); + + auto last = GraphIndex(-1); + for (GraphIndex idx : m_unlinked) { + // m_unlinked should never contain the same GraphIndex twice (the code below would fail + // if so, because GraphIndexes get invalidated by removing them). + Assume(idx != last); + last = idx; + + // Make sure the entry is unlinked. + Entry& entry = m_entries[idx]; + Assume(entry.m_ref == nullptr); + // Make sure the entry does not occur in the graph. + Assume(!entry.m_locator.IsPresent()); + + // Move the entry to the end. + if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); + // Drop the entry for idx, now that it is at the end. + m_entries.pop_back(); + } + m_unlinked.clear(); +} + +void TxGraphImpl::Split(Cluster& cluster) noexcept +{ + // To split a Cluster, first make sure all removals are applied (as we might need to split + // again afterwards otherwise). + ApplyRemovals(); + bool del = cluster.Split(*this); + if (del) { + // Cluster::Split reports whether the Cluster is to be deleted. + DeleteCluster(cluster); + } +} + +void TxGraphImpl::SplitAll() noexcept +{ + // Before splitting all Cluster, first make sure all removals are applied. + ApplyRemovals(); + auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } +} + +void TxGraphImpl::GroupClusters() noexcept +{ + // If the groupings have been computed already, nothing is left to be done. + if (m_group_data.has_value()) return; + + // Before computing which Clusters need to be merged together, first apply all removals and + // split the Clusters into connected components. If we would group first, we might end up + // with inefficient Clusters which just end up being split again anyway. + SplitAll(); + + /** Annotated clusters: an entry for each Cluster, together with the representative for the + * partition it is in if known, or with nullptr if not yet known. */ + std::vector> an_clusters; + /** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply + * to removed transactions), together with the representative root of the partition of + * Clusters it applies to. */ + std::vector, Cluster*>> an_deps; + + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Skip dependencies for which the parent or child transaction is removed. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + an_clusters.emplace_back(par_cluster, nullptr); + // Do not include a duplicate when parent and child are identical, as it'll be removed + // below anyway. + if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr); + } + // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters + // to which dependencies apply. + std::sort(an_clusters.begin(), an_clusters.end()); + an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + + // Run the union-find algorithm to to find partitions of the input Clusters which need to be + // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + { + /** Each PartitionData entry contains information about a single input Cluster. */ + struct PartitionData + { + /** The cluster this holds information for. */ + Cluster* cluster; + /** All PartitionData entries belonging to the same partition are organized in a tree. + * Each element points to its parent, or to itself if it is the root. The root is then + * a representative for the entire tree, and can be found by walking upwards from any + * element. */ + PartitionData* parent; + /** (only if this is a root, so when parent == this) An upper bound on the height of + * tree for this partition. */ + unsigned rank; + }; + /** Information about each input Cluster. Sorted by Cluster* pointer. */ + std::vector partition_data; + + /** Given a Cluster, find its corresponding PartitionData. */ + auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* { + auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, + [](auto& a, Cluster* ptr) noexcept { return a.cluster < ptr; }); + Assume(it != partition_data.end()); + Assume(it->cluster == arg); + return &*it; + }; + + /** Given a PartitionData, find the root of the tree it is in (its representative). */ + static constexpr auto find_root_fn = [](PartitionData* data) noexcept -> PartitionData* { + while (data->parent != data) { + // Replace pointers to parents with pointers to grandparents. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = data->parent; + data->parent = par->parent; + data = par; + } + return data; + }; + + /** Given two PartitionDatas, union the partitions they are in. */ + static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept { + // Find the roots of the trees, and bail out if they are already equal (which would + // mean they are in the same partition already). + auto rep1 = find_root_fn(arg1); + auto rep2 = find_root_fn(arg2); + if (rep1 == rep2) return; + // Pick the lower-rank root to become a child of the higher-rank one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. + if (rep1->rank < rep2->rank) std::swap(rep1, rep2); + rep2->parent = rep1; + rep1->rank += (rep1->rank == rep2->rank); + }; + + // Start by initializing every Cluster as its own singleton partition. + partition_data.resize(an_clusters.size()); + for (size_t i = 0; i < an_clusters.size(); ++i) { + partition_data[i].cluster = an_clusters[i].first; + partition_data[i].parent = &partition_data[i]; + partition_data[i].rank = 0; + } + + // Run through all parent/child pairs in m_deps_to_add, and union the + // the partitions their Clusters are in. + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Nothing to do if parent and child are in the same Cluster. + if (par_cluster == chl_cluster) continue; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + Assume(par != chl); + union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + } + + // Populate the an_clusters and an_deps data structures with the list of input Clusters, + // and the input dependencies, annotated with the representative of the Cluster partition + // it applies to. + for (size_t i = 0; i < partition_data.size(); ++i) { + auto& data = partition_data[i]; + // Find the representative of the partition Cluster i is in, and store it with the + // Cluster. + auto rep = find_root_fn(&data)->cluster; + Assume(an_clusters[i].second == nullptr); + an_clusters[i].second = rep; + } + an_deps.reserve(m_deps_to_add.size()); + for (auto [par, chl] : m_deps_to_add) { + auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = m_entries[par].m_locator.cluster; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + // Find the representative of the partition which this dependency's child is in (which + // should be the same as the one for the parent). + auto rep = find_root_fn(locate_fn(chl_cluster))->cluster; + // Create an_deps entry. + an_deps.emplace_back(std::pair{par, chl}, rep); + } + } + + // Sort both an_clusters and an_deps by representative of the partition they are in, grouping + // all those applying to the same partition together. + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + + // Translate the resulting cluster groups to the m_group_data structure. + m_group_data = std::vector{}; + auto an_deps_it = an_deps.begin(); + auto an_clusters_it = an_clusters.begin(); + while (an_clusters_it != an_clusters.end()) { + // Process all clusters/dependencies belonging to the partition with representative rep. + auto rep = an_clusters_it->second; + // Create and initialize a new GroupData entry for the partition. + auto& new_entry = m_group_data->emplace_back(); + // Add all its clusters to it (copying those from an_clusters to m_clusters). + while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { + new_entry.m_clusters.push_back(an_clusters_it->first); + ++an_clusters_it; + } + // Add all its dependencies to it (copying those back from an_deps to m_deps). + while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { + new_entry.m_deps.push_back(an_deps_it->first); + ++an_deps_it; + } + } + Assume(an_deps_it == an_deps.end()); + Assume(an_clusters_it == an_clusters.end()); + Compact(); +} + +void TxGraphImpl::Merge(std::span to_merge) noexcept +{ + Assume(!to_merge.empty()); + // Nothing to do if a group consists of just a single Cluster. + if (to_merge.size() == 1) return; + + // Move the largest Cluster to the front of to_merge. As all transactions in other to-be-merged + // Clusters will be moved to that one, putting the largest one first minimizes the number of + // moves. + size_t max_size_pos{0}; + DepGraphIndex max_size = to_merge[max_size_pos]->GetTxCount(); + for (size_t i = 1; i < to_merge.size(); ++i) { + DepGraphIndex size = to_merge[i]->GetTxCount(); + if (size > max_size) { + max_size_pos = i; + max_size = size; + } + } + if (max_size_pos != 0) std::swap(to_merge[0], to_merge[max_size_pos]); + + // Merge all further Clusters in the group into the first one, and delete them. + for (size_t i = 1; i < to_merge.size(); ++i) { + to_merge[0]->Merge(*this, *to_merge[i]); + DeleteCluster(*to_merge[i]); + } +} + +void TxGraphImpl::ApplyDependencies() noexcept +{ + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + GroupClusters(); + Assume(m_group_data.has_value()); + // Nothing to do if there are no dependencies to be added. + if (m_deps_to_add.empty()) return; + + // For each group of to-be-merged Clusters. + for (auto& group_data : *m_group_data) { + // Invoke Merge() to merge them into a single Cluster. + Merge(group_data.m_clusters); + // Actually apply all to-be-added dependencies (all parents and children from this grouping + // belong to the same Cluster at this point because of the merging above). + const auto& loc = m_entries[group_data.m_deps[0].second].m_locator; + Assume(loc.IsPresent()); + loc.cluster->ApplyDependencies(*this, group_data.m_deps); + } + + // Wipe the list of to-be-added dependencies now that they are applied. + m_deps_to_add.clear(); + Compact(); + // Also no further Cluster mergings are needed (note that we clear, but don't set to + // std::nullopt, as that would imply the groupings are unknown). + m_group_data = std::vector{}; +} + +void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept +{ + // We can only relinearize Clusters that do not need splitting. + Assume(!NeedsSplitting()); + // No work is required for Clusters which are already optimally linearized. + if (IsOptimal()) return; + // Invoke the actual linearization algorithm (passing in the existing one). + uint64_t rng_seed = graph.m_rng.rand64(); + auto [linearization, optimal] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization); + // Postlinearize if the result isn't optimal already. This guarantees (among other things) + // that the chunks of the resulting linearization are all connected. + if (!optimal) PostLinearize(m_depgraph, linearization); + // Update the linearization. + m_linearization = std::move(linearization); + // Update the Cluster's quality. + auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; + graph.SetClusterQuality(m_quality, m_setindex, new_quality); + // Update the Entry objects. + Updated(graph); +} + +void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept +{ + // Relinearize the Cluster if needed. + if (!cluster.NeedsSplitting() && !cluster.IsAcceptable()) { + cluster.Relinearize(*this, 10000); + } +} + +Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept +{ + // Create a new transaction in the DepGraph, and remember its position in m_mapping. + auto cluster_idx = m_depgraph.AddTransaction(feerate); + m_mapping.push_back(graph_index); + m_linearization.push_back(cluster_idx); +} + +TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept +{ + // Construct a new Ref. + Ref ret; + // Construct a new Entry, and link it with the Ref. + auto idx = m_entries.size(); + m_entries.emplace_back(); + auto& entry = m_entries.back(); + entry.m_ref = &ret; + GetRefGraph(ret) = this; + GetRefIndex(ret) = idx; + // Construct a new singleton Cluster (which is necessarily optimally linearized). + auto cluster = std::make_unique(*this, feerate, idx); + auto cluster_ptr = cluster.get(); + InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + cluster_ptr->Updated(*this); + ++m_txcount; + // Return the Ref. + return ret; +} + +void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept +{ + // Don't do anything if the Ref is empty (which may be indicative of the transaction already + // having been removed). + if (GetRefGraph(arg) == nullptr) return; + Assume(GetRefGraph(arg) == this); + // Find the Cluster the transaction is in, and stop if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return; + // Remember that the transaction is to be removed. + m_to_remove.push_back(GetRefIndex(arg)); + // Wipe m_group_data (as it will need to be recomputed). + m_group_data.reset(); +} + +void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept +{ + // Don't do anything if either Ref is empty (which may be indicative of it having already been + // removed). + if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; + Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + // Don't do anything if this is a dependency on self. + if (GetRefIndex(parent) == GetRefIndex(child)) return; + // Find the Cluster the parent and child transaction are in, and stop if either appears to be + // already removed. + auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + if (par_cluster == nullptr) return; + auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + if (chl_cluster == nullptr) return; + // Remember that this dependency is to be applied. + m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + // Wipe m_group_data (as it will need to be recomputed). + m_group_data.reset(); +} + +bool TxGraphImpl::Exists(const Ref& arg) noexcept +{ + if (GetRefGraph(arg) == nullptr) return false; + Assume(GetRefGraph(arg) == this); + // Make sure the transaction isn't scheduled for removal. + ApplyRemovals(); + return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); +} + +std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +{ + std::vector ret; + ret.reserve(m_depgraph.Ancestors(idx).Count()); + // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Ancestors(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +{ + std::vector ret; + ret.reserve(m_depgraph.Descendants(idx).Count()); + // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Descendants(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept +{ + std::vector ret; + ret.reserve(m_linearization.size()); + // Translate all transactions in the Cluster (in linearization order) to Refs. + for (auto idx : m_linearization) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept +{ + return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx)); +} + +std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then dispatch to it. + MakeAcceptable(*cluster); + return cluster->GetClusterRefs(*this); +} + +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +{ + ApplyRemovals(); + return m_txcount; +} + +FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + ApplyRemovals(); + // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); +} + +void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept +{ + // Make sure the specified DepGraphIndex exists in this Cluster. + Assume(m_depgraph.Positions()[idx]); + // Bail out if the fee isn't actually being changed. + if (m_depgraph.FeeRate(idx).fee == fee) return; + // Update the fee, remember that relinearization will be necessary, and update the Entries + // in the same Cluster. + m_depgraph.FeeRate(idx).fee = fee; + if (!NeedsSplitting()) { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } + Updated(graph); +} + +void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept +{ + // Don't do anything if the passed Ref is empty. + if (GetRefGraph(ref) == nullptr) return; + Assume(GetRefGraph(ref) == this); + // Find the entry, its locator, and inform its Cluster about the new feerate, if any. + auto& entry = m_entries[GetRefIndex(ref)]; + auto& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } +} + +} // namespace + +TxGraph::Ref::~Ref() +{ + if (m_graph) { + // Inform the TxGraph about the Ref being destroyed. + m_graph->UnlinkRef(m_index); + m_graph = nullptr; + } +} + +TxGraph::Ref& TxGraph::Ref::operator=(Ref&& other) noexcept +{ + // Unlink the current graph, if any. + if (m_graph) m_graph->UnlinkRef(m_index); + // Inform the other's graph about the move, if any. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually update the contents. + m_graph = other.m_graph; + m_index = other.m_index; + other.m_graph = nullptr; + other.m_index = GraphIndex(-1); + return *this; +} + +TxGraph::Ref::Ref(Ref&& other) noexcept +{ + // Inform the TxGraph of other that its Ref is being moved. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually move the contents. + std::swap(m_graph, other.m_graph); + std::swap(m_index, other.m_index); +} + +std::unique_ptr MakeTxGraph() noexcept +{ + return std::make_unique(); +} diff --git a/src/txgraph.h b/src/txgraph.h new file mode 100644 index 00000000000..0a20ad232d2 --- /dev/null +++ b/src/txgraph.h @@ -0,0 +1,139 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include + +#ifndef BITCOIN_TXGRAPH_H +#define BITCOIN_TXGRAPH_H + +/** No connected component within TxGraph is allowed to exceed this number of transactions. */ +static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; + +/** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. + * + * The connected components within the transaction graph are called clusters: whenever one + * transaction is reachable from another, through any sequence of is-parent-of or is-child-of + * relations, they belong to the same cluster (so clusters include parents, children, but also + * grandparents, siblings, cousins twice removed, ...). + * + * TxGraph implicitly defines an associated total ordering on its transactions (its linearization) + * that respects topology (parents go before their children), aiming for it to be close to the + * optimal order those transactions should be mined in if the goal is fee maximization, though this + * is a best effort only, not a strong guarantee. + * + * For more explanation, see https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 + * + * The interface is designed to accommodate an implementation that only stores the transitive + * closure of dependencies, so if B spends C, it does not distinguish between "A spending B" and + * "A spending both B and C". + */ +class TxGraph +{ +public: + /** Internal identifier for a transaction within a TxGraph. */ + using GraphIndex = uint32_t; + + /** Data type used to reference transactions within a TxGraph. + * + * Every transaction within a TxGraph has exactly one corresponding TxGraph::Ref, held by users + * of the class. Refs can only be destroyed after the transaction is removed from the graph. + * + * Users of the class can inherit from TxGraph::Ref. If all Refs are inherited this way, the + * Ref* pointers returned by TxGraph functions can be cast to, and used as, this inherited type. + */ + class Ref; + + /** Virtual destructor, so inheriting is safe. */ + virtual ~TxGraph() = default; + /** Construct a new transaction with the specified feerate, and return a Ref to it. In all + * further calls, only Refs created by AddTransaction() are allowed to be passed to this + * TxGraph object (or empty Ref objects). */ + [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; + /** Remove the specified transaction. This is a no-op if the transaction was already removed. + * + * TxGraph may internally reorder transaction removals with dependency additions for + * performance reasons. If together with any transaction removal all its descendants, or all + * its ancestors, are removed as well (which is what always happens in realistic scenarios), + * this reordering will not affect the behavior of TxGraph. + * + * As an example, imagine 3 transactions A,B,C where B depends on A. If a dependency of C on B + * is added, and then B is deleted, C will still depend on A. If the deletion of B is reordered + * before the C->B dependency is added, the dependency adding has no effect. If, together with + * the deletion of B also either A or C is deleted, there is no distinction between the + * original order case and the reordered case. + */ + virtual void RemoveTransaction(const Ref& arg) noexcept = 0; + /** Add a dependency between two specified transactions. Parent may not be a descendant of + * child already (but may be an ancestor of it already, in which case this is a no-op). If + * either transaction is already removed, this is a no-op. */ + virtual void AddDependency(const Ref& parent, const Ref& child) noexcept = 0; + /** Modify the fee of the specified transaction. If the transaction does not exist (or was + * removed), this has no effect. */ + virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + + /** Determine whether arg exists in this graph (i.e., was not removed). */ + virtual bool Exists(const Ref& arg) noexcept = 0; + /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight + * if arg does not exist. */ + virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get pointers to all transactions in the cluster which arg is in. The transactions will be + * returned in graph order. Returns {} if arg does not exist in the graph. */ + virtual std::vector GetCluster(const Ref& arg) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction (including the transaction + * itself), in unspecified order. Returns {} if arg does not exist in the graph. */ + virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; + /** Get pointers to all descendants of the specified transaction (including the transaction + * itself), in unspecified order. Returns {} if arg does not exist in the graph. */ + virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; + /** Get the total number of transactions in the graph. */ + virtual GraphIndex GetTransactionCount() noexcept = 0; + +protected: + // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. + friend class TxGraph::Ref; + /** Inform the TxGraph implementation that a TxGraph::Ref has moved. */ + virtual void UpdateRef(GraphIndex index, Ref& new_location) noexcept = 0; + /** Inform the TxGraph implementation that a TxGraph::Ref was destroyed. */ + virtual void UnlinkRef(GraphIndex index) noexcept = 0; + // Allow TxGraph implementations (inheriting from it) to access Ref internals. + static TxGraph*& GetRefGraph(Ref& arg) noexcept { return arg.m_graph; } + static TxGraph* GetRefGraph(const Ref& arg) noexcept { return arg.m_graph; } + static GraphIndex& GetRefIndex(Ref& arg) noexcept { return arg.m_index; } + static GraphIndex GetRefIndex(const Ref& arg) noexcept { return arg.m_index; } + +public: + class Ref + { + // Allow TxGraph's GetRefGraph and GetRefIndex to access internals. + friend class TxGraph; + /** Which Graph the Entry lives in. nullptr if this Ref is empty. */ + TxGraph* m_graph = nullptr; + /** Index into the Graph's m_entries. Only used if m_graph != nullptr. */ + GraphIndex m_index = GraphIndex(-1); + public: + /** Construct an empty Ref. Non-empty Refs can only be created using + * TxGraph::AddTransaction. */ + Ref() noexcept = default; + /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers + * to has been removed from the graph. */ + virtual ~Ref(); + // Support moving a Ref. + Ref& operator=(Ref&& other) noexcept; + Ref(Ref&& other) noexcept; + // Do not permit copy constructing or copy assignment. A TxGraph entry can have at most one + // Ref pointing to it. + Ref& operator=(const Ref&) = delete; + Ref(const Ref&) = delete; + }; +}; + +/** Construct a new TxGraph. */ +std::unique_ptr MakeTxGraph() noexcept; + +#endif // BITCOIN_TXGRAPH_H From 05abf336f997f477c6f48412809ab540fccf1cb0 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 14:15:12 -0500 Subject: [PATCH 06/25] txgraph: Add simulation fuzz test (tests) This adds a simulation fuzz test for txgraph, by comparing with a naive reimplementation that models the entire graph as a single DepGraph, and clusters in TxGraph as connected components within that DepGraph. --- src/test/fuzz/CMakeLists.txt | 1 + src/test/fuzz/txgraph.cpp | 420 +++++++++++++++++++++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 src/test/fuzz/txgraph.cpp diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt index e99c6d91f47..846afeeb474 100644 --- a/src/test/fuzz/CMakeLists.txt +++ b/src/test/fuzz/CMakeLists.txt @@ -124,6 +124,7 @@ add_executable(fuzz tx_in.cpp tx_out.cpp tx_pool.cpp + txgraph.cpp txorphan.cpp txrequest.cpp # Visual Studio 2022 version 17.12 introduced a bug diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp new file mode 100644 index 00000000000..1d7fc8345a0 --- /dev/null +++ b/src/test/fuzz/txgraph.cpp @@ -0,0 +1,420 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cluster_linearize; + +namespace { + +/** Data type representing a naive simulated TxGraph, keeping all transactions (even from + * disconnected components) in a single DepGraph. */ +struct SimTxGraph +{ + /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's + * cluster count, so we can exercise situations with more transactions than fit in one + * cluster. */ + static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + /** Set type to use in the simulation. */ + using SetType = BitSet; + /** Data type for representing positions within SimTxGraph::graph. */ + using Pos = DepGraphIndex; + /** Constant to mean "missing in this graph". */ + static constexpr auto MISSING = Pos(-1); + + /** The dependency graph (for all transactions in the simulation, regardless of + * connectivity/clustering). */ + DepGraph graph; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ + std::array, MAX_TRANSACTIONS> simmap; + /** For each TxGraph::Ref in graph, the position it corresponds with. */ + std::map simrevmap; + /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ + std::vector> removed; + + /** Determine the number of (non-removed) transactions in the graph. */ + DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } + + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ + Pos Find(const TxGraph::Ref* ref) const + { + auto it = simrevmap.find(ref); + if (it != simrevmap.end()) return it->second; + return MISSING; + } + + /** Given a position in this simulated graph, get the corresponding TxGraph::Ref. */ + TxGraph::Ref* GetRef(Pos pos) + { + assert(graph.Positions()[pos]); + assert(simmap[pos]); + return simmap[pos].get(); + } + + /** Add a new transaction to the simulation. */ + TxGraph::Ref* AddTransaction(const FeePerWeight& feerate) + { + assert(graph.TxCount() < MAX_TRANSACTIONS); + auto simpos = graph.AddTransaction(feerate); + assert(graph.Positions()[simpos]); + simmap[simpos] = std::make_unique(); + auto ptr = simmap[simpos].get(); + simrevmap[ptr] = simpos; + return ptr; + } + + /** Add a dependency between two positions in this graph. */ + void AddDependency(TxGraph::Ref* parent, TxGraph::Ref* child) + { + auto par_pos = Find(parent); + if (par_pos == MISSING) return; + auto chl_pos = Find(child); + if (chl_pos == MISSING) return; + graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + } + + /** Modify the transaction fee of a ref, if it exists. */ + void SetTransactionFee(TxGraph::Ref* ref, int64_t fee) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.FeeRate(pos).fee = fee; + } + + /** Remove the transaction in the specified position from the graph. */ + void RemoveTransaction(TxGraph::Ref* ref) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't + // invoked until the simulation explicitly decided to do so. + removed.push_back(std::move(simmap[pos])); + simmap[pos].reset(); + } + + /** Construct the set with all positions in this graph corresponding to the specified + * TxGraph::Refs. All of them must occur in this graph and not be removed. */ + SetType MakeSet(std::span arg) + { + SetType ret; + for (TxGraph::Ref* ptr : arg) { + auto pos = Find(ptr); + assert(pos != Pos(-1)); + ret.Set(pos); + } + return ret; + } + + /** Get the set of ancestors (desc=false) or descendants (desc=true) in this graph. */ + SetType GetAncDesc(TxGraph::Ref* arg, bool desc) + { + auto pos = Find(arg); + if (pos == MISSING) return {}; + return desc ? graph.Descendants(pos) : graph.Ancestors(pos); + } + + /** Given a set of Refs (given as a vector of pointers), expand the set to include all its + * ancestors (desc=false) or all its descendants (desc=true) in this graph. */ + void IncludeAncDesc(std::vector& arg, bool desc) + { + std::vector ret; + for (auto ptr : arg) { + auto simpos = Find(ptr); + if (simpos != MISSING) { + for (auto i : desc ? graph.Descendants(simpos) : graph.Ancestors(simpos)) { + ret.push_back(simmap[i].get()); + } + } else { + ret.push_back(ptr); + } + } + // Deduplicate. + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + // Replace input. + arg = std::move(ret); + } +}; + +} // namespace + +FUZZ_TARGET(txgraph) +{ + // This is a big simulation test for TxGraph, which performs a fuzz-derived sequence of valid + // operations on a TxGraph instance, as well as on a simpler (mostly) reimplementation (see + // SimTxGraph above), comparing the outcome of functions that return a result, and finally + // performing a full comparison between the two. + + SeedRandomStateForTest(SeedRand::ZEROS); + FuzzedDataProvider provider(buffer.data(), buffer.size()); + + /** Internal test RNG, used only for decisions which would require significant amount of data + * to be read from the provider, without realistically impacting test sensitivity. */ + InsecureRandomContext rng(0xdecade2009added + buffer.size()); + + /** Variable used whenever an empty TxGraph::Ref is needed. */ + TxGraph::Ref empty_ref; + + // Construct a real and a simulated graph. + auto real = MakeTxGraph(); + SimTxGraph sim; + + /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ + auto pick_fn = [&]() noexcept -> TxGraph::Ref* { + auto tx_count = sim.GetTransactionCount(); + /** The number of possible choices. */ + size_t choices = tx_count + sim.removed.size() + 1; + /** Pick one of them. */ + auto choice = provider.ConsumeIntegralInRange(0, choices - 1); + if (choice < tx_count) { + // Return from real. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count; + } + if (choice < sim.removed.size()) { + // Return from removed. + return sim.removed[choice].get(); + } else { + choice -= sim.removed.size(); + } + // Return empty. + assert(choice == 0); + return &empty_ref; + }; + + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { + // Read a one-byte command. + int command = provider.ConsumeIntegral(); + // Treat it lowest bit as a flag (which selects a variant of some of the operations), and + // leave the rest of the bits in command. + bool alt = command & 1; + command >>= 1; + + // Keep decrementing command for each applicable operation, until one is hit. Multiple + // iterations may be necessary. + while (true) { + if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + // AddTransaction. + int64_t fee; + int32_t size; + if (alt) { + // If alt is true, pick fee and size from the entire range. + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + size = provider.ConsumeIntegralInRange(1, 0x3fffff); + } else { + // Otherwise, use smaller range which consume fewer fuzz input bytes, as just + // these are likely sufficient to trigger all interesting code paths already. + fee = provider.ConsumeIntegral(); + size = provider.ConsumeIntegral() + 1; + } + FeePerWeight feerate{fee, size}; + // Create a real TxGraph::Ref. + auto ref = real->AddTransaction(feerate); + // Create a unique_ptr place in the simulation to put the Ref in. + auto ref_loc = sim.AddTransaction(feerate); + // Move it in place. + *ref_loc = std::move(ref); + break; + } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + // AddDependency. + auto par = pick_fn(); + auto chl = pick_fn(); + auto pos_par = sim.Find(par); + auto pos_chl = sim.Find(chl); + if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { + // Determine if adding this would introduce a cycle (not allowed by TxGraph), + // and if so, skip. + if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. + auto temp_depgraph = sim.graph; + temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); + auto todo = temp_depgraph.Positions(); + bool oversize{false}; + while (todo.Any()) { + auto component = temp_depgraph.FindConnectedComponent(todo); + if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; + todo -= component; + } + if (oversize) break; + } + sim.AddDependency(par, chl); + real->AddDependency(*par, *chl); + break; + } else if (sim.removed.size() < 100 && command-- == 0) { + // RemoveTransaction. Either all its ancestors or all its descendants are also + // removed (if any), to make sure TxGraph's reordering of removals and dependencies + // has no effect. + std::vector to_remove; + to_remove.push_back(pick_fn()); + sim.IncludeAncDesc(to_remove, alt); + // The order in which these ancestors/descendants are removed should not matter; + // randomly shuffle them. + std::shuffle(to_remove.begin(), to_remove.end(), rng); + for (TxGraph::Ref* ptr : to_remove) { + real->RemoveTransaction(*ptr); + sim.RemoveTransaction(ptr); + } + break; + } else if (sim.removed.size() > 0 && command-- == 0) { + // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it + // refers to, so this simulation permits doing so separately from other actions on + // TxGraph. + + // Pick a Ref of sim.removed to destroy. + auto removed_pos = provider.ConsumeIntegralInRange(0, sim.removed.size() - 1); + if (removed_pos != sim.removed.size() - 1) { + std::swap(sim.removed[removed_pos], sim.removed.back()); + } + sim.removed.pop_back(); + break; + } else if (command-- == 0) { + // SetTransactionFee. + int64_t fee; + if (alt) { + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + } else { + fee = provider.ConsumeIntegral(); + } + auto ref = pick_fn(); + real->SetTransactionFee(*ref, fee); + sim.SetTransactionFee(ref, fee); + break; + } else if (command-- == 0) { + // GetTransactionCount. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + break; + } else if (command-- == 0) { + // Exists. + auto ref = pick_fn(); + bool exists = real->Exists(*ref); + bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + assert(exists == should_exist); + break; + } else if (command-- == 0) { + // GetIndividualFeerate. + auto ref = pick_fn(); + auto feerate = real->GetIndividualFeerate(*ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + assert(feerate == sim.graph.FeeRate(simpos)); + } + break; + } else if (command-- == 0) { + // GetAncestors/GetDescendants. + auto ref = pick_fn(); + auto result_set = sim.MakeSet(alt ? real->GetDescendants(*ref) : + real->GetAncestors(*ref)); + auto expect_set = sim.GetAncDesc(ref, alt); + assert(result_set == expect_set); + break; + } else if (command-- == 0) { + // GetCluster. + auto ref = pick_fn(); + auto result = real->GetCluster(*ref); + // Check cluster count limit. + assert(result.size() <= CLUSTER_COUNT_LIMIT); + // Require the result to be topologically valid and not contain duplicates. + auto left = sim.graph.Positions(); + for (auto refptr : result) { + auto simpos = sim.Find(refptr); + assert(simpos != SimTxGraph::MISSING); + assert(left[simpos]); + left.Reset(simpos); + assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + } + // Require the set to be connected. + auto result_set = sim.MakeSet(result); + assert(sim.graph.IsConnected(result_set)); + // If ref exists, the result must contain it. If not, it must be empty. + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + assert(result_set[simpos]); + } else { + assert(result_set.None()); + } + // Require the set not to have ancestors or descendants outside of it. + for (auto i : result_set) { + assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + } + break; + } + } + } + // Compare simple properties of the graph with the simulation. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + + // Perform a full comparison. + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i)); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + } + } + } + + // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be + // addressed in a follow-up commit). + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + real->RemoveTransaction(*ref); + } +} From ee57e93099f243cf9fbf9c10265057a53f06e062 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 22:45:46 -0500 Subject: [PATCH 07/25] txgraph: Add internal sanity check function (tests) To make testing more powerful, expose a function to perform an internal sanity check on the state of a TxGraph. This is especially important as TxGraphImpl contains many redundantly represented pieces of information: * graph contains clusters, which refer to entries, but the entries refer back * graph maintains pointers to Ref objects, which point back to the graph. This lets us make sure they are always in sync. --- src/test/fuzz/txgraph.cpp | 8 +++ src/txgraph.cpp | 123 ++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 3 + 3 files changed, 134 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 1d7fc8345a0..367ded4b146 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -363,6 +363,11 @@ FUZZ_TARGET(txgraph) } } } + + // After running all modifications, perform an internal sanity check (before invoking + // inspectors that may modify the internal state). + real->SanityCheck(); + // Compare simple properties of the graph with the simulation. assert(real->GetTransactionCount() == sim.GetTransactionCount()); @@ -411,6 +416,9 @@ FUZZ_TARGET(txgraph) } } + // Sanity check again (because invoking inspectors may modify internal unobservable state). + real->SanityCheck(); + // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be // addressed in a follow-up commit). for (auto i : sim.graph.Positions()) { diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 81802e2ddf4..c31e7a087d2 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -94,6 +95,8 @@ public: } /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */ + GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept { return m_mapping[index]; } /** Only called by Graph::SwapIndexes. */ void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ @@ -126,6 +129,10 @@ public: FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; /** Modify the fee of a Cluster element. */ void SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept; + + // Debugging functions. + + void SanityCheck(const TxGraphImpl& graph) const; }; /** The transaction graph. @@ -189,6 +196,8 @@ private: void SetMissing() noexcept { cluster = nullptr; index = 0; } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is missing. */ + bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -285,6 +294,8 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + + void SanityCheck() const final; }; void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -1098,6 +1109,118 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept } } +void Cluster::SanityCheck(const TxGraphImpl& graph) const +{ + // There must be an m_mapping for each m_depgraph position (including holes). + assert(m_depgraph.PositionRange() == m_mapping.size()); + // The linearization for this Cluster must contain every transaction once. + assert(m_depgraph.TxCount() == m_linearization.size()); + // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. + + // Compute the chunking of m_linearization. + LinearizationChunking linchunking(m_depgraph, m_linearization); + + // Verify m_linearization. + SetType m_done; + assert(m_depgraph.IsAcyclic()); + for (auto lin_pos : m_linearization) { + assert(lin_pos < m_mapping.size()); + const auto& entry = graph.m_entries[m_mapping[lin_pos]]; + // Check that the linearization is topological. + m_done.Set(lin_pos); + assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + // Check that the Entry has a locator pointing back to this Cluster & position within it. + assert(entry.m_locator.cluster == this); + assert(entry.m_locator.index == lin_pos); + // Check linearization position. + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + // If this Cluster has an acceptable quality level, its chunks must be connected. + if (IsAcceptable()) { + assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + } + } + // Verify that each element of m_depgraph occured in m_linearization. + assert(m_done == m_depgraph.Positions()); +} + +void TxGraphImpl::SanityCheck() const +{ + /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */ + std::set expected_unlinked; + /** Which Clusters ought to occur in m_clusters, based on m_entries. */ + std::set expected_clusters; + + // Go over all Entry objects in m_entries. + for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { + const auto& entry = m_entries[idx]; + if (entry.m_ref == nullptr) { + // Unlinked Entry must have indexes appear in m_unlinked. + expected_unlinked.insert(idx); + } else { + // Every non-unlinked Entry must have a Ref that points back to it. + assert(GetRefGraph(*entry.m_ref) == this); + assert(GetRefIndex(*entry.m_ref) == idx); + } + const auto& locator = entry.m_locator; + // Every Locator must be in exactly one of these 2 states. + assert(locator.IsMissing() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the m_clusters. + expected_clusters.insert(locator.cluster); + } + + } + + std::set actual_clusters; + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } + + // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + for (GraphIndex idx : m_to_remove) { + assert(idx < m_entries.size()); + assert(m_entries[idx].m_locator.IsPresent()); + } + + // Verify that all to-be-added dependencies have valid identifiers. + for (auto [par_idx, chl_idx] : m_deps_to_add) { + assert(par_idx != chl_idx); + assert(par_idx < m_entries.size()); + assert(chl_idx < m_entries.size()); + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters); + + // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. + std::set actual_unlinked(m_unlinked.begin(), m_unlinked.end()); + assert(actual_unlinked == expected_unlinked); + + // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be + // empty (to prevent memory leaks due to an ever-growing m_entries vector). + if (m_to_remove.empty() && m_deps_to_add.empty()) assert(actual_unlinked.empty()); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 0a20ad232d2..709b4ba7c2b 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -94,6 +94,9 @@ public: /** Get the total number of transactions in the graph. */ virtual GraphIndex GetTransactionCount() noexcept = 0; + /** Perform an internal consistency check on this object. */ + virtual void SanityCheck() const = 0; + protected: // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. friend class TxGraph::Ref; From c80aecc24ddd878c62be9753a2746e36860e3a97 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sat, 25 Jan 2025 23:23:28 -0500 Subject: [PATCH 08/25] txgraph: Avoid per-group vectors for clusters & dependencies (optimization) Instead construct a single vector with the list of all clusters in all groups, and then store per-group offset/range in that list. For dependencies, reuse m_deps_to_add, and store offset/range into that. --- src/txgraph.cpp | 63 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index c31e7a087d2..378a2947c81 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -164,11 +164,23 @@ private: /** Information about one group of Clusters to be merged. */ struct GroupEntry { - /** Which clusters are to be merged. */ - std::vector m_clusters; - /** Which dependencies are to be applied to those merged clusters, as (parent, child) - * pairs. */ - std::vector> m_deps; + /** Where the clusters to be merged start in m_group_clusters. */ + uint32_t m_cluster_offset; + /** How many clusters to merge. */ + uint32_t m_cluster_count; + /** Where the dependencies for this cluster group in m_deps_to_add start. */ + uint32_t m_deps_offset; + /** How many dependencies to add. */ + uint32_t m_deps_count; + }; + + /** Information about all groups of Clusters to be merged. */ + struct GroupData + { + /** The groups of Clusters to be merged. */ + std::vector m_groups; + /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ + std::vector m_group_clusters; }; /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ @@ -179,7 +191,7 @@ private: * into this. */ std::vector> m_deps_to_add; /** Information about the merges to be performed, if known. */ - std::optional> m_group_data = std::vector{}; + std::optional m_group_data = GroupData{}; /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). * */ GraphIndex m_txcount{0}; @@ -796,24 +808,34 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); - // Translate the resulting cluster groups to the m_group_data structure. - m_group_data = std::vector{}; + // Translate the resulting cluster groups to the m_group_data structure, and the dependencies + // back to m_deps_to_add. + m_group_data = GroupData{}; + m_group_data->m_group_clusters.reserve(an_clusters.size()); + m_deps_to_add.clear(); + m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_clusters_it != an_clusters.end()) { // Process all clusters/dependencies belonging to the partition with representative rep. auto rep = an_clusters_it->second; // Create and initialize a new GroupData entry for the partition. - auto& new_entry = m_group_data->emplace_back(); - // Add all its clusters to it (copying those from an_clusters to m_clusters). + auto& new_entry = m_group_data->m_groups.emplace_back(); + new_entry.m_cluster_offset = m_group_data->m_group_clusters.size(); + new_entry.m_cluster_count = 0; + new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_count = 0; + // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { - new_entry.m_clusters.push_back(an_clusters_it->first); + m_group_data->m_group_clusters.push_back(an_clusters_it->first); ++an_clusters_it; + ++new_entry.m_cluster_count; } - // Add all its dependencies to it (copying those back from an_deps to m_deps). + // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { - new_entry.m_deps.push_back(an_deps_it->first); + m_deps_to_add.push_back(an_deps_it->first); ++an_deps_it; + ++new_entry.m_deps_count; } } Assume(an_deps_it == an_deps.end()); @@ -857,14 +879,19 @@ void TxGraphImpl::ApplyDependencies() noexcept if (m_deps_to_add.empty()) return; // For each group of to-be-merged Clusters. - for (auto& group_data : *m_group_data) { + for (const auto& group_data : m_group_data->m_groups) { // Invoke Merge() to merge them into a single Cluster. - Merge(group_data.m_clusters); + auto cluster_span = std::span{m_group_data->m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). - const auto& loc = m_entries[group_data.m_deps[0].second].m_locator; + auto deps_span = std::span{m_deps_to_add} + .subspan(group_data.m_deps_offset, group_data.m_deps_count); + Assume(!deps_span.empty()); + const auto& loc = m_entries[deps_span[0].second].m_locator; Assume(loc.IsPresent()); - loc.cluster->ApplyDependencies(*this, group_data.m_deps); + loc.cluster->ApplyDependencies(*this, deps_span); } // Wipe the list of to-be-added dependencies now that they are applied. @@ -872,7 +899,7 @@ void TxGraphImpl::ApplyDependencies() noexcept Compact(); // Also no further Cluster mergings are needed (note that we clear, but don't set to // std::nullopt, as that would imply the groupings are unknown). - m_group_data = std::vector{}; + m_group_data = GroupData{}; } void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept From 1d27b74c8e3bf055fb8b0a5fc5d664bd5048bec6 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 19 Mar 2025 16:22:25 -0400 Subject: [PATCH 09/25] txgraph: Add GetChunkFeerate function (feature) This adds a function to query the chunk feerate of a transaction, by caching it inside the Entry objects. --- src/test/fuzz/txgraph.cpp | 23 +++++++++++++++++++++- src/txgraph.cpp | 41 ++++++++++++++++++++++++++++++++++++++- src/txgraph.h | 9 +++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 367ded4b146..cec1cd56edc 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -321,6 +321,19 @@ FUZZ_TARGET(txgraph) assert(feerate == sim.graph.FeeRate(simpos)); } break; + } else if (command-- == 0) { + // GetChunkFeerate. + auto ref = pick_fn(); + auto feerate = real->GetChunkFeerate(*ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + // Just do some quick checks that the reported value is in range. A full + // recomputation of expected chunk feerates is done at the end. + assert(feerate.size >= sim.graph.FeeRate(simpos).size); + } + break; } else if (command-- == 0) { // GetAncestors/GetDescendants. auto ref = pick_fn(); @@ -405,13 +418,21 @@ FUZZ_TARGET(txgraph) simlin.push_back(simpos); } // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering. + // linearization as ordering, and compare it against the reported chunk feerates. cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { auto chunk = simlinchunk.GetChunk(chunknum); // Require that the chunks of cluster linearizations are connected (this must // be the case as all linearizations inside are PostLinearized). assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 378a2947c81..4b49cd6c6f5 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -221,6 +221,8 @@ private: Ref* m_ref{nullptr}; /** Which Cluster and position therein this Entry appears in. */ Locator m_locator; + /** The chunk feerate of this transaction (if not missing). */ + FeePerWeight m_chunk_feerate; }; /** The set of all transactions. GraphIndex values index into this. */ @@ -301,6 +303,7 @@ public: void SetTransactionFee(const Ref&, int64_t fee) noexcept final; bool Exists(const Ref& arg) noexcept final; + FeePerWeight GetChunkFeerate(const Ref& arg) noexcept final; FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; std::vector GetCluster(const Ref& arg) noexcept final; std::vector GetAncestors(const Ref& arg) noexcept final; @@ -317,6 +320,24 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[m_mapping[idx]]; entry.m_locator.SetPresent(this, idx); } + + // Compute its chunking and store its information in the Entry's m_chunk_feerate. + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + Assume(chunk.transactions.Any()); + // Iterate over the transactions in the linearization, which must match those in chunk. + do { + DepGraphIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + Assume(chunk.transactions[idx]); + chunk.transactions.Reset(idx); + } while(chunk.transactions.Any()); + } } void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept @@ -1108,6 +1129,23 @@ FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); } +FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be inaccurate otherwise. + ApplyDependencies(); + // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then return the transaction's + // chunk feerate. + MakeAcceptable(*cluster); + const auto& entry = m_entries[GetRefIndex(arg)]; + return entry.m_chunk_feerate; +} + void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept { // Make sure the specified DepGraphIndex exists in this Cluster. @@ -1159,10 +1197,11 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const // Check that the Entry has a locator pointing back to this Cluster & position within it. assert(entry.m_locator.cluster == this); assert(entry.m_locator.index == lin_pos); - // Check linearization position. + // Check linearization position and chunk feerate. if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); // If this Cluster has an acceptable quality level, its chunks must be connected. if (IsAcceptable()) { assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); diff --git a/src/txgraph.h b/src/txgraph.h index 709b4ba7c2b..f92cfbd7950 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -29,6 +29,12 @@ static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; * * For more explanation, see https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 * + * This linearization is partitioned into chunks: groups of transactions that according to this + * order would be mined together. Each chunk consists of the highest-feerate prefix of what remains + * of the linearization after removing previous chunks. TxGraph guarantees that the maintained + * linearization always results in chunks consisting of transactions that are connected. A chunk's + * transactions always belong to the same cluster. + * * The interface is designed to accommodate an implementation that only stores the transitive * closure of dependencies, so if B spends C, it does not distinguish between "A spending B" and * "A spending both B and C". @@ -82,6 +88,9 @@ public: /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight * if arg does not exist. */ virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeePerWeight if + * arg does not exist. */ + virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the cluster which arg is in. The transactions will be * returned in graph order. Returns {} if arg does not exist in the graph. */ virtual std::vector GetCluster(const Ref& arg) noexcept = 0; From 64f69ec8c383436d1a657add1b8a7eee3e75f61f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:53:50 -0500 Subject: [PATCH 10/25] txgraph: Make max cluster count configurable and "oversize" state (feature) Instead of leaving the responsibility on higher layers to guarantee that no connected component within TxGraph (a barely exposed concept, except through GetCluster()) exceeds the cluster count limit, move this responsibility to TxGraph itself: * TxGraph retains a cluster count limit, but it becomes configurable at construction time (this primarily helps with testing that it is properly enforced). * It is always allowed to perform mutators on TxGraph, even if they would cause the cluster count limit to be exceeded. Instead, TxGraph exposes an IsOversized() function, which queries whether it is in a special "oversize" state. * During oversize state, many inspectors are unavailable, but mutators remain valid, so the higher layer can "fix" the oversize state before continuing. --- src/test/fuzz/txgraph.cpp | 169 +++++++++++++++++++++++--------------- src/txgraph.cpp | 51 ++++++++++-- src/txgraph.h | 31 ++++--- 3 files changed, 166 insertions(+), 85 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index cec1cd56edc..b6163ed5544 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -27,7 +27,7 @@ struct SimTxGraph /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's * cluster count, so we can exercise situations with more transactions than fit in one * cluster. */ - static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + static constexpr unsigned MAX_TRANSACTIONS = MAX_CLUSTER_COUNT_LIMIT * 2; /** Set type to use in the simulation. */ using SetType = BitSet; /** Data type for representing positions within SimTxGraph::graph. */ @@ -44,6 +44,31 @@ struct SimTxGraph std::map simrevmap; /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ std::vector> removed; + /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ + std::optional oversized; + /** The configured maximum number of transactions per cluster. */ + DepGraphIndex max_cluster_count; + + /** Construct a new SimData with the specified maximum cluster count. */ + explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {} + + /** Check whether this graph is oversized (contains a connected component whose number of + * transactions exceeds max_cluster_count. */ + bool IsOversized() + { + if (!oversized.has_value()) { + // Only recompute when oversized isn't already known. + oversized = false; + auto todo = graph.Positions(); + // Iterate over all connected components of the graph. + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + if (component.Count() > max_cluster_count) oversized = true; + todo -= component; + } + } + return *oversized; + } /** Determine the number of (non-removed) transactions in the graph. */ DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } @@ -84,6 +109,8 @@ struct SimTxGraph auto chl_pos = Find(child); if (chl_pos == MISSING) return; graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; } /** Modify the transaction fee of a ref, if it exists. */ @@ -105,6 +132,8 @@ struct SimTxGraph // invoked until the simulation explicitly decided to do so. removed.push_back(std::move(simmap[pos])); simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (oversized.has_value() && *oversized) oversized = std::nullopt; } /** Construct the set with all positions in this graph corresponding to the specified @@ -170,9 +199,12 @@ FUZZ_TARGET(txgraph) /** Variable used whenever an empty TxGraph::Ref is needed. */ TxGraph::Ref empty_ref; + // Decide the maximum number of transactions per cluster we will use in this simulation. + auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + // Construct a real and a simulated graph. - auto real = MakeTxGraph(); - SimTxGraph sim; + auto real = MakeTxGraph(max_count); + SimTxGraph sim(max_count); /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref* { @@ -245,17 +277,6 @@ FUZZ_TARGET(txgraph) // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. if (sim.graph.Ancestors(pos_par)[pos_chl]) break; - // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. - auto temp_depgraph = sim.graph; - temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); - auto todo = temp_depgraph.Positions(); - bool oversize{false}; - while (todo.Any()) { - auto component = temp_depgraph.FindConnectedComponent(todo); - if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; - todo -= component; - } - if (oversize) break; } sim.AddDependency(par, chl); real->AddDependency(*par, *chl); @@ -310,6 +331,10 @@ FUZZ_TARGET(txgraph) bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; + } else if (command-- == 0) { + // IsOversized. + assert(sim.IsOversized() == real->IsOversized()); + break; } else if (command-- == 0) { // GetIndividualFeerate. auto ref = pick_fn(); @@ -321,7 +346,7 @@ FUZZ_TARGET(txgraph) assert(feerate == sim.graph.FeeRate(simpos)); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetChunkFeerate. auto ref = pick_fn(); auto feerate = real->GetChunkFeerate(*ref); @@ -334,20 +359,22 @@ FUZZ_TARGET(txgraph) assert(feerate.size >= sim.graph.FeeRate(simpos).size); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto ref = pick_fn(); - auto result_set = sim.MakeSet(alt ? real->GetDescendants(*ref) : - real->GetAncestors(*ref)); + auto result = alt ? real->GetDescendants(*ref) : real->GetAncestors(*ref); + assert(result.size() <= max_count); + auto result_set = sim.MakeSet(result); + assert(result.size() == result_set.Count()); auto expect_set = sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); auto result = real->GetCluster(*ref); // Check cluster count limit. - assert(result.size() <= CLUSTER_COUNT_LIMIT); + assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. auto left = sim.graph.Positions(); for (auto refptr : result) { @@ -382,56 +409,62 @@ FUZZ_TARGET(txgraph) real->SanityCheck(); // Compare simple properties of the graph with the simulation. + assert(real->IsOversized() == sim.IsOversized()); assert(real->GetTransactionCount() == sim.GetTransactionCount()); - // Perform a full comparison. - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(*sim.GetRef(i)); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - DepGraphIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i)); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 4b49cd6c6f5..ae0da8ba0d2 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -49,7 +49,7 @@ class Cluster { friend class TxGraphImpl; using GraphIndex = TxGraph::GraphIndex; - using SetType = BitSet; + using SetType = BitSet; /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ DepGraph m_depgraph; /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for @@ -160,6 +160,8 @@ class TxGraphImpl final : public TxGraph private: /** Internal RNG. */ FastRandomContext m_rng; + /** This TxGraphImpl's maximum cluster count limit. */ + const DepGraphIndex m_max_cluster_count; /** Information about one group of Clusters to be merged. */ struct GroupEntry @@ -181,6 +183,9 @@ private: std::vector m_groups; /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ std::vector m_group_clusters; + /** Whether at least one of the groups cannot be applied because it would result in a + * Cluster that violates the cluster count limit. */ + bool m_group_oversized; }; /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ @@ -232,8 +237,13 @@ private: std::vector m_unlinked; public: - /** Construct a new TxGraphImpl. */ - explicit TxGraphImpl() noexcept {} + /** Construct a new TxGraphImpl with the specified maximum cluster count. */ + explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept : + m_max_cluster_count(max_cluster_count) + { + Assume(max_cluster_count >= 1); + Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + } // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). TxGraphImpl(const TxGraphImpl&) = delete; @@ -309,6 +319,7 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + bool IsOversized() noexcept final; void SanityCheck() const final; }; @@ -696,7 +707,7 @@ void TxGraphImpl::GroupClusters() noexcept // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up - // with inefficient Clusters which just end up being split again anyway. + // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(); /** Annotated clusters: an entry for each Cluster, together with the representative for the @@ -833,6 +844,7 @@ void TxGraphImpl::GroupClusters() noexcept // back to m_deps_to_add. m_group_data = GroupData{}; m_group_data->m_group_clusters.reserve(an_clusters.size()); + m_group_data->m_group_oversized = false; m_deps_to_add.clear(); m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); @@ -846,9 +858,11 @@ void TxGraphImpl::GroupClusters() noexcept new_entry.m_cluster_count = 0; new_entry.m_deps_offset = m_deps_to_add.size(); new_entry.m_deps_count = 0; + uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { m_group_data->m_group_clusters.push_back(an_clusters_it->first); + total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } @@ -858,6 +872,10 @@ void TxGraphImpl::GroupClusters() noexcept ++an_deps_it; ++new_entry.m_deps_count; } + // Detect oversizedness. + if (total_count > m_max_cluster_count) { + m_group_data->m_group_oversized = true; + } } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); @@ -898,6 +916,8 @@ void TxGraphImpl::ApplyDependencies() noexcept Assume(m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (m_deps_to_add.empty()) return; + // Dependencies cannot be applied if it would result in oversized clusters. + if (m_group_data->m_group_oversized) return; // For each group of to-be-merged Clusters. for (const auto& group_data : m_group_data->m_groups) { @@ -1073,6 +1093,8 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1087,6 +1109,8 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1101,6 +1125,8 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Cluster linearization cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1136,6 +1162,8 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); + // Chunk feerates cannot be accurately known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1146,6 +1174,15 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept return entry.m_chunk_feerate; } +bool TxGraphImpl::IsOversized() noexcept +{ + // Find which Clusters will need to be merged together, as that is where the oversize + // property is assessed. + GroupClusters(); + Assume(m_group_data.has_value()); + return m_group_data->m_group_oversized; +} + void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept { // Make sure the specified DepGraphIndex exists in this Cluster. @@ -1180,6 +1217,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.PositionRange() == m_mapping.size()); // The linearization for this Cluster must contain every transaction once. assert(m_depgraph.TxCount() == m_linearization.size()); + // The number of transactions in a Cluster cannot exceed m_max_cluster_count. + assert(m_linearization.size() <= graph.m_max_cluster_count); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1321,7 +1360,7 @@ TxGraph::Ref::Ref(Ref&& other) noexcept std::swap(m_index, other.m_index); } -std::unique_ptr MakeTxGraph() noexcept +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept { - return std::make_unique(); + return std::make_unique(max_cluster_count); } diff --git a/src/txgraph.h b/src/txgraph.h index f92cfbd7950..83411dc8801 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -12,8 +12,7 @@ #ifndef BITCOIN_TXGRAPH_H #define BITCOIN_TXGRAPH_H -/** No connected component within TxGraph is allowed to exceed this number of transactions. */ -static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; +static constexpr unsigned MAX_CLUSTER_COUNT_LIMIT{64}; /** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. * @@ -83,24 +82,33 @@ public: * removed), this has no effect. */ virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; - /** Determine whether arg exists in this graph (i.e., was not removed). */ + /** Determine whether the graph is oversized (contains a connected component of more than the + * configured maximum cluster count). Some of the functions below are not available + * for oversized graphs. The mutators above are always available. */ + virtual bool IsOversized() noexcept = 0; + /** Determine whether arg exists in this graph (i.e., was not removed). This is available even + * for oversized graphs. */ virtual bool Exists(const Ref& arg) noexcept = 0; /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight - * if arg does not exist. */ + * if arg does not exist. This is available even for oversized graphs. */ virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeePerWeight if - * arg does not exist. */ + * arg does not exist. The graph must not be oversized. */ virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the cluster which arg is in. The transactions will be - * returned in graph order. Returns {} if arg does not exist in the graph. */ + * returned in graph order. The graph must not be oversized. Returns {} if arg does not exist + * in the graph. */ virtual std::vector GetCluster(const Ref& arg) noexcept = 0; /** Get pointers to all ancestors of the specified transaction (including the transaction - * itself), in unspecified order. Returns {} if arg does not exist in the graph. */ + * itself), in unspecified order. The graph must not be oversized. Returns {} if arg does not + * exist in the graph. */ virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; /** Get pointers to all descendants of the specified transaction (including the transaction - * itself), in unspecified order. Returns {} if arg does not exist in the graph. */ + * itself), in unspecified order. The graph must not be oversized. Returns {} if arg does not + * exist in the graph. */ virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. */ + /** Get the total number of transactions in the graph. This is available even for oversized + * graphs. */ virtual GraphIndex GetTransactionCount() noexcept = 0; /** Perform an internal consistency check on this object. */ @@ -145,7 +153,8 @@ public: }; }; -/** Construct a new TxGraph. */ -std::unique_ptr MakeTxGraph() noexcept; +/** Construct a new TxGraph with the specified limit on transactions within a cluster. That + * number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */ +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept; #endif // BITCOIN_TXGRAPH_H From 1171953ac6091950f06646a8cc85ca10683023ce Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 22 Jan 2025 14:53:32 -0500 Subject: [PATCH 11/25] txgraph: Avoid representative lookup for each dependency (optimization) The m_deps_to_add vector is sorted by child Cluster*, which matches the order of an_clusters. This means we can walk through m_deps_to_add while doing the representative lookups for an_clusters, and reuse them. --- src/txgraph.cpp | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index ae0da8ba0d2..acceaf064b6 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -734,6 +734,15 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(an_clusters.begin(), an_clusters.end()); an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + // Sort the dependencies by child Cluster. + std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + auto [_a_par, a_chl] = a; + auto [_b_par, b_chl] = b; + auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; + auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + return std::less{}(a_chl_cluster, b_chl_cluster); + }); + // Run the union-find algorithm to to find partitions of the input Clusters which need to be // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. { @@ -813,6 +822,8 @@ void TxGraphImpl::GroupClusters() noexcept // Populate the an_clusters and an_deps data structures with the list of input Clusters, // and the input dependencies, annotated with the representative of the Cluster partition // it applies to. + an_deps.reserve(m_deps_to_add.size()); + auto deps_it = m_deps_to_add.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; // Find the representative of the partition Cluster i is in, and store it with the @@ -820,18 +831,20 @@ void TxGraphImpl::GroupClusters() noexcept auto rep = find_root_fn(&data)->cluster; Assume(an_clusters[i].second == nullptr); an_clusters[i].second = rep; - } - an_deps.reserve(m_deps_to_add.size()); - for (auto [par, chl] : m_deps_to_add) { - auto chl_cluster = m_entries[chl].m_locator.cluster; - auto par_cluster = m_entries[par].m_locator.cluster; - // Nothing to do if either parent or child transaction is removed already. - if (par_cluster == nullptr || chl_cluster == nullptr) continue; - // Find the representative of the partition which this dependency's child is in (which - // should be the same as the one for the parent). - auto rep = find_root_fn(locate_fn(chl_cluster))->cluster; - // Create an_deps entry. - an_deps.emplace_back(std::pair{par, chl}, rep); + // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. + while (deps_it != m_deps_to_add.end()) { + auto [par, chl] = *deps_it; + auto chl_cluster = m_entries[chl].m_locator.cluster; + if (std::greater{}(chl_cluster, data.cluster)) break; + // Skip dependencies that apply to earlier Clusters (those necessary are for + // deleted transactions, as otherwise we'd have processed them already). + if (chl_cluster == data.cluster) { + auto par_cluster = m_entries[par].m_locator.cluster; + // Also filter out dependencies applying to a removed parent. + if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); + } + ++deps_it; + } } } From 57f5499882afe170612e0afd4ef6d91561738288 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 22 Jan 2025 14:36:00 -0500 Subject: [PATCH 12/25] txgraph: Avoid looking up the same child cluster repeatedly (optimization) Since m_deps_to_add has been sorted by child Cluster* already, all dependencies with the same child will be processed consecutively. Take advantage of this by remember the last partition merged with, and reusing that if applicable. --- src/txgraph.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index acceaf064b6..2ef2f7b2c3e 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -784,18 +784,20 @@ void TxGraphImpl::GroupClusters() noexcept return data; }; - /** Given two PartitionDatas, union the partitions they are in. */ + /** Given two PartitionDatas, union the partitions they are in, and return their + * representative. */ static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept { // Find the roots of the trees, and bail out if they are already equal (which would // mean they are in the same partition already). auto rep1 = find_root_fn(arg1); auto rep2 = find_root_fn(arg2); - if (rep1 == rep2) return; + if (rep1 == rep2) return rep1; // Pick the lower-rank root to become a child of the higher-rank one. // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. if (rep1->rank < rep2->rank) std::swap(rep1, rep2); rep2->parent = rep1; rep1->rank += (rep1->rank == rep2->rank); + return rep1; }; // Start by initializing every Cluster as its own singleton partition. @@ -808,6 +810,8 @@ void TxGraphImpl::GroupClusters() noexcept // Run through all parent/child pairs in m_deps_to_add, and union the // the partitions their Clusters are in. + Cluster* last_chl_cluster{nullptr}; + PartitionData* last_partition{nullptr}; for (const auto& [par, chl] : m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; @@ -816,7 +820,15 @@ void TxGraphImpl::GroupClusters() noexcept // Nothing to do if either parent or child transaction is removed already. if (par_cluster == nullptr || chl_cluster == nullptr) continue; Assume(par != chl); - union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + if (chl_cluster == last_chl_cluster) { + // If the child Clusters is the same as the previous iteration, union with the + // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add + // is sorted by child Cluster, so batches with the same child are expected. + last_partition = union_fn(locate_fn(par_cluster), last_partition); + } else { + last_chl_cluster = chl_cluster; + last_partition = union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + } } // Populate the an_clusters and an_deps data structures with the list of input Clusters, From 5801e0fb2b99f44ac24531779acf0d44ec35b98c Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 13:31:23 -0500 Subject: [PATCH 13/25] txgraph: Delay chunking while sub-acceptable (optimization) Chunk-based information (primarily, chunk feerates) are never accessed without first bringing the relevant Clusters to an "acceptable" quality level. Thus, while operations are ongoing and Clusters are not acceptable, we can omit computing the chunkings and chunk feerates for Clusters. --- src/txgraph.cpp | 50 +++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 2ef2f7b2c3e..28a2790cc81 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -331,23 +331,27 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[m_mapping[idx]]; entry.m_locator.SetPresent(this, idx); } - - // Compute its chunking and store its information in the Entry's m_chunk_feerate. - LinearizationChunking chunking(m_depgraph, m_linearization); - LinearizationIndex lin_idx{0}; - // Iterate over the chunks. - for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { - auto chunk = chunking.GetChunk(chunk_idx); - Assume(chunk.transactions.Any()); - // Iterate over the transactions in the linearization, which must match those in chunk. - do { - DepGraphIndex idx = m_linearization[lin_idx++]; - GraphIndex graph_idx = m_mapping[idx]; - auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); - Assume(chunk.transactions[idx]); - chunk.transactions.Reset(idx); - } while(chunk.transactions.Any()); + // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its + // information in the Entry's m_chunk_feerate. These fields are only accessed after making + // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that + // quality level yet. + if (IsAcceptable()) { + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + Assume(chunk.transactions.Any()); + // Iterate over the transactions in the linearization, which must match those in chunk. + do { + DepGraphIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + Assume(chunk.transactions[idx]); + chunk.transactions.Reset(idx); + } while(chunk.transactions.Any()); + } } } @@ -409,8 +413,6 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); - // We need to recompute and cache its chunking. - Updated(graph); return false; } first = false; @@ -1262,12 +1264,12 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(entry.m_locator.cluster == this); assert(entry.m_locator.index == lin_pos); // Check linearization position and chunk feerate. - if (!linchunking.GetChunk(0).transactions[lin_pos]) { - linchunking.MarkDone(linchunking.GetChunk(0).transactions); - } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); - // If this Cluster has an acceptable quality level, its chunks must be connected. if (IsAcceptable()) { + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } } From 36dd5edca5b00f4140f19f364ff93a5a7dd4bbe3 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 18:10:24 -0500 Subject: [PATCH 14/25] txgraph: Special-case removal of tail of cluster (Optimization) When transactions are removed from the tail of a cluster, we know the existing linearization remains acceptable (if it already was), but may just need splitting and postlinearization, so special case these into separate quality levels. --- src/txgraph.cpp | 69 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 28a2790cc81..72cd932f34c 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -33,6 +33,8 @@ enum class QualityLevel { /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, + /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ + NEEDS_SPLIT_ACCEPTABLE, /** This cluster has undergone changes that warrant re-linearization. */ NEEDS_RELINEARIZE, /** The minimal level of linearization has been performed, but it is not known to be optimal. */ @@ -79,9 +81,10 @@ public: // Generic helper functions. /** Whether the linearization of this Cluster can be exposed. */ - bool IsAcceptable() const noexcept + bool IsAcceptable(bool after_split = false) const noexcept { - return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL || + (after_split && m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE); } /** Whether the linearization of this Cluster is optimal. */ bool IsOptimal() const noexcept @@ -91,7 +94,8 @@ public: /** Whether this cluster requires splitting. */ bool NeedsSplitting() const noexcept { - return m_quality == QualityLevel::NEEDS_SPLIT; + return m_quality == QualityLevel::NEEDS_SPLIT || + m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE; } /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } @@ -379,19 +383,35 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove --graph.m_txcount; } while(!to_remove.empty()); + auto quality = m_quality; Assume(todo.Any()); // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries // removed, so we benefit from batching all the removals). m_depgraph.RemoveTransactions(todo); m_mapping.resize(m_depgraph.PositionRange()); - // Filter removals out of m_linearization. - m_linearization.erase(std::remove_if( - m_linearization.begin(), - m_linearization.end(), - [&](auto pos) { return todo[pos]; }), m_linearization.end()); - - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + // First remove all removals at the end of the linearization. + while (!m_linearization.empty() && todo[m_linearization.back()]) { + todo.Reset(m_linearization.back()); + m_linearization.pop_back(); + } + if (todo.None()) { + // If no further removals remain, and thus all removals were at the end, we may be able + // to leave the cluster at a better quality level. + if (IsAcceptable(/*after_split=*/true)) { + quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } else { + quality = QualityLevel::NEEDS_SPLIT; + } + } else { + // If more removals remain, filter those out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + quality = QualityLevel::NEEDS_SPLIT; + } + graph.SetClusterQuality(m_quality, m_setindex, quality); Updated(graph); } @@ -399,6 +419,18 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. Assume(NeedsSplitting()); + // Determine the new quality the split-off Clusters will have. + QualityLevel new_quality = IsAcceptable(/*after_split=*/true) ? QualityLevel::ACCEPTABLE + : QualityLevel::NEEDS_RELINEARIZE; + // If we're going to produce ACCEPTABLE clusters (i.e., when in NEEDS_SPLIT_ACCEPTABLE), we + // need to post-linearize to make sure the split-out versions are all connected (as + // connectivity may have changed by removing part of the cluster). This could be done on each + // resulting split-out cluster separately, but it is simpler to do it once up front before + // splitting. This step is not necessary if the resulting clusters are NEEDS_RELINEARIZE, as + // they will be post-linearized anyway in MakeAcceptable(). + if (new_quality == QualityLevel::ACCEPTABLE) { + PostLinearize(m_depgraph, m_linearization); + } /** Which positions are still left in this Cluster. */ auto todo = m_depgraph.Positions(); /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and @@ -412,7 +444,10 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_quality, m_setindex, new_quality); + // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its + // chunking. + Updated(graph); return false; } first = false; @@ -424,7 +459,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + graph.InsertCluster(std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -696,9 +731,11 @@ void TxGraphImpl::SplitAll() noexcept { // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); - auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; - while (!queue.empty()) { - Split(*queue.back().get()); + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { + auto& queue = m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } @@ -1221,6 +1258,8 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep m_depgraph.FeeRate(idx).fee = fee; if (!NeedsSplitting()) { graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } else { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } From 34aa3da5adea40615d80588bb0ff8b78d6d292a8 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:37:53 -0500 Subject: [PATCH 15/25] txgraph: Group per-graph data in ClusterSet (refactor) This is a preparation for a next commit where a TxGraph will start representing potentially two distinct graphs (a main one, and a staging one with proposed changes). --- src/txgraph.cpp | 153 ++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 70 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 72cd932f34c..4ea4a7f0f9b 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -25,7 +25,7 @@ class TxGraphImpl; /** Position of a DepGraphIndex within a Cluster::m_linearization. */ using LinearizationIndex = uint32_t; -/** Position of a Cluster within Graph::m_clusters. */ +/** Position of a Cluster within Graph::ClusterSet::m_clusters. */ using ClusterSetIndex = uint32_t; /** Quality levels for cached cluster linearizations. */ @@ -41,12 +41,12 @@ enum class QualityLevel ACCEPTABLE, /** The linearization is known to be optimal. */ OPTIMAL, - /** This cluster is not registered in any m_clusters. - * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + /** This cluster is not registered in any ClusterSet::m_clusters. + * This must be the last entry in QualityLevel as ClusterSet::m_clusters is sized using it. */ NONE, }; -/** A grouping of connected transactions inside a TxGraphImpl. */ +/** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */ class Cluster { friend class TxGraphImpl; @@ -63,7 +63,7 @@ class Cluster std::vector m_linearization; /** The quality level of m_linearization. */ QualityLevel m_quality{QualityLevel::NONE}; - /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; public: @@ -72,7 +72,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -192,18 +192,25 @@ private: bool m_group_oversized; }; - /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ - std::array>, int(QualityLevel::NONE)> m_clusters; - /** Which removals have yet to be applied. */ - std::vector m_to_remove; - /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes - * into this. */ - std::vector> m_deps_to_add; - /** Information about the merges to be performed, if known. */ - std::optional m_group_data = GroupData{}; - /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). - * */ - GraphIndex m_txcount{0}; + /** The collection of all Clusters in main or staged. */ + struct ClusterSet + { + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::array>, int(QualityLevel::NONE)> m_clusters; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Information about the merges to be performed, if known. */ + std::optional m_group_data = GroupData{}; + /** Total number of transactions in this graph (sum of all transaction counts in all + * Clusters). */ + GraphIndex m_txcount{0}; + }; + + /** The ClusterSet for this TxGraphImpl. */ + ClusterSet m_clusterset; /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ struct Locator @@ -380,7 +387,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // - Mark it as removed in the Entry's locator. locator.SetMissing(); to_remove = to_remove.subspan(1); - --graph.m_txcount; + --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; @@ -576,7 +583,7 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust { Assume(quality != QualityLevel::NONE); - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. @@ -605,7 +612,7 @@ ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, Q Assume(cluster->m_quality == QualityLevel::NONE); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; @@ -635,15 +642,16 @@ void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept void TxGraphImpl::ApplyRemovals() noexcept { - auto& to_remove = m_to_remove; + auto& clusterset = m_clusterset; + auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; // Group the set of to-be-removed entries by Cluster*. - std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); }); // Process per Cluster. - std::span to_remove_span{m_to_remove}; + std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; if (cluster != nullptr) { @@ -656,7 +664,7 @@ void TxGraphImpl::ApplyRemovals() noexcept to_remove_span = to_remove_span.subspan(1); } } - m_to_remove.clear(); + to_remove.clear(); Compact(); } @@ -685,8 +693,8 @@ void TxGraphImpl::Compact() noexcept { // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. // It is easier to delay the compaction until they have been applied. - if (!m_deps_to_add.empty()) return; - if (!m_to_remove.empty()) return; + if (!m_clusterset.m_deps_to_add.empty()) return; + if (!m_clusterset.m_to_remove.empty()) return; // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of @@ -732,7 +740,7 @@ void TxGraphImpl::SplitAll() noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = m_clusters[int(quality)]; + auto& queue = m_clusterset.m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } @@ -741,8 +749,9 @@ void TxGraphImpl::SplitAll() noexcept void TxGraphImpl::GroupClusters() noexcept { + auto& clusterset = m_clusterset; // If the groupings have been computed already, nothing is left to be done. - if (m_group_data.has_value()) return; + if (clusterset.m_group_data.has_value()) return; // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up @@ -758,7 +767,7 @@ void TxGraphImpl::GroupClusters() noexcept std::vector, Cluster*>> an_deps; // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Skip dependencies for which the parent or child transaction is removed. @@ -774,7 +783,7 @@ void TxGraphImpl::GroupClusters() noexcept an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); // Sort the dependencies by child Cluster. - std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; @@ -851,7 +860,7 @@ void TxGraphImpl::GroupClusters() noexcept // the partitions their Clusters are in. Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Nothing to do if parent and child are in the same Cluster. @@ -873,8 +882,8 @@ void TxGraphImpl::GroupClusters() noexcept // Populate the an_clusters and an_deps data structures with the list of input Clusters, // and the input dependencies, annotated with the representative of the Cluster partition // it applies to. - an_deps.reserve(m_deps_to_add.size()); - auto deps_it = m_deps_to_add.begin(); + an_deps.reserve(clusterset.m_deps_to_add.size()); + auto deps_it = clusterset.m_deps_to_add.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; // Find the representative of the partition Cluster i is in, and store it with the @@ -883,7 +892,7 @@ void TxGraphImpl::GroupClusters() noexcept Assume(an_clusters[i].second == nullptr); an_clusters[i].second = rep; // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. - while (deps_it != m_deps_to_add.end()) { + while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; auto chl_cluster = m_entries[chl].m_locator.cluster; if (std::greater{}(chl_cluster, data.cluster)) break; @@ -906,39 +915,39 @@ void TxGraphImpl::GroupClusters() noexcept // Translate the resulting cluster groups to the m_group_data structure, and the dependencies // back to m_deps_to_add. - m_group_data = GroupData{}; - m_group_data->m_group_clusters.reserve(an_clusters.size()); - m_group_data->m_group_oversized = false; - m_deps_to_add.clear(); - m_deps_to_add.reserve(an_deps.size()); + clusterset.m_group_data = GroupData{}; + clusterset.m_group_data->m_group_clusters.reserve(an_clusters.size()); + clusterset.m_group_data->m_group_oversized = false; + clusterset.m_deps_to_add.clear(); + clusterset.m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_clusters_it != an_clusters.end()) { // Process all clusters/dependencies belonging to the partition with representative rep. auto rep = an_clusters_it->second; // Create and initialize a new GroupData entry for the partition. - auto& new_entry = m_group_data->m_groups.emplace_back(); - new_entry.m_cluster_offset = m_group_data->m_group_clusters.size(); + auto& new_entry = clusterset.m_group_data->m_groups.emplace_back(); + new_entry.m_cluster_offset = clusterset.m_group_data->m_group_clusters.size(); new_entry.m_cluster_count = 0; - new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_offset = clusterset.m_deps_to_add.size(); new_entry.m_deps_count = 0; uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { - m_group_data->m_group_clusters.push_back(an_clusters_it->first); + clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first); total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { - m_deps_to_add.push_back(an_deps_it->first); + clusterset.m_deps_to_add.push_back(an_deps_it->first); ++an_deps_it; ++new_entry.m_deps_count; } // Detect oversizedness. if (total_count > m_max_cluster_count) { - m_group_data->m_group_oversized = true; + clusterset.m_group_data->m_group_oversized = true; } } Assume(an_deps_it == an_deps.end()); @@ -975,23 +984,24 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept void TxGraphImpl::ApplyDependencies() noexcept { + auto& clusterset = m_clusterset; // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). GroupClusters(); - Assume(m_group_data.has_value()); + Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. - if (m_deps_to_add.empty()) return; + if (clusterset.m_deps_to_add.empty()) return; // Dependencies cannot be applied if it would result in oversized clusters. - if (m_group_data->m_group_oversized) return; + if (clusterset.m_group_data->m_group_oversized) return; // For each group of to-be-merged Clusters. - for (const auto& group_data : m_group_data->m_groups) { + for (const auto& group_data : clusterset.m_group_data->m_groups) { // Invoke Merge() to merge them into a single Cluster. - auto cluster_span = std::span{m_group_data->m_group_clusters} + auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). - auto deps_span = std::span{m_deps_to_add} + auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); const auto& loc = m_entries[deps_span[0].second].m_locator; @@ -1000,11 +1010,11 @@ void TxGraphImpl::ApplyDependencies() noexcept } // Wipe the list of to-be-added dependencies now that they are applied. - m_deps_to_add.clear(); + clusterset.m_deps_to_add.clear(); Compact(); // Also no further Cluster mergings are needed (note that we clear, but don't set to // std::nullopt, as that would imply the groupings are unknown). - m_group_data = GroupData{}; + clusterset.m_group_data = GroupData{}; } void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept @@ -1060,7 +1070,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept auto cluster_ptr = cluster.get(); InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_txcount; + ++m_clusterset.m_txcount; // Return the Ref. return ret; } @@ -1075,9 +1085,9 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_to_remove.push_back(GetRefIndex(arg)); + m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). - m_group_data.reset(); + m_clusterset.m_group_data.reset(); } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1095,9 +1105,9 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; if (chl_cluster == nullptr) return; // Remember that this dependency is to be applied. - m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). - m_group_data.reset(); + m_clusterset.m_group_data.reset(); } bool TxGraphImpl::Exists(const Ref& arg) noexcept @@ -1158,7 +1168,7 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1174,7 +1184,7 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1190,7 +1200,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1202,7 +1212,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept { ApplyRemovals(); - return m_txcount; + return m_clusterset.m_txcount; } FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1227,7 +1237,7 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1243,8 +1253,8 @@ bool TxGraphImpl::IsOversized() noexcept // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(); - Assume(m_group_data.has_value()); - return m_group_data->m_group_oversized; + Assume(m_clusterset.m_group_data.has_value()); + return m_clusterset.m_group_data->m_group_oversized; } void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept @@ -1346,11 +1356,12 @@ void TxGraphImpl::SanityCheck() const } + auto& clusterset = m_clusterset; std::set actual_clusters; // For all quality levels... for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { QualityLevel quality{qual}; - const auto& quality_clusters = m_clusters[qual]; + const auto& quality_clusters = clusterset.m_clusters[qual]; // ... for all clusters in them ... for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { const auto& cluster = *quality_clusters[setindex]; @@ -1368,13 +1379,13 @@ void TxGraphImpl::SanityCheck() const } // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. - for (GraphIndex idx : m_to_remove) { + for (GraphIndex idx : m_clusterset.m_to_remove) { assert(idx < m_entries.size()); assert(m_entries[idx].m_locator.IsPresent()); } // Verify that all to-be-added dependencies have valid identifiers. - for (auto [par_idx, chl_idx] : m_deps_to_add) { + for (auto [par_idx, chl_idx] : m_clusterset.m_deps_to_add) { assert(par_idx != chl_idx); assert(par_idx < m_entries.size()); assert(chl_idx < m_entries.size()); @@ -1389,7 +1400,9 @@ void TxGraphImpl::SanityCheck() const // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be // empty (to prevent memory leaks due to an ever-growing m_entries vector). - if (m_to_remove.empty() && m_deps_to_add.empty()) assert(actual_unlinked.empty()); + if (clusterset.m_to_remove.empty() && clusterset.m_deps_to_add.empty()) { + assert(actual_unlinked.empty()); + } } } // namespace From c99c7300b4443f70e452cb97c42b9c2513b372d7 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 10:00:59 -0500 Subject: [PATCH 16/25] txgraph: Abstract out ClearLocator (refactor) Move a number of related modifications to TxGraphImpl into a separate function for removal of transactions. This is preparation for a later commit where this will be useful in more than one place. --- src/txgraph.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 4ea4a7f0f9b..00bfec0216f 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -274,6 +274,8 @@ public: ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist. It must currently exist. */ + void ClearLocator(GraphIndex index) noexcept; // Functions for handling Refs. @@ -335,6 +337,16 @@ public: void SanityCheck() const final; }; +void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +{ + auto& entry = m_entries[idx]; + Assume(entry.m_locator.IsPresent()); + // Change the locator from Present to Missing. + entry.m_locator.SetMissing(); + // Update the transaction count. + --m_clusterset.m_txcount; +} + void Cluster::Updated(TxGraphImpl& graph) noexcept { // Update all the Locators for this Cluster's Entrys. @@ -385,9 +397,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); // - Mark it as removed in the Entry's locator. - locator.SetMissing(); + graph.ClearLocator(idx); to_remove = to_remove.subspan(1); - --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; From 8c70688965bc4038f28f41e4490180e40a88b5ee Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 4 Dec 2024 09:40:53 -0500 Subject: [PATCH 17/25] txgraph: Add staging support (feature) In order to make it easy to evaluate proposed changes to a TxGraph, introduce a "staging" mode, where mutators (AddTransaction, AddDependency, RemoveTransaction) do not modify the actual graph, but just a staging version of it. That staging graph can then be commited (replacing the main one with it), or aborted (discarding the staging). --- src/test/fuzz/txgraph.cpp | 333 +++++++++------- src/txgraph.cpp | 812 ++++++++++++++++++++++++++++---------- src/txgraph.h | 101 +++-- 3 files changed, 863 insertions(+), 383 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index b6163ed5544..cc20f9e3c4f 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,8 @@ using namespace cluster_linearize; namespace { /** Data type representing a naive simulated TxGraph, keeping all transactions (even from - * disconnected components) in a single DepGraph. */ + * disconnected components) in a single DepGraph. Unlike the real TxGraph, this only models + * a single graph, and multiple instances are used to simulate main/staging. */ struct SimTxGraph { /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's @@ -38,20 +40,28 @@ struct SimTxGraph /** The dependency graph (for all transactions in the simulation, regardless of * connectivity/clustering). */ DepGraph graph; - /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ - std::array, MAX_TRANSACTIONS> simmap; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). Use shared_ptr + * so that a SimTxGraph can be copied to create a staging one, while sharing Refs with + * the main graph. */ + std::array, MAX_TRANSACTIONS> simmap; /** For each TxGraph::Ref in graph, the position it corresponds with. */ std::map simrevmap; /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ - std::vector> removed; + std::vector> removed; /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ std::optional oversized; /** The configured maximum number of transactions per cluster. */ DepGraphIndex max_cluster_count; - /** Construct a new SimData with the specified maximum cluster count. */ + /** Construct a new SimTxGraph with the specified maximum cluster count. */ explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {} + // Permit copying and moving. + SimTxGraph(const SimTxGraph&) noexcept = default; + SimTxGraph& operator=(const SimTxGraph&) noexcept = default; + SimTxGraph(SimTxGraph&&) noexcept = default; + SimTxGraph& operator=(SimTxGraph&&) noexcept = default; + /** Check whether this graph is oversized (contains a connected component whose number of * transactions exceeds max_cluster_count. */ bool IsOversized() @@ -95,7 +105,7 @@ struct SimTxGraph assert(graph.TxCount() < MAX_TRANSACTIONS); auto simpos = graph.AddTransaction(feerate); assert(graph.Positions()[simpos]); - simmap[simpos] = std::make_unique(); + simmap[simpos] = std::make_shared(); auto ptr = simmap[simpos].get(); simrevmap[ptr] = simpos; return ptr; @@ -202,32 +212,43 @@ FUZZ_TARGET(txgraph) // Decide the maximum number of transactions per cluster we will use in this simulation. auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); - // Construct a real and a simulated graph. + // Construct a real graph, and a vector of simulated graphs (main, and possibly staging). auto real = MakeTxGraph(max_count); - SimTxGraph sim(max_count); + std::vector sims; + sims.reserve(2); + sims.emplace_back(max_count); - /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ + /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the + * empty Ref). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref* { - auto tx_count = sim.GetTransactionCount(); + size_t tx_count[2] = {sims[0].GetTransactionCount(), 0}; /** The number of possible choices. */ - size_t choices = tx_count + sim.removed.size() + 1; + size_t choices = tx_count[0] + sims[0].removed.size() + 1; + if (sims.size() == 2) { + tx_count[1] = sims[1].GetTransactionCount(); + choices += tx_count[1] + sims[1].removed.size(); + } /** Pick one of them. */ auto choice = provider.ConsumeIntegralInRange(0, choices - 1); - if (choice < tx_count) { - // Return from real. - for (auto i : sim.graph.Positions()) { - if (choice == 0) return sim.GetRef(i); - --choice; + // Consider both main and (if it exists) staging. + for (size_t level = 0; level < sims.size(); ++level) { + auto& sim = sims[level]; + if (choice < tx_count[level]) { + // Return from graph. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count[level]; + } + if (choice < sim.removed.size()) { + // Return from removed. + return sim.removed[choice].get(); + } else { + choice -= sim.removed.size(); } - assert(false); - } else { - choice -= tx_count; - } - if (choice < sim.removed.size()) { - // Return from removed. - return sim.removed[choice].get(); - } else { - choice -= sim.removed.size(); } // Return empty. assert(choice == 0); @@ -237,15 +258,24 @@ FUZZ_TARGET(txgraph) LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); - // Treat it lowest bit as a flag (which selects a variant of some of the operations), and - // leave the rest of the bits in command. + // Treat the lowest bit of a command as a flag (which selects a variant of some of the + // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave + // the rest of the bits in command. bool alt = command & 1; - command >>= 1; + bool use_main = command & 2; + command >>= 2; + + // Provide convenient aliases for the top simulated graph (main, or staging if it exists), + // one for the simulated graph selected based on use_main (for operations that can operate + // on both graphs), and one that always refers to the main graph. + auto& top_sim = sims.back(); + auto& sel_sim = use_main ? sims[0] : top_sim; + auto& main_sim = sims[0]; // Keep decrementing command for each applicable operation, until one is hit. Multiple // iterations may be necessary. while (true) { - if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { // AddTransaction. int64_t fee; int32_t size; @@ -262,51 +292,54 @@ FUZZ_TARGET(txgraph) FeePerWeight feerate{fee, size}; // Create a real TxGraph::Ref. auto ref = real->AddTransaction(feerate); - // Create a unique_ptr place in the simulation to put the Ref in. - auto ref_loc = sim.AddTransaction(feerate); + // Create a shared_ptr place in the simulation to put the Ref in. + auto ref_loc = top_sim.AddTransaction(feerate); // Move it in place. *ref_loc = std::move(ref); break; - } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { // AddDependency. auto par = pick_fn(); auto chl = pick_fn(); - auto pos_par = sim.Find(par); - auto pos_chl = sim.Find(chl); + auto pos_par = top_sim.Find(par); + auto pos_chl = top_sim.Find(chl); if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. - if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break; } - sim.AddDependency(par, chl); + top_sim.AddDependency(par, chl); real->AddDependency(*par, *chl); break; - } else if (sim.removed.size() < 100 && command-- == 0) { + } else if (top_sim.removed.size() < 100 && command-- == 0) { // RemoveTransaction. Either all its ancestors or all its descendants are also // removed (if any), to make sure TxGraph's reordering of removals and dependencies // has no effect. std::vector to_remove; to_remove.push_back(pick_fn()); - sim.IncludeAncDesc(to_remove, alt); + top_sim.IncludeAncDesc(to_remove, alt); // The order in which these ancestors/descendants are removed should not matter; // randomly shuffle them. std::shuffle(to_remove.begin(), to_remove.end(), rng); for (TxGraph::Ref* ptr : to_remove) { real->RemoveTransaction(*ptr); - sim.RemoveTransaction(ptr); + top_sim.RemoveTransaction(ptr); } break; - } else if (sim.removed.size() > 0 && command-- == 0) { + } else if (sel_sim.removed.size() > 0 && command-- == 0) { // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it // refers to, so this simulation permits doing so separately from other actions on // TxGraph. - // Pick a Ref of sim.removed to destroy. - auto removed_pos = provider.ConsumeIntegralInRange(0, sim.removed.size() - 1); - if (removed_pos != sim.removed.size() - 1) { - std::swap(sim.removed[removed_pos], sim.removed.back()); + // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur + // in the other graph, and thus not actually trigger ~Ref yet (which is exactly + // what we want, as destroying Refs is only allowed when it does not refer to an + // existing transaction in either graph). + auto removed_pos = provider.ConsumeIntegralInRange(0, sel_sim.removed.size() - 1); + if (removed_pos != sel_sim.removed.size() - 1) { + std::swap(sel_sim.removed[removed_pos], sel_sim.removed.back()); } - sim.removed.pop_back(); + sel_sim.removed.pop_back(); break; } else if (command-- == 0) { // SetTransactionFee. @@ -318,77 +351,83 @@ FUZZ_TARGET(txgraph) } auto ref = pick_fn(); real->SetTransactionFee(*ref, fee); - sim.SetTransactionFee(ref, fee); + for (auto& sim : sims) { + sim.SetTransactionFee(ref, fee); + } break; } else if (command-- == 0) { // GetTransactionCount. - assert(real->GetTransactionCount() == sim.GetTransactionCount()); + assert(real->GetTransactionCount(use_main) == sel_sim.GetTransactionCount()); break; } else if (command-- == 0) { // Exists. auto ref = pick_fn(); - bool exists = real->Exists(*ref); - bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + bool exists = real->Exists(*ref, use_main); + bool should_exist = sel_sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; } else if (command-- == 0) { // IsOversized. - assert(sim.IsOversized() == real->IsOversized()); + assert(sel_sim.IsOversized() == real->IsOversized(use_main)); break; } else if (command-- == 0) { // GetIndividualFeerate. auto ref = pick_fn(); auto feerate = real->GetIndividualFeerate(*ref); - auto simpos = sim.Find(ref); - if (simpos == SimTxGraph::MISSING) { - assert(feerate.IsEmpty()); - } else { - assert(feerate == sim.graph.FeeRate(simpos)); + bool found{false}; + for (auto& sim : sims) { + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + found = true; + assert(feerate == sim.graph.FeeRate(simpos)); + } } + if (!found) assert(feerate.IsEmpty()); break; - } else if (!sim.IsOversized() && command-- == 0) { - // GetChunkFeerate. + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetMainChunkFeerate. auto ref = pick_fn(); - auto feerate = real->GetChunkFeerate(*ref); - auto simpos = sim.Find(ref); + auto feerate = real->GetMainChunkFeerate(*ref); + auto simpos = main_sim.Find(ref); if (simpos == SimTxGraph::MISSING) { assert(feerate.IsEmpty()); } else { // Just do some quick checks that the reported value is in range. A full // recomputation of expected chunk feerates is done at the end. - assert(feerate.size >= sim.graph.FeeRate(simpos).size); + assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); } break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto ref = pick_fn(); - auto result = alt ? real->GetDescendants(*ref) : real->GetAncestors(*ref); + auto result = alt ? real->GetDescendants(*ref, use_main) + : real->GetAncestors(*ref, use_main); assert(result.size() <= max_count); - auto result_set = sim.MakeSet(result); + auto result_set = sel_sim.MakeSet(result); assert(result.size() == result_set.Count()); - auto expect_set = sim.GetAncDesc(ref, alt); + auto expect_set = sel_sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); - auto result = real->GetCluster(*ref); + auto result = real->GetCluster(*ref, use_main); // Check cluster count limit. assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. - auto left = sim.graph.Positions(); + auto left = sel_sim.graph.Positions(); for (auto refptr : result) { - auto simpos = sim.Find(refptr); + auto simpos = sel_sim.Find(refptr); assert(simpos != SimTxGraph::MISSING); assert(left[simpos]); left.Reset(simpos); - assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left)); } // Require the set to be connected. - auto result_set = sim.MakeSet(result); - assert(sim.graph.IsConnected(result_set)); + auto result_set = sel_sim.MakeSet(result); + assert(sel_sim.graph.IsConnected(result_set)); // If ref exists, the result must contain it. If not, it must be empty. - auto simpos = sim.Find(ref); + auto simpos = sel_sim.Find(ref); if (simpos != SimTxGraph::MISSING) { assert(result_set[simpos]); } else { @@ -396,10 +435,29 @@ FUZZ_TARGET(txgraph) } // Require the set not to have ancestors or descendants outside of it. for (auto i : result_set) { - assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); - assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Descendants(i).IsSubsetOf(result_set)); } break; + } else if (command-- == 0) { + // HaveStaging. + assert((sims.size() == 2) == real->HaveStaging()); + break; + } else if (sims.size() < 2 && command-- == 0) { + // StartStaging. + sims.emplace_back(sims.back()); + real->StartStaging(); + break; + } else if (sims.size() > 1 && command-- == 0) { + // CommitStaging. + real->CommitStaging(); + sims.erase(sims.begin()); + break; + } else if (sims.size() > 1 && command-- == 0) { + // AbortStaging. + real->AbortStaging(); + sims.pop_back(); + break; } } } @@ -407,63 +465,70 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + assert(real->HaveStaging() == (sims.size() > 1)); - // Compare simple properties of the graph with the simulation. - assert(real->IsOversized() == sim.IsOversized()); - assert(real->GetTransactionCount() == sim.GetTransactionCount()); - - // If the graph (and the simulation) are not oversized, perform a full comparison. - if (!sim.IsOversized()) { - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); - assert(anc.Count() <= max_count); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); - assert(desc.Count() <= max_count); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(*sim.GetRef(i)); - assert(cluster.size() <= max_count); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - DepGraphIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph + // inspector functions that support both. + for (int main_only = 0; main_only < 2; ++main_only) { + auto& sim = main_only ? sims[0] : sims.back(); + // Compare simple properties of the graph with the simulation. + assert(real->IsOversized(main_only) == sim.IsOversized()); + assert(real->GetTransactionCount(main_only) == sim.GetTransactionCount()); + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i), main_only)); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i), main_only)); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i), main_only); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + if (sims.size() == 1 || main_only) { + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetMainChunkFeerate(*cluster[idx])); + ++idx; + } + } } } } @@ -475,8 +540,10 @@ FUZZ_TARGET(txgraph) // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be // addressed in a follow-up commit). - for (auto i : sim.graph.Positions()) { - auto ref = sim.GetRef(i); - real->RemoveTransaction(*ref); + for (auto& sim : sims) { + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + real->RemoveTransaction(*ref); + } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 00bfec0216f..13bab582b50 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -20,6 +20,9 @@ namespace { using namespace cluster_linearize; +/** The maximum number of levels a TxGraph can have (0 = main, 1 = staging). */ +static constexpr int MAX_LEVELS{2}; + // Forward declare the TxGraph implementation class. class TxGraphImpl; @@ -65,6 +68,8 @@ class Cluster QualityLevel m_quality{QualityLevel::NONE}; /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + /** Which level this Cluster is at in the graph (-1=not inserted, 0=main, 1=staging). */ + int m_level{-1}; public: /** Construct an empty Cluster. */ @@ -72,7 +77,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ + // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -105,6 +110,17 @@ public: void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ void Updated(TxGraphImpl& graph) noexcept; + /** Create a copy of this Cluster in staging, returning a pointer to it (used by PullIn). */ + Cluster* CopyToStaging(TxGraphImpl& graph) const noexcept; + /** Get the list of Clusters in main that conflict with this one (which is assumed to be in staging). */ + void GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept; + /** Mark all the Entry objects belonging to this staging Cluster as missing. The Cluster must be + * deleted immediately after. */ + void MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept; + /** Remove all transactions from a Cluster. */ + void Clear(TxGraphImpl& graph) noexcept; + /** Change a Cluster's level from 1 (staging) to 0 (main). */ + void MoveToMain(TxGraphImpl& graph) noexcept; // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. @@ -136,16 +152,20 @@ public: // Debugging functions. - void SanityCheck(const TxGraphImpl& graph) const; + void SanityCheck(const TxGraphImpl& graph, int level) const; }; -/** The transaction graph. +/** The transaction graph, including staged changes. * * The overall design of the data structure consists of 3 interlinked representations: * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). - * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl::ClusterSet). * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) * + * The Clusters are kept in one or two ClusterSet objects, one for the "main" graph, and one for + * the proposed changes ("staging"). If a transaction occurs in both, they share the same Entry, + * but there will be a separate Cluster per graph. + * * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects * refer back to the Clusters and Refs the corresponding transaction is contained in. * @@ -204,28 +224,68 @@ private: std::vector> m_deps_to_add; /** Information about the merges to be performed, if known. */ std::optional m_group_data = GroupData{}; + /** Which entries were removed in this ClusterSet (so they can be wiped on abort). This + * includes all entries which have an (R) removed locator at this level (staging only). */ + std::vector m_removed; /** Total number of transactions in this graph (sum of all transaction counts in all - * Clusters). */ + * Clusters, and for staging also those inherited from the main ClusterSet). */ GraphIndex m_txcount{0}; + + ClusterSet() noexcept = default; }; - /** The ClusterSet for this TxGraphImpl. */ - ClusterSet m_clusterset; + /** The main ClusterSet. */ + ClusterSet m_main_clusterset; + /** The staging ClusterSet, if any. */ + std::optional m_staging_clusterset; - /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + /** A Locator that describes whether, where, and in which Cluster an Entry appears. + * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. + * + * Each level of a Locator is in one of three states: + * + * - (P)resent: actually occurs in a Cluster at that level. + * + * - (M)issing: + * - In the main graph: the transaction does not exist in main. + * - In the staging graph: the transaction's existence is the same as in main. If it doesn't + * exist in main, (M) in staging means it does not exist there + * either. If it does exist in main, (M) in staging means the + * cluster it is in has not been modified in staging, and thus the + * transaction implicitly exists in staging too (without explicit + * Cluster object; see PullIn() to create it in staging too). + * + * - (R)emoved: only possible in staging; it means the transaction exists in main, but is + * removed in staging. + * + * The following combinations are possible: + * - (M,M): the transaction doesn't exist in either graph. + * - (P,M): the transaction exists in both, but only exists explicitly in a Cluster object in + * main. Its existence in staging is inherited from main. + * - (P,P): the transaction exists in both, and is materialized in both. Thus, the clusters + * and/or their linearizations may be different in main and staging. + * - (M,P): the transaction is added in staging, and does not exist in main. + * - (P,R): the transaction exists in main, but is removed in staging. + * + * When staging does not exist, only (M,M) and (P,M) are possible. + */ struct Locator { /** Which Cluster the Entry appears in (nullptr = missing). */ Cluster* cluster{nullptr}; - /** Where in the Cluster it appears (only if cluster != nullptr). */ + /** Where in the Cluster it appears (if cluster == nullptr: 0 = missing, -1 = removed). */ DepGraphIndex index{0}; - /** Mark this Locator as missing. */ + /** Mark this Locator as missing (= same as lower level, or non-existing if level 0). */ void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as removed (not allowed in level 0). */ + void SetRemoved() noexcept { cluster = nullptr; index = DepGraphIndex(-1); } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } /** Check if this Locator is missing. */ bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } + /** Check if this Locator is removed. */ + bool IsRemoved() const noexcept { return cluster == nullptr && index == DepGraphIndex(-1); } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -235,13 +295,13 @@ private: { /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ Ref* m_ref{nullptr}; - /** Which Cluster and position therein this Entry appears in. */ - Locator m_locator; - /** The chunk feerate of this transaction (if not missing). */ - FeePerWeight m_chunk_feerate; + /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ + Locator m_locator[MAX_LEVELS]; + /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ + FeePerWeight m_main_chunk_feerate; }; - /** The set of all transactions. GraphIndex values index into this. */ + /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ std::vector m_entries; /** Set of Entries which have no linked Ref anymore. */ @@ -266,16 +326,28 @@ public: /** Swap the Entrys referred to by a and b. */ void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; - /** Extract a Cluster. */ - std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not + * removed), return the Cluster it is in. Otherwise, return nullptr. */ + Cluster* FindCluster(GraphIndex idx, int level) const noexcept; + /** Extract a Cluster from its ClusterSet. */ + std::unique_ptr ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept; /** Delete a Cluster. */ void DeleteCluster(Cluster& cluster) noexcept; - /** Insert a Cluster. */ - ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Insert a Cluster into its ClusterSet. */ + ClusterSetIndex InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ - void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; - /** Make a transaction not exist. It must currently exist. */ - void ClearLocator(GraphIndex index) noexcept; + void SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Get the index of the top level ClusterSet (staging if it exists, main otherwise). */ + int GetTopLevel() const noexcept { return m_staging_clusterset.has_value(); } + /** Get the specified level (staging if it exists and main_only is not specified, main otherwise). */ + int GetSpecifiedLevel(bool main_only) const noexcept { return m_staging_clusterset.has_value() && !main_only; } + /** Get a reference to the ClusterSet at the specified level (which must exist). */ + ClusterSet& GetClusterSet(int level) noexcept; + const ClusterSet& GetClusterSet(int level) const noexcept; + /** Make a transaction not exist at a specified level. It must currently exist there. */ + void ClearLocator(int level, GraphIndex index) noexcept; + /** Find which Clusters in main conflict with ones in staging. */ + std::vector GetConflicts() const noexcept; // Functions for handling Refs. @@ -300,21 +372,25 @@ public: // Functions related to various normalization/application steps. /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex * values for remaining Entrys, so this only does something when no to-be-applied operations - * referring to GraphIndexes remain). */ + * or staged removals referring to GraphIndexes remain). */ void Compact() noexcept; + /** If cluster is not in staging, copy it there, and return a pointer to it. This has no + * effect if only a main graph exists, but if staging exists this modifies the locators of its + * transactions from inherited (P,M) to explicit (P,P). */ + Cluster* PullIn(Cluster* cluster) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a - * NEEDS_SPLIT* QualityLevel). */ - void ApplyRemovals() noexcept; - /** Split an individual cluster. */ + * NEEDS_SPLIT* QualityLevel) in the specified level. */ + void ApplyRemovals(int level) noexcept; + /** Split an individual cluster (which must be in the top-level ClusterSet). */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting. */ - void SplitAll() noexcept; - /** Populate m_group_data based on m_deps_to_add. */ - void GroupClusters() noexcept; + /** Split all clusters that need splitting in the specified level. */ + void SplitAll(int level) noexcept; + /** Populate m_group_data based on m_deps_to_add in the specified level. */ + void GroupClusters(int level) noexcept; /** Merge the specified clusters. */ void Merge(std::span to_merge) noexcept; - /** Apply all m_deps_to_add to the relevant Clusters. */ - void ApplyDependencies() noexcept; + /** Apply all m_deps_to_add to the relevant Clusters in the specified level. */ + void ApplyDependencies(int level) noexcept; /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ void MakeAcceptable(Cluster& cluster) noexcept; @@ -325,26 +401,53 @@ public: void AddDependency(const Ref& parent, const Ref& child) noexcept final; void SetTransactionFee(const Ref&, int64_t fee) noexcept final; - bool Exists(const Ref& arg) noexcept final; - FeePerWeight GetChunkFeerate(const Ref& arg) noexcept final; + void StartStaging() noexcept final; + void CommitStaging() noexcept final; + void AbortStaging() noexcept final; + bool HaveStaging() const noexcept final { return m_staging_clusterset.has_value(); } + + bool Exists(const Ref& arg, bool main_only = false) noexcept final; + FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept final; FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; - std::vector GetCluster(const Ref& arg) noexcept final; - std::vector GetAncestors(const Ref& arg) noexcept final; - std::vector GetDescendants(const Ref& arg) noexcept final; - GraphIndex GetTransactionCount() noexcept final; - bool IsOversized() noexcept final; + std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + GraphIndex GetTransactionCount(bool main_only = false) noexcept final; + bool IsOversized(bool main_only = false) noexcept final; void SanityCheck() const final; }; -void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +TxGraphImpl::ClusterSet& TxGraphImpl::GetClusterSet(int level) noexcept +{ + if (level == 0) return m_main_clusterset; + Assume(level == 1); + Assume(m_staging_clusterset.has_value()); + return *m_staging_clusterset; +} + +const TxGraphImpl::ClusterSet& TxGraphImpl::GetClusterSet(int level) const noexcept +{ + if (level == 0) return m_main_clusterset; + Assume(level == 1); + Assume(m_staging_clusterset.has_value()); + return *m_staging_clusterset; +} + +void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; - Assume(entry.m_locator.IsPresent()); - // Change the locator from Present to Missing. - entry.m_locator.SetMissing(); + auto& clusterset = GetClusterSet(level); + Assume(entry.m_locator[level].IsPresent()); + // Change the locator from Present to Missing or Removed. + if (level == 0 || !entry.m_locator[level - 1].IsPresent()) { + entry.m_locator[level].SetMissing(); + } else { + entry.m_locator[level].SetRemoved(); + clusterset.m_removed.push_back(idx); + } // Update the transaction count. - --m_clusterset.m_txcount; + --clusterset.m_txcount; } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -352,13 +455,13 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (DepGraphIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; - entry.m_locator.SetPresent(this, idx); + entry.m_locator[m_level].SetPresent(this, idx); } - // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its - // information in the Entry's m_chunk_feerate. These fields are only accessed after making - // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that - // quality level yet. - if (IsAcceptable()) { + // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. + // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless + // to compute these if we haven't reached that quality level yet. + if (m_level == 0 && IsAcceptable()) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; // Iterate over the chunks. @@ -370,7 +473,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept DepGraphIndex idx = m_linearization[lin_idx++]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); Assume(chunk.transactions[idx]); chunk.transactions.Reset(idx); } while(chunk.transactions.Any()); @@ -378,6 +481,59 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept } } +void Cluster::GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept +{ + Assume(m_level == 1); + for (auto i : m_linearization) { + auto& entry = graph.m_entries[m_mapping[i]]; + // For every transaction Entry in this Cluster, if it also exists in a lower-level Cluster, + // then that Cluster conflicts. + if (entry.m_locator[0].IsPresent()) { + out.push_back(entry.m_locator[0].cluster); + } + } +} + +std::vector TxGraphImpl::GetConflicts() const noexcept +{ + Assume(GetTopLevel() == 1); + auto& clusterset = GetClusterSet(1); + std::vector ret; + // All main Clusters containing transactions in m_removed (so (P,R) ones) are conflicts. + for (auto i : clusterset.m_removed) { + auto& entry = m_entries[i]; + Assume(entry.m_locator[0].IsPresent()); + ret.push_back(entry.m_locator[0].cluster); + } + // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones). + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& clusters = clusterset.m_clusters[quality]; + for (const auto& cluster : clusters) { + cluster->GetConflicts(*this, ret); + } + } + // Deduplicate the result (the same Cluster may appear multiple times). + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + return ret; +} + +Cluster* Cluster::CopyToStaging(TxGraphImpl& graph) const noexcept +{ + // Construct an empty Cluster. + auto ret = std::make_unique(); + auto ptr = ret.get(); + // Copy depgraph, mapping, and linearization/ + ptr->m_depgraph = m_depgraph; + ptr->m_mapping = m_mapping; + ptr->m_linearization = m_linearization; + // Insert the new Cluster into the graph. + graph.InsertCluster(1, std::move(ret), m_quality); + // Update its Locators. + ptr->Updated(graph); + return ptr; +} + void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept { // Iterate over the prefix of to_remove that applies to this cluster. @@ -387,7 +543,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove GraphIndex idx = to_remove.front(); Assume(idx < graph.m_entries.size()); auto& entry = graph.m_entries[idx]; - auto& locator = entry.m_locator; + auto& locator = entry.m_locator[m_level]; // Stop once we hit an entry that applies to another Cluster. if (locator.cluster != this) break; // - Remember it in a set of to-remove DepGraphIndexes. @@ -396,8 +552,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // are just never accessed, but set it to -1 here to increase the ability to detect a bug // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); - // - Mark it as removed in the Entry's locator. - graph.ClearLocator(idx); + // - Mark it as missing/removed in the Entry's locator. + graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); } while(!to_remove.empty()); @@ -429,7 +585,31 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove [&](auto pos) { return todo[pos]; }), m_linearization.end()); quality = QualityLevel::NEEDS_SPLIT; } - graph.SetClusterQuality(m_quality, m_setindex, quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, quality); + Updated(graph); +} + +void Cluster::Clear(TxGraphImpl& graph) noexcept +{ + for (auto i : m_linearization) { + graph.ClearLocator(m_level, m_mapping[i]); + } + m_depgraph = {}; + m_linearization.clear(); + m_mapping.clear(); +} + +void Cluster::MoveToMain(TxGraphImpl& graph) noexcept +{ + Assume(m_level == 1); + for (auto i : m_linearization) { + GraphIndex idx = m_mapping[i]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[1].SetMissing(); + } + auto quality = m_quality; + auto cluster = graph.ExtractCluster(1, quality, m_setindex); + graph.InsertCluster(0, std::move(cluster), quality); Updated(graph); } @@ -462,7 +642,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its // chunking. Updated(graph); @@ -477,7 +657,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), new_quality); + graph.InsertCluster(m_level, std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -539,7 +719,7 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // Update the transaction's Locator. There is no need to call Updated() to update chunk // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). - graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. other.m_depgraph = DepGraph{}; @@ -562,14 +742,14 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::spansecond].m_locator; + auto& first_child = graph.m_entries[it->second].m_locator[m_level]; const auto child_idx = first_child.index; // Iterate over all to-be-added dependencies within that same child, gather the relevant // parents. SetType parents; while (it != to_apply.end()) { - auto& child = graph.m_entries[it->second].m_locator; - auto& parent = graph.m_entries[it->first].m_locator; + auto& child = graph.m_entries[it->second].m_locator[m_level]; + auto& parent = graph.m_entries[it->first].m_locator[m_level]; Assume(child.cluster == this && parent.cluster == this); if (child.index != child_idx) break; parents.Set(parent.index); @@ -590,23 +770,26 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +std::unique_ptr TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept { Assume(quality != QualityLevel::NONE); - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = GetClusterSet(level); + auto& quality_clusters = clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. std::unique_ptr ret = std::move(quality_clusters[setindex]); ret->m_quality = QualityLevel::NONE; ret->m_setindex = ClusterSetIndex(-1); + ret->m_level = -1; // Clean up space in quality_cluster. auto max_setindex = quality_clusters.size() - 1; if (setindex != max_setindex) { // If the cluster was not the last element of quality_clusters, move that to take its place. quality_clusters.back()->m_setindex = setindex; + quality_clusters.back()->m_level = level; quality_clusters[setindex] = std::move(quality_clusters.back()); } // The last element of quality_clusters is now unused; drop it. @@ -615,7 +798,7 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust return ret; } -ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +ClusterSetIndex TxGraphImpl::InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept { // Cannot insert with quality level NONE (as that would mean not inserted). Assume(quality != QualityLevel::NONE); @@ -623,48 +806,92 @@ ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, Q Assume(cluster->m_quality == QualityLevel::NONE); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = GetClusterSet(level); + auto& quality_clusters = clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; + cluster->m_level = level; quality_clusters.push_back(std::move(cluster)); return ret; } -void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +void TxGraphImpl::SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept { Assume(new_quality != QualityLevel::NONE); // Don't do anything if the quality did not change. if (old_quality == new_quality) return; // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(old_quality, old_index); + auto cluster_ptr = ExtractCluster(level, old_quality, old_index); // And re-insert it where it belongs. - InsertCluster(std::move(cluster_ptr), new_quality); + InsertCluster(level, std::move(cluster_ptr), new_quality); } void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept { // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + auto cluster_ptr = ExtractCluster(cluster.m_level, cluster.m_quality, cluster.m_setindex); // And throw it away. cluster_ptr.reset(); } -void TxGraphImpl::ApplyRemovals() noexcept +Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept { - auto& clusterset = m_clusterset; + Assume(level >= 0 && level <= GetTopLevel()); + auto& entry = m_entries[idx]; + // Search the entry's locators from top to bottom. + for (int l = level; l >= 0; --l) { + // If the locator is missing, dig deeper; it may exist at a lower level and therefore be + // implicitly existing at this level too. + if (entry.m_locator[l].IsMissing()) continue; + // If the locator has the entry marked as explicitly removed, stop. + if (entry.m_locator[l].IsRemoved()) break; + // Otherwise, we have found the topmost ClusterSet that contains this entry. + return entry.m_locator[l].cluster; + } + // If no non-empty locator was found, or an explicitly removed was hit, return nothing. + return nullptr; +} + +Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept +{ + int to_level = GetTopLevel(); + if (to_level == 0) return cluster; + int level = cluster->m_level; + Assume(level <= to_level); + // Copy the Cluster from main to staging, if it's not already there. + if (level == 0) { + // Make the Cluster Acceptable before copying. This isn't strictly necessary, but doing it + // now avoids doing double work later. + MakeAcceptable(*cluster); + cluster = cluster->CopyToStaging(*this); + } + return cluster; +} + +void TxGraphImpl::ApplyRemovals(int level) noexcept +{ + auto& clusterset = GetClusterSet(level); auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; + // There cannot be removals to be applied in main when staging exists (they should have been + // applied in StartStaging already, and none can be added to main while staging exists). + Assume(level == GetTopLevel()); + // Pull in all Clusters that are not in staging. + for (GraphIndex index : to_remove) { + auto cluster = FindCluster(index, level); + PullIn(cluster); + } // Group the set of to-be-removed entries by Cluster*. std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); }); // Process per Cluster. std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; if (cluster != nullptr) { // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it // can pop off whatever applies to it. @@ -691,21 +918,29 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept Entry& entry = m_entries[idx]; // Update linked Ref. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; - // Update the locator. The rest of the Entry information will not change, so no need to - // invoke Cluster::Updated(). - Locator& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->UpdateMapping(locator.index, idx); + // Update the locators for both levels. The rest of the Entry information will not change, + // so no need to invoke Cluster::Updated(). + for (int level = 0; level < MAX_LEVELS; ++level) { + Locator& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } } } } void TxGraphImpl::Compact() noexcept { - // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. - // It is easier to delay the compaction until they have been applied. - if (!m_clusterset.m_deps_to_add.empty()) return; - if (!m_clusterset.m_to_remove.empty()) return; + // We cannot compact while any to-be-applied operations or staged removals remain as we'd need + // to rewrite them. It is easier to delay the compaction until they have been applied. + if (!m_main_clusterset.m_deps_to_add.empty()) return; + if (!m_main_clusterset.m_to_remove.empty()) return; + if (!m_main_clusterset.m_removed.empty()) return; + if (m_staging_clusterset.has_value()) { + if (!m_staging_clusterset->m_deps_to_add.empty()) return; + if (!m_staging_clusterset->m_to_remove.empty()) return; + if (!m_staging_clusterset->m_removed.empty()) return; + } // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of @@ -724,7 +959,9 @@ void TxGraphImpl::Compact() noexcept Entry& entry = m_entries[idx]; Assume(entry.m_ref == nullptr); // Make sure the entry does not occur in the graph. - Assume(!entry.m_locator.IsPresent()); + for (int level = 0; level < MAX_LEVELS; ++level) { + Assume(!entry.m_locator[level].IsPresent()); + } // Move the entry to the end. if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); @@ -738,7 +975,7 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept { // To split a Cluster, first make sure all removals are applied (as we might need to split // again afterwards otherwise). - ApplyRemovals(); + ApplyRemovals(cluster.m_level); bool del = cluster.Split(*this); if (del) { // Cluster::Split reports whether the Cluster is to be deleted. @@ -746,28 +983,32 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept } } -void TxGraphImpl::SplitAll() noexcept +void TxGraphImpl::SplitAll(int level) noexcept { // Before splitting all Cluster, first make sure all removals are applied. - ApplyRemovals(); + ApplyRemovals(level); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = m_clusterset.m_clusters[int(quality)]; + auto& queue = GetClusterSet(level).m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } } } -void TxGraphImpl::GroupClusters() noexcept +void TxGraphImpl::GroupClusters(int level) noexcept { - auto& clusterset = m_clusterset; + auto& clusterset = GetClusterSet(level); // If the groupings have been computed already, nothing is left to be done. if (clusterset.m_group_data.has_value()) return; + // We should never need to compute main grouping while staging exists (it should have already + // been computing in StartStaging, and no modifications that invalidate it can be made while + // staging exists). + Assume(level == GetTopLevel()); // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up // with inefficient and/or oversized Clusters which just end up being split again anyway. - SplitAll(); + SplitAll(level); /** Annotated clusters: an entry for each Cluster, together with the representative for the * partition it is in if known, or with nullptr if not yet known. */ @@ -779,8 +1020,8 @@ void TxGraphImpl::GroupClusters() noexcept // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Skip dependencies for which the parent or child transaction is removed. if (par_cluster == nullptr || chl_cluster == nullptr) continue; an_clusters.emplace_back(par_cluster, nullptr); @@ -797,8 +1038,8 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; - auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; - auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + auto a_chl_cluster = FindCluster(a_chl, level); + auto b_chl_cluster = FindCluster(b_chl, level); return std::less{}(a_chl_cluster, b_chl_cluster); }); @@ -872,8 +1113,8 @@ void TxGraphImpl::GroupClusters() noexcept Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Nothing to do if parent and child are in the same Cluster. if (par_cluster == chl_cluster) continue; // Nothing to do if either parent or child transaction is removed already. @@ -905,12 +1146,12 @@ void TxGraphImpl::GroupClusters() noexcept // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto chl_cluster = FindCluster(chl, level); if (std::greater{}(chl_cluster, data.cluster)) break; // Skip dependencies that apply to earlier Clusters (those necessary are for // deleted transactions, as otherwise we'd have processed them already). if (chl_cluster == data.cluster) { - auto par_cluster = m_entries[par].m_locator.cluster; + auto par_cluster = FindCluster(par, level); // Also filter out dependencies applying to a removed parent. if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); } @@ -993,11 +1234,11 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept } } -void TxGraphImpl::ApplyDependencies() noexcept +void TxGraphImpl::ApplyDependencies(int level) noexcept { - auto& clusterset = m_clusterset; + auto& clusterset = GetClusterSet(level); // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). - GroupClusters(); + GroupClusters(level); Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (clusterset.m_deps_to_add.empty()) return; @@ -1006,16 +1247,20 @@ void TxGraphImpl::ApplyDependencies() noexcept // For each group of to-be-merged Clusters. for (const auto& group_data : clusterset.m_group_data->m_groups) { - // Invoke Merge() to merge them into a single Cluster. auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + // Pull in all the Clusters that contain dependencies. + for (Cluster*& cluster : cluster_span) { + cluster = PullIn(cluster); + } + // Invoke Merge() to merge them into a single Cluster. Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); - const auto& loc = m_entries[deps_span[0].second].m_locator; + const auto& loc = m_entries[deps_span[0].second].m_locator[level]; Assume(loc.IsPresent()); loc.cluster->ApplyDependencies(*this, deps_span); } @@ -1044,7 +1289,7 @@ void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept m_linearization = std::move(linearization); // Update the Cluster's quality. auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // Update the Entry objects. Updated(graph); } @@ -1079,9 +1324,11 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept // Construct a new singleton Cluster (which is necessarily optimally linearized). auto cluster = std::make_unique(*this, feerate, idx); auto cluster_ptr = cluster.get(); - InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + int level = GetTopLevel(); + auto& clusterset = GetClusterSet(level); + InsertCluster(level, std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_clusterset.m_txcount; + ++clusterset.m_txcount; // Return the Ref. return ret; } @@ -1093,12 +1340,14 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept if (GetRefGraph(arg) == nullptr) return; Assume(GetRefGraph(arg) == this); // Find the Cluster the transaction is in, and stop if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + int level = GetTopLevel(); + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); + auto& clusterset = GetClusterSet(level); + clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). - m_clusterset.m_group_data.reset(); + clusterset.m_group_data.reset(); } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1111,23 +1360,27 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept if (GetRefIndex(parent) == GetRefIndex(child)) return; // Find the Cluster the parent and child transaction are in, and stop if either appears to be // already removed. - auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + int level = GetTopLevel(); + auto par_cluster = FindCluster(GetRefIndex(parent), level); if (par_cluster == nullptr) return; - auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + auto chl_cluster = FindCluster(GetRefIndex(child), level); if (chl_cluster == nullptr) return; // Remember that this dependency is to be applied. - m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + auto& clusterset = GetClusterSet(level); + clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). - m_clusterset.m_group_data.reset(); + clusterset.m_group_data.reset(); } -bool TxGraphImpl::Exists(const Ref& arg) noexcept +bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept { if (GetRefGraph(arg) == nullptr) return false; Assume(GetRefGraph(arg) == this); + size_t level = GetSpecifiedLevel(main_only); // Make sure the transaction isn't scheduled for removal. - ApplyRemovals(); - return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); + ApplyRemovals(level); + auto cluster = FindCluster(GetRefIndex(arg), level); + return cluster != nullptr; } std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept @@ -1171,59 +1424,76 @@ FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx)); } -std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +void Cluster::MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept { - // Return the empty vector if the Ref is empty. - if (GetRefGraph(arg) == nullptr) return {}; - Assume(GetRefGraph(arg) == this); - // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); - // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); - // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; - if (cluster == nullptr) return {}; - // Dispatch to the Cluster. - return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + Assume(m_level == 1); + // Mark all transactions of a Cluster missing, needed when aborting staging, so that the + // corresponding Locators don't retain references into aborted Clusters. + for (auto ci : m_linearization) { + GraphIndex idx = m_mapping[ci]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[1].SetMissing(); + } } -std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(GetClusterSet(level).m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(GetClusterSet(level).m_deps_to_add.empty()); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(GetClusterSet(level).m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then dispatch to it. MakeAcceptable(*cluster); return cluster->GetClusterRefs(*this); } -TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept { - ApplyRemovals(); - return m_clusterset.m_txcount; + size_t level = GetSpecifiedLevel(main_only); + ApplyRemovals(level); + return GetClusterSet(level).m_txcount; } FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1231,41 +1501,121 @@ FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept // Return the empty FeePerWeight if the passed Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); - // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. - ApplyRemovals(); - // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + // Find the cluster the argument is in (the level does not matter as individual feerates will + // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any. + Cluster* cluster{nullptr}; + for (int level = 0; level <= GetTopLevel(); ++level) { + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + ApplyRemovals(level); + if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { + cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; + break; + } + } if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept { // Return the empty FeePerWeight if the passed Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be inaccurate otherwise. - ApplyDependencies(); + ApplyDependencies(/*level=*/0); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(m_main_clusterset.m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), 0); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then return the transaction's // chunk feerate. MakeAcceptable(*cluster); const auto& entry = m_entries[GetRefIndex(arg)]; - return entry.m_chunk_feerate; + return entry.m_main_chunk_feerate; } -bool TxGraphImpl::IsOversized() noexcept +bool TxGraphImpl::IsOversized(bool main_only) noexcept { + size_t level = GetSpecifiedLevel(main_only); // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. - GroupClusters(); - Assume(m_clusterset.m_group_data.has_value()); - return m_clusterset.m_group_data->m_group_oversized; + GroupClusters(level); + auto& clusterset = GetClusterSet(level); + Assume(clusterset.m_group_data.has_value()); + return clusterset.m_group_data->m_group_oversized; +} + +void TxGraphImpl::StartStaging() noexcept +{ + // Staging cannot already exist. + Assume(!m_staging_clusterset.has_value()); + // Apply all remaining dependencies in main before creating a staging graph. Once staging + // exists, we cannot merge Clusters anymore (because of interference with Clusters being + // pulled into staging), so to make sure all inspectors are available (if not oversized), + // do all merging work now. This also involves applying all removals. + ApplyDependencies(0); + // Construct the staging ClusterSet. + m_staging_clusterset.emplace(); + // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to + // the new graph. To-be-applied removals will always be empty at this point. + m_staging_clusterset->m_txcount = m_main_clusterset.m_txcount; + m_staging_clusterset->m_deps_to_add = m_main_clusterset.m_deps_to_add; + m_staging_clusterset->m_group_data = m_main_clusterset.m_group_data; +} + +void TxGraphImpl::AbortStaging() noexcept +{ + // Staging must exist. + Assume(m_staging_clusterset.has_value()); + // Mark all removed transactions as Missing (so the staging locator for these transactions + // can be reused if another staging is created). + for (auto idx : m_staging_clusterset->m_removed) { + m_entries[idx].m_locator[1].SetMissing(); + } + // Do the same with the non-removed transactions in staging Clusters. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (auto& cluster : m_staging_clusterset->m_clusters[quality]) { + cluster->MakeStagingTransactionsMissing(*this); + } + } + // Destroy the staging ClusterSet. + m_staging_clusterset.reset(); + Compact(); +} + +void TxGraphImpl::CommitStaging() noexcept +{ + // Staging must exist. + Assume(m_staging_clusterset.has_value()); + // Delete all conflicting Clusters in main, to make place for moving the staging ones + // there. All of these have been copied to staging in PullIn(). + auto conflicts = GetConflicts(); + for (Cluster* conflict : conflicts) { + conflict->Clear(*this); + DeleteCluster(*conflict); + } + // Mark the removed transactions as Missing (so the staging locator for these transactions + // can be reused if another staging is created). + for (auto idx : m_staging_clusterset->m_removed) { + m_entries[idx].m_locator[1].SetMissing(); + } + // Then move all Clusters in staging to main. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& stage_sets = m_staging_clusterset->m_clusters[quality]; + while (!stage_sets.empty()) { + stage_sets.back()->MoveToMain(*this); + } + } + // Move all statistics, precomputed data, and to-be-applied removals and dependencies. + m_main_clusterset.m_deps_to_add = std::move(m_staging_clusterset->m_deps_to_add); + m_main_clusterset.m_to_remove = std::move(m_staging_clusterset->m_to_remove); + m_main_clusterset.m_group_data = std::move(m_staging_clusterset->m_group_data); + m_main_clusterset.m_txcount = std::move(m_staging_clusterset->m_txcount); + // Delete the old staging graph, after all its information was moved to main. + m_staging_clusterset.reset(); + Compact(); } void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept @@ -1278,9 +1628,9 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep // in the same Cluster. m_depgraph.FeeRate(idx).fee = fee; if (!NeedsSplitting()) { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); } else { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } @@ -1292,13 +1642,15 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept Assume(GetRefGraph(ref) == this); // Find the entry, its locator, and inform its Cluster about the new feerate, if any. auto& entry = m_entries[GetRefIndex(ref)]; - auto& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->SetFee(*this, locator.index, fee); + for (int level = 0; level < MAX_LEVELS; ++level) { + auto& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } } } -void Cluster::SanityCheck(const TxGraphImpl& graph) const +void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). assert(m_depgraph.PositionRange() == m_mapping.size()); @@ -1306,6 +1658,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.TxCount() == m_linearization.size()); // The number of transactions in a Cluster cannot exceed m_max_cluster_count. assert(m_linearization.size() <= graph.m_max_cluster_count); + // The level must match the level the Cluster occurs in. + assert(m_level == level); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1321,14 +1675,14 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const m_done.Set(lin_pos); assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); // Check that the Entry has a locator pointing back to this Cluster & position within it. - assert(entry.m_locator.cluster == this); - assert(entry.m_locator.index == lin_pos); - // Check linearization position and chunk feerate. - if (IsAcceptable()) { + assert(entry.m_locator[level].cluster == this); + assert(entry.m_locator[level].index == lin_pos); + // For main-level entries, check linearization position and chunk feerate. + if (level == 0 && IsAcceptable()) { if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } @@ -1341,8 +1695,12 @@ void TxGraphImpl::SanityCheck() const { /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */ std::set expected_unlinked; - /** Which Clusters ought to occur in m_clusters, based on m_entries. */ - std::set expected_clusters; + /** Which Clusters ought to occur in ClusterSet::m_clusters, based on m_entries. */ + std::set expected_clusters[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ + std::set expected_removed[MAX_LEVELS]; + /** Whether compaction is possible in the current state. */ + bool compact_possible{true}; // Go over all Entry objects in m_entries. for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { @@ -1355,63 +1713,91 @@ void TxGraphImpl::SanityCheck() const assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } - const auto& locator = entry.m_locator; - // Every Locator must be in exactly one of these 2 states. - assert(locator.IsMissing() + locator.IsPresent() == 1); - if (locator.IsPresent()) { - // Verify that the Cluster agrees with where the Locator claims the transaction is. - assert(locator.cluster->GetClusterEntry(locator.index) == idx); - // Remember that we expect said Cluster to appear in the m_clusters. - expected_clusters.insert(locator.cluster); - } - - } - - auto& clusterset = m_clusterset; - std::set actual_clusters; - // For all quality levels... - for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { - QualityLevel quality{qual}; - const auto& quality_clusters = clusterset.m_clusters[qual]; - // ... for all clusters in them ... - for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { - const auto& cluster = *quality_clusters[setindex]; - // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't - // expected to be referenced by the Entry vector). - if (cluster.GetTxCount() != 0) { - actual_clusters.insert(&cluster); + // Verify the Entry m_locators. + bool was_present{false}, was_removed{false}; + for (int level = 0; level < MAX_LEVELS; ++level) { + const auto& locator = entry.m_locator[level]; + // Every Locator must be in exactly one of these 3 states. + assert(locator.IsMissing() + locator.IsRemoved() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Once removed, a transaction cannot be revived. + assert(!was_removed); + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the ClusterSet::m_clusters. + expected_clusters[level].insert(locator.cluster); + was_present = true; + } else if (locator.IsRemoved()) { + // Level 0 (main) cannot have IsRemoved locators (IsMissing there means non-existing). + assert(level > 0); + // A Locator can only be IsRemoved if it was IsPresent before, and only once. + assert(was_present && !was_removed); + // Remember that we expect this GraphIndex to occur in the ClusterSet::m_removed. + expected_removed[level].insert(idx); + was_removed = true; } - // Sanity check the cluster, according to the Cluster's internal rules. - cluster.SanityCheck(*this); - // Check that the cluster's quality and setindex matches its position in the quality list. - assert(cluster.m_quality == quality); - assert(cluster.m_setindex == setindex); } } - // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. - for (GraphIndex idx : m_clusterset.m_to_remove) { - assert(idx < m_entries.size()); - assert(m_entries[idx].m_locator.IsPresent()); - } + // For all levels (0 = main, 1 = staged)... + for (int level = 0; level <= GetTopLevel(); ++level) { + assert(level < MAX_LEVELS); + auto& clusterset = GetClusterSet(level); + std::set actual_clusters; - // Verify that all to-be-added dependencies have valid identifiers. - for (auto [par_idx, chl_idx] : m_clusterset.m_deps_to_add) { - assert(par_idx != chl_idx); - assert(par_idx < m_entries.size()); - assert(chl_idx < m_entries.size()); - } + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = clusterset.m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this, level); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } - // Verify that the actually encountered clusters match the ones occurring in Entry vector. - assert(actual_clusters == expected_clusters); + // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + for (GraphIndex idx : clusterset.m_to_remove) { + assert(idx < m_entries.size()); + assert(FindCluster(idx, level) != nullptr); + } + + // Verify that all to-be-added dependencies have valid identifiers. + for (auto [par_idx, chl_idx] : clusterset.m_deps_to_add) { + assert(par_idx != chl_idx); + assert(par_idx < m_entries.size()); + assert(chl_idx < m_entries.size()); + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters[level]); + + // Verify that the contents of m_removed matches what was expected based on the Entry vector. + std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + assert(actual_removed == expected_removed[level]); + + // If any GraphIndex entries remain in this ClusterSet, compact is not possible. + if (!clusterset.m_deps_to_add.empty()) compact_possible = false; + if (!clusterset.m_to_remove.empty()) compact_possible = false; + if (!clusterset.m_removed.empty()) compact_possible = false; + } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. std::set actual_unlinked(m_unlinked.begin(), m_unlinked.end()); assert(actual_unlinked == expected_unlinked); - // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be + // If compaction was possible, it should have been performed already, and m_unlinked must be // empty (to prevent memory leaks due to an ever-growing m_entries vector). - if (clusterset.m_to_remove.empty() && clusterset.m_deps_to_add.empty()) { + if (compact_possible) { assert(actual_unlinked.empty()); } } diff --git a/src/txgraph.h b/src/txgraph.h index 83411dc8801..a524e0468f7 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -16,15 +16,18 @@ static constexpr unsigned MAX_CLUSTER_COUNT_LIMIT{64}; /** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. * - * The connected components within the transaction graph are called clusters: whenever one + * Each TxGraph represents one or two such graphs ("main", and optionally "staging"), to allow for + * working with batches of changes that may still be discarded. + * + * The connected components within each transaction graph are called clusters: whenever one * transaction is reachable from another, through any sequence of is-parent-of or is-child-of * relations, they belong to the same cluster (so clusters include parents, children, but also * grandparents, siblings, cousins twice removed, ...). * - * TxGraph implicitly defines an associated total ordering on its transactions (its linearization) - * that respects topology (parents go before their children), aiming for it to be close to the - * optimal order those transactions should be mined in if the goal is fee maximization, though this - * is a best effort only, not a strong guarantee. + * For each graph, TxGraph implicitly defines an associated total ordering on its transactions + * (its linearization) that respects topology (parents go before their children), aiming for it to + * be close to the optimal order those transactions should be mined in if the goal is fee + * maximization, though this is a best effort only, not a strong guarantee. * * For more explanation, see https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 * @@ -56,11 +59,13 @@ public: /** Virtual destructor, so inheriting is safe. */ virtual ~TxGraph() = default; - /** Construct a new transaction with the specified feerate, and return a Ref to it. In all + /** Construct a new transaction with the specified feerate, and return a Ref to it. + * If a staging graph exists, the new transaction is only created there. In all * further calls, only Refs created by AddTransaction() are allowed to be passed to this * TxGraph object (or empty Ref objects). */ [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; - /** Remove the specified transaction. This is a no-op if the transaction was already removed. + /** Remove the specified transaction. If a staging graph exists, the removal only happens + * there. This is a no-op if the transaction was already removed. * * TxGraph may internally reorder transaction removals with dependency additions for * performance reasons. If together with any transaction removal all its descendants, or all @@ -74,42 +79,64 @@ public: * original order case and the reordered case. */ virtual void RemoveTransaction(const Ref& arg) noexcept = 0; - /** Add a dependency between two specified transactions. Parent may not be a descendant of - * child already (but may be an ancestor of it already, in which case this is a no-op). If - * either transaction is already removed, this is a no-op. */ + /** Add a dependency between two specified transactions. If a staging graph exists, the + * dependency is only added there. Parent may not be a descendant of child already (but may + * be an ancestor of it already, in which case this is a no-op). If either transaction is + * already removed, this is a no-op. */ virtual void AddDependency(const Ref& parent, const Ref& child) noexcept = 0; - /** Modify the fee of the specified transaction. If the transaction does not exist (or was - * removed), this has no effect. */ + /** Modify the fee of the specified transaction, in both the main graph and the staging + * graph if it exists. Wherever the transaction does not exist (or was removed), this has no + * effect. */ virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + /** Create a staging graph (which cannot exist already). This acts as if a full copy of + * the transaction graph is made, upon which further modifications are made. This copy can + * be inspected, and then either discarded, or the main graph can be replaced by it by + * commiting it. */ + virtual void StartStaging() noexcept = 0; + /** Discard the existing active staging graph (which must exist). */ + virtual void AbortStaging() noexcept = 0; + /** Replace the main graph with the staging graph (which must exist). */ + virtual void CommitStaging() noexcept = 0; + /** Check whether a staging graph exists. */ + virtual bool HaveStaging() const noexcept = 0; + /** Determine whether the graph is oversized (contains a connected component of more than the - * configured maximum cluster count). Some of the functions below are not available + * configured maximum cluster count). If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. Some of the functions below are not available * for oversized graphs. The mutators above are always available. */ - virtual bool IsOversized() noexcept = 0; - /** Determine whether arg exists in this graph (i.e., was not removed). This is available even - * for oversized graphs. */ - virtual bool Exists(const Ref& arg) noexcept = 0; + virtual bool IsOversized(bool main_only = false) noexcept = 0; + /** Determine whether arg exists in the graph (i.e., was not removed). If main_only is false + * and a staging graph exists, it is queried; otherwise the main graph is queried. This is + * available even for oversized graphs. */ + virtual bool Exists(const Ref& arg, bool main_only = false) noexcept = 0; /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight - * if arg does not exist. This is available even for oversized graphs. */ - virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; - /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeePerWeight if - * arg does not exist. The graph must not be oversized. */ - virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; - /** Get pointers to all transactions in the cluster which arg is in. The transactions will be - * returned in graph order. The graph must not be oversized. Returns {} if arg does not exist - * in the graph. */ - virtual std::vector GetCluster(const Ref& arg) noexcept = 0; - /** Get pointers to all ancestors of the specified transaction (including the transaction - * itself), in unspecified order. The graph must not be oversized. Returns {} if arg does not - * exist in the graph. */ - virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; - /** Get pointers to all descendants of the specified transaction (including the transaction - * itself), in unspecified order. The graph must not be oversized. Returns {} if arg does not - * exist in the graph. */ - virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. This is available even for oversized + * if arg does not exist in either main or staging. This is available even for oversized * graphs. */ - virtual GraphIndex GetTransactionCount() noexcept = 0; + virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in, in the main graph. Returns the + * empty FeePerWeight if arg does not exist in the main graph. The main graph must not be + * oversized. */ + virtual FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept = 0; + /** Get pointers to all transactions in the cluster which arg is in. The transactions are + * returned in graph order. If main_only is false and a staging graph exists, it is queried; + * otherwise the main graph is queried. The queried graph must not be oversized. Returns {} if + * arg does not exist in the queried graph. */ + virtual std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction (including the transaction + * itself), in unspecified order. If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. The queried graph must not be oversized. + * Returns {} if arg does not exist in the graph. */ + virtual std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all descendants of the specified transaction (including the transaction + * itself), in unspecified order. If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. The queried graph must not be oversized. + * Returns {} if arg does not exist in the graph. */ + virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get the total number of transactions in the graph. If main_only is false and a staging + * graph exists, it is queried; otherwise the main graph is queried. This is available even + * for oversized graphs. */ + virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; @@ -141,7 +168,7 @@ public: * TxGraph::AddTransaction. */ Ref() noexcept = default; /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers - * to has been removed from the graph. */ + * to does not exist in the graph (in main nor staging). */ virtual ~Ref(); // Support moving a Ref. Ref& operator=(Ref&& other) noexcept; From 6b037ceddfd0160981bd401630c610ad2a3cf000 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sat, 25 Jan 2025 00:27:52 -0500 Subject: [PATCH 18/25] txgraph: Cache oversizedness of graphs (optimization) --- src/txgraph.cpp | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 13bab582b50..e1ea1f9ea58 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -230,6 +230,9 @@ private: /** Total number of transactions in this graph (sum of all transaction counts in all * Clusters, and for staging also those inherited from the main ClusterSet). */ GraphIndex m_txcount{0}; + /** Whether this graph is oversized (if known). This roughly matches + * m_group_data->m_group_oversized, but may be known even if m_group_data is not. */ + std::optional m_oversized{false}; ClusterSet() noexcept = default; }; @@ -1204,6 +1207,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); + clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; Compact(); } @@ -1237,6 +1241,8 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept void TxGraphImpl::ApplyDependencies(int level) noexcept { auto& clusterset = GetClusterSet(level); + // Do not bother computing groups if we already know the result will be oversized. + if (clusterset.m_oversized == true) return; // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). GroupClusters(level); Assume(clusterset.m_group_data.has_value()); @@ -1348,6 +1354,7 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). clusterset.m_group_data.reset(); + if (clusterset.m_oversized == true) clusterset.m_oversized = std::nullopt; } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1370,6 +1377,7 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). clusterset.m_group_data.reset(); + if (clusterset.m_oversized == false) clusterset.m_oversized = std::nullopt; } bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept @@ -1539,12 +1547,17 @@ FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept bool TxGraphImpl::IsOversized(bool main_only) noexcept { size_t level = GetSpecifiedLevel(main_only); + auto& clusterset = GetClusterSet(level); + if (clusterset.m_oversized.has_value()) { + // Return cached value if known. + return *clusterset.m_oversized; + } // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(level); - auto& clusterset = GetClusterSet(level); Assume(clusterset.m_group_data.has_value()); - return clusterset.m_group_data->m_group_oversized; + clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; + return *clusterset.m_oversized; } void TxGraphImpl::StartStaging() noexcept @@ -1553,8 +1566,10 @@ void TxGraphImpl::StartStaging() noexcept Assume(!m_staging_clusterset.has_value()); // Apply all remaining dependencies in main before creating a staging graph. Once staging // exists, we cannot merge Clusters anymore (because of interference with Clusters being - // pulled into staging), so to make sure all inspectors are available (if not oversized), - // do all merging work now. This also involves applying all removals. + // pulled into staging), so to make sure all inspectors are available (if not oversized), do + // all merging work now. Call SplitAll() first, so that even if ApplyDependencies does not do + // any thing due to knowing the result is oversized, splitting is still performed. + SplitAll(0); ApplyDependencies(0); // Construct the staging ClusterSet. m_staging_clusterset.emplace(); @@ -1563,6 +1578,8 @@ void TxGraphImpl::StartStaging() noexcept m_staging_clusterset->m_txcount = m_main_clusterset.m_txcount; m_staging_clusterset->m_deps_to_add = m_main_clusterset.m_deps_to_add; m_staging_clusterset->m_group_data = m_main_clusterset.m_group_data; + m_staging_clusterset->m_oversized = m_main_clusterset.m_oversized; + Assume(m_staging_clusterset->m_oversized.has_value()); } void TxGraphImpl::AbortStaging() noexcept @@ -1612,6 +1629,7 @@ void TxGraphImpl::CommitStaging() noexcept m_main_clusterset.m_deps_to_add = std::move(m_staging_clusterset->m_deps_to_add); m_main_clusterset.m_to_remove = std::move(m_staging_clusterset->m_to_remove); m_main_clusterset.m_group_data = std::move(m_staging_clusterset->m_group_data); + m_main_clusterset.m_oversized = std::move(m_staging_clusterset->m_oversized); m_main_clusterset.m_txcount = std::move(m_staging_clusterset->m_txcount); // Delete the old staging graph, after all its information was moved to main. m_staging_clusterset.reset(); @@ -1789,6 +1807,11 @@ void TxGraphImpl::SanityCheck() const if (!clusterset.m_deps_to_add.empty()) compact_possible = false; if (!clusterset.m_to_remove.empty()) compact_possible = false; if (!clusterset.m_removed.empty()) compact_possible = false; + + // If m_group_data exists, its m_group_oversized must match m_oversized. + if (clusterset.m_group_data.has_value()) { + assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized); + } } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. From 82fa3573e197f184054fc5096f13ea2520a8d219 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 3 Dec 2024 11:25:49 -0500 Subject: [PATCH 19/25] txgraph: Destroying Ref means removing transaction (feature) Before this commit, if a TxGraph::Ref object is destroyed, it becomes impossible to refer to, but the actual corresponding transaction node in the TxGraph remains, and remains indefinitely as there is no way to remove it. Fix this by making the destruction of TxGraph::Ref trigger immediate removal of the corresponding transaction in TxGraph, both in main and staging if it exists. --- src/test/fuzz/txgraph.cpp | 65 +++++++++++--- src/txgraph.cpp | 174 ++++++++++++++++++++++++++------------ src/txgraph.h | 11 ++- 3 files changed, 179 insertions(+), 71 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index cc20f9e3c4f..976839464b7 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -146,6 +146,30 @@ struct SimTxGraph if (oversized.has_value() && *oversized) oversized = std::nullopt; } + /** Destroy the transaction from the graph, including from the removed set. This will + * trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized + * value is cleared (destroying does not clear oversizedness in TxGraph of the main + * graph while staging exists). */ + void DestroyTransaction(TxGraph::Ref* ref, bool reset_oversize) + { + auto pos = Find(ref); + if (pos == MISSING) { + // Wipe the ref, if it exists, from the removed vector. Use std::partition rather + // than std::erase because we don't care about the order of the entries that + // remain. + auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; }); + removed.erase(remove, removed.end()); + } else { + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (reset_oversize && oversized.has_value() && *oversized) { + oversized = std::nullopt; + } + } + } + /** Construct the set with all positions in this graph corresponding to the specified * TxGraph::Refs. All of them must occur in this graph and not be removed. */ SetType MakeSet(std::span arg) @@ -327,9 +351,9 @@ FUZZ_TARGET(txgraph) } break; } else if (sel_sim.removed.size() > 0 && command-- == 0) { - // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it - // refers to, so this simulation permits doing so separately from other actions on - // TxGraph. + // ~Ref (of an already-removed transaction). Destroying a TxGraph::Ref has an + // observable effect on the TxGraph it refers to, so this simulation permits doing + // so separately from other actions on TxGraph. // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur // in the other graph, and thus not actually trigger ~Ref yet (which is exactly @@ -341,6 +365,28 @@ FUZZ_TARGET(txgraph) } sel_sim.removed.pop_back(); break; + } else if (command-- == 0) { + // ~Ref (of any transaction). + std::vector to_destroy; + to_destroy.push_back(pick_fn()); + while (true) { + // Keep adding either the ancestors or descendants the already picked + // transactions have in both graphs (main and staging) combined. Destroying + // will trigger deletions in both, so to have consistent TxGraph behavior, the + // set must be closed under ancestors, or descendants, in both graphs. + auto old_size = to_destroy.size(); + for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt); + if (to_destroy.size() == old_size) break; + } + // The order in which these ancestors/descendants are destroyed should not matter; + // randomly shuffle them. + std::shuffle(to_destroy.begin(), to_destroy.end(), rng); + for (TxGraph::Ref* ptr : to_destroy) { + for (size_t level = 0; level < sims.size(); ++level) { + sims[level].DestroyTransaction(ptr, level == sims.size() - 1); + } + } + break; } else if (command-- == 0) { // SetTransactionFee. int64_t fee; @@ -457,6 +503,10 @@ FUZZ_TARGET(txgraph) // AbortStaging. real->AbortStaging(); sims.pop_back(); + // Reset the cached oversized value (if TxGraph::Ref destructions triggered + // removals of main transactions while staging was active, then aborting will + // cause it to be re-evaluated in TxGraph). + sims.back().oversized = std::nullopt; break; } } @@ -537,13 +587,4 @@ FUZZ_TARGET(txgraph) // Sanity check again (because invoking inspectors may modify internal unobservable state). real->SanityCheck(); - - // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be - // addressed in a follow-up commit). - for (auto& sim : sims) { - for (auto i : sim.graph.Positions()) { - auto ref = sim.GetRef(i); - real->RemoveTransaction(*ref); - } - } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index e1ea1f9ea58..7312a402bf3 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -225,7 +225,8 @@ private: /** Information about the merges to be performed, if known. */ std::optional m_group_data = GroupData{}; /** Which entries were removed in this ClusterSet (so they can be wiped on abort). This - * includes all entries which have an (R) removed locator at this level (staging only). */ + * includes all entries which have an (R) removed locator at this level (staging only), + * plus optionally any transaction in m_unlinked. */ std::vector m_removed; /** Total number of transactions in this graph (sum of all transaction counts in all * Clusters, and for staging also those inherited from the main ClusterSet). */ @@ -368,8 +369,34 @@ public: auto& entry = m_entries[idx]; Assume(entry.m_ref != nullptr); entry.m_ref = nullptr; + // Mark the transaction as to be removed in all levels where it explicitly or implicitly + // exists. + bool exists_anywhere{false}; + bool exists{false}; + for (int level = 0; level <= GetTopLevel(); ++level) { + if (entry.m_locator[level].IsPresent()) { + exists_anywhere = true; + exists = true; + } else if (entry.m_locator[level].IsRemoved()) { + exists = false; + } + if (exists) { + auto& clusterset = GetClusterSet(level); + clusterset.m_to_remove.push_back(idx); + // Force recomputation of grouping data. + clusterset.m_group_data = std::nullopt; + // Do not wipe the oversized state of main if staging exists. The reason for this + // is that the alternative would mean that cluster merges may need to be applied to + // a formerly-oversized main graph while staging exists (to satisfy chunk feerate + // queries into main, for example), and such merges could conflict with pulls of + // some of their constituents into staging. + if (level == GetTopLevel() && clusterset.m_oversized == true) { + clusterset.m_oversized = std::nullopt; + } + } + } m_unlinked.push_back(idx); - Compact(); + if (!exists_anywhere) Compact(); } // Functions related to various normalization/application steps. @@ -382,12 +409,12 @@ public: * transactions from inherited (P,M) to explicit (P,P). */ Cluster* PullIn(Cluster* cluster) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a - * NEEDS_SPLIT* QualityLevel) in the specified level. */ - void ApplyRemovals(int level) noexcept; - /** Split an individual cluster (which must be in the top-level ClusterSet). */ + * NEEDS_SPLIT* QualityLevel) up to the specified level. */ + void ApplyRemovals(int up_to_level) noexcept; + /** Split an individual cluster. */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting in the specified level. */ - void SplitAll(int level) noexcept; + /** Split all clusters that need splitting up to the specified level. */ + void SplitAll(int up_to_level) noexcept; /** Populate m_group_data based on m_deps_to_add in the specified level. */ void GroupClusters(int level) noexcept; /** Merge the specified clusters. */ @@ -451,6 +478,14 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept } // Update the transaction count. --clusterset.m_txcount; + // If clearing main, adjust the status of Locators of this transaction in staging, if it exists. + if (level == 0 && GetTopLevel() == 1) { + if (entry.m_locator[1].IsRemoved()) { + entry.m_locator[1].SetMissing(); + } else if (!entry.m_locator[1].IsPresent()) { + --m_staging_clusterset->m_txcount; + } + } } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -505,8 +540,9 @@ std::vector TxGraphImpl::GetConflicts() const noexcept // All main Clusters containing transactions in m_removed (so (P,R) ones) are conflicts. for (auto i : clusterset.m_removed) { auto& entry = m_entries[i]; - Assume(entry.m_locator[0].IsPresent()); - ret.push_back(entry.m_locator[0].cluster); + if (entry.m_locator[0].IsPresent()) { + ret.push_back(entry.m_locator[0].cluster); + } } // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones). for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { @@ -873,39 +909,41 @@ Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept return cluster; } -void TxGraphImpl::ApplyRemovals(int level) noexcept +void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept { - auto& clusterset = GetClusterSet(level); - auto& to_remove = clusterset.m_to_remove; - // Skip if there is nothing to remove. - if (to_remove.empty()) return; - // There cannot be removals to be applied in main when staging exists (they should have been - // applied in StartStaging already, and none can be added to main while staging exists). - Assume(level == GetTopLevel()); - // Pull in all Clusters that are not in staging. - for (GraphIndex index : to_remove) { - auto cluster = FindCluster(index, level); - PullIn(cluster); - } - // Group the set of to-be-removed entries by Cluster*. - std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); - }); - // Process per Cluster. - std::span to_remove_span{to_remove}; - while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; - if (cluster != nullptr) { - // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it - // can pop off whatever applies to it. - cluster->ApplyRemovals(*this, to_remove_span); - } else { - // Otherwise, skip this already-removed entry. This may happen when RemoveTransaction - // was called twice on the same Ref. - to_remove_span = to_remove_span.subspan(1); + Assume(up_to_level >= 0 && up_to_level <= GetTopLevel()); + for (int level = 0; level <= up_to_level; ++level) { + auto& clusterset = GetClusterSet(level); + auto& to_remove = clusterset.m_to_remove; + // Skip if there is nothing to remove in this level. + if (to_remove.empty()) continue; + // Pull in all Clusters that are not in staging. + if (level == 1) { + for (GraphIndex index : to_remove) { + auto cluster = FindCluster(index, level); + if (cluster != nullptr) PullIn(cluster); + } } + // Group the set of to-be-removed entries by Cluster*. + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); + }); + // Process per Cluster. + std::span to_remove_span{to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. This may happen when + // RemoveTransaction was called twice on the same Ref, for example. + to_remove_span = to_remove_span.subspan(1); + } + } + to_remove.clear(); } - to_remove.clear(); Compact(); } @@ -919,7 +957,7 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept for (int i = 0; i < 2; ++i) { GraphIndex idx = i ? b : a; Entry& entry = m_entries[idx]; - // Update linked Ref. + // Update linked Ref, if any exists. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; // Update the locators for both levels. The rest of the Entry information will not change, // so no need to invoke Cluster::Updated(). @@ -986,14 +1024,17 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept } } -void TxGraphImpl::SplitAll(int level) noexcept +void TxGraphImpl::SplitAll(int up_to_level) noexcept { + Assume(up_to_level >= 0 && up_to_level <= GetTopLevel()); // Before splitting all Cluster, first make sure all removals are applied. - ApplyRemovals(level); - for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = GetClusterSet(level).m_clusters[int(quality)]; - while (!queue.empty()) { - Split(*queue.back().get()); + ApplyRemovals(up_to_level); + for (int level = 0; level <= up_to_level; ++level) { + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { + auto& queue = GetClusterSet(level).m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } } @@ -1003,10 +1044,6 @@ void TxGraphImpl::GroupClusters(int level) noexcept auto& clusterset = GetClusterSet(level); // If the groupings have been computed already, nothing is left to be done. if (clusterset.m_group_data.has_value()) return; - // We should never need to compute main grouping while staging exists (it should have already - // been computing in StartStaging, and no modifications that invalidate it can be made while - // staging exists). - Assume(level == GetTopLevel()); // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up @@ -1256,8 +1293,10 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); // Pull in all the Clusters that contain dependencies. - for (Cluster*& cluster : cluster_span) { - cluster = PullIn(cluster); + if (level == 1) { + for (Cluster*& cluster : cluster_span) { + cluster = PullIn(cluster); + } } // Invoke Merge() to merge them into a single Cluster. Merge(cluster_span); @@ -1398,6 +1437,7 @@ std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, De // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Ancestors(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1410,6 +1450,7 @@ std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Descendants(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1422,6 +1463,7 @@ std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noe // Translate all transactions in the Cluster (in linearization order) to Refs. for (auto idx : m_linearization) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1513,7 +1555,8 @@ FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any. Cluster* cluster{nullptr}; for (int level = 0; level <= GetTopLevel(); ++level) { - // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing + // transactions. ApplyRemovals(level); if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; @@ -1600,6 +1643,11 @@ void TxGraphImpl::AbortStaging() noexcept // Destroy the staging ClusterSet. m_staging_clusterset.reset(); Compact(); + if (!m_main_clusterset.m_group_data.has_value()) { + // In case m_oversized in main was kept after a Ref destruction while staging exists, we + // need to re-evaluate m_oversized now. + m_main_clusterset.m_oversized = std::nullopt; + } } void TxGraphImpl::CommitStaging() noexcept @@ -1783,10 +1831,13 @@ void TxGraphImpl::SanityCheck() const } } - // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + // Verify that all to-be-removed transactions have valid identifiers. for (GraphIndex idx : clusterset.m_to_remove) { assert(idx < m_entries.size()); - assert(FindCluster(idx, level) != nullptr); + // We cannot assert that all m_to_remove transactions are still present: ~Ref on a + // (P,M) transaction (present in main, inherited in staging) will cause an m_to_remove + // addition in both main and staging, but a subsequence ApplyRemovals in main will + // cause it to disappear from staging too, leaving the m_to_remove in place. } // Verify that all to-be-added dependencies have valid identifiers. @@ -1801,6 +1852,15 @@ void TxGraphImpl::SanityCheck() const // Verify that the contents of m_removed matches what was expected based on the Entry vector. std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + for (auto i : expected_unlinked) { + // If a transaction exists in both main and staging, and is removed from staging (adding + // it to m_removed there), and consequently destroyed (wiping the locator completely), + // it can remain in m_removed despite not having an IsRemoved() locator. Exclude those + // transactions from the comparison here. + actual_removed.erase(i); + expected_removed[level].erase(i); + } + assert(actual_removed == expected_removed[level]); // If any GraphIndex entries remain in this ClusterSet, compact is not possible. @@ -1812,6 +1872,10 @@ void TxGraphImpl::SanityCheck() const if (clusterset.m_group_data.has_value()) { assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized); } + + // For non-top levels, m_oversized must be known (as it cannot change until the level + // on top is gone). + if (level < GetTopLevel()) assert(clusterset.m_oversized.has_value()); } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. diff --git a/src/txgraph.h b/src/txgraph.h index a524e0468f7..8f9451921eb 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -50,7 +50,8 @@ public: /** Data type used to reference transactions within a TxGraph. * * Every transaction within a TxGraph has exactly one corresponding TxGraph::Ref, held by users - * of the class. Refs can only be destroyed after the transaction is removed from the graph. + * of the class. Destroying the TxGraph::Ref removes the corresponding transaction (in both the + * main and staging graphs). * * Users of the class can inherit from TxGraph::Ref. If all Refs are inherited this way, the * Ref* pointers returned by TxGraph functions can be cast to, and used as, this inherited type. @@ -104,7 +105,9 @@ public: /** Determine whether the graph is oversized (contains a connected component of more than the * configured maximum cluster count). If main_only is false and a staging graph exists, it is * queried; otherwise the main graph is queried. Some of the functions below are not available - * for oversized graphs. The mutators above are always available. */ + * for oversized graphs. The mutators above are always available. Removing a transaction by + * destroying its Ref while staging exists will not clear main's oversizedness until staging + * is aborted or committed. */ virtual bool IsOversized(bool main_only = false) noexcept = 0; /** Determine whether arg exists in the graph (i.e., was not removed). If main_only is false * and a staging graph exists, it is queried; otherwise the main graph is queried. This is @@ -167,8 +170,8 @@ public: /** Construct an empty Ref. Non-empty Refs can only be created using * TxGraph::AddTransaction. */ Ref() noexcept = default; - /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers - * to does not exist in the graph (in main nor staging). */ + /** Destroy this Ref. If it is not empty, the corresponding transaction is removed (in both + * main and staging, if it exists). */ virtual ~Ref(); // Support moving a Ref. Ref& operator=(Ref&& other) noexcept; From 22c68cd153b72f867dffcc7a62a3f65cef9038fb Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 21 Mar 2025 15:17:42 -0400 Subject: [PATCH 20/25] txgraph: Allow Refs to outlive the TxGraph (feature) --- src/test/fuzz/txgraph.cpp | 6 ++++++ src/txgraph.cpp | 14 ++++++++++++++ src/txgraph.h | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 976839464b7..d85b81d6402 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -587,4 +587,10 @@ FUZZ_TARGET(txgraph) // Sanity check again (because invoking inspectors may modify internal unobservable state). real->SanityCheck(); + + // Kill the TxGraph object. + real.reset(); + // Kill the simulated graphs, with all remaining Refs in it. If any, this verifies that Refs + // can outlive the TxGraph that created them. + sims.clear(); } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 7312a402bf3..b90911c5920 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -320,6 +320,9 @@ public: Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); } + /** Destructor. */ + ~TxGraphImpl() noexcept; + // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). TxGraphImpl(const TxGraphImpl&) = delete; TxGraphImpl& operator=(const TxGraphImpl&) = delete; @@ -809,6 +812,17 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept { Assume(quality != QualityLevel::NONE); diff --git a/src/txgraph.h b/src/txgraph.h index 8f9451921eb..27f6ab21751 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -63,7 +63,8 @@ public: /** Construct a new transaction with the specified feerate, and return a Ref to it. * If a staging graph exists, the new transaction is only created there. In all * further calls, only Refs created by AddTransaction() are allowed to be passed to this - * TxGraph object (or empty Ref objects). */ + * TxGraph object (or empty Ref objects). Ref objects may outlive the TxGraph they were + * created for. */ [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; /** Remove the specified transaction. If a staging graph exists, the removal only happens * there. This is a no-op if the transaction was already removed. From 295a1ca8bbbe5e61bd936158ca33cda5d5e58afd Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 16:16:59 -0500 Subject: [PATCH 21/25] txgraph: Expose ability to compare transactions (feature) In order to make it possible for higher layers to compare transaction quality (ordering within the implicit total ordering on the mempool), expose a comparison function and test it. --- src/test/fuzz/txgraph.cpp | 66 +++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 47 +++++++++++++++++++++++++--- src/txgraph.h | 3 ++ 3 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index d85b81d6402..fca253cfc8d 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -508,6 +508,24 @@ FUZZ_TARGET(txgraph) // cause it to be re-evaluated in TxGraph). sims.back().oversized = std::nullopt; break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // CompareMainOrder. + auto ref_a = pick_fn(); + auto ref_b = pick_fn(); + auto sim_a = main_sim.Find(ref_a); + auto sim_b = main_sim.Find(ref_b); + // Both transactions must exist in the main graph. + if (sim_a == SimTxGraph::MISSING || sim_b == SimTxGraph::MISSING) break; + auto cmp = real->CompareMainOrder(*ref_a, *ref_b); + // Distinct transactions have distinct places. + if (sim_a != sim_b) assert(cmp != 0); + // Ancestors go before descendants. + if (main_sim.graph.Ancestors(sim_a)[sim_b]) assert(cmp >= 0); + if (main_sim.graph.Descendants(sim_a)[sim_b]) assert(cmp <= 0); + // Do not verify consistency with chunk feerates, as we cannot easily determine + // these here without making more calls to real, which could affect its internal + // state. A full comparison is done at the end. + break; } } } @@ -515,6 +533,54 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + + if (!sims[0].IsOversized()) { + // If the main graph is not oversized, verify the total ordering implied by + // CompareMainOrder. + // First construct two distinct randomized permutations of the positions in sims[0]. + std::vector vec1; + for (auto i : sims[0].graph.Positions()) vec1.push_back(i); + std::shuffle(vec1.begin(), vec1.end(), rng); + auto vec2 = vec1; + std::shuffle(vec2.begin(), vec2.end(), rng); + if (vec1 == vec2) std::next_permutation(vec2.begin(), vec2.end()); + // Sort both according to CompareMainOrder. By having randomized starting points, the order + // of CompareMainOrder invocations is somewhat randomized as well. + auto cmp = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept { + return real->CompareMainOrder(*sims[0].GetRef(a), *sims[0].GetRef(b)) < 0; + }; + std::sort(vec1.begin(), vec1.end(), cmp); + std::sort(vec2.begin(), vec2.end(), cmp); + + // Verify the resulting orderings are identical. This could only fail if the ordering was + // not total. + assert(vec1 == vec2); + + // Verify that the ordering is topological. + auto todo = sims[0].graph.Positions(); + for (auto i : vec1) { + todo.Reset(i); + assert(!sims[0].graph.Ancestors(i).Overlaps(todo)); + } + assert(todo.None()); + + // For every transaction in the total ordering, find a random one before it and after it, + // and compare their chunk feerates, which must be consistent with the ordering. + for (size_t pos = 0; pos < vec1.size(); ++pos) { + auto pos_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[pos])); + if (pos > 0) { + size_t before = rng.randrange(pos); + auto before_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[before])); + assert(FeeRateCompare(before_feerate, pos_feerate) >= 0); + } + if (pos + 1 < vec1.size()) { + size_t after = pos + 1 + rng.randrange(vec1.size() - 1 - pos); + auto after_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[after])); + assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); + } + } + } + assert(real->HaveStaging() == (sims.size() > 1)); // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph diff --git a/src/txgraph.cpp b/src/txgraph.cpp index b90911c5920..2d96d2802c4 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -303,6 +303,8 @@ private: Locator m_locator[MAX_LEVELS]; /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ FeePerWeight m_main_chunk_feerate; + /** The position this transaction has in the main linearization (if present). */ + LinearizationIndex m_main_lin_index; }; /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ @@ -447,6 +449,7 @@ public: std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; + std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; void SanityCheck() const final; }; @@ -499,9 +502,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or - // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. - // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless - // to compute these if we haven't reached that quality level yet. + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_lin_index + // and m_main_chunk_feerate. These fields are only accessed after making the entire graph + // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level + // yet. if (m_level == 0 && IsAcceptable()) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; @@ -511,9 +515,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept Assume(chunk.transactions.Any()); // Iterate over the transactions in the linearization, which must match those in chunk. do { - DepGraphIndex idx = m_linearization[lin_idx++]; + DepGraphIndex idx = m_linearization[lin_idx]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; + entry.m_main_lin_index = lin_idx++; entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); Assume(chunk.transactions[idx]); chunk.transactions.Reset(idx); @@ -594,6 +599,10 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // are just never accessed, but set it to -1 here to increase the ability to detect a bug // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); + // - Remove its linearization index from the Entry (if in main). + if (m_level == 0) { + entry.m_main_lin_index = LinearizationIndex(-1); + } // - Mark it as missing/removed in the Entry's locator. graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); @@ -1730,6 +1739,33 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept } } +std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) noexcept +{ + // The references must not be empty. + Assume(GetRefGraph(a) == this); + Assume(GetRefGraph(b) == this); + // Apply dependencies in main. + ApplyDependencies(0); + Assume(m_main_clusterset.m_deps_to_add.empty()); + // Make both involved Clusters acceptable, so chunk feerates are relevant. + const auto& entry_a = m_entries[GetRefIndex(a)]; + const auto& entry_b = m_entries[GetRefIndex(b)]; + const auto& locator_a = entry_a.m_locator[0]; + const auto& locator_b = entry_b.m_locator[0]; + Assume(locator_a.IsPresent()); + Assume(locator_b.IsPresent()); + MakeAcceptable(*locator_a.cluster); + MakeAcceptable(*locator_b.cluster); + // Compare chunk feerates, and return result if it differs. + auto feerate_cmp = FeeRateCompare(entry_b.m_main_chunk_feerate, entry_a.m_main_chunk_feerate); + if (feerate_cmp < 0) return std::strong_ordering::less; + if (feerate_cmp > 0) return std::strong_ordering::greater; + // Compare Cluster* as tie-break for equal chunk feerates. + if (locator_a.cluster != locator_b.cluster) return locator_a.cluster <=> locator_b.cluster; + // As final tie-break, compare position within cluster linearization. + return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). @@ -1747,6 +1783,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const // Verify m_linearization. SetType m_done; + LinearizationIndex linindex{0}; assert(m_depgraph.IsAcyclic()); for (auto lin_pos : m_linearization) { assert(lin_pos < m_mapping.size()); @@ -1759,6 +1796,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(entry.m_locator[level].index == lin_pos); // For main-level entries, check linearization position and chunk feerate. if (level == 0 && IsAcceptable()) { + assert(entry.m_main_lin_index == linindex); + ++linindex; if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } diff --git a/src/txgraph.h b/src/txgraph.h index 27f6ab21751..e90d6b3ae46 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -141,6 +141,9 @@ public: * graph exists, it is queried; otherwise the main graph is queried. This is available even * for oversized graphs. */ virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; + /** Compare two transactions according to their order in the main graph. Both transactions must + * be in the main graph. The main graph must not be oversized. */ + virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From b685d322c9739ca03b9d0bb9fa57aabea1890060 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 9 Jan 2025 14:22:24 -0500 Subject: [PATCH 22/25] txgraph: Add DoWork function (feature) This can be called when the caller has time to spend now, and wants future operations to be fast. --- src/test/fuzz/txgraph.cpp | 4 ++++ src/txgraph.cpp | 22 ++++++++++++++++++++++ src/txgraph.h | 5 +++++ 3 files changed, 31 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index fca253cfc8d..250d0554dd9 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -526,6 +526,10 @@ FUZZ_TARGET(txgraph) // these here without making more calls to real, which could affect its internal // state. A full comparison is done at the end. break; + } else if (command-- == 0) { + // DoWork. + real->DoWork(); + break; } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 2d96d2802c4..fe220406677 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -428,6 +428,8 @@ public: void ApplyDependencies(int level) noexcept; /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ void MakeAcceptable(Cluster& cluster) noexcept; + /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */ + void MakeAllAcceptable(int level) noexcept; // Implementations for the public TxGraph interface. @@ -436,6 +438,8 @@ public: void AddDependency(const Ref& parent, const Ref& child) noexcept final; void SetTransactionFee(const Ref&, int64_t fee) noexcept final; + void DoWork() noexcept final; + void StartStaging() noexcept final; void CommitStaging() noexcept final; void AbortStaging() noexcept final; @@ -1370,6 +1374,17 @@ void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept } } +void TxGraphImpl::MakeAllAcceptable(int level) noexcept +{ + ApplyDependencies(level); + auto& clusterset = GetClusterSet(level); + if (clusterset.m_oversized == true) return; + auto& queue = clusterset.m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)]; + while (!queue.empty()) { + MakeAcceptable(*queue.back().get()); + } +} + Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept { // Create a new transaction in the DepGraph, and remember its position in m_mapping. @@ -1942,6 +1957,13 @@ void TxGraphImpl::SanityCheck() const } } +void TxGraphImpl::DoWork() noexcept +{ + for (int level = 0; level <= GetTopLevel(); ++level) { + MakeAllAcceptable(level); + } +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index e90d6b3ae46..163aaaa793a 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -91,6 +91,11 @@ public: * effect. */ virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + /** TxGraph is internally lazy, and will not compute many things until they are needed. + * Calling DoWork will compute everything now, so that future operations are fast. This can be + * invoked while oversized. */ + virtual void DoWork() noexcept = 0; + /** Create a staging graph (which cannot exist already). This acts as if a full copy of * the transaction graph is made, upon which further modifications are made. This copy can * be inspected, and then either discarded, or the main graph can be replaced by it by From aded04701925781ffe194e11e4782261e4736339 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 16 Jan 2025 16:00:10 -0500 Subject: [PATCH 23/25] txgraph: Add CountDistinctClusters function (feature) --- src/test/fuzz/txgraph.cpp | 44 +++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 28 +++++++++++++++++++++++++ src/txgraph.h | 5 +++++ 3 files changed, 77 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 250d0554dd9..32147081827 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -526,6 +526,50 @@ FUZZ_TARGET(txgraph) // these here without making more calls to real, which could affect its internal // state. A full comparison is done at the end. break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // CountDistinctClusters. + std::vector refs; + // Gather a list of up to 15 (or up to 255) Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, alt ? 255 : 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function. + auto result = real->CountDistinctClusters(refs, use_main); + // Build a vector with representatives of the clusters the Refs occur in in the + // simulated graph. For each, remember the lowest-index transaction SimPos in the + // cluster. + std::vector sim_reps; + for (auto ref : refs) { + // Skip Refs that do not occur in the simulated graph. + auto simpos = sel_sim.Find(ref); + if (simpos == SimTxGraph::MISSING) continue; + // Start with component equal to just the Ref's SimPos. + auto component = SimTxGraph::SetType::Singleton(simpos); + // Keep adding ancestors/descendants of all elements in component until it no + // longer changes. + while (true) { + auto old_component = component; + for (auto i : component) { + component |= sel_sim.graph.Ancestors(i); + component |= sel_sim.graph.Descendants(i); + } + if (component == old_component) break; + } + // Remember the lowest-index SimPos in component, as a representative for it. + assert(component.Any()); + sim_reps.push_back(component.First()); + } + // Remove duplicates from sim_reps. + std::sort(sim_reps.begin(), sim_reps.end()); + sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end()); + // Compare the number of deduplicated representatives with the value returned by + // the real function. + assert(result == sim_reps.size()); + break; } else if (command-- == 0) { // DoWork. real->DoWork(); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index fe220406677..d6c6556f1af 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -454,6 +454,7 @@ public: GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; + GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; void SanityCheck() const final; }; @@ -1781,6 +1782,33 @@ std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) n return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; } +TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span refs, bool main_only) noexcept +{ + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); + auto& clusterset = GetClusterSet(level); + Assume(clusterset.m_deps_to_add.empty()); + // Build a vector of Clusters that the specified Refs occur in. + std::vector clusters; + clusters.reserve(refs.size()); + for (const Ref* ref : refs) { + if (ref == nullptr) continue; + if (GetRefGraph(*ref) == nullptr) continue; + Assume(GetRefGraph(*ref) == this); + auto cluster = FindCluster(GetRefIndex(*ref), level); + if (cluster != nullptr) clusters.push_back(cluster); + } + // Count the number of distinct elements in clusters. + std::sort(clusters.begin(), clusters.end()); + Cluster* last{nullptr}; + GraphIndex ret{0}; + for (Cluster* cluster : clusters) { + ret += (cluster != last); + last = cluster; + } + return ret; +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). diff --git a/src/txgraph.h b/src/txgraph.h index 163aaaa793a..1a0c11b92cd 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -149,6 +149,11 @@ public: /** Compare two transactions according to their order in the main graph. Both transactions must * be in the main graph. The main graph must not be oversized. */ virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; + /** Count the number of distinct clusters that the specified transactions belong to. If + * main_only is false and a staging graph exists, staging clusters are counted. Otherwise, + * main clusters are counted. Refs that do not exist in the queried graph are ignored. The + * queried graph must not be oversized. */ + virtual GraphIndex CountDistinctClusters(std::span, bool main_only = false) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 54bceddd3ab39918834d72e9c77eb14e41996652 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 5 Feb 2025 22:53:45 -0500 Subject: [PATCH 24/25] txgraph: Multiple inputs to Get{Ancestors,Descendant}Refs (preparation) This is a preparation for the next commit, which adds a feature to request the Refs to multiple ancestors/descendants at once. --- src/txgraph.cpp | 58 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index d6c6556f1af..847cf6ffee1 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -139,10 +139,12 @@ public: // Functions that implement the Cluster-specific side of public TxGraph functions. - /** Get a vector of Refs for the ancestors of a given Cluster element. */ - std::vector GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; - /** Get a vector of Refs for the descendants of a given Cluster element. */ - std::vector GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their ancestors to output. */ + void GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their descendants to output. */ + void GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; /** Get the individual transaction feerate of a Cluster element. */ @@ -1469,30 +1471,42 @@ bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept return cluster != nullptr; } -std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +void Cluster::GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept { - std::vector ret; - ret.reserve(m_depgraph.Ancestors(idx).Count()); + /** The union of all ancestors to be returned. */ + SetType ancestors_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + ancestors_union |= m_depgraph.Ancestors(args.front().second); + args = args.subspan(1); + } + Assume(ancestors_union.Any()); // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. - for (auto idx : m_depgraph.Ancestors(idx)) { + for (auto idx : ancestors_union) { const auto& entry = graph.m_entries[m_mapping[idx]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + output.push_back(entry.m_ref); } - return ret; } -std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept { - std::vector ret; - ret.reserve(m_depgraph.Descendants(idx).Count()); + /** The union of all descendants to be returned. */ + SetType descendants_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + descendants_union |= m_depgraph.Descendants(args.front().second); + args = args.subspan(1); + } + Assume(descendants_union.Any()); // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. - for (auto idx : m_depgraph.Descendants(idx)) { + for (auto idx : descendants_union) { const auto& entry = graph.m_entries[m_mapping[idx]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + output.push_back(entry.m_ref); } - return ret; } std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept @@ -1539,7 +1553,11 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_o auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetAncestorRefs(*this, matches, ret); + return ret; } std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept @@ -1556,7 +1574,11 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetDescendantRefs(*this, matches, ret); + return ret; } std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept From b2ea3656481b4196acaf6a1b5f3949a9ba4cf48f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 21 Mar 2025 23:21:20 -0400 Subject: [PATCH 25/25] txgraph: Add Get{Ancestors,Descendants}Union functions (feature) Like GetAncestors and GetDescendants, but for the union of multiple inputs. --- src/test/fuzz/txgraph.cpp | 22 +++++++++++++ src/txgraph.cpp | 66 +++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 8 +++++ 3 files changed, 96 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 32147081827..010c9e951ed 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -454,6 +454,28 @@ FUZZ_TARGET(txgraph) auto expect_set = sel_sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // GetAncestorsUnion/GetDescendantsUnion. + std::vector refs; + // Gather a list of up to 15 Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function, and convert to SimPos set. + auto result = alt ? real->GetDescendantsUnion(refs, use_main) + : real->GetAncestorsUnion(refs, use_main); + auto result_set = sel_sim.MakeSet(result); + assert(result.size() == result_set.Count()); + // Compute the expected result. + SimTxGraph::SetType expect_set; + for (TxGraph::Ref* ref : refs) expect_set |= sel_sim.GetAncDesc(ref, alt); + // Compare. + assert(result_set == expect_set); + break; } else if (!sel_sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 847cf6ffee1..f6d9eec5666 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -453,6 +453,8 @@ public: std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept final; + std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept final; GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; @@ -1581,6 +1583,70 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main return ret; } +std::vector TxGraphImpl::GetAncestorsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(GetClusterSet(level).m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetAncestorRefs(*this, match_span, ret); + } + return ret; +} + +std::vector TxGraphImpl::GetDescendantsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = GetSpecifiedLevel(main_only); + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(GetClusterSet(level).m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetDescendantRefs(*this, match_span, ret); + } + return ret; +} + std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty (which may be indicative of the transaction diff --git a/src/txgraph.h b/src/txgraph.h index 1a0c11b92cd..eba983cb5b9 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -142,6 +142,14 @@ public: * queried; otherwise the main graph is queried. The queried graph must not be oversized. * Returns {} if arg does not exist in the graph. */ virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Like GetAncestors, but return the Refs for all transactions in the union of the provided + * arguments' ancestors (each transaction is only reported once). Refs that do not exist in + * the queried graph are ignored. */ + virtual std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept = 0; + /** Like GetDescendants, but return the Refs for all transactions in the union of the provided + * arguments' descendants (each transaction is only reported once). Refs that do not exist in + * the queried graph are ignored. */ + virtual std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept = 0; /** Get the total number of transactions in the graph. If main_only is false and a staging * graph exists, it is queried; otherwise the main graph is queried. This is available even * for oversized graphs. */