diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index ea0d6bd8346..774bc61734f 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -472,37 +472,73 @@ concept StrongComparator = * Linearize(), which just sorts by DepGraphIndex. */ using IndexTxOrder = std::compare_three_way; +/** A default cost model for SFL for SetType=BitSet<64>, based on benchmarks. + * + * The numbers here were obtained in February 2026 by: + * - For a variety of machines: + * - Running a fixed collection of ~385000 clusters found through random generation and fuzzing, + * optimizing for difficulty of linearization. + * - Linearize each ~3000 times, with different random seeds. Sometimes without input + * linearization, sometimes with a bad one. + * - Gather cycle counts for each of the operations included in this cost model, + * broken down by their parameters. + * - Correct the data by subtracting the runtime of obtaining the cycle count. + * - Drop the 5% top and bottom samples from each cycle count dataset, and compute the average + * of the remaining samples. + * - For each operation, fit a least-squares linear function approximation through the samples. + * - Rescale all machine expressions to make their total time match, as we only care about + * relative cost of each operation. + * - Take the per-operation average of operation expressions across all machines, to construct + * expressions for an average machine. + * - Approximate the result with integer coefficients. Each cost unit corresponds to somewhere + * between 0.5 ns and 2.5 ns, depending on the hardware. + */ class SFLDefaultCostModel { uint64_t m_cost{0}; public: inline void InitializeBegin() noexcept {} - inline void InitializeEnd(int num_txns, int num_deps) noexcept {} + inline void InitializeEnd(int num_txns, int num_deps) noexcept + { + // Cost of initialization. + m_cost += 39 * num_txns; + // Cost of producing linearization at the end. + m_cost += 48 * num_txns + 4 * num_deps; + } inline void GetLinearizationBegin() noexcept {} - inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept {} + inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept + { + // Note that we account for the cost of the final linearization at the beginning (see + // InitializeEnd), because the cost budget decision needs to be made before calling + // GetLinearization. + // This function exists here to allow overriding it easily for benchmark purposes. + } inline void MakeTopologicalBegin() noexcept {} - inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept {} + inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept + { + m_cost += 20 * num_chunks + 28 * num_steps; + } inline void StartOptimizingBegin() noexcept {} - inline void StartOptimizingEnd(int num_chunks) noexcept {} + inline void StartOptimizingEnd(int num_chunks) noexcept { m_cost += 13 * num_chunks; } inline void ActivateBegin() noexcept {} - inline void ActivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; } + inline void ActivateEnd(int num_deps) noexcept { m_cost += 10 * num_deps + 1; } inline void DeactivateBegin() noexcept {} - inline void DeactivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; } + inline void DeactivateEnd(int num_deps) noexcept { m_cost += 11 * num_deps + 8; } inline void MergeChunksBegin() noexcept {} - inline void MergeChunksMid(int num_txns) noexcept {} - inline void MergeChunksEnd(int num_steps) noexcept {} + inline void MergeChunksMid(int num_txns) noexcept { m_cost += 2 * num_txns; } + inline void MergeChunksEnd(int num_steps) noexcept { m_cost += 3 * num_steps + 5; } inline void PickMergeCandidateBegin() noexcept {} - inline void PickMergeCandidateEnd(int num_steps) noexcept {} + inline void PickMergeCandidateEnd(int num_steps) noexcept { m_cost += 8 * num_steps; } inline void PickChunkToOptimizeBegin() noexcept {} - inline void PickChunkToOptimizeEnd(int num_steps) noexcept {} + inline void PickChunkToOptimizeEnd(int num_steps) noexcept { m_cost += num_steps + 4; } inline void PickDependencyToSplitBegin() noexcept {} - inline void PickDependencyToSplitEnd(int num_txns) noexcept {} + inline void PickDependencyToSplitEnd(int num_txns) noexcept { m_cost += 8 * num_txns + 9; } inline void StartMinimizingBegin() noexcept {} - inline void StartMinimizingEnd(int num_chunks) noexcept {} + inline void StartMinimizingEnd(int num_chunks) noexcept { m_cost += 18 * num_chunks; } inline void MinimizeStepBegin() noexcept {} - inline void MinimizeStepMid(int num_txns) noexcept {} - inline void MinimizeStepEnd(bool split) noexcept {} + inline void MinimizeStepMid(int num_txns) noexcept { m_cost += 11 * num_txns + 11; } + inline void MinimizeStepEnd(bool split) noexcept { m_cost += 17 * split + 7; } inline uint64_t GetCost() const noexcept { return m_cost; } }; diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 9b07bf00eb2..b735940100a 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -1021,6 +1021,7 @@ FUZZ_TARGET(clusterlin_linearize) try { reader >> VARINT(max_cost) >> Using(depgraph) >> rng_seed >> flags; } catch (const std::ios_base::failure&) {} + if (depgraph.TxCount() <= 1) return; bool make_connected = flags & 1; // The following 3 booleans have 4 combinations: // - (flags & 6) == 0: do not provide input linearization. diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index f8ffb5ea4e5..b3f639e83ed 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -402,14 +402,14 @@ inline uint64_t MaxOptimalLinearizationCost(DepGraphIndex cluster_count) // *some* reasonable cost bound, optimal linearizations are always found. static constexpr uint64_t COSTS[65] = { 0, - 0, 176, 440, 1496, 3344, 6864, 10076, 16720, - 19404, 22748, 29832, 41052, 45628, 60104, 64416, 75284, - 92884, 111848, 134992, 137104, 177276, 152548, 234256, 237688, - 285164, 315084, 327404, 360052, 389092, 411532, 488576, 504020, - 518804, 553080, 593120, 627396, 639100, 546744, 636988, 888844, - 824428, 729564, 1039368, 1253384, 1348688, 1452924, 1449448, 1440780, - 1498024, 1153988, 1525128, 1672836, 1795816, 1368972, 1823712, 1494592, - 1541056, 2605108, 1886368, 1816188, 1864060, 2280652, 2790040, 2949540 + 0, 545, 928, 1633, 2647, 4065, 5598, 8258, + 9505, 11471, 14137, 19553, 20460, 26191, 28397, 32599, + 41631, 47419, 56329, 57767, 72196, 63652, 95366, 96537, + 115653, 125407, 131734, 145090, 156349, 164665, 194224, 203953, + 207710, 225878, 239971, 252284, 256534, 222142, 251332, 357098, + 325788, 295867, 410053, 497483, 533892, 576572, 577845, 572400, + 592536, 455082, 609249, 659130, 714091, 544507, 718788, 562378, + 601926, 1025081, 732725, 708896, 738224, 900445, 1092519, 1139946 }; assert(cluster_count < std::size(COSTS)); // Multiply the table number by two, to account for the fact that they are not absolutes.