clusterlin: adopt trained cost model (feature)

See the comments for the SFLDefaultCostModel class for details on how
the numbers were obtained.
This commit is contained in:
Pieter Wuille 2026-02-05 16:02:19 -05:00
parent 4eefdfc5b7
commit 744d47fcee
3 changed files with 59 additions and 22 deletions

View File

@ -472,37 +472,73 @@ concept StrongComparator =
* Linearize(), which just sorts by DepGraphIndex. */
using IndexTxOrder = std::compare_three_way;
/** A default cost model for SFL for SetType=BitSet<64>, based on benchmarks.
*
* The numbers here were obtained in February 2026 by:
* - For a variety of machines:
* - Running a fixed collection of ~385000 clusters found through random generation and fuzzing,
* optimizing for difficulty of linearization.
* - Linearize each ~3000 times, with different random seeds. Sometimes without input
* linearization, sometimes with a bad one.
* - Gather cycle counts for each of the operations included in this cost model,
* broken down by their parameters.
* - Correct the data by subtracting the runtime of obtaining the cycle count.
* - Drop the 5% top and bottom samples from each cycle count dataset, and compute the average
* of the remaining samples.
* - For each operation, fit a least-squares linear function approximation through the samples.
* - Rescale all machine expressions to make their total time match, as we only care about
* relative cost of each operation.
* - Take the per-operation average of operation expressions across all machines, to construct
* expressions for an average machine.
* - Approximate the result with integer coefficients. Each cost unit corresponds to somewhere
* between 0.5 ns and 2.5 ns, depending on the hardware.
*/
class SFLDefaultCostModel
{
uint64_t m_cost{0};
public:
inline void InitializeBegin() noexcept {}
inline void InitializeEnd(int num_txns, int num_deps) noexcept {}
inline void InitializeEnd(int num_txns, int num_deps) noexcept
{
// Cost of initialization.
m_cost += 39 * num_txns;
// Cost of producing linearization at the end.
m_cost += 48 * num_txns + 4 * num_deps;
}
inline void GetLinearizationBegin() noexcept {}
inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept {}
inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept
{
// Note that we account for the cost of the final linearization at the beginning (see
// InitializeEnd), because the cost budget decision needs to be made before calling
// GetLinearization.
// This function exists here to allow overriding it easily for benchmark purposes.
}
inline void MakeTopologicalBegin() noexcept {}
inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept {}
inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept
{
m_cost += 20 * num_chunks + 28 * num_steps;
}
inline void StartOptimizingBegin() noexcept {}
inline void StartOptimizingEnd(int num_chunks) noexcept {}
inline void StartOptimizingEnd(int num_chunks) noexcept { m_cost += 13 * num_chunks; }
inline void ActivateBegin() noexcept {}
inline void ActivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; }
inline void ActivateEnd(int num_deps) noexcept { m_cost += 10 * num_deps + 1; }
inline void DeactivateBegin() noexcept {}
inline void DeactivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; }
inline void DeactivateEnd(int num_deps) noexcept { m_cost += 11 * num_deps + 8; }
inline void MergeChunksBegin() noexcept {}
inline void MergeChunksMid(int num_txns) noexcept {}
inline void MergeChunksEnd(int num_steps) noexcept {}
inline void MergeChunksMid(int num_txns) noexcept { m_cost += 2 * num_txns; }
inline void MergeChunksEnd(int num_steps) noexcept { m_cost += 3 * num_steps + 5; }
inline void PickMergeCandidateBegin() noexcept {}
inline void PickMergeCandidateEnd(int num_steps) noexcept {}
inline void PickMergeCandidateEnd(int num_steps) noexcept { m_cost += 8 * num_steps; }
inline void PickChunkToOptimizeBegin() noexcept {}
inline void PickChunkToOptimizeEnd(int num_steps) noexcept {}
inline void PickChunkToOptimizeEnd(int num_steps) noexcept { m_cost += num_steps + 4; }
inline void PickDependencyToSplitBegin() noexcept {}
inline void PickDependencyToSplitEnd(int num_txns) noexcept {}
inline void PickDependencyToSplitEnd(int num_txns) noexcept { m_cost += 8 * num_txns + 9; }
inline void StartMinimizingBegin() noexcept {}
inline void StartMinimizingEnd(int num_chunks) noexcept {}
inline void StartMinimizingEnd(int num_chunks) noexcept { m_cost += 18 * num_chunks; }
inline void MinimizeStepBegin() noexcept {}
inline void MinimizeStepMid(int num_txns) noexcept {}
inline void MinimizeStepEnd(bool split) noexcept {}
inline void MinimizeStepMid(int num_txns) noexcept { m_cost += 11 * num_txns + 11; }
inline void MinimizeStepEnd(bool split) noexcept { m_cost += 17 * split + 7; }
inline uint64_t GetCost() const noexcept { return m_cost; }
};

View File

@ -1021,6 +1021,7 @@ FUZZ_TARGET(clusterlin_linearize)
try {
reader >> VARINT(max_cost) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> flags;
} catch (const std::ios_base::failure&) {}
if (depgraph.TxCount() <= 1) return;
bool make_connected = flags & 1;
// The following 3 booleans have 4 combinations:
// - (flags & 6) == 0: do not provide input linearization.

View File

@ -402,14 +402,14 @@ inline uint64_t MaxOptimalLinearizationCost(DepGraphIndex cluster_count)
// *some* reasonable cost bound, optimal linearizations are always found.
static constexpr uint64_t COSTS[65] = {
0,
0, 176, 440, 1496, 3344, 6864, 10076, 16720,
19404, 22748, 29832, 41052, 45628, 60104, 64416, 75284,
92884, 111848, 134992, 137104, 177276, 152548, 234256, 237688,
285164, 315084, 327404, 360052, 389092, 411532, 488576, 504020,
518804, 553080, 593120, 627396, 639100, 546744, 636988, 888844,
824428, 729564, 1039368, 1253384, 1348688, 1452924, 1449448, 1440780,
1498024, 1153988, 1525128, 1672836, 1795816, 1368972, 1823712, 1494592,
1541056, 2605108, 1886368, 1816188, 1864060, 2280652, 2790040, 2949540
0, 545, 928, 1633, 2647, 4065, 5598, 8258,
9505, 11471, 14137, 19553, 20460, 26191, 28397, 32599,
41631, 47419, 56329, 57767, 72196, 63652, 95366, 96537,
115653, 125407, 131734, 145090, 156349, 164665, 194224, 203953,
207710, 225878, 239971, 252284, 256534, 222142, 251332, 357098,
325788, 295867, 410053, 497483, 533892, 576572, 577845, 572400,
592536, 455082, 609249, 659130, 714091, 544507, 718788, 562378,
601926, 1025081, 732725, 708896, 738224, 900445, 1092519, 1139946
};
assert(cluster_count < std::size(COSTS));
// Multiply the table number by two, to account for the fact that they are not absolutes.