mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-03-02 09:46:14 +00:00
clusterlin: adopt trained cost model (feature)
See the comments for the SFLDefaultCostModel class for details on how the numbers were obtained.
This commit is contained in:
parent
4eefdfc5b7
commit
744d47fcee
@ -472,37 +472,73 @@ concept StrongComparator =
|
||||
* Linearize(), which just sorts by DepGraphIndex. */
|
||||
using IndexTxOrder = std::compare_three_way;
|
||||
|
||||
/** A default cost model for SFL for SetType=BitSet<64>, based on benchmarks.
|
||||
*
|
||||
* The numbers here were obtained in February 2026 by:
|
||||
* - For a variety of machines:
|
||||
* - Running a fixed collection of ~385000 clusters found through random generation and fuzzing,
|
||||
* optimizing for difficulty of linearization.
|
||||
* - Linearize each ~3000 times, with different random seeds. Sometimes without input
|
||||
* linearization, sometimes with a bad one.
|
||||
* - Gather cycle counts for each of the operations included in this cost model,
|
||||
* broken down by their parameters.
|
||||
* - Correct the data by subtracting the runtime of obtaining the cycle count.
|
||||
* - Drop the 5% top and bottom samples from each cycle count dataset, and compute the average
|
||||
* of the remaining samples.
|
||||
* - For each operation, fit a least-squares linear function approximation through the samples.
|
||||
* - Rescale all machine expressions to make their total time match, as we only care about
|
||||
* relative cost of each operation.
|
||||
* - Take the per-operation average of operation expressions across all machines, to construct
|
||||
* expressions for an average machine.
|
||||
* - Approximate the result with integer coefficients. Each cost unit corresponds to somewhere
|
||||
* between 0.5 ns and 2.5 ns, depending on the hardware.
|
||||
*/
|
||||
class SFLDefaultCostModel
|
||||
{
|
||||
uint64_t m_cost{0};
|
||||
|
||||
public:
|
||||
inline void InitializeBegin() noexcept {}
|
||||
inline void InitializeEnd(int num_txns, int num_deps) noexcept {}
|
||||
inline void InitializeEnd(int num_txns, int num_deps) noexcept
|
||||
{
|
||||
// Cost of initialization.
|
||||
m_cost += 39 * num_txns;
|
||||
// Cost of producing linearization at the end.
|
||||
m_cost += 48 * num_txns + 4 * num_deps;
|
||||
}
|
||||
inline void GetLinearizationBegin() noexcept {}
|
||||
inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept {}
|
||||
inline void GetLinearizationEnd(int num_txns, int num_deps) noexcept
|
||||
{
|
||||
// Note that we account for the cost of the final linearization at the beginning (see
|
||||
// InitializeEnd), because the cost budget decision needs to be made before calling
|
||||
// GetLinearization.
|
||||
// This function exists here to allow overriding it easily for benchmark purposes.
|
||||
}
|
||||
inline void MakeTopologicalBegin() noexcept {}
|
||||
inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept {}
|
||||
inline void MakeTopologicalEnd(int num_chunks, int num_steps) noexcept
|
||||
{
|
||||
m_cost += 20 * num_chunks + 28 * num_steps;
|
||||
}
|
||||
inline void StartOptimizingBegin() noexcept {}
|
||||
inline void StartOptimizingEnd(int num_chunks) noexcept {}
|
||||
inline void StartOptimizingEnd(int num_chunks) noexcept { m_cost += 13 * num_chunks; }
|
||||
inline void ActivateBegin() noexcept {}
|
||||
inline void ActivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; }
|
||||
inline void ActivateEnd(int num_deps) noexcept { m_cost += 10 * num_deps + 1; }
|
||||
inline void DeactivateBegin() noexcept {}
|
||||
inline void DeactivateEnd(int num_deps) noexcept { m_cost += 38 * num_deps + 38; }
|
||||
inline void DeactivateEnd(int num_deps) noexcept { m_cost += 11 * num_deps + 8; }
|
||||
inline void MergeChunksBegin() noexcept {}
|
||||
inline void MergeChunksMid(int num_txns) noexcept {}
|
||||
inline void MergeChunksEnd(int num_steps) noexcept {}
|
||||
inline void MergeChunksMid(int num_txns) noexcept { m_cost += 2 * num_txns; }
|
||||
inline void MergeChunksEnd(int num_steps) noexcept { m_cost += 3 * num_steps + 5; }
|
||||
inline void PickMergeCandidateBegin() noexcept {}
|
||||
inline void PickMergeCandidateEnd(int num_steps) noexcept {}
|
||||
inline void PickMergeCandidateEnd(int num_steps) noexcept { m_cost += 8 * num_steps; }
|
||||
inline void PickChunkToOptimizeBegin() noexcept {}
|
||||
inline void PickChunkToOptimizeEnd(int num_steps) noexcept {}
|
||||
inline void PickChunkToOptimizeEnd(int num_steps) noexcept { m_cost += num_steps + 4; }
|
||||
inline void PickDependencyToSplitBegin() noexcept {}
|
||||
inline void PickDependencyToSplitEnd(int num_txns) noexcept {}
|
||||
inline void PickDependencyToSplitEnd(int num_txns) noexcept { m_cost += 8 * num_txns + 9; }
|
||||
inline void StartMinimizingBegin() noexcept {}
|
||||
inline void StartMinimizingEnd(int num_chunks) noexcept {}
|
||||
inline void StartMinimizingEnd(int num_chunks) noexcept { m_cost += 18 * num_chunks; }
|
||||
inline void MinimizeStepBegin() noexcept {}
|
||||
inline void MinimizeStepMid(int num_txns) noexcept {}
|
||||
inline void MinimizeStepEnd(bool split) noexcept {}
|
||||
inline void MinimizeStepMid(int num_txns) noexcept { m_cost += 11 * num_txns + 11; }
|
||||
inline void MinimizeStepEnd(bool split) noexcept { m_cost += 17 * split + 7; }
|
||||
|
||||
inline uint64_t GetCost() const noexcept { return m_cost; }
|
||||
};
|
||||
|
||||
@ -1021,6 +1021,7 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
try {
|
||||
reader >> VARINT(max_cost) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> flags;
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
if (depgraph.TxCount() <= 1) return;
|
||||
bool make_connected = flags & 1;
|
||||
// The following 3 booleans have 4 combinations:
|
||||
// - (flags & 6) == 0: do not provide input linearization.
|
||||
|
||||
@ -402,14 +402,14 @@ inline uint64_t MaxOptimalLinearizationCost(DepGraphIndex cluster_count)
|
||||
// *some* reasonable cost bound, optimal linearizations are always found.
|
||||
static constexpr uint64_t COSTS[65] = {
|
||||
0,
|
||||
0, 176, 440, 1496, 3344, 6864, 10076, 16720,
|
||||
19404, 22748, 29832, 41052, 45628, 60104, 64416, 75284,
|
||||
92884, 111848, 134992, 137104, 177276, 152548, 234256, 237688,
|
||||
285164, 315084, 327404, 360052, 389092, 411532, 488576, 504020,
|
||||
518804, 553080, 593120, 627396, 639100, 546744, 636988, 888844,
|
||||
824428, 729564, 1039368, 1253384, 1348688, 1452924, 1449448, 1440780,
|
||||
1498024, 1153988, 1525128, 1672836, 1795816, 1368972, 1823712, 1494592,
|
||||
1541056, 2605108, 1886368, 1816188, 1864060, 2280652, 2790040, 2949540
|
||||
0, 545, 928, 1633, 2647, 4065, 5598, 8258,
|
||||
9505, 11471, 14137, 19553, 20460, 26191, 28397, 32599,
|
||||
41631, 47419, 56329, 57767, 72196, 63652, 95366, 96537,
|
||||
115653, 125407, 131734, 145090, 156349, 164665, 194224, 203953,
|
||||
207710, 225878, 239971, 252284, 256534, 222142, 251332, 357098,
|
||||
325788, 295867, 410053, 497483, 533892, 576572, 577845, 572400,
|
||||
592536, 455082, 609249, 659130, 714091, 544507, 718788, 562378,
|
||||
601926, 1025081, 732725, 708896, 738224, 900445, 1092519, 1139946
|
||||
};
|
||||
assert(cluster_count < std::size(COSTS));
|
||||
// Multiply the table number by two, to account for the fact that they are not absolutes.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user