Merge bitcoin/bitcoin#33512: coins: use dirty entry count for flush warnings and disk space checks

afb1bc120ecce2bf663093e15c93f5592c0d4a98 validation: Use dirty entry count in flush warnings and disk space checks (Pieter Wuille)
b413491a1cdd9a51f2aa10b775650f54f6785e3e coins: Keep track of number of dirty entries in `CCoinsViewCache` (Pieter Wuille)
7e52b1b945c4137e0fb05715090635ce82ed04b3 fuzz: call `EmplaceCoinInternalDANGER` as well in `SimulationTest` (Lőrinc)

Pull request description:

  ### Problem
  Now that non-wiping flushes are possible (#28280, #28233), the cache may be mostly clean at flush time.
  But the flush warning, disk-space check, and benchmark logging still used total cache size, so a node with a 10 GiB cache that only needs to write a small fraction of dirty entries could still trigger a scary warning via the disk-space checks.

  The previous `DynamicMemoryUsage` metric was also fundamentally wrong for estimating disk writes, even before non-wiping flushes. In-memory coin size differs from on-disk write size due to LevelDB overhead, log doubling, and compaction.

  The warning also only fired in `FlushStateToDisk`, so `AssumeUTXO` snapshot loads never warned at all.

  ### Fix

  This PR tracks the actual number of dirty entries via `m_dirty_count` in `CCoinsViewCache`, maintained alongside the existing dirty-flag linked list, `SanityCheck` cross-validating both counts.

  The warning and benchmark log move from `FlushStateToDisk` down to `CCoinsViewDB::BatchWrite`, where the actual I/O happens. This is the single place all flush paths converge (regular flushes, syncs, and snapshot loads), so the warning now fires correctly for `AssumeUTXO` too.
  The threshold changes from 1 GiB of memory to 10 million dirty entries, which is roughly equivalent but avoids the in-memory vs on-disk size confusion.

  The disk-space safety check now uses `GetDirtyCount()` with the existing conservative 48-byte-per-entry estimate, preventing unnecessary shutdowns when the cache is large but mostly clean.

  ---

  Note: the first commit adds fuzz coverage for `EmplaceCoinInternalDANGER` in `SimulationTest` to exercise the accounting paths before modifying them.
  Note: this is a revival of #31703 with all outstanding review feedback addressed.

ACKs for top commit:
  Eunovo:
    Concept ACK afb1bc120e
  andrewtoth:
    re-ACK afb1bc120ecce2bf663093e15c93f5592c0d4a98
  sipa:
    Code review ACK afb1bc120ecce2bf663093e15c93f5592c0d4a98
  sedited:
    ACK afb1bc120ecce2bf663093e15c93f5592c0d4a98

Tree-SHA512: 4133c6669fd20836ae2fb62ed804cdf6ebaa61076927b54fc412e42455a2f0d4cadfab0844064f9c32431eacb1f5e47b78de8e5cde1b26ba7239a7becf92f369
This commit is contained in:
merge-script 2026-02-19 22:18:38 +01:00
commit 739f75c098
No known key found for this signature in database
GPG Key ID: 9B79B45691DB4173
6 changed files with 68 additions and 26 deletions

View File

@ -98,10 +98,12 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi
fresh = !it->second.IsDirty();
}
if (!inserted) {
m_dirty_count -= it->second.IsDirty();
cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage();
}
it->second.coin = std::move(coin);
CCoinsCacheEntry::SetDirty(*it, m_sentinel);
++m_dirty_count;
if (fresh) CCoinsCacheEntry::SetFresh(*it, m_sentinel);
cachedCoinsUsage += it->second.coin.DynamicMemoryUsage();
TRACEPOINT(utxocache, add,
@ -117,6 +119,7 @@ void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coi
auto [it, inserted] = cacheCoins.try_emplace(std::move(outpoint), std::move(coin));
if (inserted) {
CCoinsCacheEntry::SetDirty(*it, m_sentinel);
++m_dirty_count;
cachedCoinsUsage += mem_usage;
}
}
@ -135,6 +138,7 @@ void AddCoins(CCoinsViewCache& cache, const CTransaction &tx, int nHeight, bool
bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) {
CCoinsMap::iterator it = FetchCoin(outpoint);
if (it == cacheCoins.end()) return false;
m_dirty_count -= it->second.IsDirty();
cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage();
TRACEPOINT(utxocache, spent,
outpoint.hash.data(),
@ -149,6 +153,7 @@ bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) {
cacheCoins.erase(it);
} else {
CCoinsCacheEntry::SetDirty(*it, m_sentinel);
++m_dirty_count;
it->second.coin.Clear();
}
return true;
@ -207,8 +212,9 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha
} else {
entry.coin = it->second.coin;
}
cachedCoinsUsage += entry.coin.DynamicMemoryUsage();
CCoinsCacheEntry::SetDirty(*itUs, m_sentinel);
++m_dirty_count;
cachedCoinsUsage += entry.coin.DynamicMemoryUsage();
// We can mark it FRESH in the parent if it was FRESH in the child
// Otherwise it might have just been flushed from the parent's cache
// and already exist in the grandparent
@ -227,6 +233,7 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha
if (itUs->second.IsFresh() && it->second.coin.IsSpent()) {
// The grandparent cache does not have an entry, and the coin
// has been spent. We can just delete it from the parent cache.
m_dirty_count -= itUs->second.IsDirty();
cachedCoinsUsage -= itUs->second.coin.DynamicMemoryUsage();
cacheCoins.erase(itUs);
} else {
@ -240,7 +247,10 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha
itUs->second.coin = it->second.coin;
}
cachedCoinsUsage += itUs->second.coin.DynamicMemoryUsage();
CCoinsCacheEntry::SetDirty(*itUs, m_sentinel);
if (!itUs->second.IsDirty()) {
CCoinsCacheEntry::SetDirty(*itUs, m_sentinel);
++m_dirty_count;
}
// NOTE: It isn't safe to mark the coin as FRESH in the parent
// cache. If it already existed and was spent in the parent
// cache then marking it FRESH would prevent that spentness
@ -253,8 +263,9 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha
void CCoinsViewCache::Flush(bool reallocate_cache)
{
auto cursor{CoinsViewCacheCursor(m_sentinel, cacheCoins, /*will_erase=*/true)};
auto cursor{CoinsViewCacheCursor(m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/true)};
base->BatchWrite(cursor, hashBlock);
Assume(m_dirty_count == 0);
cacheCoins.clear();
if (reallocate_cache) {
ReallocateCache();
@ -264,8 +275,9 @@ void CCoinsViewCache::Flush(bool reallocate_cache)
void CCoinsViewCache::Sync()
{
auto cursor{CoinsViewCacheCursor(m_sentinel, cacheCoins, /*will_erase=*/false)};
auto cursor{CoinsViewCacheCursor(m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/false)};
base->BatchWrite(cursor, hashBlock);
Assume(m_dirty_count == 0);
if (m_sentinel.second.Next() != &m_sentinel) {
/* BatchWrite must clear flags of all entries */
throw std::logic_error("Not all unspent flagged entries were cleared");
@ -276,6 +288,7 @@ void CCoinsViewCache::Reset() noexcept
{
cacheCoins.clear();
cachedCoinsUsage = 0;
m_dirty_count = 0;
SetBestBlock(uint256::ZERO);
}
@ -348,7 +361,7 @@ void CCoinsViewCache::SanityCheck() const
// Count the number of entries actually in the list.
++count_linked;
}
assert(count_linked == count_dirty);
assert(count_dirty == count_linked && count_dirty == m_dirty_count);
assert(recomputed_usage == cachedCoinsUsage);
}

View File

@ -265,10 +265,11 @@ struct CoinsViewCacheCursor
//! This is an optimization compared to erasing all entries as the cursor iterates them when will_erase is set.
//! Calling CCoinsMap::clear() afterwards is faster because a CoinsCachePair cannot be coerced back into a
//! CCoinsMap::iterator to be erased, and must therefore be looked up again by key in the CCoinsMap before being erased.
CoinsViewCacheCursor(CoinsCachePair& sentinel LIFETIMEBOUND,
CoinsViewCacheCursor(size_t& dirty_count LIFETIMEBOUND,
CoinsCachePair& sentinel LIFETIMEBOUND,
CCoinsMap& map LIFETIMEBOUND,
bool will_erase) noexcept
: m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {}
: m_dirty_count(dirty_count), m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {}
inline CoinsCachePair* Begin() const noexcept { return m_sentinel.second.Next(); }
inline CoinsCachePair* End() const noexcept { return &m_sentinel; }
@ -277,6 +278,7 @@ struct CoinsViewCacheCursor
inline CoinsCachePair* NextAndMaybeErase(CoinsCachePair& current) noexcept
{
const auto next_entry{current.second.Next()};
m_dirty_count -= current.second.IsDirty();
// If we are not going to erase the cache, we must still erase spent entries.
// Otherwise, clear the state of the entry.
if (!m_will_erase) {
@ -291,7 +293,10 @@ struct CoinsViewCacheCursor
}
inline bool WillErase(CoinsCachePair& current) const noexcept { return m_will_erase || current.second.coin.IsSpent(); }
size_t GetDirtyCount() const noexcept { return m_dirty_count; }
size_t GetTotalCount() const noexcept { return m_map.size(); }
private:
size_t& m_dirty_count;
CoinsCachePair& m_sentinel;
CCoinsMap& m_map;
bool m_will_erase;
@ -369,6 +374,8 @@ protected:
/* Cached dynamic memory usage for the inner Coin objects. */
mutable size_t cachedCoinsUsage{0};
/* Running count of dirty Coin cache entries. */
mutable size_t m_dirty_count{0};
/**
* Discard all modifications made to this cache without flushing to the base view.
@ -458,9 +465,12 @@ public:
*/
void Uncache(const COutPoint &outpoint);
//! Calculate the size of the cache (in number of transaction outputs)
//! Size of the cache (in number of transaction outputs)
unsigned int GetCacheSize() const;
//! Number of dirty cache entries (transaction outputs)
size_t GetDirtyCount() const noexcept { return m_dirty_count; }
//! Calculate the size of the cache (in bytes)
size_t DynamicMemoryUsage() const;

View File

@ -95,6 +95,7 @@ public:
CCoinsMap& map() const { return cacheCoins; }
CoinsCachePair& sentinel() const { return m_sentinel; }
size_t& usage() const { return cachedCoinsUsage; }
size_t& dirty() const { return m_dirty_count; }
};
} // namespace
@ -189,8 +190,11 @@ void SimulationTest(CCoinsView* base, bool fake_best_block)
(coin.IsSpent() ? added_an_entry : updated_an_entry) = true;
coin = newcoin;
}
bool is_overwrite = !coin.IsSpent() || m_rng.rand32() & 1;
stack.back()->AddCoin(COutPoint(txid, 0), std::move(newcoin), is_overwrite);
if (COutPoint op(txid, 0); !stack.back()->map().contains(op) && !newcoin.out.scriptPubKey.IsUnspendable() && m_rng.randbool()) {
stack.back()->EmplaceCoinInternalDANGER(std::move(op), std::move(newcoin));
} else {
stack.back()->AddCoin(op, std::move(newcoin), /*possible_overwrite=*/!coin.IsSpent() || m_rng.randbool());
}
} else {
// Spend the coin.
removed_an_entry = true;
@ -653,8 +657,10 @@ static void WriteCoinsViewEntry(CCoinsView& view, const MaybeCoin& cache_coin)
CCoinsMapMemoryResource resource;
CCoinsMap map{0, CCoinsMap::hasher{}, CCoinsMap::key_equal{}, &resource};
if (cache_coin) InsertCoinsMapEntry(map, sentinel, *cache_coin);
auto cursor{CoinsViewCacheCursor(sentinel, map, /*will_erase=*/true)};
size_t dirty_count{cache_coin && cache_coin->IsDirty()};
auto cursor{CoinsViewCacheCursor(dirty_count, sentinel, map, /*will_erase=*/true)};
view.BatchWrite(cursor, {});
BOOST_CHECK_EQUAL(dirty_count, 0U);
}
class SingleEntryCacheTest
@ -664,7 +670,10 @@ public:
{
auto base_cache_coin{base_value == ABSENT ? MISSING : CoinEntry{base_value, CoinEntry::State::DIRTY}};
WriteCoinsViewEntry(base, base_cache_coin);
if (cache_coin) cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin);
if (cache_coin) {
cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin);
cache.dirty() += cache_coin->IsDirty();
}
}
CCoinsView root;
@ -1125,6 +1134,7 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard)
const Coin coin{CTxOut{m_rng.randrange(10), CScript{} << m_rng.randbytes(CScriptBase::STATIC_SIZE + 1)}, 1, false};
cache.EmplaceCoinInternalDANGER(COutPoint{outpoint}, Coin{coin});
BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 1U);
uint256 cache_best_block{m_rng.rand256()};
cache.SetBestBlock(cache_best_block);
@ -1134,12 +1144,14 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard)
BOOST_CHECK(cache.AccessCoin(outpoint) == coin);
BOOST_CHECK(!cache.AccessCoin(outpoint).IsSpent());
BOOST_CHECK_EQUAL(cache.GetCacheSize(), 1);
BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 1);
BOOST_CHECK_EQUAL(cache.GetBestBlock(), cache_best_block);
BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint));
}
BOOST_CHECK(cache.AccessCoin(outpoint).IsSpent());
BOOST_CHECK_EQUAL(cache.GetCacheSize(), 0);
BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0);
BOOST_CHECK_EQUAL(cache.GetBestBlock(), base_best_block);
BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint));
@ -1150,8 +1162,13 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard)
BOOST_CHECK(cache.AccessCoin(outpoint).IsSpent());
BOOST_CHECK_EQUAL(cache.GetCacheSize(), 0);
BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0U);
BOOST_CHECK_EQUAL(cache.GetBestBlock(), base_best_block);
BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint));
// Flush should be a no-op after reset.
cache.Flush();
BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0U);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -139,6 +139,7 @@ void TestCoinsView(FuzzedDataProvider& fuzzed_data_provider, CCoinsView& backend
[&] {
CoinsCachePair sentinel{};
sentinel.second.SelfRef(sentinel);
size_t dirty_count{0};
CCoinsMapMemoryResource resource;
CCoinsMap coins_map{0, SaltedOutpointHasher{/*deterministic=*/true}, CCoinsMap::key_equal{}, &resource};
LIMITED_WHILE(good_data && fuzzed_data_provider.ConsumeBool(), 10'000)
@ -159,10 +160,11 @@ void TestCoinsView(FuzzedDataProvider& fuzzed_data_provider, CCoinsView& backend
auto it{coins_map.emplace(random_out_point, std::move(coins_cache_entry)).first};
if (dirty) CCoinsCacheEntry::SetDirty(*it, sentinel);
if (fresh) CCoinsCacheEntry::SetFresh(*it, sentinel);
dirty_count += dirty;
}
bool expected_code_path = false;
try {
auto cursor{CoinsViewCacheCursor(sentinel, coins_map, /*will_erase=*/true)};
auto cursor{CoinsViewCacheCursor(dirty_count, sentinel, coins_map, /*will_erase=*/true)};
uint256 best_block{coins_view_cache.GetBestBlock()};
if (fuzzed_data_provider.ConsumeBool()) best_block = ConsumeUInt256(fuzzed_data_provider);
// Set best block hash to non-null to satisfy the assertion in CCoinsViewDB::BatchWrite().

View File

@ -7,6 +7,7 @@
#include <coins.h>
#include <dbwrapper.h>
#include <logging/timer.h>
#include <primitives/transaction.h>
#include <random.h>
#include <serialize.h>
@ -25,6 +26,9 @@ static constexpr uint8_t DB_HEAD_BLOCKS{'H'};
// Keys used in previous version that might still be found in the DB:
static constexpr uint8_t DB_COINS{'c'};
// Threshold for warning when writing this many dirty cache entries to disk.
static constexpr size_t WARN_FLUSH_COINS_COUNT{10'000'000};
bool CCoinsViewDB::NeedsUpgrade()
{
std::unique_ptr<CDBIterator> cursor{m_db->NewIterator()};
@ -97,7 +101,7 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB
{
CDBBatch batch(*m_db);
size_t count = 0;
size_t changed = 0;
const size_t dirty_count{cursor.GetDirtyCount()};
assert(!hashBlock.IsNull());
uint256 old_tip = GetBestBlock();
@ -113,6 +117,10 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB
}
}
if (dirty_count > WARN_FLUSH_COINS_COUNT) LogWarning("Flushing large (%d entries) UTXO set to disk, it may take several minutes", dirty_count);
LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d out of %d cached coins)",
dirty_count, cursor.GetTotalCount()), BCLog::BENCH);
// In the first batch, mark the database as being in the middle of a
// transition from old_tip to hashBlock.
// A vector is used for future extensibility, as we may want to support
@ -128,8 +136,6 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB
} else {
batch.Write(entry, it->second.coin);
}
changed++;
}
count++;
it = cursor.NextAndMaybeErase(*it);
@ -154,7 +160,7 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB
LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0));
m_db->WriteBatch(batch);
LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count);
LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...", (unsigned int)dirty_count, (unsigned int)count);
}
size_t CCoinsViewDB::EstimateSize() const

View File

@ -89,8 +89,6 @@ using node::CBlockIndexHeightOnlyComparator;
using node::CBlockIndexWorkComparator;
using node::SnapshotMetadata;
/** Size threshold for warning about slow UTXO set flush to disk. */
static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB
/** Time window to wait between writing blocks/block index and chainstate to disk.
* Randomize writing time inside the window to prevent a situation where the
* network over time settles into a few cohorts of synchronized writers.
@ -2708,8 +2706,8 @@ bool Chainstate::FlushStateToDisk(
std::set<int> setFilesToPrune;
bool full_flush_completed = false;
const size_t coins_count = CoinsTip().GetCacheSize();
const size_t coins_mem_usage = CoinsTip().DynamicMemoryUsage();
[[maybe_unused]] const size_t coins_count{CoinsTip().GetCacheSize()};
[[maybe_unused]] const size_t coins_mem_usage{CoinsTip().DynamicMemoryUsage()};
try {
{
@ -2802,16 +2800,12 @@ bool Chainstate::FlushStateToDisk(
}
if (!CoinsTip().GetBestBlock().IsNull()) {
if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30);
LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)",
coins_count, coins_mem_usage >> 10), BCLog::BENCH);
// Typical Coin structures on disk are around 48 bytes in size.
// Pushing a new one to the database can cause it to be written
// twice (once in the log, and once in the tables). This is already
// an overestimation, as most will delete an existing entry or
// overwrite one. Still, use a conservative safety factor of 2.
if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) {
if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetDirtyCount())) {
return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!"));
}
// Flush the chainstate (which may refer to block index entries).