From 7e52b1b945c4137e0fb05715090635ce82ed04b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 30 Sep 2025 17:38:36 -0400 Subject: [PATCH 1/3] fuzz: call `EmplaceCoinInternalDANGER` as well in `SimulationTest` Adds test coverage by randomly calling `EmplaceCoinInternalDANGER` in `SimulationTest` to verify it remains correct as we modify it in a future commit. Co-authored-by: Andrew Toth --- src/test/coins_tests.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/test/coins_tests.cpp b/src/test/coins_tests.cpp index 8c0756d8528..d5219fceb07 100644 --- a/src/test/coins_tests.cpp +++ b/src/test/coins_tests.cpp @@ -189,8 +189,11 @@ void SimulationTest(CCoinsView* base, bool fake_best_block) (coin.IsSpent() ? added_an_entry : updated_an_entry) = true; coin = newcoin; } - bool is_overwrite = !coin.IsSpent() || m_rng.rand32() & 1; - stack.back()->AddCoin(COutPoint(txid, 0), std::move(newcoin), is_overwrite); + if (COutPoint op(txid, 0); !stack.back()->map().contains(op) && !newcoin.out.scriptPubKey.IsUnspendable() && m_rng.randbool()) { + stack.back()->EmplaceCoinInternalDANGER(std::move(op), std::move(newcoin)); + } else { + stack.back()->AddCoin(op, std::move(newcoin), /*possible_overwrite=*/!coin.IsSpent() || m_rng.randbool()); + } } else { // Spend the coin. removed_an_entry = true; From b413491a1cdd9a51f2aa10b775650f54f6785e3e Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Jan 2025 14:02:10 -0500 Subject: [PATCH 2/3] coins: Keep track of number of dirty entries in `CCoinsViewCache` Adds `m_dirty_count` member to track the running count of dirty cache entries as follows: * Incremented when entries are marked dirty via `CCoinsCacheEntry::SetDirty` * Decremented when dirty entries are removed or cleaned * Passed through `CoinsViewCacheCursor` and updated during iteration The dirty count is needed because after non-wiping flushes (introduced in #28280 and #28233), the percentage of dirty entries in the cache may be far below 100%. Using total cache size for flush warnings and disk space checks is therefore misleading. Updates all test code to properly initialize and maintain the dirty count. Co-authored-by: l0rinc Co-authored-by: Andrew Toth Co-authored-by: optout <13562139+optout21@users.noreply.github.com> --- src/coins.cpp | 23 ++++++++++++++++++----- src/coins.h | 14 +++++++++++--- src/test/coins_tests.cpp | 18 ++++++++++++++++-- src/test/fuzz/coins_view.cpp | 4 +++- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/coins.cpp b/src/coins.cpp index fc33c521617..1bc01d20a5b 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -98,10 +98,12 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi fresh = !it->second.IsDirty(); } if (!inserted) { + m_dirty_count -= it->second.IsDirty(); cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage(); } it->second.coin = std::move(coin); CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; if (fresh) CCoinsCacheEntry::SetFresh(*it, m_sentinel); cachedCoinsUsage += it->second.coin.DynamicMemoryUsage(); TRACEPOINT(utxocache, add, @@ -117,6 +119,7 @@ void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coi auto [it, inserted] = cacheCoins.try_emplace(std::move(outpoint), std::move(coin)); if (inserted) { CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; cachedCoinsUsage += mem_usage; } } @@ -135,6 +138,7 @@ void AddCoins(CCoinsViewCache& cache, const CTransaction &tx, int nHeight, bool bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) { CCoinsMap::iterator it = FetchCoin(outpoint); if (it == cacheCoins.end()) return false; + m_dirty_count -= it->second.IsDirty(); cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage(); TRACEPOINT(utxocache, spent, outpoint.hash.data(), @@ -149,6 +153,7 @@ bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) { cacheCoins.erase(it); } else { CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; it->second.coin.Clear(); } return true; @@ -207,8 +212,9 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha } else { entry.coin = it->second.coin; } - cachedCoinsUsage += entry.coin.DynamicMemoryUsage(); CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + ++m_dirty_count; + cachedCoinsUsage += entry.coin.DynamicMemoryUsage(); // We can mark it FRESH in the parent if it was FRESH in the child // Otherwise it might have just been flushed from the parent's cache // and already exist in the grandparent @@ -227,6 +233,7 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha if (itUs->second.IsFresh() && it->second.coin.IsSpent()) { // The grandparent cache does not have an entry, and the coin // has been spent. We can just delete it from the parent cache. + m_dirty_count -= itUs->second.IsDirty(); cachedCoinsUsage -= itUs->second.coin.DynamicMemoryUsage(); cacheCoins.erase(itUs); } else { @@ -240,7 +247,10 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha itUs->second.coin = it->second.coin; } cachedCoinsUsage += itUs->second.coin.DynamicMemoryUsage(); - CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + if (!itUs->second.IsDirty()) { + CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + ++m_dirty_count; + } // NOTE: It isn't safe to mark the coin as FRESH in the parent // cache. If it already existed and was spent in the parent // cache then marking it FRESH would prevent that spentness @@ -253,8 +263,9 @@ void CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& ha void CCoinsViewCache::Flush(bool reallocate_cache) { - auto cursor{CoinsViewCacheCursor(m_sentinel, cacheCoins, /*will_erase=*/true)}; + auto cursor{CoinsViewCacheCursor(m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/true)}; base->BatchWrite(cursor, hashBlock); + Assume(m_dirty_count == 0); cacheCoins.clear(); if (reallocate_cache) { ReallocateCache(); @@ -264,8 +275,9 @@ void CCoinsViewCache::Flush(bool reallocate_cache) void CCoinsViewCache::Sync() { - auto cursor{CoinsViewCacheCursor(m_sentinel, cacheCoins, /*will_erase=*/false)}; + auto cursor{CoinsViewCacheCursor(m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/false)}; base->BatchWrite(cursor, hashBlock); + Assume(m_dirty_count == 0); if (m_sentinel.second.Next() != &m_sentinel) { /* BatchWrite must clear flags of all entries */ throw std::logic_error("Not all unspent flagged entries were cleared"); @@ -276,6 +288,7 @@ void CCoinsViewCache::Reset() noexcept { cacheCoins.clear(); cachedCoinsUsage = 0; + m_dirty_count = 0; SetBestBlock(uint256::ZERO); } @@ -348,7 +361,7 @@ void CCoinsViewCache::SanityCheck() const // Count the number of entries actually in the list. ++count_linked; } - assert(count_linked == count_dirty); + assert(count_dirty == count_linked && count_dirty == m_dirty_count); assert(recomputed_usage == cachedCoinsUsage); } diff --git a/src/coins.h b/src/coins.h index 4b39c0bacd2..9efd23c581d 100644 --- a/src/coins.h +++ b/src/coins.h @@ -265,10 +265,11 @@ struct CoinsViewCacheCursor //! This is an optimization compared to erasing all entries as the cursor iterates them when will_erase is set. //! Calling CCoinsMap::clear() afterwards is faster because a CoinsCachePair cannot be coerced back into a //! CCoinsMap::iterator to be erased, and must therefore be looked up again by key in the CCoinsMap before being erased. - CoinsViewCacheCursor(CoinsCachePair& sentinel LIFETIMEBOUND, + CoinsViewCacheCursor(size_t& dirty_count LIFETIMEBOUND, + CoinsCachePair& sentinel LIFETIMEBOUND, CCoinsMap& map LIFETIMEBOUND, bool will_erase) noexcept - : m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {} + : m_dirty_count(dirty_count), m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {} inline CoinsCachePair* Begin() const noexcept { return m_sentinel.second.Next(); } inline CoinsCachePair* End() const noexcept { return &m_sentinel; } @@ -277,6 +278,7 @@ struct CoinsViewCacheCursor inline CoinsCachePair* NextAndMaybeErase(CoinsCachePair& current) noexcept { const auto next_entry{current.second.Next()}; + m_dirty_count -= current.second.IsDirty(); // If we are not going to erase the cache, we must still erase spent entries. // Otherwise, clear the state of the entry. if (!m_will_erase) { @@ -292,6 +294,7 @@ struct CoinsViewCacheCursor inline bool WillErase(CoinsCachePair& current) const noexcept { return m_will_erase || current.second.coin.IsSpent(); } private: + size_t& m_dirty_count; CoinsCachePair& m_sentinel; CCoinsMap& m_map; bool m_will_erase; @@ -369,6 +372,8 @@ protected: /* Cached dynamic memory usage for the inner Coin objects. */ mutable size_t cachedCoinsUsage{0}; + /* Running count of dirty Coin cache entries. */ + mutable size_t m_dirty_count{0}; /** * Discard all modifications made to this cache without flushing to the base view. @@ -458,9 +463,12 @@ public: */ void Uncache(const COutPoint &outpoint); - //! Calculate the size of the cache (in number of transaction outputs) + //! Size of the cache (in number of transaction outputs) unsigned int GetCacheSize() const; + //! Number of dirty cache entries (transaction outputs) + size_t GetDirtyCount() const noexcept { return m_dirty_count; } + //! Calculate the size of the cache (in bytes) size_t DynamicMemoryUsage() const; diff --git a/src/test/coins_tests.cpp b/src/test/coins_tests.cpp index d5219fceb07..92935ba6c23 100644 --- a/src/test/coins_tests.cpp +++ b/src/test/coins_tests.cpp @@ -95,6 +95,7 @@ public: CCoinsMap& map() const { return cacheCoins; } CoinsCachePair& sentinel() const { return m_sentinel; } size_t& usage() const { return cachedCoinsUsage; } + size_t& dirty() const { return m_dirty_count; } }; } // namespace @@ -656,8 +657,10 @@ static void WriteCoinsViewEntry(CCoinsView& view, const MaybeCoin& cache_coin) CCoinsMapMemoryResource resource; CCoinsMap map{0, CCoinsMap::hasher{}, CCoinsMap::key_equal{}, &resource}; if (cache_coin) InsertCoinsMapEntry(map, sentinel, *cache_coin); - auto cursor{CoinsViewCacheCursor(sentinel, map, /*will_erase=*/true)}; + size_t dirty_count{cache_coin && cache_coin->IsDirty()}; + auto cursor{CoinsViewCacheCursor(dirty_count, sentinel, map, /*will_erase=*/true)}; view.BatchWrite(cursor, {}); + BOOST_CHECK_EQUAL(dirty_count, 0U); } class SingleEntryCacheTest @@ -667,7 +670,10 @@ public: { auto base_cache_coin{base_value == ABSENT ? MISSING : CoinEntry{base_value, CoinEntry::State::DIRTY}}; WriteCoinsViewEntry(base, base_cache_coin); - if (cache_coin) cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin); + if (cache_coin) { + cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin); + cache.dirty() += cache_coin->IsDirty(); + } } CCoinsView root; @@ -1128,6 +1134,7 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard) const Coin coin{CTxOut{m_rng.randrange(10), CScript{} << m_rng.randbytes(CScriptBase::STATIC_SIZE + 1)}, 1, false}; cache.EmplaceCoinInternalDANGER(COutPoint{outpoint}, Coin{coin}); + BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 1U); uint256 cache_best_block{m_rng.rand256()}; cache.SetBestBlock(cache_best_block); @@ -1137,12 +1144,14 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard) BOOST_CHECK(cache.AccessCoin(outpoint) == coin); BOOST_CHECK(!cache.AccessCoin(outpoint).IsSpent()); BOOST_CHECK_EQUAL(cache.GetCacheSize(), 1); + BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 1); BOOST_CHECK_EQUAL(cache.GetBestBlock(), cache_best_block); BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint)); } BOOST_CHECK(cache.AccessCoin(outpoint).IsSpent()); BOOST_CHECK_EQUAL(cache.GetCacheSize(), 0); + BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0); BOOST_CHECK_EQUAL(cache.GetBestBlock(), base_best_block); BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint)); @@ -1153,8 +1162,13 @@ BOOST_AUTO_TEST_CASE(ccoins_reset_guard) BOOST_CHECK(cache.AccessCoin(outpoint).IsSpent()); BOOST_CHECK_EQUAL(cache.GetCacheSize(), 0); + BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0U); BOOST_CHECK_EQUAL(cache.GetBestBlock(), base_best_block); BOOST_CHECK(!root_cache.HaveCoinInCache(outpoint)); + + // Flush should be a no-op after reset. + cache.Flush(); + BOOST_CHECK_EQUAL(cache.GetDirtyCount(), 0U); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/fuzz/coins_view.cpp b/src/test/fuzz/coins_view.cpp index 699a45e2c49..ec10dbb31f9 100644 --- a/src/test/fuzz/coins_view.cpp +++ b/src/test/fuzz/coins_view.cpp @@ -139,6 +139,7 @@ void TestCoinsView(FuzzedDataProvider& fuzzed_data_provider, CCoinsView& backend [&] { CoinsCachePair sentinel{}; sentinel.second.SelfRef(sentinel); + size_t dirty_count{0}; CCoinsMapMemoryResource resource; CCoinsMap coins_map{0, SaltedOutpointHasher{/*deterministic=*/true}, CCoinsMap::key_equal{}, &resource}; LIMITED_WHILE(good_data && fuzzed_data_provider.ConsumeBool(), 10'000) @@ -159,10 +160,11 @@ void TestCoinsView(FuzzedDataProvider& fuzzed_data_provider, CCoinsView& backend auto it{coins_map.emplace(random_out_point, std::move(coins_cache_entry)).first}; if (dirty) CCoinsCacheEntry::SetDirty(*it, sentinel); if (fresh) CCoinsCacheEntry::SetFresh(*it, sentinel); + dirty_count += dirty; } bool expected_code_path = false; try { - auto cursor{CoinsViewCacheCursor(sentinel, coins_map, /*will_erase=*/true)}; + auto cursor{CoinsViewCacheCursor(dirty_count, sentinel, coins_map, /*will_erase=*/true)}; uint256 best_block{coins_view_cache.GetBestBlock()}; if (fuzzed_data_provider.ConsumeBool()) best_block = ConsumeUInt256(fuzzed_data_provider); // Set best block hash to non-null to satisfy the assertion in CCoinsViewDB::BatchWrite(). From afb1bc120ecce2bf663093e15c93f5592c0d4a98 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Jan 2025 14:20:54 -0500 Subject: [PATCH 3/3] validation: Use dirty entry count in flush warnings and disk space checks Changes flush warnings to use the actual number of dirty entries being written rather than total cache size or memory usage: * Moves warning from `FlushStateToDisk` to `CCoinsViewDB::BatchWrite` so it applies to both regular flushes and `AssumeUTXO` snapshot writes * Changes threshold from `WARN_FLUSH_COINS_SIZE` (1 GiB) to `WARN_FLUSH_COINS_COUNT` (10M entries), approximately equivalent - this also helps with the confusion caused by UTXO size difference on-disk vs in-memory * Moves benchmark logging to `BatchWrite` where the actual disk I/O occurs to make sure AssumeUTXO also warns * Uses dirty count for disk space check (48 bytes per entry estimate) * Removes redundant `changed` counter since `dirty_count` is now tracked This ensures users are warned appropriately even when only a fraction of the cache is dirty, and provides accurate warnings during `AssumeUTXO` loads. Co-authored-by: l0rinc --- src/coins.h | 2 ++ src/txdb.cpp | 14 ++++++++++---- src/validation.cpp | 12 +++--------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/coins.h b/src/coins.h index 9efd23c581d..8ff296059f8 100644 --- a/src/coins.h +++ b/src/coins.h @@ -293,6 +293,8 @@ struct CoinsViewCacheCursor } inline bool WillErase(CoinsCachePair& current) const noexcept { return m_will_erase || current.second.coin.IsSpent(); } + size_t GetDirtyCount() const noexcept { return m_dirty_count; } + size_t GetTotalCount() const noexcept { return m_map.size(); } private: size_t& m_dirty_count; CoinsCachePair& m_sentinel; diff --git a/src/txdb.cpp b/src/txdb.cpp index 21f6eb93867..2c39cf2767b 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,9 @@ static constexpr uint8_t DB_HEAD_BLOCKS{'H'}; // Keys used in previous version that might still be found in the DB: static constexpr uint8_t DB_COINS{'c'}; +// Threshold for warning when writing this many dirty cache entries to disk. +static constexpr size_t WARN_FLUSH_COINS_COUNT{10'000'000}; + bool CCoinsViewDB::NeedsUpgrade() { std::unique_ptr cursor{m_db->NewIterator()}; @@ -97,7 +101,7 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB { CDBBatch batch(*m_db); size_t count = 0; - size_t changed = 0; + const size_t dirty_count{cursor.GetDirtyCount()}; assert(!hashBlock.IsNull()); uint256 old_tip = GetBestBlock(); @@ -113,6 +117,10 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB } } + if (dirty_count > WARN_FLUSH_COINS_COUNT) LogWarning("Flushing large (%d entries) UTXO set to disk, it may take several minutes", dirty_count); + LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d out of %d cached coins)", + dirty_count, cursor.GetTotalCount()), BCLog::BENCH); + // In the first batch, mark the database as being in the middle of a // transition from old_tip to hashBlock. // A vector is used for future extensibility, as we may want to support @@ -128,8 +136,6 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB } else { batch.Write(entry, it->second.coin); } - - changed++; } count++; it = cursor.NextAndMaybeErase(*it); @@ -154,7 +160,7 @@ void CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256& hashB LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0)); m_db->WriteBatch(batch); - LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count); + LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...", (unsigned int)dirty_count, (unsigned int)count); } size_t CCoinsViewDB::EstimateSize() const diff --git a/src/validation.cpp b/src/validation.cpp index c200c3d6cd8..0a7c57df25e 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -89,8 +89,6 @@ using node::CBlockIndexHeightOnlyComparator; using node::CBlockIndexWorkComparator; using node::SnapshotMetadata; -/** Size threshold for warning about slow UTXO set flush to disk. */ -static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB /** Time window to wait between writing blocks/block index and chainstate to disk. * Randomize writing time inside the window to prevent a situation where the * network over time settles into a few cohorts of synchronized writers. @@ -2708,8 +2706,8 @@ bool Chainstate::FlushStateToDisk( std::set setFilesToPrune; bool full_flush_completed = false; - const size_t coins_count = CoinsTip().GetCacheSize(); - const size_t coins_mem_usage = CoinsTip().DynamicMemoryUsage(); + [[maybe_unused]] const size_t coins_count{CoinsTip().GetCacheSize()}; + [[maybe_unused]] const size_t coins_mem_usage{CoinsTip().DynamicMemoryUsage()}; try { { @@ -2802,16 +2800,12 @@ bool Chainstate::FlushStateToDisk( } if (!CoinsTip().GetBestBlock().IsNull()) { - if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); - LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", - coins_count, coins_mem_usage >> 10), BCLog::BENCH); - // Typical Coin structures on disk are around 48 bytes in size. // Pushing a new one to the database can cause it to be written // twice (once in the log, and once in the tables). This is already // an overestimation, as most will delete an existing entry or // overwrite one. Still, use a conservative safety factor of 2. - if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { + if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetDirtyCount())) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); } // Flush the chainstate (which may refer to block index entries).