Merge bitcoin/bitcoin#29770: index: Check all necessary block data is available before starting to sync

fd06157d1465d93b960e8be6e8e419295abde9a1 test: Add coverage for restarted node without any block sync (Fabian Jahr)
3d7ab7ecb7dfcdfb8aaa45869388887b948841c8 rpc, test: Address feedback from #29668 (Fabian Jahr)
312919c9dd5dba7da20317604e1638bdc5010f14 test: Indices can not start based on block data without undo data (Fabian Jahr)
a9a3b29dd687b4c355e131fefc145e8e48b48b17 index: Check availability of undo data for indices (Fabian Jahr)
881ab4fc82fe3cf36b227cf1ba704448df160745 support multiple block status checks in CheckBlockDataAvailability (furszy)

Pull request description:

  Currently, we check that `BLOCK_HAVE_DATA` is available for all blocks an index needs to sync during startup. However, for `coinstatsindex` and `blockfilterindex` we also need the undo data for these blocks. If that data is missing in the blocks, we are currently still starting to sync each of these indices and then crash later when we encounter the missing data.

  This PR adds explicit knowledge of which block data is needed for each index and then checks its availability during startup before initializing the sync process on them.

  This also addresses a few open comments from #29668 in the last commit.

ACKs for top commit:
  achow101:
    ACK fd06157d1465d93b960e8be6e8e419295abde9a1
  furszy:
    utACK fd06157d1465d93b960e8be6e8e419295abde9a1
  sedited:
    Re-ACK fd06157d1465d93b960e8be6e8e419295abde9a1

Tree-SHA512: e2ed81c93372b02daa8ddf2819df4164f96d92de05b1d48855410ecac78d5fcd9612d7f0e63a9d57d7e75a0b46e1bea278e43ea87f2693af0220d1f9c600e416
This commit is contained in:
Ava Chow 2026-02-20 15:58:48 -08:00
commit d907d65acd
No known key found for this signature in database
GPG Key ID: 17565732E08E5E41
12 changed files with 163 additions and 48 deletions

View File

@ -122,9 +122,6 @@ protected:
void ChainStateFlushed(const kernel::ChainstateRole& role, const CBlockLocator& locator) override;
/// Return custom notification options for index.
[[nodiscard]] virtual interfaces::Chain::NotifyOptions CustomOptions() { return {}; }
/// Initialize internal state from the database and block index.
[[nodiscard]] virtual bool CustomInit(const std::optional<interfaces::BlockRef>& block) { return true; }
@ -151,6 +148,9 @@ public:
/// Get the name of the index for display in logs.
const std::string& GetName() const LIFETIMEBOUND { return m_name; }
/// Return custom notification options for index.
[[nodiscard]] virtual interfaces::Chain::NotifyOptions CustomOptions() { return {}; }
/// Blocks the current thread until the index is caught up to the current
/// state of the block chain. This only blocks if the index has gotten in
/// sync once and only needs to process blocks in the ValidationInterface

View File

@ -63,8 +63,6 @@ private:
std::optional<uint256> ReadFilterHeader(int height, const uint256& expected_block_hash);
protected:
interfaces::Chain::NotifyOptions CustomOptions() override;
bool CustomInit(const std::optional<interfaces::BlockRef>& block) override;
bool CustomCommit(CDBBatch& batch) override;
@ -80,6 +78,8 @@ public:
explicit BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type,
size_t n_cache_size, bool f_memory = false, bool f_wipe = false);
interfaces::Chain::NotifyOptions CustomOptions() override;
BlockFilterType GetFilterType() const { return m_filter_type; }
/** Get a single filter by block. */

View File

@ -52,8 +52,6 @@ private:
bool AllowPrune() const override { return true; }
protected:
interfaces::Chain::NotifyOptions CustomOptions() override;
bool CustomInit(const std::optional<interfaces::BlockRef>& block) override;
bool CustomCommit(CDBBatch& batch) override;
@ -68,6 +66,8 @@ public:
// Constructs the index, which becomes available to be queried.
explicit CoinStatsIndex(std::unique_ptr<interfaces::Chain> chain, size_t n_cache_size, bool f_memory = false, bool f_wipe = false);
interfaces::Chain::NotifyOptions CustomOptions() override;
// Look up stats for a specific block using CBlockIndex
std::optional<kernel::CCoinsStats> LookUpStats(const CBlockIndex& block_index) const;
};

View File

@ -2308,41 +2308,70 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
bool StartIndexBackgroundSync(NodeContext& node)
{
// Find the oldest block among all indexes.
// This block is used to verify that we have the required blocks' data stored on disk,
// starting from that point up to the current tip.
// indexes_start_block='nullptr' means "start from height 0".
std::optional<const CBlockIndex*> indexes_start_block;
std::string older_index_name;
ChainstateManager& chainman = *Assert(node.chainman);
const Chainstate& chainstate = WITH_LOCK(::cs_main, return chainman.ValidatedChainstate());
const CChain& index_chain = chainstate.m_chain;
const int current_height = WITH_LOCK(::cs_main, return index_chain.Height());
for (auto index : node.indexes) {
const IndexSummary& summary = index->GetSummary();
if (summary.synced) continue;
// Skip checking data availability if we have not synced any blocks yet
if (current_height > 0) {
// Before starting index sync, verify that all required block data is available
// on disk from each index's current sync position up to the chain tip.
//
// This is done separately for undo and block data: First we verify block + undo
// data existence from tip down to the lowest height required by any index that
// needs undo data (e.g., coinstatsindex, blockfilterindex). Then, if any
// block-only index needs to sync from a lower height than previously covered,
// verify block data existence down to that lower height.
//
// This avoids checking undo data for blocks where no index requires it,
// though currently block and undo data availability are synchronized on disk
// under normal circumstances.
std::optional<const CBlockIndex*> block_start;
std::string block_start_name;
std::optional<const CBlockIndex*> undo_start;
std::string undo_start_name;
// Get the last common block between the index best block and the active chain
LOCK(::cs_main);
const CBlockIndex* pindex = chainman.m_blockman.LookupBlockIndex(summary.best_block_hash);
if (!index_chain.Contains(pindex)) {
pindex = index_chain.FindFork(pindex);
for (const auto& index : node.indexes) {
const IndexSummary& summary = index->GetSummary();
if (summary.synced) continue;
// Get the last common block between the index best block and the active chain
const CBlockIndex* pindex = nullptr;
{
LOCK(::cs_main);
pindex = chainman.m_blockman.LookupBlockIndex(summary.best_block_hash);
if (!index_chain.Contains(pindex)) {
pindex = index_chain.FindFork(pindex);
}
}
if (!pindex) {
pindex = index_chain.Genesis();
}
bool need_undo = index->CustomOptions().connect_undo_data;
auto& op_start_index = need_undo ? undo_start : block_start;
auto& name_index = need_undo ? undo_start_name : block_start_name;
if (op_start_index && pindex->nHeight >= op_start_index.value()->nHeight) continue;
op_start_index = pindex;
name_index = summary.name;
}
if (!indexes_start_block || !pindex || pindex->nHeight < indexes_start_block.value()->nHeight) {
indexes_start_block = pindex;
older_index_name = summary.name;
if (!pindex) break; // Starting from genesis so no need to look for earlier block.
// Verify all blocks needed to sync to current tip are present including undo data.
if (undo_start) {
LOCK(::cs_main);
if (!chainman.m_blockman.CheckBlockDataAvailability(*index_chain.Tip(), *Assert(undo_start.value()), BlockStatus{BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO})) {
return InitError(Untranslated(strprintf("%s best block of the index goes beyond pruned data (including undo data). Please disable the index or reindex (which will download the whole blockchain again)", undo_start_name)));
}
}
};
// Verify all blocks needed to sync to current tip are present.
if (indexes_start_block) {
LOCK(::cs_main);
const CBlockIndex* start_block = *indexes_start_block;
if (!start_block) start_block = chainman.ActiveChain().Genesis();
if (!chainman.m_blockman.CheckBlockDataAvailability(*index_chain.Tip(), *Assert(start_block))) {
return InitError(Untranslated(strprintf("%s best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)", older_index_name)));
// Verify all blocks needed to sync to current tip are present unless we already checked all of them above.
if (block_start && !(undo_start && undo_start.value()->nHeight <= block_start.value()->nHeight)) {
LOCK(::cs_main);
if (!chainman.m_blockman.CheckBlockDataAvailability(*index_chain.Tip(), *Assert(block_start.value()), BlockStatus{BLOCK_HAVE_DATA})) {
return InitError(Untranslated(strprintf("%s best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)", block_start_name)));
}
}
}

View File

@ -631,10 +631,18 @@ const CBlockIndex& BlockManager::GetFirstBlock(const CBlockIndex& upper_block, u
return *last_block;
}
bool BlockManager::CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block)
bool BlockManager::CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block, BlockStatus block_status)
{
if (!(upper_block.nStatus & BLOCK_HAVE_DATA)) return false;
return &GetFirstBlock(upper_block, BLOCK_HAVE_DATA, &lower_block) == &lower_block;
if (!(upper_block.nStatus & block_status)) return false;
const auto& first_block = GetFirstBlock(upper_block, block_status, &lower_block);
// Special case: the genesis block has no undo data
if (block_status & BLOCK_HAVE_UNDO && lower_block.nHeight == 0 && first_block.nHeight == 1) {
// This might indicate missing data, or it could simply reflect the expected absence of undo data for the genesis block.
// To distinguish between the two, check if all required block data *except* undo is available up to the genesis block.
BlockStatus flags{block_status & ~BLOCK_HAVE_UNDO};
return first_block.pprev && first_block.pprev->nStatus & flags;
}
return &first_block == &lower_block;
}
// If we're using -prune with -reindex, then delete block files that will be ignored by the

View File

@ -412,10 +412,11 @@ public:
/** Calculate the amount of disk space the block & undo files currently use */
uint64_t CalculateCurrentUsage();
//! Check if all blocks in the [upper_block, lower_block] range have data available.
//! Check if all blocks in the [upper_block, lower_block] range have data available as
//! defined by the status mask.
//! The caller is responsible for ensuring that lower_block is an ancestor of upper_block
//! (part of the same chain).
bool CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block) EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
bool CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block, BlockStatus block_status = BLOCK_HAVE_DATA) EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
/**
* @brief Returns the earliest block with specified `status_mask` flags set after

View File

@ -893,7 +893,7 @@ std::optional<int> GetPruneHeight(const BlockManager& blockman, const CChain& ch
if (!first_block || !chain_tip) return std::nullopt;
// If the chain tip is pruned, everything is pruned.
if (!((chain_tip->nStatus & BLOCK_HAVE_MASK) == BLOCK_HAVE_MASK)) return chain_tip->nHeight;
if ((chain_tip->nStatus & BLOCK_HAVE_MASK) != BLOCK_HAVE_MASK) return chain_tip->nHeight;
const auto& first_unpruned{blockman.GetFirstBlock(*chain_tip, /*status_mask=*/BLOCK_HAVE_MASK, first_block)};
if (&first_unpruned == first_block) {
@ -1383,7 +1383,7 @@ RPCHelpMan getblockchaininfo()
{RPCResult::Type::STR_HEX, "chainwork", "total amount of work in active chain, in hexadecimal"},
{RPCResult::Type::NUM, "size_on_disk", "the estimated size of the block and undo files on disk"},
{RPCResult::Type::BOOL, "pruned", "if the blocks are subject to pruning"},
{RPCResult::Type::NUM, "pruneheight", /*optional=*/true, "height of the last block pruned, plus one (only present if pruning is enabled)"},
{RPCResult::Type::NUM, "pruneheight", /*optional=*/true, "the first block unpruned, all previous blocks were pruned (only present if pruning is enabled)"},
{RPCResult::Type::BOOL, "automatic_pruning", /*optional=*/true, "whether automatic pruning is enabled (only present if pruning is enabled)"},
{RPCResult::Type::NUM, "prune_target_size", /*optional=*/true, "the target size used by pruning (only present if automatic pruning is enabled)"},
{RPCResult::Type::STR_HEX, "signet_challenge", /*optional=*/true, "the block challenge (aka. block script), in hexadecimal (only present if the current network is a signet)"},

View File

@ -9,15 +9,18 @@
#include <core_io.h>
#include <streams.h>
#include <sync.h>
#include <threadsafety.h>
#include <util/fs.h>
#include <validation.h>
#include <any>
#include <cstdint>
#include <optional>
#include <vector>
class CBlock;
class CBlockIndex;
class CChain;
class Chainstate;
class UniValue;
namespace node {

View File

@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(get_difficulty_for_very_high_target)
//! Prune chain from height down to genesis block and check that
//! GetPruneHeight returns the correct value
static void CheckGetPruneHeight(node::BlockManager& blockman, CChain& chain, int height) EXCLUSIVE_LOCKS_REQUIRED(::cs_main)
static void CheckGetPruneHeight(const node::BlockManager& blockman, const CChain& chain, int height) EXCLUSIVE_LOCKS_REQUIRED(::cs_main)
{
AssertLockHeld(::cs_main);
@ -98,8 +98,8 @@ static void CheckGetPruneHeight(node::BlockManager& blockman, CChain& chain, int
BOOST_FIXTURE_TEST_CASE(get_prune_height, TestChain100Setup)
{
LOCK(::cs_main);
auto& chain = m_node.chainman->ActiveChain();
auto& blockman = m_node.chainman->m_blockman;
const auto& chain = m_node.chainman->ActiveChain();
const auto& blockman = m_node.chainman->m_blockman;
// Fresh chain of 100 blocks without any pruned blocks, so std::nullopt should be returned
BOOST_CHECK(!GetPruneHeight(blockman, chain).has_value());

View File

@ -126,6 +126,14 @@ BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_availability, TestChain100Setup)
CBlockIndex* lower_block = chainman->ActiveChain()[tip.nHeight / 2];
BOOST_CHECK(blockman.CheckBlockDataAvailability(tip, *lower_block));
// Ensure we don't fail due to the expected absence of undo data in the genesis block
CBlockIndex* upper_block = chainman->ActiveChain()[2];
CBlockIndex* genesis = chainman->ActiveChain()[0];
BOOST_CHECK(blockman.CheckBlockDataAvailability(*upper_block, *genesis, BlockStatus{BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO}));
// Ensure we detect absence of undo data in the first block
chainman->ActiveChain()[1]->nStatus &= ~BLOCK_HAVE_UNDO;
BOOST_CHECK(!blockman.CheckBlockDataAvailability(tip, *genesis, BlockStatus{BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO}));
// Prune half of the blocks
int height_to_prune = tip.nHeight / 2;
CBlockIndex* first_available_block = chainman->ActiveChain()[height_to_prune + 1];
@ -136,6 +144,12 @@ BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_availability, TestChain100Setup)
BOOST_CHECK_EQUAL(&blockman.GetFirstBlock(tip, BLOCK_HAVE_DATA), first_available_block);
BOOST_CHECK(blockman.CheckBlockDataAvailability(tip, *first_available_block));
BOOST_CHECK(!blockman.CheckBlockDataAvailability(tip, *last_pruned_block));
// Simulate that the first available block is missing undo data and
// detect this by using a status mask.
first_available_block->nStatus &= ~BLOCK_HAVE_UNDO;
BOOST_CHECK(!blockman.CheckBlockDataAvailability(tip, *first_available_block, BlockStatus{BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO}));
BOOST_CHECK(blockman.CheckBlockDataAvailability(tip, *first_available_block, BlockStatus{BLOCK_HAVE_DATA}));
}
BOOST_FIXTURE_TEST_CASE(blockmanager_block_data_part, TestChain100Setup)

View File

@ -5,13 +5,28 @@
"""Test indices in conjunction with prune."""
import concurrent.futures
import os
from test_framework.authproxy import JSONRPCException
from test_framework.test_framework import BitcoinTestFramework
from test_framework.test_node import TestNode
from test_framework.util import (
assert_equal,
assert_greater_than,
assert_raises_rpc_error,
)
from typing import List, Any
def send_batch_request(node: TestNode, method: str, params: List[Any]) -> List[Any]:
"""Send batch request and parse all results"""
data = [{"method": method, "params": p} for p in params]
response = node.batch(data)
result = []
for item in response:
assert item["error"] is None, item["error"]
result.append(item["result"])
return result
class FeatureIndexPruneTest(BitcoinTestFramework):
def set_test_params(self):
@ -57,6 +72,13 @@ class FeatureIndexPruneTest(BitcoinTestFramework):
for i in range(3):
self.restart_node(i, extra_args=["-fastprune", "-prune=1"])
def check_for_block(self, node, hash):
try:
self.nodes[node].getblock(hash)
return True
except JSONRPCException:
return False
def run_test(self):
filter_nodes = [self.nodes[0], self.nodes[2]]
stats_nodes = [self.nodes[1], self.nodes[2]]
@ -130,12 +152,38 @@ class FeatureIndexPruneTest(BitcoinTestFramework):
self.stop_node(i)
self.log.info("make sure we get an init error when starting the nodes again with the indices")
filter_msg = "Error: basic block filter index best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)"
stats_msg = "Error: coinstatsindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)"
filter_msg = "Error: basic block filter index best block of the index goes beyond pruned data (including undo data). Please disable the index or reindex (which will download the whole blockchain again)"
stats_msg = "Error: coinstatsindex best block of the index goes beyond pruned data (including undo data). Please disable the index or reindex (which will download the whole blockchain again)"
end_msg = f"{os.linesep}Error: A fatal internal error occurred, see debug.log for details: Failed to start indexes, shutting down…"
for i, msg in enumerate([filter_msg, stats_msg, filter_msg]):
self.nodes[i].assert_start_raises_init_error(extra_args=self.extra_args[i], expected_msg=msg+end_msg)
self.log.info("fetching the missing blocks with getblockfrompeer doesn't work for block filter index and coinstatsindex")
# Only checking the first two nodes since this test takes a long time
# and the third node is kind of redundant in this context
for i, msg in enumerate([filter_msg, stats_msg]):
self.restart_node(i, extra_args=["-prune=1", "-fastprune"])
node = self.nodes[i]
prune_height = node.getblockchaininfo()["pruneheight"]
self.connect_nodes(i, 3)
peers = node.getpeerinfo()
assert_equal(len(peers), 1)
peer_id = peers[0]["id"]
# 1500 is the height to where the indices were able to sync previously
hashes = send_batch_request(node, "getblockhash", [[a] for a in range(1500, prune_height)])
send_batch_request(node, "getblockfrompeer", [[bh, peer_id] for bh in hashes])
# Ensure all necessary blocks have been fetched before proceeding
for bh in hashes:
self.wait_until(lambda: self.check_for_block(i, bh), timeout=10)
# Upon restart we expect the same errors as previously although all
# necessary blocks have been fetched. Both indices need the undo
# data of the blocks to be available as well and getblockfrompeer
# can not provide that.
self.stop_node(i)
node.assert_start_raises_init_error(extra_args=self.extra_args[i], expected_msg=msg+end_msg)
self.log.info("make sure the nodes start again with the indices and an additional -reindex arg")
for i in range(3):
restart_args = self.extra_args[i] + ["-reindex"]

View File

@ -26,8 +26,8 @@ class InitTest(BitcoinTestFramework):
"""
def set_test_params(self):
self.setup_clean_chain = False
self.num_nodes = 1
self.setup_clean_chain = True
self.num_nodes = 2
self.uses_wallet = None
def check_clean_start(self, node, extra_args):
@ -42,8 +42,10 @@ class InitTest(BitcoinTestFramework):
"""
- test terminating initialization after seeing a certain log line.
"""
self.stop_node(0)
self.start_node(0)
node = self.nodes[0]
self.generate(node, 200, sync_fun=self.no_op)
self.stop_node(0)
def sigterm_node():
if platform.system() == 'Windows':
@ -307,11 +309,21 @@ class InitTest(BitcoinTestFramework):
assert_equal(result["height"], current_height)
node.wait_until_stopped()
def init_empty_test(self):
self.log.info("Test that stopping and restarting a node that has done nothing is not causing a failure")
options = [
[],
["-txindex=1", "-blockfilterindex=1", "-coinstatsindex=1"],
]
for option in options:
self.restart_node(1, option)
def run_test(self):
self.init_pid_test()
self.init_stress_test_interrupt()
self.init_stress_test_removals()
self.break_wait_test()
self.init_empty_test()
if __name__ == '__main__':