From 66673f1c1302c986e344c7f44bb0b352213d5dc8 Mon Sep 17 00:00:00 2001 From: Sebastian Falbesoner Date: Thu, 4 Jul 2024 20:12:13 +0200 Subject: [PATCH 1/3] net: fix race condition in self-connect detection Initiating an outbound network connection currently involves the following steps after the socket connection is established (see `CConnman::OpenNetworkConnection` method): 1. set up node state 2. queue VERSION message 3. add new node to vector `m_nodes` If we connect to ourself, it can happen that the sent VERSION message (step 2) is received and processed locally *before* the node object is added to the connection manager's `m_nodes` vector (step 3). In this case, the self-connect remains undiscovered, as the detection doesn't find the outbound peer in `m_nodes` yet (see `CConnman::CheckIncomingNonce`). Fix this by swapping the order of 2. and 3., by taking the `PushNodeVersion` call out of `InitializeNode` and doing that in the `SendMessages` method instead, which is only called for `CNode` instances in `m_nodes`. Thanks go to vasild, mzumsande, dergoegge and sipa for suggestions on how to fix this. --- src/net.h | 2 +- src/net_processing.cpp | 16 +++++++++++++--- src/test/util/net.cpp | 3 ++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/net.h b/src/net.h index 8075975d620..050af0d5ac0 100644 --- a/src/net.h +++ b/src/net.h @@ -991,7 +991,7 @@ public: /** Mutex for anything that is only accessed via the msg processing thread */ static Mutex g_msgproc_mutex; - /** Initialize a peer (setup state, queue any initial messages) */ + /** Initialize a peer (setup state) */ virtual void InitializeNode(CNode& node, ServiceFlags our_services) = 0; /** Handle removal of a peer (clear state) */ diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 8137e17c980..080d6241c9b 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -243,6 +243,9 @@ struct Peer { * Most peers use headers-first syncing, which doesn't use this mechanism */ uint256 m_continuation_block GUARDED_BY(m_block_inv_mutex) {}; + /** Set to true once initial VERSION message was sent (only relevant for outbound peers). */ + bool m_outbound_version_message_sent GUARDED_BY(NetEventsInterface::g_msgproc_mutex){false}; + /** This peer's reported block height when we connected */ std::atomic m_starting_height{-1}; @@ -1677,9 +1680,6 @@ void PeerManagerImpl::InitializeNode(CNode& node, ServiceFlags our_services) LOCK(m_peer_mutex); m_peer_map.emplace_hint(m_peer_map.end(), nodeid, peer); } - if (!node.IsInboundConn()) { - PushNodeVersion(node, *peer); - } } void PeerManagerImpl::ReattemptInitialBroadcast(CScheduler& scheduler) @@ -5326,6 +5326,10 @@ bool PeerManagerImpl::ProcessMessages(CNode* pfrom, std::atomic& interrupt PeerRef peer = GetPeerRef(pfrom->GetId()); if (peer == nullptr) return false; + // For outbound connections, ensure that the initial VERSION message + // has been sent first before processing any incoming messages + if (!pfrom->IsInboundConn() && !peer->m_outbound_version_message_sent) return false; + { LOCK(peer->m_getdata_requests_mutex); if (!peer->m_getdata_requests.empty()) { @@ -5817,6 +5821,12 @@ bool PeerManagerImpl::SendMessages(CNode* pto) // disconnect misbehaving peers even before the version handshake is complete. if (MaybeDiscourageAndDisconnect(*pto, *peer)) return true; + // Initiate version handshake for outbound connections + if (!pto->IsInboundConn() && !peer->m_outbound_version_message_sent) { + PushNodeVersion(*pto, *peer); + peer->m_outbound_version_message_sent = true; + } + // Don't send anything until the version handshake is complete if (!pto->fSuccessfullyConnected || pto->fDisconnect) return true; diff --git a/src/test/util/net.cpp b/src/test/util/net.cpp index 9b38d85d582..e4b2ac57439 100644 --- a/src/test/util/net.cpp +++ b/src/test/util/net.cpp @@ -28,7 +28,8 @@ void ConnmanTestMsg::Handshake(CNode& node, auto& connman{*this}; peerman.InitializeNode(node, local_services); - FlushSendBuffer(node); // Drop the version message added by InitializeNode. + peerman.SendMessages(&node); + FlushSendBuffer(node); // Drop the version message added by SendMessages. CSerializedNetMsg msg_version{ NetMsg::Make(NetMsgType::VERSION, From 0dbcd4c14855fe2cba15a32245572b693dc18c4e Mon Sep 17 00:00:00 2001 From: Sebastian Falbesoner Date: Tue, 9 Jul 2024 12:18:23 +0200 Subject: [PATCH 2/3] net: prevent sending messages in `NetEventsInterface::InitializeNode` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the queueing of the VERSION messages has been moved out of `InitializeNode`, there is no need to pass a mutable `CNode` reference any more. With a const reference, trying to send messages in this method would lead to a compile-time error, e.g.: ---------------------------------------------------------------------------------------------------------------------------------- ... net_processing.cpp: In member function ‘virtual void {anonymous}::PeerManagerImpl::InitializeNode(const CNode&, ServiceFlags)’: net_processing.cpp:1683:21: error: binding reference of type ‘CNode&’ to ‘const CNode’ discards qualifiers 1683 | PushNodeVersion(node, *peer); ... ---------------------------------------------------------------------------------------------------------------------------------- --- src/net.h | 2 +- src/net_processing.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/net.h b/src/net.h index 050af0d5ac0..443f6a8de8d 100644 --- a/src/net.h +++ b/src/net.h @@ -992,7 +992,7 @@ public: static Mutex g_msgproc_mutex; /** Initialize a peer (setup state) */ - virtual void InitializeNode(CNode& node, ServiceFlags our_services) = 0; + virtual void InitializeNode(const CNode& node, ServiceFlags our_services) = 0; /** Handle removal of a peer (clear state) */ virtual void FinalizeNode(const CNode& node) = 0; diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 080d6241c9b..728bfc4c18f 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -501,7 +501,7 @@ public: EXCLUSIVE_LOCKS_REQUIRED(!m_most_recent_block_mutex); /** Implement NetEventsInterface */ - void InitializeNode(CNode& node, ServiceFlags our_services) override EXCLUSIVE_LOCKS_REQUIRED(!m_peer_mutex); + void InitializeNode(const CNode& node, ServiceFlags our_services) override EXCLUSIVE_LOCKS_REQUIRED(!m_peer_mutex); void FinalizeNode(const CNode& node) override EXCLUSIVE_LOCKS_REQUIRED(!m_peer_mutex, !m_headers_presync_mutex); bool HasAllDesirableServiceFlags(ServiceFlags services) const override; bool ProcessMessages(CNode* pfrom, std::atomic& interrupt) override @@ -1662,7 +1662,7 @@ void PeerManagerImpl::UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_s if (state) state->m_last_block_announcement = time_in_seconds; } -void PeerManagerImpl::InitializeNode(CNode& node, ServiceFlags our_services) +void PeerManagerImpl::InitializeNode(const CNode& node, ServiceFlags our_services) { NodeId nodeid = node.GetId(); { From 16bd283b3ad05daa41259a062aee0fc05b463fa6 Mon Sep 17 00:00:00 2001 From: Sebastian Falbesoner Date: Thu, 4 Jul 2024 20:35:22 +0200 Subject: [PATCH 3/3] Reapply "test: p2p: check that connecting to ourself leads to disconnect" This reverts commit 9ec2c53701a391629b55aeb2804e8060d2c453a4 with a tiny change included (identation of the wait_until call). --- test/functional/p2p_handshake.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/functional/p2p_handshake.py b/test/functional/p2p_handshake.py index 21959ae522e..9536e748931 100755 --- a/test/functional/p2p_handshake.py +++ b/test/functional/p2p_handshake.py @@ -17,6 +17,7 @@ from test_framework.messages import ( NODE_WITNESS, ) from test_framework.p2p import P2PInterface +from test_framework.util import p2p_port # Desirable service flags for outbound non-pruned and pruned peers. Note that @@ -88,9 +89,11 @@ class P2PHandshakeTest(BitcoinTestFramework): with node.assert_debug_log([f"feeler connection completed"]): self.add_outbound_connection(node, "feeler", NODE_NONE, wait_for_disconnect=True) - # TODO: re-add test introduced in commit 5d2fb14bafe4e80c0a482d99e5ebde07c477f000 - # ("test: p2p: check that connecting to ourself leads to disconnect") once - # the race condition causing issue #30368 is fixed + self.log.info("Check that connecting to ourself leads to immediate disconnect") + with node.assert_debug_log(["connected to self", "disconnecting"]): + node_listen_addr = f"127.0.0.1:{p2p_port(0)}" + node.addconnection(node_listen_addr, "outbound-full-relay", self.options.v2transport) + self.wait_until(lambda: len(node.getpeerinfo()) == 0) if __name__ == '__main__':