From 57ce645f05d18d8ad10711c347a5989076f1f788 Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Wed, 2 Apr 2025 09:46:56 +0100 Subject: [PATCH 1/3] net: filter for default routes in netlink responses Filter netlink responses to only consider default routes by checking the destination prefix length (rtm_dst_len == 0). Previously, we selected the first route with an RTA_GATEWAY attribute, which for IPv6 often resulted in choosing a non-default route instead of the actual default. This caused occasional PCP port mapping failures because a gateway for a non-default route was selected. --- src/common/netif.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/netif.cpp b/src/common/netif.cpp index 7424f977c7e..dbdd3ddde97 100644 --- a/src/common/netif.cpp +++ b/src/common/netif.cpp @@ -97,6 +97,11 @@ std::optional QueryDefaultGatewayImpl(sa_family_t family) rtmsg* r = (rtmsg*)NLMSG_DATA(hdr); int remaining_len = RTM_PAYLOAD(hdr); + // Only consider default routes (destination prefix length of 0). + if (r->rtm_dst_len != 0) { + continue; + } + // Iterate over the attributes. rtattr *rta_gateway = nullptr; int scope_id = 0; From 42e99ad77396e4e9b02d67daf46349e215e99a0f Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Wed, 2 Apr 2025 09:47:27 +0100 Subject: [PATCH 2/3] net: skip non-route netlink responses This shouldn't usually be hit, but is a good belt-and-braces. --- src/common/netif.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/netif.cpp b/src/common/netif.cpp index dbdd3ddde97..ef0485b8a2d 100644 --- a/src/common/netif.cpp +++ b/src/common/netif.cpp @@ -97,6 +97,10 @@ std::optional QueryDefaultGatewayImpl(sa_family_t family) rtmsg* r = (rtmsg*)NLMSG_DATA(hdr); int remaining_len = RTM_PAYLOAD(hdr); + if (hdr->nlmsg_type != RTM_NEWROUTE) { + continue; // Skip non-route messages + } + // Only consider default routes (destination prefix length of 0). if (r->rtm_dst_len != 0) { continue; From 88db09bafe9ec363525e5e526c5f6cdd13691447 Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Tue, 1 Apr 2025 14:15:50 +0100 Subject: [PATCH 3/3] net: handle multi-part netlink responses Handle multi-part netlink responses to prevent truncated results from large routing tables. Previously, we only made a single recv call, which led to incomplete results when the kernel split the message into multiple responses (which happens frequently with NLM_F_DUMP). Also guard against a potential hanging issue where the code would indefinitely wait for NLMSG_DONE for non-multi-part responses by detecting the NLM_F_MULTI flag and only continue waiting when necessary. --- src/common/netif.cpp | 96 +++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/src/common/netif.cpp b/src/common/netif.cpp index ef0485b8a2d..33809288dfa 100644 --- a/src/common/netif.cpp +++ b/src/common/netif.cpp @@ -36,6 +36,9 @@ namespace { // will fail, so we skip that. #if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 1400000) +// Good for responses containing ~ 10,000-15,000 routes. +static constexpr ssize_t NETLINK_MAX_RESPONSE_SIZE{1'048'576}; + std::optional QueryDefaultGatewayImpl(sa_family_t family) { // Create a netlink socket. @@ -84,49 +87,68 @@ std::optional QueryDefaultGatewayImpl(sa_family_t family) // Receive response. char response[4096]; - int64_t recv_result; - do { - recv_result = sock->Recv(response, sizeof(response), 0); - } while (recv_result < 0 && (errno == EINTR || errno == EAGAIN)); - if (recv_result < 0) { - LogPrintLevel(BCLog::NET, BCLog::Level::Error, "recv() from netlink socket: %s\n", NetworkErrorString(errno)); - return std::nullopt; - } - - for (nlmsghdr* hdr = (nlmsghdr*)response; NLMSG_OK(hdr, recv_result); hdr = NLMSG_NEXT(hdr, recv_result)) { - rtmsg* r = (rtmsg*)NLMSG_DATA(hdr); - int remaining_len = RTM_PAYLOAD(hdr); - - if (hdr->nlmsg_type != RTM_NEWROUTE) { - continue; // Skip non-route messages + ssize_t total_bytes_read{0}; + bool done{false}; + while (!done) { + int64_t recv_result; + do { + recv_result = sock->Recv(response, sizeof(response), 0); + } while (recv_result < 0 && (errno == EINTR || errno == EAGAIN)); + if (recv_result < 0) { + LogPrintLevel(BCLog::NET, BCLog::Level::Error, "recv() from netlink socket: %s\n", NetworkErrorString(errno)); + return std::nullopt; } - // Only consider default routes (destination prefix length of 0). - if (r->rtm_dst_len != 0) { - continue; + total_bytes_read += recv_result; + if (total_bytes_read > NETLINK_MAX_RESPONSE_SIZE) { + LogPrintLevel(BCLog::NET, BCLog::Level::Warning, "Netlink response exceeded size limit (%zu bytes, family=%d)\n", NETLINK_MAX_RESPONSE_SIZE, family); + return std::nullopt; } - // Iterate over the attributes. - rtattr *rta_gateway = nullptr; - int scope_id = 0; - for (rtattr* attr = RTM_RTA(r); RTA_OK(attr, remaining_len); attr = RTA_NEXT(attr, remaining_len)) { - if (attr->rta_type == RTA_GATEWAY) { - rta_gateway = attr; - } else if (attr->rta_type == RTA_OIF && sizeof(int) == RTA_PAYLOAD(attr)) { - std::memcpy(&scope_id, RTA_DATA(attr), sizeof(scope_id)); + for (nlmsghdr* hdr = (nlmsghdr*)response; NLMSG_OK(hdr, recv_result); hdr = NLMSG_NEXT(hdr, recv_result)) { + if (!(hdr->nlmsg_flags & NLM_F_MULTI)) { + done = true; } - } - // Found gateway? - if (rta_gateway != nullptr) { - if (family == AF_INET && sizeof(in_addr) == RTA_PAYLOAD(rta_gateway)) { - in_addr gw; - std::memcpy(&gw, RTA_DATA(rta_gateway), sizeof(gw)); - return CNetAddr(gw); - } else if (family == AF_INET6 && sizeof(in6_addr) == RTA_PAYLOAD(rta_gateway)) { - in6_addr gw; - std::memcpy(&gw, RTA_DATA(rta_gateway), sizeof(gw)); - return CNetAddr(gw, scope_id); + if (hdr->nlmsg_type == NLMSG_DONE) { + done = true; + break; + } + + rtmsg* r = (rtmsg*)NLMSG_DATA(hdr); + int remaining_len = RTM_PAYLOAD(hdr); + + if (hdr->nlmsg_type != RTM_NEWROUTE) { + continue; // Skip non-route messages + } + + // Only consider default routes (destination prefix length of 0). + if (r->rtm_dst_len != 0) { + continue; + } + + // Iterate over the attributes. + rtattr* rta_gateway = nullptr; + int scope_id = 0; + for (rtattr* attr = RTM_RTA(r); RTA_OK(attr, remaining_len); attr = RTA_NEXT(attr, remaining_len)) { + if (attr->rta_type == RTA_GATEWAY) { + rta_gateway = attr; + } else if (attr->rta_type == RTA_OIF && sizeof(int) == RTA_PAYLOAD(attr)) { + std::memcpy(&scope_id, RTA_DATA(attr), sizeof(scope_id)); + } + } + + // Found gateway? + if (rta_gateway != nullptr) { + if (family == AF_INET && sizeof(in_addr) == RTA_PAYLOAD(rta_gateway)) { + in_addr gw; + std::memcpy(&gw, RTA_DATA(rta_gateway), sizeof(gw)); + return CNetAddr(gw); + } else if (family == AF_INET6 && sizeof(in6_addr) == RTA_PAYLOAD(rta_gateway)) { + in6_addr gw; + std::memcpy(&gw, RTA_DATA(rta_gateway), sizeof(gw)); + return CNetAddr(gw, scope_id); + } } } }