From a0ca851d26f8a9d819708db06fec2465e9f6228c Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Mon, 23 Jun 2025 11:36:01 -0700 Subject: [PATCH 1/6] Make GetBindAddress() callable from outside net.cpp The function logic is moved-only from net.cpp to netbase.cpp and redeclared (as non-static) in netbase.h --- src/net.cpp | 14 -------------- src/netbase.cpp | 13 +++++++++++++ src/netbase.h | 3 +++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/net.cpp b/src/net.cpp index bd3d0bb1f32..559319bff31 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -360,20 +360,6 @@ bool CConnman::CheckIncomingNonce(uint64_t nonce) return true; } -/** Get the bind address for a socket as CService. */ -static CService GetBindAddress(const Sock& sock) -{ - CService addr_bind; - struct sockaddr_storage sockaddr_bind; - socklen_t sockaddr_bind_len = sizeof(sockaddr_bind); - if (!sock.GetSockName((struct sockaddr*)&sockaddr_bind, &sockaddr_bind_len)) { - addr_bind.SetSockAddr((const struct sockaddr*)&sockaddr_bind, sockaddr_bind_len); - } else { - LogWarning("getsockname failed\n"); - } - return addr_bind; -} - CNode* CConnman::ConnectNode(CAddress addrConnect, const char* pszDest, bool fCountFailure, diff --git a/src/netbase.cpp b/src/netbase.cpp index 1cc8f095ea7..43d9d04b5d9 100644 --- a/src/netbase.cpp +++ b/src/netbase.cpp @@ -947,3 +947,16 @@ CService MaybeFlipIPv6toCJDNS(const CService& service) } return ret; } + +CService GetBindAddress(const Sock& sock) +{ + CService addr_bind; + struct sockaddr_storage sockaddr_bind; + socklen_t sockaddr_bind_len = sizeof(sockaddr_bind); + if (!sock.GetSockName((struct sockaddr*)&sockaddr_bind, &sockaddr_bind_len)) { + addr_bind.SetSockAddr((const struct sockaddr*)&sockaddr_bind, sockaddr_bind_len); + } else { + LogWarning("getsockname failed\n"); + } + return addr_bind; +} diff --git a/src/netbase.h b/src/netbase.h index d3c263f9e8a..9c4c626227c 100644 --- a/src/netbase.h +++ b/src/netbase.h @@ -362,4 +362,7 @@ bool IsBadPort(uint16_t port); */ CService MaybeFlipIPv6toCJDNS(const CService& service); +/** Get the bind address for a socket as CService. */ +CService GetBindAddress(const Sock& sock); + #endif // BITCOIN_NETBASE_H From 0b0d9125c19c04c1fc19fb127d7639ed9ea39bec Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Tue, 14 Oct 2025 12:49:14 -0400 Subject: [PATCH 2/6] Modernize GetBindAddress() Replace the C-style casting with C++ reinterpret_cast --- src/netbase.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/netbase.cpp b/src/netbase.cpp index 43d9d04b5d9..c1c03c57259 100644 --- a/src/netbase.cpp +++ b/src/netbase.cpp @@ -951,10 +951,13 @@ CService MaybeFlipIPv6toCJDNS(const CService& service) CService GetBindAddress(const Sock& sock) { CService addr_bind; - struct sockaddr_storage sockaddr_bind; - socklen_t sockaddr_bind_len = sizeof(sockaddr_bind); - if (!sock.GetSockName((struct sockaddr*)&sockaddr_bind, &sockaddr_bind_len)) { - addr_bind.SetSockAddr((const struct sockaddr*)&sockaddr_bind, sockaddr_bind_len); + sockaddr_storage storage; + socklen_t len = sizeof(storage); + + auto sa = reinterpret_cast(&storage); + + if (sock.GetSockName(sa, &len) == 0) { + addr_bind.SetSockAddr(sa, len); } else { LogWarning("getsockname failed\n"); } From 4e300df7123a402aef472aaaac30907b18a10c27 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Wed, 12 Mar 2025 13:31:47 -0400 Subject: [PATCH 3/6] string: add `base` argument for ToIntegral to operate on hexadecimal --- src/test/util_tests.cpp | 33 +++++++++++++++++++++++++++++++++ src/util/strencodings.h | 7 ++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index 13821db4b95..daa5b8b334d 100644 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -835,6 +835,39 @@ BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi) BOOST_CHECK_EQUAL(LocaleIndependentAtoi("256"), 255U); } +BOOST_AUTO_TEST_CASE(test_ToIntegralHex) +{ + std::optional n; + // Valid values + n = ToIntegral("1234", 16); + BOOST_CHECK_EQUAL(*n, 0x1234); + n = ToIntegral("a", 16); + BOOST_CHECK_EQUAL(*n, 0xA); + n = ToIntegral("0000000a", 16); + BOOST_CHECK_EQUAL(*n, 0xA); + n = ToIntegral("100", 16); + BOOST_CHECK_EQUAL(*n, 0x100); + n = ToIntegral("DEADbeef", 16); + BOOST_CHECK_EQUAL(*n, 0xDEADbeef); + n = ToIntegral("FfFfFfFf", 16); + BOOST_CHECK_EQUAL(*n, 0xFfFfFfFf); + n = ToIntegral("123456789", 16); + BOOST_CHECK_EQUAL(*n, 0x123456789ULL); + n = ToIntegral("0", 16); + BOOST_CHECK_EQUAL(*n, 0); + n = ToIntegral("FfFfFfFfFfFfFfFf", 16); + BOOST_CHECK_EQUAL(*n, 0xFfFfFfFfFfFfFfFfULL); + n = ToIntegral("-1", 16); + BOOST_CHECK_EQUAL(*n, -1); + // Invalid values + BOOST_CHECK(!ToIntegral("", 16)); + BOOST_CHECK(!ToIntegral("-1", 16)); + BOOST_CHECK(!ToIntegral("10 00", 16)); + BOOST_CHECK(!ToIntegral("1 ", 16)); + BOOST_CHECK(!ToIntegral("0xAB", 16)); + BOOST_CHECK(!ToIntegral("FfFfFfFfFfFfFfFf0", 16)); +} + BOOST_AUTO_TEST_CASE(test_FormatParagraph) { BOOST_CHECK_EQUAL(FormatParagraph("", 79, 0), ""); diff --git a/src/util/strencodings.h b/src/util/strencodings.h index 01063858047..dc8493ff566 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -169,17 +169,18 @@ constexpr inline bool IsSpace(char c) noexcept { /** * Convert string to integral type T. Leading whitespace, a leading +, or any * trailing character fail the parsing. The required format expressed as regex - * is `-?[0-9]+`. The minus sign is only permitted for signed integer types. + * is `-?[0-9]+` by default (or `-?[0-9a-fA-F]+` if base = 16). + * The minus sign is only permitted for signed integer types. * * @returns std::nullopt if the entire string could not be parsed, or if the * parsed value is not in the range representable by the type T. */ template -std::optional ToIntegral(std::string_view str) +std::optional ToIntegral(std::string_view str, size_t base = 10) { static_assert(std::is_integral_v); T result; - const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result); + const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result, base); if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) { return std::nullopt; } From eea38787b9be99c3f192cb83fc18358397e4ab52 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Thu, 30 May 2024 15:34:58 -0400 Subject: [PATCH 4/6] string: add AsciiCaseInsensitive{KeyEqual, Hash} for unordered map https://httpwg.org/specs/rfc9110.html#rfc.section.5.1 Field names in HTTP headers are case-insensitive. These structs will be used in the headers map to search by key. In libevent field names are also converted to lowercase for comparison: evhttp_find_header() evutil_ascii_strcasecmp() EVUTIL_TOLOWER_() --- src/test/util_string_tests.cpp | 35 ++++++++++++++++++++++++++++++++++ src/util/strencodings.h | 15 +++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp index 65ee140b6e9..4a49e5d0905 100644 --- a/src/test/util_string_tests.cpp +++ b/src/test/util_string_tests.cpp @@ -2,6 +2,7 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include #include #include @@ -146,4 +147,38 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) HasReason{"tinyformat: Too many conversion specifiers in format string"}); } +BOOST_AUTO_TEST_CASE(ascii_case_insensitive_key_equal_test) +{ + AsciiCaseInsensitiveKeyEqual cmp; + BOOST_CHECK(!cmp("A", "B")); + BOOST_CHECK(!cmp("A", "b")); + BOOST_CHECK(!cmp("a", "B")); + BOOST_CHECK(!cmp("B", "A")); + BOOST_CHECK(!cmp("B", "a")); + BOOST_CHECK(!cmp("b", "A")); + BOOST_CHECK(!cmp("A", "AA")); + BOOST_CHECK(cmp("A-A", "a-a")); + BOOST_CHECK(cmp("A", "A")); + BOOST_CHECK(cmp("A", "a")); + BOOST_CHECK(cmp("a", "a")); + BOOST_CHECK(cmp("B", "b")); + BOOST_CHECK(cmp("ab", "aB")); + BOOST_CHECK(cmp("Ab", "aB")); + BOOST_CHECK(cmp("AB", "ab")); + + // Use a character with value > 127 + // to ensure we don't trigger implicit-integer-sign-change + BOOST_CHECK(!cmp("a", "\xe4")); +} + +BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test) +{ + AsciiCaseInsensitiveHash hsh; + BOOST_CHECK_NE(hsh("A"), hsh("B")); + BOOST_CHECK_NE(hsh("AA"), hsh("A")); + BOOST_CHECK_EQUAL(hsh("A"), hsh("a")); + BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB")); + BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe")); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/strencodings.h b/src/util/strencodings.h index dc8493ff566..97a7b268fd6 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -353,6 +354,20 @@ struct Hex { }; } // namespace detail +struct AsciiCaseInsensitiveKeyEqual { + bool operator()(std::string_view s1, std::string_view s2) const + { + return ToLower(s1) == ToLower(s2); + } +}; + +struct AsciiCaseInsensitiveHash { + size_t operator()(std::string_view s) const + { + return std::hash{}(ToLower(s)); + } +}; + /** * ""_hex is a compile-time user-defined literal returning a * `std::array`, equivalent to ParseHex(). Variants provided: From ee62405cce2bf3d14117bdb327832f12584968d6 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Mon, 3 Jun 2024 13:37:12 -0400 Subject: [PATCH 5/6] time: implement and test RFC1123 timestamp string HTTP 1.1 responses require a timestamp header with a format specified (currently) by: https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.7 This specific format is defined in RFC1123: https://www.rfc-editor.org/rfc/rfc1123#page-55 The libevent implementation can be referenced in evutil_time.c evutil_date_rfc1123() --- src/test/util_tests.cpp | 15 +++++++++++++++ src/util/time.cpp | 22 ++++++++++++++++++++++ src/util/time.h | 6 ++++++ 3 files changed, 43 insertions(+) diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index daa5b8b334d..eb6057a6ffb 100644 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -385,6 +385,21 @@ BOOST_AUTO_TEST_CASE(util_FormatISO8601Date) BOOST_CHECK_EQUAL(FormatISO8601Date(1317425777), "2011-09-30"); } + +BOOST_AUTO_TEST_CASE(util_FormatRFC1123DateTime) +{ + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(std::numeric_limits::max()), ""); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(253402300800), ""); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(253402300799), "Fri, 31 Dec 9999 23:59:59 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(253402214400), "Fri, 31 Dec 9999 00:00:00 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(1717429609), "Mon, 03 Jun 2024 15:46:49 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(0), "Thu, 01 Jan 1970 00:00:00 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(-1), "Wed, 31 Dec 1969 23:59:59 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(-1717429609), "Sat, 31 Jul 1915 08:13:11 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(-62167219200), "Sat, 01 Jan 0000 00:00:00 GMT"); + BOOST_CHECK_EQUAL(FormatRFC1123DateTime(-62167219201), ""); +} + BOOST_AUTO_TEST_CASE(util_FormatMoney) { BOOST_CHECK_EQUAL(FormatMoney(0), "0.00"); diff --git a/src/util/time.cpp b/src/util/time.cpp index 68af84a8fcc..9e0715e5250 100644 --- a/src/util/time.cpp +++ b/src/util/time.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,9 @@ #include #include +static constexpr std::array weekdays{"Thu", "Fri", "Sat", "Sun", "Mon", "Tue", "Wed"}; // 1970-01-01 was a Thursday. +static constexpr std::array months{"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + void UninterruptibleSleep(const std::chrono::microseconds& n) { std::this_thread::sleep_for(n); } static std::atomic g_mock_time{}; //!< For testing @@ -117,6 +121,24 @@ std::optional ParseISO8601DateTime(std::string_view str) return int64_t{TicksSinceEpoch(tp)}; } +std::string FormatRFC1123DateTime(int64_t time) +{ + if (time < -62167219200 || 253402300799 < time) { + // 4-digit year, so only support years 0 to 9999 + return ""; + } + const std::chrono::sys_seconds secs{std::chrono::seconds{time}}; + const auto days{std::chrono::floor(secs)}; + const auto w{days.time_since_epoch().count() % 7}; // will be in the range [-6, 6] + std::string_view weekday{weekdays.at(w >= 0 ? w : w + 7)}; + const std::chrono::year_month_day ymd{days}; + std::string_view month{months.at(unsigned{ymd.month()} - 1)}; + const std::chrono::hh_mm_ss hms{secs - days}; + // examples: Mon, 27 Jul 2009 12:28:53 GMT + // Fri, 31 May 2024 19:18:04 GMT + return strprintf("%03s, %02u %03s %04i %02i:%02i:%02i GMT", weekday, unsigned{ymd.day()}, month, signed{ymd.year()}, hms.hours().count(), hms.minutes().count(), hms.seconds().count()); +} + struct timeval MillisToTimeval(int64_t nTimeout) { struct timeval timeout; diff --git a/src/util/time.h b/src/util/time.h index 655db4475a5..0f98726d224 100644 --- a/src/util/time.h +++ b/src/util/time.h @@ -136,6 +136,12 @@ std::string FormatISO8601DateTime(int64_t nTime); std::string FormatISO8601Date(int64_t nTime); std::optional ParseISO8601DateTime(std::string_view str); +/** + * RFC1123 formatting https://www.rfc-editor.org/rfc/rfc1123#section-5.2.14 + * Used in HTTP/1.1 responses + */ +std::string FormatRFC1123DateTime(int64_t nTime); + /** * Convert milliseconds to a struct timeval for e.g. select. */ From 1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Fri, 27 Sep 2024 15:22:17 -0400 Subject: [PATCH 6/6] string: add LineReader This is a helper struct to parse HTTP messages from data in buffers from sockets. HTTP messages begin with headers which are CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of body data. Whitespace is trimmed from the field lines but not the body. https://httpwg.org/specs/rfc9110.html#rfc.section.5 --- src/test/util_string_tests.cpp | 117 +++++++++++++++++++++++++++++++++ src/util/string.cpp | 54 +++++++++++++++ src/util/string.h | 35 ++++++++++ 3 files changed, 206 insertions(+) diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp index 4a49e5d0905..9f7513fbb30 100644 --- a/src/test/util_string_tests.cpp +++ b/src/test/util_string_tests.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -41,6 +42,12 @@ void FailFmtWithError(const char* wrong_fmt, std::string_view error) BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers(wrong_fmt), const char*, HasReason{error}); } +std::vector StringToBuffer(const std::string& str) +{ + auto span = std::as_bytes(std::span(str)); + return {span.begin(), span.end()}; +} + BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) { PassFmt<0>(""); @@ -181,4 +188,114 @@ BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test) BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe")); } +BOOST_AUTO_TEST_CASE(line_reader_test) +{ + { + // Check three lines terminated by \n and \r\n, trimming whitespace + const std::vector input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")}; + LineReader reader(input, /*max_line_length=*/128); + std::optional line1{reader.ReadLine()}; + BOOST_CHECK_EQUAL(reader.Remaining(), 34); + std::optional line2{reader.ReadLine()}; + BOOST_CHECK_EQUAL(reader.Remaining(), 15); + std::optional line3{reader.ReadLine()}; + std::optional line4{reader.ReadLine()}; + BOOST_CHECK(line1); + BOOST_CHECK(line2); + BOOST_CHECK(line3); + BOOST_CHECK(!line4); + BOOST_CHECK_EQUAL(line1.value(), "once upon a time"); + BOOST_CHECK_EQUAL(line2.value(), "there was a dog"); + BOOST_CHECK_EQUAL(line3.value(), "who liked food"); + } + { + // Do not exceed max_line_length + 1 while searching for \n + // Test with 22-character line + \n + 23-character line + \n + const std::vector input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")}; + + LineReader reader1(input, /*max_line_length=*/22); + // First line is exactly the length of max_line_length + BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there"); + // Second line is +1 character too long + BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + + // Increase max_line_length by 1 + LineReader reader2(input, /*max_line_length=*/23); + // Both lines fit within limit + BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there"); + BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea"); + // End of buffer reached + BOOST_CHECK(!reader2.ReadLine()); + } + { + // Empty lines are empty + const std::vector input{StringToBuffer("\n")}; + LineReader reader(input, /*max_line_length=*/1024); + BOOST_CHECK_EQUAL(reader.ReadLine(), ""); + BOOST_CHECK(!reader.ReadLine()); + } + { + // Empty buffers are null + const std::vector input{StringToBuffer("")}; + LineReader reader(input, /*max_line_length=*/1024); + BOOST_CHECK(!reader.ReadLine()); + } + { + // Even one character is too long, if it's not \n + const std::vector input{StringToBuffer("ab\n")}; + LineReader reader(input, /*max_line_length=*/1); + // First line is +1 character too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + } + { + const std::vector input{StringToBuffer("a\nb\n")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); + BOOST_CHECK_EQUAL(reader.ReadLine(), "b"); + BOOST_CHECK(!reader.ReadLine()); + } + { + // If ReadLine fails, the iterator is reset and we can ReadLength instead + const std::vector input{StringToBuffer("a\nbaboon\n")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); + // "baboon" is too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "b"); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "a"); + BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo"); + // "on" is too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "o"); + BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line + BOOST_CHECK(!reader.ReadLine()); + } + { + // The end of the buffer (EOB) does not count as end of line \n + const std::vector input{StringToBuffer("once upon a time there")}; + + LineReader reader(input, /*max_line_length=*/22); + // First line is exactly the length of max_line_length, but that doesn't matter because \n is missing + BOOST_CHECK(!reader.ReadLine()); + // Data can still be read using ReadLength + BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there"); + // End of buffer reached + BOOST_CHECK_EQUAL(reader.Remaining(), 0); + } + { + // Read specific number of bytes regardless of max_line_length or \n unless buffer is too short + const std::vector input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLength(0), ""); + BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc"); + BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a"); + BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t"); + BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"}); + // After the error the iterator is reset so we can try again + BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food"); + // End of buffer reached + BOOST_CHECK_EQUAL(reader.Remaining(), 0); + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/string.cpp b/src/util/string.cpp index 507d9d31718..c3b4b474d34 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -13,4 +13,58 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin if (search.empty()) return; in_out = std::regex_replace(in_out, std::regex(search), substitute); } + +LineReader::LineReader(std::span buffer, size_t max_line_length) + : start(buffer.begin()), end(buffer.end()), max_line_length(max_line_length), it(buffer.begin()) {} + +std::optional LineReader::ReadLine() +{ + if (it == end) { + return std::nullopt; + } + + auto line_start = it; + size_t count = 0; + while (it != end) { + // Read a character from the incoming buffer and increment the iterator + auto c = static_cast(*it); + ++it; + ++count; + // If the character we just consumed was \n, the line is terminated. + // The \n itself does not count against max_line_length. + if (c == '\n') { + const std::string_view untrimmed_line(reinterpret_cast(std::to_address(line_start)), count); + const std::string_view line = TrimStringView(untrimmed_line); // delete leading and trailing whitespace including \r and \n + return std::string(line); + } + // If the character we just consumed gives us a line length greater + // than max_line_length, and we are not at the end of the line (or buffer) yet, + // that means the line we are currently reading is too long, and we throw. + if (count > max_line_length) { + // Reset iterator + it = line_start; + throw std::runtime_error("max_line_length exceeded by LineReader"); + } + } + // End of buffer reached without finding a \n or exceeding max_line_length. + // Reset the iterator so the rest of the buffer can be read granularly + // with ReadLength() and return null to indicate a line was not found. + it = line_start; + return std::nullopt; +} + +// Ignores max_line_length but won't overflow +std::string LineReader::ReadLength(size_t len) +{ + if (len == 0) return ""; + if (Remaining() < len) throw std::runtime_error("Not enough data in buffer"); + std::string out(reinterpret_cast(std::to_address(it)), len); + it += len; + return out; +} + +size_t LineReader::Remaining() const +{ + return std::distance(it, end); +} } // namespace util diff --git a/src/util/string.h b/src/util/string.h index 330c2a2a61e..048e83ba6fa 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include // IWYU pragma: export #include // IWYU pragma: export @@ -260,6 +261,40 @@ template return obj.size() >= PREFIX_LEN && std::equal(std::begin(prefix), std::end(prefix), std::begin(obj)); } + +struct LineReader { + const std::span::iterator start; + const std::span::iterator end; + const size_t max_line_length; + std::span::iterator it; + + explicit LineReader(std::span buffer, size_t max_line_length); + + /** + * Returns a string from current iterator position up to (but not including) next \n + * and advances iterator to the character following the \n on success. + * Will not return a line longer than max_line_length. + * @returns the next string from the buffer. + * std::nullopt if end of buffer is reached without finding a \n. + * @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n. + */ + std::optional ReadLine(); + + /** + * Returns string from current iterator position of specified length + * if possible and advances iterator on success. + * May exceed max_line_length but will not read past end of buffer. + * @param[in] len The number of bytes to read from the buffer + * @returns a string of the expected length. + * @throws a std::runtime_error if there is not enough data in the buffer. + */ + std::string ReadLength(size_t len); + + /** + * Returns remaining size of bytes in buffer + */ + size_t Remaining() const; +}; } // namespace util #endif // BITCOIN_UTIL_STRING_H