mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-03-03 02:06:18 +00:00
1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9 string: add LineReader (Matthew Zipkin)
ee62405cce2bf3d14117bdb327832f12584968d6 time: implement and test RFC1123 timestamp string (Matthew Zipkin)
eea38787b9be99c3f192cb83fc18358397e4ab52 string: add AsciiCaseInsensitive{KeyEqual, Hash} for unordered map (Matthew Zipkin)
4e300df7123a402aef472aaaac30907b18a10c27 string: add `base` argument for ToIntegral to operate on hexadecimal (Matthew Zipkin)
0b0d9125c19c04c1fc19fb127d7639ed9ea39bec Modernize GetBindAddress() (Matthew Zipkin)
a0ca851d26f8a9d819708db06fec2465e9f6228c Make GetBindAddress() callable from outside net.cpp (Matthew Zipkin)
Pull request description:
This is a component of [removing libevent as a dependency of the project](https://github.com/bitcoin/bitcoin/issues/31194). It is the first six commits of #32061 and provides a string-parsing utility (`LineReader`) that is also consumed by #34158.
These are the functions that are added / updated for HTTP and Torcontrol:
- `GetBindAddress()`: Given a socket, provides the bound address as a CService. Currently used by p2p but moved from `net` to `netbase` so other modules can call it.
- `ToIntegral()`: Already used to parse numbers from strings, added new argument `base = 10` so it can also be used to parse hexadecimal integers. HTTP chunked transfer-encoding uses hex-encoded integers to specify payload size: https://datatracker.ietf.org/doc/html/rfc7230.html#section-4.1
- `AsciiCaseInsensitive` comparators: Needed to store HTTP headers in an `unordered_map`. Headers are key-value pairs that are parsed with case-insensitive keys: https://httpwg.org/specs/rfc9110.html#rfc.section.5.1
- `FormatRFC1123DateTime()`: The required datetime format for HTTP headers (e.g. `Fri, 31 May 2024 19:18:04 GMT`)
- `LineReader`: Fields in HTTP requests are newline-terminated. This struct is given an input buffer and provides methods to read lines as strings.
ACKs for top commit:
maflcko:
review ACK 1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9 👲
furszy:
utACK 1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9
sedited:
ACK 1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9
Tree-SHA512: bb8d3b7b18f158386fd391df6d377c9f5b181051dc258efbf2a896c42e20417a1b0b0d4637671ebd2829f6bc371daa15775625af989c19ef8aee76118660deff
301 lines
9.3 KiB
C++
301 lines
9.3 KiB
C++
// Copyright (c) 2019-present The Bitcoin Core developers
|
|
// Distributed under the MIT software license, see the accompanying
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
#ifndef BITCOIN_UTIL_STRING_H
|
|
#define BITCOIN_UTIL_STRING_H
|
|
|
|
#include <span.h>
|
|
|
|
#include <array>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <locale>
|
|
#include <optional>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
namespace util {
|
|
namespace detail {
|
|
template <unsigned num_params>
|
|
constexpr static void CheckNumFormatSpecifiers(const char* str)
|
|
{
|
|
unsigned count_normal{0}; // Number of "normal" specifiers, like %s
|
|
unsigned count_pos{0}; // Max number in positional specifier, like %8$s
|
|
for (auto it{str}; *it != '\0'; ++it) {
|
|
if (*it != '%' || *++it == '%') continue; // Skip escaped %%
|
|
|
|
auto add_arg = [&] {
|
|
unsigned maybe_num{0};
|
|
while ('0' <= *it && *it <= '9') {
|
|
maybe_num *= 10;
|
|
maybe_num += *it - '0';
|
|
++it;
|
|
}
|
|
|
|
if (*it == '$') {
|
|
++it;
|
|
// Positional specifier, like %8$s
|
|
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
|
|
count_pos = std::max(count_pos, maybe_num);
|
|
} else {
|
|
// Non-positional specifier, like %s
|
|
++count_normal;
|
|
}
|
|
};
|
|
|
|
// Increase argument count and consume positional specifier, if present.
|
|
add_arg();
|
|
|
|
// Consume flags.
|
|
while (*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+') ++it;
|
|
|
|
auto parse_size = [&] {
|
|
if (*it == '*') {
|
|
++it;
|
|
add_arg();
|
|
} else {
|
|
while ('0' <= *it && *it <= '9') ++it;
|
|
}
|
|
};
|
|
|
|
// Consume dynamic or static width value.
|
|
parse_size();
|
|
|
|
// Consume dynamic or static precision value.
|
|
if (*it == '.') {
|
|
++it;
|
|
parse_size();
|
|
}
|
|
|
|
if (*it == '\0') throw "Format specifier incorrectly terminated by end of string";
|
|
|
|
// Length and type in "[flags][width][.precision][length]type"
|
|
// is not checked. Parsing continues with the next '%'.
|
|
}
|
|
if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
|
|
unsigned count{count_normal | count_pos};
|
|
if (num_params != count) throw "Format specifier count must match the argument count!";
|
|
}
|
|
} // namespace detail
|
|
|
|
/**
|
|
* @brief A wrapper for a compile-time partially validated format string
|
|
*
|
|
* This struct can be used to enforce partial compile-time validation of format
|
|
* strings, to reduce the likelihood of tinyformat throwing exceptions at
|
|
* run-time. Validation is partial to try and prevent the most common errors
|
|
* while avoiding re-implementing the entire parsing logic.
|
|
*/
|
|
template <unsigned num_params>
|
|
struct ConstevalFormatString {
|
|
const char* const fmt;
|
|
consteval ConstevalFormatString(const char* str) : fmt{str} { detail::CheckNumFormatSpecifiers<num_params>(fmt); }
|
|
};
|
|
|
|
void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute);
|
|
|
|
/** Split a string on any char found in separators, returning a vector.
|
|
*
|
|
* If sep does not occur in sp, a singleton with the entirety of sp is returned.
|
|
*
|
|
* @param[in] include_sep Whether to include the separator at the end of the left side of the splits.
|
|
*
|
|
* Note that this function does not care about braces, so splitting
|
|
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
|
|
*
|
|
* If include_sep == true, splitting "foo(bar(1),2),3) on ','
|
|
* will return:
|
|
* - foo(bar(1),
|
|
* - 2),
|
|
* - 3)
|
|
*/
|
|
template <typename T = std::span<const char>>
|
|
std::vector<T> Split(const std::span<const char>& sp, std::string_view separators, bool include_sep = false)
|
|
{
|
|
std::vector<T> ret;
|
|
auto it = sp.begin();
|
|
auto start = it;
|
|
while (it != sp.end()) {
|
|
if (separators.find(*it) != std::string::npos) {
|
|
if (include_sep) {
|
|
ret.emplace_back(start, it + 1);
|
|
} else {
|
|
ret.emplace_back(start, it);
|
|
}
|
|
start = it + 1;
|
|
}
|
|
++it;
|
|
}
|
|
ret.emplace_back(start, it);
|
|
return ret;
|
|
}
|
|
|
|
/** Split a string on every instance of sep, returning a vector.
|
|
*
|
|
* If sep does not occur in sp, a singleton with the entirety of sp is returned.
|
|
*
|
|
* Note that this function does not care about braces, so splitting
|
|
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
|
|
*/
|
|
template <typename T = std::span<const char>>
|
|
std::vector<T> Split(const std::span<const char>& sp, char sep, bool include_sep = false)
|
|
{
|
|
return Split<T>(sp, std::string_view{&sep, 1}, include_sep);
|
|
}
|
|
|
|
[[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep)
|
|
{
|
|
return Split<std::string>(str, sep);
|
|
}
|
|
|
|
[[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, std::string_view separators)
|
|
{
|
|
return Split<std::string>(str, separators);
|
|
}
|
|
|
|
[[nodiscard]] inline std::string_view TrimStringView(std::string_view str, std::string_view pattern = " \f\n\r\t\v")
|
|
{
|
|
std::string::size_type front = str.find_first_not_of(pattern);
|
|
if (front == std::string::npos) {
|
|
return {};
|
|
}
|
|
std::string::size_type end = str.find_last_not_of(pattern);
|
|
return str.substr(front, end - front + 1);
|
|
}
|
|
|
|
[[nodiscard]] inline std::string TrimString(std::string_view str, std::string_view pattern = " \f\n\r\t\v")
|
|
{
|
|
return std::string(TrimStringView(str, pattern));
|
|
}
|
|
|
|
[[nodiscard]] inline std::string_view RemoveSuffixView(std::string_view str, std::string_view suffix)
|
|
{
|
|
if (str.ends_with(suffix)) {
|
|
return str.substr(0, str.size() - suffix.size());
|
|
}
|
|
return str;
|
|
}
|
|
|
|
[[nodiscard]] inline std::string_view RemovePrefixView(std::string_view str, std::string_view prefix)
|
|
{
|
|
if (str.starts_with(prefix)) {
|
|
return str.substr(prefix.size());
|
|
}
|
|
return str;
|
|
}
|
|
|
|
[[nodiscard]] inline std::string RemovePrefix(std::string_view str, std::string_view prefix)
|
|
{
|
|
return std::string(RemovePrefixView(str, prefix));
|
|
}
|
|
|
|
/**
|
|
* Join all container items. Typically used to concatenate strings but accepts
|
|
* containers with elements of any type.
|
|
*
|
|
* @param container The items to join
|
|
* @param separator The separator
|
|
* @param unary_op Apply this operator to each item
|
|
*/
|
|
template <typename C, typename S, typename UnaryOp>
|
|
// NOLINTNEXTLINE(misc-no-recursion)
|
|
auto Join(const C& container, const S& separator, UnaryOp unary_op)
|
|
{
|
|
decltype(unary_op(*container.begin())) ret;
|
|
bool first{true};
|
|
for (const auto& item : container) {
|
|
if (!first) ret += separator;
|
|
ret += unary_op(item);
|
|
first = false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
template <typename C, typename S>
|
|
auto Join(const C& container, const S& separator)
|
|
{
|
|
return Join(container, separator, [](const auto& i) { return i; });
|
|
}
|
|
|
|
/**
|
|
* Create an unordered multi-line list of items.
|
|
*/
|
|
inline std::string MakeUnorderedList(const std::vector<std::string>& items)
|
|
{
|
|
return Join(items, "\n", [](const std::string& item) { return "- " + item; });
|
|
}
|
|
|
|
/**
|
|
* Check if a string does not contain any embedded NUL (\0) characters
|
|
*/
|
|
[[nodiscard]] inline bool ContainsNoNUL(std::string_view str) noexcept
|
|
{
|
|
for (auto c : str) {
|
|
if (c == 0) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Locale-independent version of std::to_string
|
|
*/
|
|
template <typename T>
|
|
std::string ToString(const T& t)
|
|
{
|
|
std::ostringstream oss;
|
|
oss.imbue(std::locale::classic());
|
|
oss << t;
|
|
return oss.str();
|
|
}
|
|
|
|
/**
|
|
* Check whether a container begins with the given prefix.
|
|
*/
|
|
template <typename T1, size_t PREFIX_LEN>
|
|
[[nodiscard]] inline bool HasPrefix(const T1& obj,
|
|
const std::array<uint8_t, PREFIX_LEN>& prefix)
|
|
{
|
|
return obj.size() >= PREFIX_LEN &&
|
|
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
|
|
}
|
|
|
|
struct LineReader {
|
|
const std::span<const std::byte>::iterator start;
|
|
const std::span<const std::byte>::iterator end;
|
|
const size_t max_line_length;
|
|
std::span<const std::byte>::iterator it;
|
|
|
|
explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
|
|
|
|
/**
|
|
* Returns a string from current iterator position up to (but not including) next \n
|
|
* and advances iterator to the character following the \n on success.
|
|
* Will not return a line longer than max_line_length.
|
|
* @returns the next string from the buffer.
|
|
* std::nullopt if end of buffer is reached without finding a \n.
|
|
* @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n.
|
|
*/
|
|
std::optional<std::string> ReadLine();
|
|
|
|
/**
|
|
* Returns string from current iterator position of specified length
|
|
* if possible and advances iterator on success.
|
|
* May exceed max_line_length but will not read past end of buffer.
|
|
* @param[in] len The number of bytes to read from the buffer
|
|
* @returns a string of the expected length.
|
|
* @throws a std::runtime_error if there is not enough data in the buffer.
|
|
*/
|
|
std::string ReadLength(size_t len);
|
|
|
|
/**
|
|
* Returns remaining size of bytes in buffer
|
|
*/
|
|
size_t Remaining() const;
|
|
};
|
|
} // namespace util
|
|
|
|
#endif // BITCOIN_UTIL_STRING_H
|