From eea38787b9be99c3f192cb83fc18358397e4ab52 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Thu, 30 May 2024 15:34:58 -0400 Subject: [PATCH] string: add AsciiCaseInsensitive{KeyEqual, Hash} for unordered map https://httpwg.org/specs/rfc9110.html#rfc.section.5.1 Field names in HTTP headers are case-insensitive. These structs will be used in the headers map to search by key. In libevent field names are also converted to lowercase for comparison: evhttp_find_header() evutil_ascii_strcasecmp() EVUTIL_TOLOWER_() --- src/test/util_string_tests.cpp | 35 ++++++++++++++++++++++++++++++++++ src/util/strencodings.h | 15 +++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp index 65ee140b6e9..4a49e5d0905 100644 --- a/src/test/util_string_tests.cpp +++ b/src/test/util_string_tests.cpp @@ -2,6 +2,7 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include #include #include @@ -146,4 +147,38 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) HasReason{"tinyformat: Too many conversion specifiers in format string"}); } +BOOST_AUTO_TEST_CASE(ascii_case_insensitive_key_equal_test) +{ + AsciiCaseInsensitiveKeyEqual cmp; + BOOST_CHECK(!cmp("A", "B")); + BOOST_CHECK(!cmp("A", "b")); + BOOST_CHECK(!cmp("a", "B")); + BOOST_CHECK(!cmp("B", "A")); + BOOST_CHECK(!cmp("B", "a")); + BOOST_CHECK(!cmp("b", "A")); + BOOST_CHECK(!cmp("A", "AA")); + BOOST_CHECK(cmp("A-A", "a-a")); + BOOST_CHECK(cmp("A", "A")); + BOOST_CHECK(cmp("A", "a")); + BOOST_CHECK(cmp("a", "a")); + BOOST_CHECK(cmp("B", "b")); + BOOST_CHECK(cmp("ab", "aB")); + BOOST_CHECK(cmp("Ab", "aB")); + BOOST_CHECK(cmp("AB", "ab")); + + // Use a character with value > 127 + // to ensure we don't trigger implicit-integer-sign-change + BOOST_CHECK(!cmp("a", "\xe4")); +} + +BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test) +{ + AsciiCaseInsensitiveHash hsh; + BOOST_CHECK_NE(hsh("A"), hsh("B")); + BOOST_CHECK_NE(hsh("AA"), hsh("A")); + BOOST_CHECK_EQUAL(hsh("A"), hsh("a")); + BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB")); + BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe")); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/strencodings.h b/src/util/strencodings.h index dc8493ff566..97a7b268fd6 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -353,6 +354,20 @@ struct Hex { }; } // namespace detail +struct AsciiCaseInsensitiveKeyEqual { + bool operator()(std::string_view s1, std::string_view s2) const + { + return ToLower(s1) == ToLower(s2); + } +}; + +struct AsciiCaseInsensitiveHash { + size_t operator()(std::string_view s) const + { + return std::hash{}(ToLower(s)); + } +}; + /** * ""_hex is a compile-time user-defined literal returning a * `std::array`, equivalent to ParseHex(). Variants provided: