Update minisketch subtree to latest upstream

This commit is contained in:
Hennadii Stepanov 2025-10-13 12:33:23 +01:00
commit c235aa468b
No known key found for this signature in database
GPG Key ID: 410108112E7EA81F
9 changed files with 467 additions and 4 deletions

69
src/minisketch/.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,69 @@
name: CI
on:
pull_request:
push:
branches:
- '**'
tags-ignore:
- '**'
concurrency:
group: ${{ github.event_name != 'pull_request' && github.run_id || github.ref }}
cancel-in-progress: true
jobs:
windows-native:
name: ${{ matrix.configuration.job_name }}
# See: https://github.com/actions/runner-images#available-images.
runs-on: windows-2025
strategy:
fail-fast: false
matrix:
configuration:
- job_name: 'x64 (MSVC): Windows (VS 2022)'
build_configuration: 'Release'
- job_name: 'x64 (MSVC): Windows (VS 2022, fields=32)'
cmake_options: '-DMINISKETCH_FIELDS=32'
build_configuration: 'Release'
- job_name: 'x64 (MSVC): Windows (VS 2022, debug)'
build_configuration: 'Debug'
# TODO: Resolve the issue and re-enable benchmark.
# See: https://github.com/bitcoin-core/minisketch/pull/96.
skip_benchmark: true
- job_name: 'x64 (MSVC): Windows (VS 2022, shared)'
cmake_options: '-DBUILD_SHARED_LIBS=ON'
build_configuration: 'Release'
- job_name: 'x64 (clang-cl): Windows (VS 2022)'
cmake_options: '-T ClangCL'
build_configuration: 'Release'
# TODO: Resolve the issue and re-enable benchmark.
# See: https://github.com/bitcoin-core/minisketch/pull/96.
skip_benchmark: true
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Generate buildsystem
env:
CXXFLAGS: '/WX'
run: cmake -B build -DMINISKETCH_BUILD_BENCHMARK=ON ${{ matrix.configuration.cmake_options }}
- name: Build
run: cmake --build build --config ${{ matrix.configuration.build_configuration }}
- name: Binaries info
shell: bash
run: |
cd build/bin/${{ matrix.configuration.build_configuration }}
file * | grep "\.exe\|\.dll"
- name: Check
working-directory: build
run: ctest --output-on-failure -j $env:NUMBER_OF_PROCESSORS -C ${{ matrix.configuration.build_configuration }}
- name: Benchmark
if: ${{ ! matrix.configuration.skip_benchmark }}
working-directory: build
run: bin\${{ matrix.configuration.build_configuration }}\bench.exe

View File

@ -33,3 +33,6 @@ stamp-h1
test*
bench
# CMake build directories.
/*build*

View File

@ -0,0 +1,122 @@
cmake_minimum_required(VERSION 3.22)
#=============================
# Project / Package Metadata
#=============================
project(minisketch
VERSION 0.0.1
DESCRIPTION "A library for BCH-based set reconciliation"
HOMEPAGE_URL "https://github.com/bitcoin-core/minisketch"
LANGUAGES CXX
)
# ============================================================
# Project Initialization
# ============================================================
enable_testing()
include(CTestUseLaunchers)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
if(NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
endif()
if(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
endif()
if(NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
endif()
# Prevent include directories from parent project from leaking into this one.
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
#=============================
# Language Setup
#=============================
if(DEFINED CMAKE_CXX_STANDARD)
if(CMAKE_CXX_STANDARD EQUAL 98 OR CMAKE_CXX_STANDARD LESS 11)
message(FATAL_ERROR "This project requires at least C++11")
endif()
else()
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if(NOT DEFINED CMAKE_CXX_EXTENSIONS)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()
#=============================
# Configurable Options
#=============================
option(MINISKETCH_INSTALL "Enable installation." ${PROJECT_IS_TOP_LEVEL})
if(NOT PROJECT_IS_TOP_LEVEL)
mark_as_advanced(MINISKETCH_INSTALL)
endif()
option(MINISKETCH_BUILD_TESTS "Build tests." ON)
option(MINISKETCH_BUILD_BENCHMARK "Build benchmark." OFF)
set(supported_fields "")
set(have_enabled_fields NO)
set(have_disabled_fields NO)
foreach(i RANGE 2 64)
list(APPEND supported_fields ${i})
endforeach()
if(NOT DEFINED MINISKETCH_FIELDS)
set(MINISKETCH_FIELDS ${supported_fields} CACHE STRING "Semicolon-separated list of field sizes to build. Default=all. Available sizes: ${supported_fields}.")
endif()
foreach(field IN LISTS supported_fields)
if(field IN_LIST MINISKETCH_FIELDS)
set(have_enabled_fields YES)
else()
set(have_disabled_fields YES)
add_compile_definitions(DISABLE_FIELD_${field})
endif()
endforeach()
if(NOT have_enabled_fields)
message(FATAL_ERROR "No field sizes are enabled.")
endif()
unset(have_enabled_fields)
unset(supported_fields)
#=============================
# Build Options
#=============================
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
if(MSVC)
add_compile_options(/Zc:__cplusplus)
endif()
if(MINGW)
add_link_options(-static)
endif()
#=============================
# Diagnostics Options
#=============================
if(MSVC)
# For both MSVC's cl.exe and clang-cl compilers.
add_compile_options(/W3) # Production quality warning level. Enables -Wall Clang's core option.
else()
add_compile_options(-Wall)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/wd4060) # Disable warning C4060 "switch statement contains no 'case' or 'default' labels".
add_compile_options(/wd4065) # Disable warning C4065 "switch statement contains 'default' but no 'case' labels".
add_compile_options(/wd4146) # Disable warning C4146 "unary minus operator applied to unsigned type, result still unsigned".
add_compile_options(/wd4244) # Disable warning C4244 "conversion from 'type1' to 'type2', possible loss of data".
else()
add_compile_options(-Wundef)
endif()
#=============================
# Main Processing
#=============================
include(SystemIntrospection)
add_subdirectory(src)
include(PrintConfigureSummary)
print_configure_summary()

View File

@ -66,7 +66,7 @@ Below we compare the PinSketch algorithm (which `libminisketch` is an implementa
* **Difference type:** PinSketch can only compute the symmetric difference from a merged sketch, while CPISync and IBLT can distinguish which side certain elements were missing on. When the decoder has access to one of the sets, this generally doesn't matter, as he can look up each of the elements in the symmetric difference with one of the sets.
* **Secure sketch:** Whether the sketch satisfies the definition of a secure sketch<sup>[[1]](#myfootnote1)</sup>, which implies a minimal amount about a set can be extracted from a sketch by anyone who does not know most of the elements already. This makes the algorithm appropriate for applications like fingerprint authentication.
## Building
## Building with Autotools
The build system is very rudimentary for now, and [improvements](https://github.com/bitcoin-core/minisketch/pulls) are welcome.
@ -78,6 +78,44 @@ cd minisketch
./autogen.sh && ./configure && make
```
## Building with CMake
To maintain a pristine source tree, CMake encourages performing an out-of-source build by using a separate dedicated build directory.
### Building on POSIX systems
The following commands will produce the same `libminisketch.a` file as in the example [above](#building-with-autotools):
```bash
cmake -B build -DCMAKE_CXX_FLAGS="-g -O2" # Generate a build system in subdirectory "build"
cmake --build build # Run the actual build process
ctest --test-dir build # Run the test suite
sudo cmake --install build # Install the library into the system (optional)
```
Run `cmake -B build -LH` or `ccmake -B build` to see the full list of configurable build options.
### Cross compiling
The following example works on modern Ubuntu/Debian systems:
```bash
sudo apt install g++-mingw-w64-x86-64-posix
cmake -B build -DCMAKE_SYSTEM_NAME=Windows -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++-posix
cmake --build build
```
### Building on Windows
The following example assumes the use of Visual Studio 2022 and CMake v3.21 or newer.
In "Developer Command Prompt for VS 2022":
```cmd
cmake -B build
cmake --build build --config Release
```
## Usage
In this section Alice and Bob are trying to find the difference between their sets.
@ -167,7 +205,7 @@ The order of the output is arbitrary and will differ on different runs of minisk
## Applications
Communications efficient set reconciliation has been proposed to optimize Bitcoin transaction distribution<sup>[[8]](#myfootnote8)</sup>, which would allow Bitcoin nodes to have many more peers while reducing bandwidth usage. It could also be used for Bitcoin block distribution<sup>[[9]](#myfootnote9)</sup>, particularly for very low bandwidth links such as satellite. A similar approach (CPISync) is used by PGP SKS keyservers to synchronize their databases efficiently. Secure sketches can also be used as helper data to reliably extract a consistent cryptographic key from fuzzy biometric data while leaking minimal information<sup>[[1]](#myfootnote1)</sup>. They can be combined with [dcnets](https://en.wikipedia.org/wiki/Dining_cryptographers_problem) to create cryptographic multiparty anonymous communication<sup>[[10]](#myfootnote10)</sup>.
Communications efficient set reconciliation has been proposed to optimize Bitcoin transaction distribution<sup>[[8]](#myfootnote8)</sup>, which would allow Bitcoin nodes to have many more peers while reducing bandwidth usage. It could also be used for Bitcoin block distribution<sup>[[9]](#myfootnote9)</sup>, particularly for very low bandwidth links such as satellite. A similar approach (CPISync) is used by PGP SKS keyservers to synchronize their databases efficiently. Secure sketches can also be used as helper data to reliably extract a consistent cryptographic key from fuzzy biometric data while leaking minimal information<sup>[[1]](#myfootnote1)</sup>. They can be combined with [dcnets](https://en.wikipedia.org/wiki/Dining_cryptographers_problem) to create cryptographic multiparty anonymous communication<sup>[[10]](#myfootnote10)</sup>.
## Implementation notes

View File

@ -0,0 +1,112 @@
include_guard(GLOBAL)
function(indent_message header content indent_num)
if(indent_num GREATER 0)
string(REPEAT " " ${indent_num} indentation)
string(REPEAT "." ${indent_num} tail)
string(REGEX REPLACE "${tail}$" "" header "${header}")
endif()
message("${indentation}${header} ${content}")
endfunction()
# Print compiler's flags on best-effort. Include the abstracted
# CMake flags that we touch ourselves.
function(print_flags_per_config config indent_num)
string(STRIP "${CMAKE_CXX_COMPILER_ARG1} ${CMAKE_CXX_FLAGS}" combined_cxx_flags)
string(TOUPPER "${config}" config_uppercase)
string(STRIP "${combined_cxx_flags} ${CMAKE_CXX_FLAGS_${config_uppercase}}" combined_cxx_flags)
string(STRIP "${combined_cxx_flags} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_STANDARD_COMPILE_OPTION}" combined_cxx_flags)
if(CMAKE_POSITION_INDEPENDENT_CODE)
string(JOIN " " combined_cxx_flags ${combined_cxx_flags} ${CMAKE_CXX_COMPILE_OPTIONS_PIC})
endif()
if(CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY AND CMAKE_CXX_VISIBILITY_PRESET)
string(JOIN " " combined_cxx_flags ${combined_cxx_flags} ${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY}${CMAKE_CXX_VISIBILITY_PRESET})
endif()
get_directory_property(compile_options COMPILE_OPTIONS)
string(JOIN " " combined_cxx_flags ${combined_cxx_flags} ${compile_options})
indent_message("CXXFLAGS .............................." "${combined_cxx_flags}" ${indent_num})
endfunction()
function(print_configure_summary)
message("")
if(PROJECT_IS_TOP_LEVEL)
message("Configure summary")
message("=================")
else()
message("minisketch configure summary")
message("============================")
endif()
if(BUILD_SHARED_LIBS)
set(library_type "Shared")
else()
set(library_type "Static")
endif()
message("Library type .......................... ${library_type}")
message("Build options:")
if(have_disabled_fields)
set(filed_sizes "${MINISKETCH_FIELDS}")
else()
set(filed_sizes "All")
endif()
message(" field sizes ........................ ${filed_sizes}")
if(HAVE_CLMUL)
set(clmul_status "Enabled")
else()
set(clmul_status "Disabled")
endif()
message(" clmul fields ........................ ${clmul_status}")
if(CMAKE_CXX_STANDARD GREATER_EQUAL 20)
set(clz_status "C++20")
elseif(HAVE_CLZ)
set(clz_status "Compiler builtin")
else()
set(clz_status "Default")
endif()
message(" clz implementation .................. ${clz_status}")
message("Optional binaries:")
message(" benchmark ........................... ${MINISKETCH_BUILD_BENCHMARK}")
message(" tests ............................... ${MINISKETCH_BUILD_TESTS}")
message("")
if(CMAKE_CROSSCOMPILING)
set(cross_status "TRUE, for ${CMAKE_SYSTEM_NAME}, ${CMAKE_SYSTEM_PROCESSOR}")
else()
set(cross_status "FALSE")
endif()
message("Cross compiling ....................... ${cross_status}")
message("C++ compiler .......................... ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}, ${CMAKE_CXX_COMPILER}")
get_property(_is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(_is_multi_config)
list(JOIN CMAKE_CONFIGURATION_TYPES ", " configs)
message("Available build configurations ........ ${configs}")
if(CMAKE_GENERATOR MATCHES "Visual Studio")
set(default_config "Debug")
else()
list(GET CMAKE_CONFIGURATION_TYPES 0 default_config)
endif()
message("Default build configuration ........... ${default_config}")
foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES)
message("'${config}' build configuration:")
print_flags_per_config("${config}" 2)
endforeach()
else()
message("CMAKE_BUILD_TYPE ...................... ${CMAKE_BUILD_TYPE}")
print_flags_per_config("${CMAKE_BUILD_TYPE}" 0)
endif()
unset(_is_multi_config)
message([=[
NOTE: The summary above may not exactly match the final applied build flags
if any additional CMAKE_* or environment variables have been modified.
To see the exact flags applied, build with the --verbose option.]=]
)
if(have_disabled_fields AND PROJECT_IS_TOP_LEVEL)
message("")
message(WARNING
"Only compiling in support for field sizes: ${MINISKETCH_FIELDS}\n"
"This means the library will lack support for other field sizes entirely.\n"
)
endif()
message("")
endfunction()

View File

@ -0,0 +1,43 @@
include_guard(GLOBAL)
include(CheckCXXSourceCompiles)
include(CMakePushCheckState)
cmake_push_check_state(RESET)
# Check for clmul instructions support.
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_REQUIRED_FLAGS "-mpclmul")
endif()
check_cxx_source_compiles("
#include <immintrin.h>
#include <stdint.h>
int main()
{
__m128i a = _mm_cvtsi64_si128((uint64_t)7);
__m128i b = _mm_clmulepi64_si128(a, a, 37);
__m128i c = _mm_srli_epi64(b, 41);
__m128i d = _mm_xor_si128(b, c);
uint64_t e = _mm_cvtsi128_si64(d);
return e == 0;
}
" HAVE_CLMUL
)
if(HAVE_CLMUL)
set(CLMUL_CXXFLAGS ${CMAKE_REQUIRED_FLAGS})
endif()
if(CMAKE_CXX_STANDARD LESS 20)
# Check for working clz builtins.
check_cxx_source_compiles("
int main()
{
unsigned a = __builtin_clz(1);
unsigned long b = __builtin_clzl(1);
unsigned long long c = __builtin_clzll(1);
}
" HAVE_CLZ
)
endif()
cmake_pop_check_state()

View File

@ -9,7 +9,7 @@ A sketch, for the purpose of this description, can be seen as a "set checksum" w
* Sketches have a predetermined capacity, and when the number of elements in the set is not higher than the capacity, minisketch will always recover the entire set from the sketch. A sketch of *b*-bit elements with capacity *c* can be stored in *bc* bits.
* The sketches of two sets can be combined by adding them (XOR) to obtain a sketch of the [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference) between the two sets (*i.e.*, all elements that occur in one but not both input sets).
This overview explains how sets can be converted into a sketch and how a set can be recovered from a sketch.
This overview explains how sets can be converted into a sketch and how a set can be recovered from a sketch.
## From field elements to sketches

View File

@ -0,0 +1,76 @@
add_library(minisketch_field_sources INTERFACE)
target_sources(minisketch_field_sources
INTERFACE
fields/generic_1byte.cpp
fields/generic_2bytes.cpp
fields/generic_3bytes.cpp
fields/generic_4bytes.cpp
fields/generic_5bytes.cpp
fields/generic_6bytes.cpp
fields/generic_7bytes.cpp
fields/generic_8bytes.cpp
)
if(HAVE_CLMUL)
set(clmul_sources
fields/clmul_1byte.cpp
fields/clmul_2bytes.cpp
fields/clmul_3bytes.cpp
fields/clmul_4bytes.cpp
fields/clmul_5bytes.cpp
fields/clmul_6bytes.cpp
fields/clmul_7bytes.cpp
fields/clmul_8bytes.cpp
)
set_property(SOURCE minisketch.cpp
PROPERTY COMPILE_DEFINITIONS HAVE_CLMUL
)
set_property(SOURCE ${clmul_sources}
PROPERTY COMPILE_OPTIONS ${CLMUL_CXXFLAGS}
)
target_sources(minisketch_field_sources INTERFACE ${clmul_sources})
endif()
add_compile_definitions($<$<BOOL:${HAVE_CLZ}>:HAVE_CLZ>)
add_library(minisketch minisketch.cpp)
set_property(TARGET minisketch PROPERTY PUBLIC_HEADER
${PROJECT_SOURCE_DIR}/include/minisketch.h
)
target_include_directories(minisketch
INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
)
target_link_libraries(minisketch PRIVATE minisketch_field_sources)
add_library(minisketch_verify EXCLUDE_FROM_ALL minisketch.cpp)
target_compile_definitions(minisketch_verify
PUBLIC
MINISKETCH_VERIFY
)
target_link_libraries(minisketch_verify PRIVATE minisketch_field_sources)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
target_compile_options(minisketch_verify
PRIVATE
/wd4702
)
endif()
if(MINISKETCH_BUILD_TESTS)
add_executable(test-noverify test.cpp)
target_link_libraries(test-noverify PRIVATE minisketch)
add_test(NAME ${PROJECT_NAME}_test_noverify COMMAND test-noverify)
add_executable(test-verify test.cpp)
target_link_libraries(test-verify PRIVATE minisketch_verify)
add_test(NAME ${PROJECT_NAME}_test_verify COMMAND test-verify)
endif()
if(MINISKETCH_BUILD_BENCHMARK)
add_executable(bench bench.cpp)
target_link_libraries(bench PRIVATE minisketch)
endif()
if(MINISKETCH_INSTALL)
include(GNUInstallDirs)
install(TARGETS minisketch)
endif()

View File

@ -36,7 +36,7 @@ template<typename I, int BITS, I MOD> NO_SANITIZE_MEMORY I MulWithClMulReduce(I
__m128i product = _mm_clmulepi64_si128(_mm_cvtsi64_si128((uint64_t)a), _mm_cvtsi64_si128((uint64_t)b), 0x00);
if (BITS <= 32) {
__m128i high1 = _mm_srli_epi64(product, BITS);
__m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
__m128i red1 = _mm_clmulepi64_si128(high1, MOD128, 0x00);
__m128i high2 = _mm_srli_epi64(red1, BITS);
__m128i red2 = _mm_clmulepi64_si128(high2, MOD128, 0x00);
return _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(product, red1), red2)) & MASK;