diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87b170657b1..f4a6623416c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,66 @@ on: pull_request: jobs: + build-netbsd: + runs-on: ubuntu-latest + name: build • netbsd ${{ matrix.release }} + defaults: + run: + shell: netbsd {0} + strategy: + fail-fast: false + matrix: + # Test all supported releases. + # See https://www.netbsd.org/releases/. + include: + - release: 9.4 + capnproto-cppflags: 'CPPFLAGS="-DKJ_NO_EXCEPTIONS=0 -DKJ_USE_KQUEUE=0"' + - release: 10.1 + capnproto-cppflags: 'CPPFLAGS="-DKJ_NO_EXCEPTIONS=0"' + steps: + - uses: actions/checkout@v6 + + - name: Start NetBSD VM + uses: vmactions/netbsd-vm@v1 + with: + release: ${{ matrix.release }} + # The installed compiler version must match the CXX variable + # defined in `ci/configs/netbsd.bash`. + prepare: | + pkg_add cmake ninja-build gcc14 + # capnproto prerequisites. + # See the following "Install capnproto" step. + run: | + set -e + pkg_add digest libtool-base mktools pkgconf cwrappers + pkg_admin -K /usr/pkg/pkgdb fetch-pkg-vulnerabilities + cd /usr + cvs -danoncvs@anoncvs.NetBSD.org:/cvsroot checkout -P \ + pkgsrc/devel/capnproto \ + pkgsrc/devel/libtool-base \ + pkgsrc/devel/pkgconf \ + pkgsrc/devel/zlib \ + `# gcc15 is referenced here because the pkgsrc framework requires lang/gcc15/version.mk to exist` \ + `# during the "make install" step below, even though we compile our project with gcc14.` \ + pkgsrc/lang/gcc15 \ + pkgsrc/mk \ + pkgsrc/pkgtools \ + pkgsrc/security/openssl \ + pkgsrc/sysutils/install-sh/files + sync: 'rsync' + copyback: false + + - name: Install capnproto + run: | + cd /usr/pkgsrc/devel/capnproto/ + unset PKG_PATH + make ${{ matrix.capnproto-cppflags }} install + + - name: Run CI script + run: | + cd ${{ github.workspace }} + CI_CONFIG="ci/configs/netbsd.bash" bash ci/scripts/ci.sh + build-openbsd: runs-on: ubuntu-latest name: build • openbsd @@ -18,17 +78,10 @@ jobs: uses: vmactions/openbsd-vm@v1 with: prepare: | - pkg_add -v cmake ninja git bash - run: | - git clone --depth=1 https://codeberg.org/OpenBSD/ports.git /usr/ports + pkg_add -v cmake ninja bash capnproto sync: 'rsync' copyback: false - - name: Install capnproto - run: | - cd /usr/ports/devel/capnproto/ - make install - - name: Run CI script run: | cd ${{ github.workspace }} @@ -76,6 +129,11 @@ jobs: build: runs-on: ubuntu-latest + env: + NIX_EXTRA_CONFIG_ACT: | + sandbox = false + filter-syscalls = false + strategy: fail-fast: false matrix: @@ -90,6 +148,10 @@ jobs: uses: cachix/install-nix-action@v31 # 2025-05-27, from https://github.com/cachix/install-nix-action/tags with: nix_path: nixpkgs=channel:nixos-25.05 # latest release + # Act executes inside an unprivileged container (Docker or Podman), + # so KVM support isn't available. + enable_kvm: "${{ github.actor != 'nektos/act' }}" + extra_nix_config: ${{ github.actor == 'nektos/act' && env.NIX_EXTRA_CONFIG_ACT || '' }} - name: Run CI script env: diff --git a/ci/README.md b/ci/README.md index 2ddaa9e1206..fef1c022b5c 100644 --- a/ci/README.md +++ b/ci/README.md @@ -24,3 +24,29 @@ CI_CONFIG=ci/configs/olddeps.bash ci/scripts/run.sh ``` By default CI jobs will reuse their build directories. `CI_CLEAN=1` can be specified to delete them before running instead. + +### Running workflows with `act` + +You can run either the entire workflow or a single matrix entry locally. On +macOS or Linux: + +1. Install [`act`](https://github.com/nektos/act) and either Docker or + Podman. +2. Inside the Podman VM, create a named volume for the Nix store (ext4, + case-sensitive) so builds persist across runs. Recreate it any time you want + a clean cache: + ```bash + podman volume create libmultiprocess-nix + ``` +3. From the repo root, launch the workflow. The example below targets the + sanitize matrix entry; drop the `--matrix` flag to run every configuration. + ```bash + act \ + --reuse \ + -P ubuntu-latest=ghcr.io/catthehacker/ubuntu:act-24.04 \ + --container-options "-v libmultiprocess-nix:/nix" \ + -j build \ + --matrix config:sanitize + ``` + + diff --git a/ci/configs/netbsd.bash b/ci/configs/netbsd.bash new file mode 100644 index 00000000000..c3e7d258e68 --- /dev/null +++ b/ci/configs/netbsd.bash @@ -0,0 +1,11 @@ +CI_DESC="CI config for NetBSD" +CI_DIR=build-netbsd +export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-unused-parameter" +# Hardcode GCC 14, since default GCC versions installed by NetBSD are older +# and may not be compatible with libmultiprocess. GCC 14 was chosen because +# it's the latest compiler available on all versions of NetBSD that we test. +# Note that the GCC version specified here must match the version specified +# in pkg_add in ci.yml. +export CXX="/usr/pkg/gcc14/bin/g++" +CMAKE_ARGS=(-G Ninja) +BUILD_ARGS=(-k 0) diff --git a/ci/scripts/run.sh b/ci/scripts/run.sh index 11b91845e12..4b7808314ce 100755 --- a/ci/scripts/run.sh +++ b/ci/scripts/run.sh @@ -10,4 +10,4 @@ set -o errexit -o nounset -o pipefail -o xtrace [ "${CI_CONFIG+x}" ] && source "$CI_CONFIG" -nix-shell --pure --keep CI_CONFIG --keep CI_CLEAN "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" --run ci/scripts/ci.sh shell.nix +nix develop --ignore-environment --keep CI_CONFIG --keep CI_CLEAN "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" -f shell.nix --command ci/scripts/ci.sh diff --git a/cmake/TargetCapnpSources.cmake b/cmake/TargetCapnpSources.cmake index 347ef4a010a..abcca70f94e 100644 --- a/cmake/TargetCapnpSources.cmake +++ b/cmake/TargetCapnpSources.cmake @@ -55,7 +55,7 @@ Example: function(target_capnp_sources target include_prefix) cmake_parse_arguments(PARSE_ARGV 2 "TCS" # prefix - "" # options + "ONLY_CAPNP" # options "" # one_value_keywords "IMPORT_PATHS" # multi_value_keywords ) @@ -85,11 +85,14 @@ function(target_capnp_sources target include_prefix) set_source_files_properties(${capnp_file}.c++ PROPERTIES SKIP_LINTING TRUE) # Ignored before cmake 3.27 target_sources(${target} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.c++ - ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-client.c++ - ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-server.c++ - ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-types.c++ ) - + if(NOT TCS_ONLY_CAPNP) + target_sources(${target} PRIVATE + ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-client.c++ + ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-server.c++ + ${CMAKE_CURRENT_BINARY_DIR}/${capnp_file}.proxy-types.c++ + ) + endif() list(APPEND generated_headers ${capnp_file}.h) endforeach() @@ -111,5 +114,7 @@ function(target_capnp_sources target include_prefix) # dependencies explicitly because while cmake detect dependencies of non # generated files on generated headers, it does not reliably detect # dependencies of generated headers on other generated headers. - add_custom_target("${target}_headers" DEPENDS ${generated_headers}) + if(NOT TARGET "${target}_headers") + add_custom_target("${target}_headers" DEPENDS ${generated_headers}) + endif() endfunction() diff --git a/doc/design.md b/doc/design.md index 4422953d4dc..113cafc4af3 100644 --- a/doc/design.md +++ b/doc/design.md @@ -13,7 +13,7 @@ There is also optional support for thread mapping, so each thread making interpr Libmultiprocess acts as a pure wrapper or layer over the underlying protocol. Clients and servers written in other languages, but using a shared capnproto schema can communicate with interprocess counterparties using libmultiprocess without having to use libmultiprocess themselves or having to know about the implementation details of libmultiprocess. -### Internals +## Core Architecture The `ProxyClient` and `ProxyServer` generated classes are not directly exposed to the user, as described in [usage.md](usage.md). Instead, they wrap C++ interfaces and appear to the user as pointers to an interface. They are first instantiated when calling `ConnectStream` and `ServeStream` respectively for creating the `InitInterface`. These methods establish connections through sockets, internally creating `Connection` objects wrapping a `capnp::RpcSystem` configured for client and server mode respectively. @@ -25,7 +25,190 @@ When a generated method on the `ProxyClient` is called, it calls `clientInvoke` On the server side, the `capnp::RpcSystem` receives the capnp request and invokes the corresponding C++ method through the corresponding `ProxyServer` and the heavily templated `serverInvoke` triggering a `ServerCall`. The return values from the actual C++ methods are copied into capnp responses by `ServerRet` and exceptions are caught and copied by `ServerExcept`. The two are connected through `ServerField`. The main method driving execution of a request is `PassField`, which is invoked through `ServerField`. Instantiated interfaces, or capabilities in capnp speak, are tracked and owned by the server's `capnp::RpcSystem`. -## Interface descriptions +## Request and Response Flow + +Method parameters and return values are serialized using Cap'n Proto's Builder objects (for sending) and Reader objects (for receiving). Input parameters flow from the client to the server, while output parameters (return values) flow back from the server to the client. + +```mermaid +sequenceDiagram + participant clientInvoke + participant BuildField as BuildField
(Client) + participant ReadField_C as ReadField
(Client) + participant Request as Request
message + participant serverInvoke + participant ReadField as ReadField
(Server) + participant BuildField_S as BuildField
(Server) + participant Response as Response
message + + Note over clientInvoke,ReadField: Input Parameter Flow + clientInvoke->>BuildField: BuildField(input_arg) + BuildField->>Request: Serialize input + Request->>serverInvoke: Cap'n Proto message + serverInvoke->>ReadField: Deserialize input + + Note over clientInvoke,Response: Output Parameter Flow + serverInvoke-->>BuildField_S: BuildField(output) + BuildField_S-->Response: Serialize output + Response-->>ReadField_C: Cap'n Proto message + ReadField_C-->>clientInvoke: Deserialize output +``` + +### Detailed Serialization Mechanism + +Parameters are represented as Fields that must be set on Cap'n Proto Builder objects (for sending) and read from Reader objects (for receiving). + +#### Building Fields + +`BuildField` uses a generated parameter `Accessor` to set the appropriate field in the Cap'n Proto Builder object. + +```mermaid +sequenceDiagram + participant clientInvoke as clientInvoke or
serverInvoke + participant BuildField + participant Accessor + participant Builder as Params::Builder + + Note over clientInvoke,Builder: Serializing Parameters + clientInvoke->>BuildField: BuildField(param1) + BuildField->>Accessor: Use generated field accessor + Accessor->>Builder: builder.setField1(param1) + + clientInvoke->>BuildField: BuildField(param2) + BuildField->>Accessor: Use generated field Accessor + Accessor->>Builder: builder.setField2(param2) +``` + +#### Reading Fields + +`ReadField` uses a generated parameter `Accessor` to read the appropriate field from the Cap'n Proto Reader object and reconstruct C++ parameters. + +```mermaid +sequenceDiagram + participant serverInvoke as clientInvoke or
serverInvoke + participant ReadField + participant Accessor + participant Reader as Params::Reader + participant ServerCall + + Note over serverInvoke,ServerCall: Deserializing Parameters + serverInvoke->>ReadField: Read param1 + ReadField->>Accessor: Use generated field accessor + Accessor->>Reader: reader.getField1() + Reader-->>ServerCall: call function with param1 +``` + +## Server-Side Request Processing + +The generated server code uses a Russian nesting doll structure to process method fields. Each `ServerField` wraps another `ServerField` (for the next parameter), or wraps `ServerRet` (for the return value), which finally wraps `ServerCall` (which invokes the actual C++ method). + +Each `ServerField` invokes `PassField`, which: +1. Calls `ReadField` to deserialize the parameter from the `Params::Reader` +2. Calls the next nested layer's `invoke()` with the accumulated parameters +3. Calls `BuildField` to serialize the parameter back if it's an output parameter + +`ServerRet` invokes the next layer (typically `ServerCall`), stores the result, and calls `BuildField` to serialize it into the `Results::Builder`. + +`ServerCall` uses the generated `ProxyMethod::impl` pointer-to-member to invoke the actual C++ method on the wrapped implementation object. + +```mermaid +sequenceDiagram + participant serverInvoke + participant SF1 as ServerField
(param 1) + participant SF2 as ServerField
(param 2) + participant SR as ServerRet
(return value) + participant SC as ServerCall + participant PMT as ProxyMethodTraits + participant Impl as Actual C++ Method + + serverInvoke->>SF1: SF1::invoke + SF1->>SF2: SF2::invoke + SF2->>SR: SR::invoke + SR->>SC: SC::invoke + SC->>PMT: PMT::invoke + PMT->>Impl: Call impl method + Impl->>PMT: return + PMT->>SC: return + SC->>SR: return + SR->>SF2: return + SF2->>SF1: return + SF1->>serverInvoke: return +``` + +## Advanced Features + +### Callbacks + +Callbacks (passed as `std::function` arguments) are intercepted by `CustomBuildField` and converted into Cap'n Proto capabilities that can be invoked across process boundaries. On the receiving end, `CustomReadField` intercepts the capability and constructs a `ProxyCallFn` object with an `operator()` that sends function calls back over the socket to invoke the original callback. + +```mermaid +sequenceDiagram + participant CT as Client Thread + participant C as clientInvoke + participant CBF1 as CustomBuildField (Client) + participant S as Socket + participant CRF1 as CustomReadField (Server) + participant Srv as Server Code + participant PCF as ProxyCallFn + + C->>CBF1: send function parameter + CBF1->>S: creates a Server for the function and sends a capability + S->>CRF1: receives a capability and creates ProxyCallFn + CRF1->>Srv: + Srv->>PCF: call the callback + PCF-->>CT: sends request to Client +``` + +### Thread Mapping + +Thread mapping enables each client thread to have a dedicated server thread processing its requests, preserving thread-local state and allowing recursive mutex usage across process boundaries. + +Thread mapping is initialized by defining an interface method with a `ThreadMap` parameter and/or response. The example below adds `ThreadMap` to the `construct` method because libmultiprocess calls the `construct` method automatically. + +```capnp +interface InitInterface $Proxy.wrap("Init") { + construct @0 (threadMap: Proxy.ThreadMap) -> (threadMap :Proxy.ThreadMap); +} +``` + +- **ThreadMap in parameter**: The client's `CustomBuildField` creates a `ThreadMap::Server` capability and sends it to the server, where `CustomReadField` stores the `ThreadMap::Client` in `connection.m_thread_map` +- **ThreadMap in response**: The server's `CustomBuildField` creates a `ThreadMap::Server` capability and sends it to the client, where `CustomReadField` stores the `ThreadMap::Client` in `connection.m_thread_map` + +You can specify ThreadMap in the parameter only, response only, or both: +- **Parameter only**: Server can create threads on the client +- **Response only**: Client can create threads on the server +- **Both (as shown)**: Bidirectional thread creation + +When both parameter and response include ThreadMap, both processes end up with `ThreadMap::Client` capabilities pointing to each other's `ThreadMap::Server`, allowing both sides to create threads on the other process. + +### Async Processing with Context + +By adding a `Context` parameter to a method in the capnp interface file, you enable async processing where the client tells the server to execute the request in a separate worker thread. For example: + +```capnp +processData @5 (context :Proxy.Context, data :Data) -> (result :Result); +``` + +If a method does not have a `Context` parameter, then libmultiprocess will execute IPC requests invoking that method on the I/O event loop thread. This is fine for fast and non-blocking methods, but should be avoided for any methods that are slow or blocking or make any IPC calls(including callbacks to the client), since as long as the method is executing, the Cap'n Proto event loop will not be able to perform any I/O. + +When a method has a `Context` parameter: + +**Client side** (`CustomBuildField`): +If this is the first asynchronous request made from the current client thread, `CustomBuildField` will: +1. Call `connection.m_thread_map.makeThreadRequest()` to request a dedicated worker thread on the server (stored in `request_threads` map) +2. Set the remote thread capability in `Context.thread` +3. Create a local `Thread::Server` object for the current thread (stored in `callback_threads` map) +4. Set the local thread capability in `Context.callbackThread` + +Subsequent requests will reuse the existing thread capabilities held in `callback_threads` and `request_threads`. + +**Server side** (`PassField`): +1. Looks up the local `Thread::Server` object specified by `context.thread` +2. The worker thread: + - Stores `context.callbackThread` in its `request_threads` map (so callbacks go to the right client thread) + - Posts the work lambda to that thread's queue via `waiter->post(invoke)` + - Cleans up the `request_threads` entry + +## Interface Definitions As explained in the [usage](usage.md) document, interface descriptions need to be consumed both by the _libmultiprocess_ code generator, and by C++ code that calls and implements the interfaces. The C++ code only needs to know about C++ arguments and return types, while the code generator only needs to know about capnp arguments and return types, but both need to know class and method names, so the corresponding `.h` and `.capnp` source files contain some of the same information, and have to be kept in sync manually when methods or parameters change. Despite the redundancy, reconciling the interface definitions is designed to be _straightforward_ and _safe_. _Straightforward_ because there is no need to write manual serialization code or use awkward intermediate types like [`UniValue`](https://github.com/bitcoin/bitcoin/blob/master/src/univalue/include/univalue.h) instead of native types. _Safe_ because if there are any inconsistencies between API and data definitions (even minor ones like using a narrow int data type for a wider int API input), there are errors at build time instead of errors or bugs at runtime. diff --git a/doc/versions.md b/doc/versions.md new file mode 100644 index 00000000000..8623fcdb4fd --- /dev/null +++ b/doc/versions.md @@ -0,0 +1,46 @@ +# libmultiprocess Versions + +Library versions are tracked with simple +[tags](https://github.com/bitcoin-core/libmultiprocess/tags) and +[branches](https://github.com/bitcoin-core/libmultiprocess/branches). + +Versioning policy is described in the [version.h](../include/mp/version.h) +include. + +## v7 +- Current unstable version in master branch. +- Intended to be compatible with Bitcoin Core 30.1 and future releases. + +## v6.0 +- `EventLoop::addClient` and `EventLoop::removeClient` methods dropped, + requiring clients to use new `EventLoopRef` class instead. +- Compatible with Bitcoin Core 30.0 release. + +## v5.0 +- Broke up `proxy-types.h` into `type-*.h` files requiring clients to explicitly + include overloads needed for C++ ↔️ Cap'n Proto type conversions. +- Now requires C++ 20 support. +- Compatible with Bitcoin Core 29 releases. + +## v4.0 +- Added better cmake support, installing cmake package files so clients do not + need to use pkgconfig. +- Compatible with Bitcoin Core 28 releases. + +## v3.0 +- Dropped compatibility with Cap'n Proto versions before 0.7. +- Compatible with Bitcoin Core 27 releases. + +## v2.0 +- Changed `PassField` function signature. +- Now requires C++17 support. +- Compatible with Bitcoin Core 25 and 26 releases. + +## v1.0 +- Dropped hardcoded includes in generated files, now requiring `include` and + `includeTypes` annotations. +- Compatible with Bitcoin Core 22, 23, and 24 releases. + +## v0.0 +- Initial version used in a downstream release. +- Compatible with Bitcoin Core 21 releases. diff --git a/include/mp/proxy-io.h b/include/mp/proxy-io.h index f7367468fbe..b3c67a3206f 100644 --- a/include/mp/proxy-io.h +++ b/include/mp/proxy-io.h @@ -317,12 +317,12 @@ public: void* m_context; }; -//! Single element task queue used to handle recursive capnp calls. (If server -//! makes an callback into the client in the middle of a request, while client +//! Single element task queue used to handle recursive capnp calls. (If the +//! server makes a callback into the client in the middle of a request, while the client //! thread is blocked waiting for server response, this is what allows the -//! client to run the request in the same thread, the same way code would run in -//! single process, with the callback sharing same thread stack as the original -//! call. +//! client to run the request in the same thread, the same way code would run in a +//! single process, with the callback sharing the same thread stack as the original +//! call.) struct Waiter { Waiter() = default; diff --git a/include/mp/proxy-types.h b/include/mp/proxy-types.h index 22468b15b0c..7301aa59f4a 100644 --- a/include/mp/proxy-types.h +++ b/include/mp/proxy-types.h @@ -87,7 +87,7 @@ struct StructField // actually construct the read destination object. For example, if a std::string // is being read, the ReadField call will call the custom emplace_fn with char* // and size_t arguments, and the emplace function can decide whether to call the -// constructor via the operator or make_shared or emplace or just return a +// constructor via the operator, make_shared, emplace or just return a // temporary string that is moved from. template struct ReadDestEmplace @@ -205,11 +205,11 @@ void BuildField(TypeList, Context& context, Output&& output, Valu } } -// Adapter to let BuildField overloads methods work set & init list elements as -// if they were fields of a struct. If BuildField is changed to use some kind of -// accessor class instead of calling method pointers, then then maybe this could -// go away or be simplified, because would no longer be a need to return -// ListOutput method pointers emulating capnp struct method pointers.. +// Adapter that allows BuildField overloads to work with, set, and initialize list +// elements as if they were fields of a struct. If BuildField is changed to use some +// kind of accessor class instead of calling method pointers, then maybe this could +// go away or be simplified, because there would no longer be a need to return +// ListOutput method pointers emulating capnp struct method pointers. template struct ListOutput; diff --git a/include/mp/util.h b/include/mp/util.h index e5b4dd19b8e..6cd11fda2e3 100644 --- a/include/mp/util.h +++ b/include/mp/util.h @@ -221,13 +221,16 @@ using FdToArgsFn = std::function(int fd)>; //! Spawn a new process that communicates with the current process over a socket //! pair. Returns pid through an output argument, and file descriptor for the -//! local side of the socket. Invokes fd_to_args callback with the remote file -//! descriptor number which returns the command line arguments that should be -//! used to execute the process, and which should have the remote file -//! descriptor embedded in whatever format the child process expects. +//! local side of the socket. +//! The fd_to_args callback is invoked in the parent process before fork(). +//! It must not rely on child pid/state, and must return the command line +//! arguments that should be used to execute the process. Embed the remote file +//! descriptor number in whatever format the child process expects. int SpawnProcess(int& pid, FdToArgsFn&& fd_to_args); //! Call execvp with vector args. +//! Not safe to call in a post-fork child of a multi-threaded process. +//! Currently only used by mpgen at build time. void ExecProcess(const std::vector& args); //! Wait for a process to exit and return its exit code. diff --git a/include/mp/version.h b/include/mp/version.h new file mode 100644 index 00000000000..01583ad95a9 --- /dev/null +++ b/include/mp/version.h @@ -0,0 +1,26 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef MP_VERSION_H +#define MP_VERSION_H + +//! Major version number. Should be incremented in the master branch before +//! changes that introduce major new features or break API compatibility, if +//! there are clients relying on the previous API. (If an API changes multiple +//! times and nothing uses the intermediate changes, it is sufficient to +//! increment this only once before the first change.) +//! +//! Each time this is incremented, a new stable branch should be created. E.g. +//! when this is incremented to 8, a "v7" stable branch should be created +//! pointing at the prior merge commit. The /doc/versions.md file should also be +//! updated, noting any significant or incompatible changes made since the +//! previous version, and backported to the stable branch before it is tagged. +#define MP_MAJOR_VERSION 7 + +//! Minor version number. Should be incremented in stable branches after +//! backporting changes. The /doc/versions.md file in the master and stable +//! branches should also be updated to list the new minor version. +#define MP_MINOR_VERSION 0 + +#endif // MP_VERSION_H diff --git a/src/mp/gen.cpp b/src/mp/gen.cpp index 18400710e49..cedb4dc42f9 100644 --- a/src/mp/gen.cpp +++ b/src/mp/gen.cpp @@ -136,7 +136,7 @@ static bool BoxedType(const ::capnp::Type& type) // include_prefix can be used to control relative include paths used in // generated files. For example if src_file is "/a/b/c/d/file.canp" and // include_prefix is "/a/b/c" include lines like -// "#include " "#include "i +// "#include ", "#include " // will be generated. static void Generate(kj::StringPtr src_prefix, kj::StringPtr include_prefix, diff --git a/src/mp/proxy.cpp b/src/mp/proxy.cpp index 57545d379cd..9d81284ea4d 100644 --- a/src/mp/proxy.cpp +++ b/src/mp/proxy.cpp @@ -102,12 +102,12 @@ Connection::~Connection() // The ProxyClient cleanup handlers are synchronous because they are fast // and don't do anything besides release capnp resources and reset state so // future calls to client methods immediately throw exceptions instead of - // trying to communicating across the socket. The synchronous callbacks set + // trying to communicate across the socket. The synchronous callbacks set // ProxyClient capability pointers to null, so new method calls on client // objects fail without triggering i/o or relying on event loop which may go // out of scope or trigger obscure capnp i/o errors. // - // The ProxySever cleanup handlers call user defined destructors on server + // The ProxyServer cleanup handlers call user defined destructors on the server // object, which can run arbitrary blocking bitcoin code so they have to run // asynchronously in a different thread. The asynchronous cleanup functions // intentionally aren't started until after the synchronous cleanup @@ -136,7 +136,7 @@ Connection::~Connection() // // Either way disconnect code runs in the event loop thread and called both // on clean and unclean shutdowns. In unclean shutdown case when the - // connection is broken, sync and async cleanup lists will filled with + // connection is broken, sync and async cleanup lists will be filled with // callbacks. In the clean shutdown case both lists will be empty. Lock lock{m_loop->m_mutex}; while (!m_sync_cleanup_fns.empty()) { diff --git a/src/mp/util.cpp b/src/mp/util.cpp index 509913b8637..7b486082a06 100644 --- a/src/mp/util.cpp +++ b/src/mp/util.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,17 @@ namespace fs = std::filesystem; namespace mp { namespace { +std::vector MakeArgv(const std::vector& args) +{ + std::vector argv; + argv.reserve(args.size() + 1); + for (const auto& arg : args) { + argv.push_back(const_cast(arg.c_str())); + } + argv.push_back(nullptr); + return argv; +} + //! Return highest possible file descriptor. size_t MaxFd() { @@ -111,35 +123,57 @@ int SpawnProcess(int& pid, FdToArgsFn&& fd_to_args) throw std::system_error(errno, std::system_category(), "socketpair"); } + // Evaluate the callback and build the argv array before forking. + // + // The parent process may be multi-threaded and holding internal library + // locks at fork time. In that case, running code that allocates memory or + // takes locks in the child between fork() and exec() can deadlock + // indefinitely. Precomputing arguments in the parent avoids this. + const std::vector args{fd_to_args(fds[0])}; + const std::vector argv{MakeArgv(args)}; + pid = fork(); if (pid == -1) { throw std::system_error(errno, std::system_category(), "fork"); } - // Parent process closes the descriptor for socket 0, child closes the descriptor for socket 1. + // Parent process closes the descriptor for socket 0, child closes the + // descriptor for socket 1. On failure, the parent throws, but the child + // must _exit(126) (post-fork child must not throw). if (close(fds[pid ? 0 : 1]) != 0) { - throw std::system_error(errno, std::system_category(), "close"); + if (pid) { + (void)close(fds[1]); + throw std::system_error(errno, std::system_category(), "close"); + } + static constexpr char msg[] = "SpawnProcess(child): close(fds[1]) failed\n"; + const ssize_t writeResult = ::write(STDERR_FILENO, msg, sizeof(msg) - 1); + (void)writeResult; + _exit(126); } + if (!pid) { - // Child process must close all potentially open descriptors, except socket 0. + // Child process must close all potentially open descriptors, except + // socket 0. Do not throw, allocate, or do non-fork-safe work here. const int maxFd = MaxFd(); for (int fd = 3; fd < maxFd; ++fd) { if (fd != fds[0]) { close(fd); } } - ExecProcess(fd_to_args(fds[0])); + + execvp(argv[0], argv.data()); + // NOTE: perror() is not async-signal-safe; calling it here in a + // post-fork child may deadlock in multithreaded parents. + // TODO: Report errors to the parent via a pipe (e.g. write errno) + // so callers can get diagnostics without relying on perror(). + perror("execvp failed"); + _exit(127); } return fds[1]; } void ExecProcess(const std::vector& args) { - std::vector argv; - argv.reserve(args.size()); - for (const auto& arg : args) { - argv.push_back(const_cast(arg.c_str())); - } - argv.push_back(nullptr); + const std::vector argv{MakeArgv(args)}; if (execvp(argv[0], argv.data()) != 0) { perror("execvp failed"); if (errno == ENOENT && !args.empty()) { @@ -152,7 +186,7 @@ void ExecProcess(const std::vector& args) int WaitProcess(int pid) { int status; - if (::waitpid(pid, &status, 0 /* options */) != pid) { + if (::waitpid(pid, &status, /*options=*/0) != pid) { throw std::system_error(errno, std::system_category(), "waitpid"); } return status; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2a1a7e9e7c2..1f21ba44ca0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -26,6 +26,7 @@ if(BUILD_TESTING AND TARGET CapnProto::kj-test) ${MP_PROXY_HDRS} mp/test/foo-types.h mp/test/foo.h + mp/test/spawn_tests.cpp mp/test/test.cpp ) include(${PROJECT_SOURCE_DIR}/cmake/TargetCapnpSources.cmake) diff --git a/test/mp/test/spawn_tests.cpp b/test/mp/test/spawn_tests.cpp new file mode 100644 index 00000000000..4c7edba4830 --- /dev/null +++ b/test/mp/test/spawn_tests.cpp @@ -0,0 +1,110 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +// Poll for child process exit using waitpid(..., WNOHANG) until the child exits +// or timeout expires. Returns true if the child exited and status_out was set. +// Returns false on timeout or error. +static bool WaitPidWithTimeout(int pid, std::chrono::milliseconds timeout, int& status_out) +{ + const auto deadline = std::chrono::steady_clock::now() + timeout; + while (std::chrono::steady_clock::now() < deadline) { + const int r = ::waitpid(pid, &status_out, WNOHANG); + if (r == pid) return true; + if (r == 0) { + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + continue; + } + // waitpid error + return false; + } + return false; +} + +} // namespace + +KJ_TEST("SpawnProcess does not run callback in child") +{ + // This test is designed to fail deterministically if fd_to_args is invoked + // in the post-fork child: a mutex held by another parent thread at fork + // time appears locked forever in the child. + std::mutex target_mutex; + std::mutex control_mutex; + std::condition_variable control_cv; + bool locked{false}; + bool release{false}; + + // Holds target_mutex until the releaser thread updates release + std::thread locker([&] { + std::unique_lock target_lock(target_mutex); + { + std::lock_guard g(control_mutex); + locked = true; + } + control_cv.notify_one(); + + std::unique_lock control_lock(control_mutex); + control_cv.wait(control_lock, [&] { return release; }); + }); + + // Wait for target_mutex to be held by the locker thread. + { + std::unique_lock l(control_mutex); + control_cv.wait(l, [&] { return locked; }); + } + + // Release the lock shortly after SpawnProcess starts. + std::thread releaser([&] { + // In the unlikely event a CI machine overshoots this delay, a + // regression could be missed. This is preferable to spurious + // test failures. + std::this_thread::sleep_for(std::chrono::milliseconds{50}); + { + std::lock_guard g(control_mutex); + release = true; + } + control_cv.notify_one(); + }); + + int pid{-1}; + const int fd{mp::SpawnProcess(pid, [&](int child_fd) -> std::vector { + // If this callback runs in the post-fork child, target_mutex appears + // locked forever (the owning thread does not exist), so this deadlocks. + std::lock_guard g(target_mutex); + return {"true", std::to_string(child_fd)}; + })}; + ::close(fd); + + int status{0}; + // Give the child up to 1 second to exit. If it does not, terminate it and + // reap it to avoid leaving a zombie behind. + const bool exited{WaitPidWithTimeout(pid, std::chrono::milliseconds{1000}, status)}; + if (!exited) { + ::kill(pid, SIGKILL); + ::waitpid(pid, &status, /*options=*/0); + } + + releaser.join(); + locker.join(); + + KJ_EXPECT(exited, "Timeout waiting for child process to exit"); + KJ_EXPECT(WIFEXITED(status) && WEXITSTATUS(status) == 0); +} diff --git a/test/mp/test/test.cpp b/test/mp/test/test.cpp index e32365faeb0..aa155685977 100644 --- a/test/mp/test/test.cpp +++ b/test/mp/test/test.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -32,12 +33,19 @@ #include #include #include +#include #include #include namespace mp { namespace test { +/** Check version.h header values */ +constexpr auto kMP_MAJOR_VERSION{MP_MAJOR_VERSION}; +constexpr auto kMP_MINOR_VERSION{MP_MINOR_VERSION}; +static_assert(std::is_integral_v, "MP_MAJOR_VERSION must be an integral constant"); +static_assert(std::is_integral_v, "MP_MINOR_VERSION must be an integral constant"); + /** * Test setup class creating a two way connection between a * ProxyServer object and a ProxyClient.