diff --git a/CMakeLists.txt b/CMakeLists.txt index 77703a4661..b702cd012f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(WITH_THRIFT "With thrift framed protocol supported" OFF) option(WITH_BTHREAD_TRACER "With bthread tracer supported" OFF) option(WITH_SNAPPY "With snappy" OFF) option(WITH_RDMA "With RDMA" OFF) +option(WITH_UBRING "With UB" OFF) option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF) option(WITH_DEBUG_LOCK "With debugging lock" OFF) option(WITH_ASAN "With AddressSanitizer" OFF) @@ -104,6 +105,11 @@ if(WITH_RDMA) set(WITH_RDMA_VAL "1") endif() +set(WITH_UBRING_VAL "0") +if(WITH_UBRING) + set(WITH_UBRING_VAL "1") +endif() + set(WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL "0") if(WITH_DEBUG_BTHREAD_SCHE_SAFETY) set(WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL "1") @@ -136,7 +142,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override") endif() -set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL} -DBRPC_DEBUG_LOCK=${WITH_DEBUG_LOCK_VAL}") +set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DBRPC_WITH_UBRING=${WITH_UBRING_VAL} -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL} -DBRPC_DEBUG_LOCK=${WITH_DEBUG_LOCK_VAL}") if (WITH_ASAN) set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -fsanitize=address") set(CMAKE_C_FLAGS "${CMAKE_CPP_FLAGS} -fsanitize=address") @@ -322,6 +328,11 @@ if(WITH_RDMA) list(APPEND DYNAMIC_LIB ${RDMA_LIB}) endif() +if(WITH_UBRING) + message(STATUS "brpc compile with ubring") + list(APPEND DYNAMIC_LIB ${UB_LIB}) +endif() + set(BRPC_PRIVATE_LIBS "-lgflags -lprotobuf -lleveldb -lprotoc -lssl -lcrypto -ldl -lz") if(WITH_GLOG) @@ -564,6 +575,7 @@ set(SOURCES ${MCPACK2PB_SOURCES} ${BRPC_SOURCES} ${THRIFT_SOURCES} + ${BRPC_C_SOURCES} ) add_subdirectory(src) diff --git a/README.md b/README.md index 1c4f78528b..d65366fafb 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ You can use it to: * [FlatMap](docs/en/flatmap.md) * [Coroutine](docs/en/coroutine.md) * [Circuit Breaker](docs/en/circuit_breaker.md) + * [UBRing](docs/en/ubring.md) * [RDMA](docs/en/rdma.md) * [Bazel Support](docs/en/bazel_support.md) * [Wireshark baidu_std dissector plugin](docs/en/wireshark_baidu_std.md) diff --git a/README_cn.md b/README_cn.md index 6413f83fde..2cc686bd85 100644 --- a/README_cn.md +++ b/README_cn.md @@ -87,6 +87,7 @@ * [FlatMap](docs/cn/flatmap.md) * [协程](docs/cn/coroutine.md) * [熔断](docs/cn/circuit_breaker.md) + * [UBRing](docs/cn/ubring.md) * [RDMA](docs/cn/rdma.md) * [Bazel构建支持](docs/cn/bazel_support.md) * [Wireshark baidu_std协议解析插件](docs/cn/wireshark_baidu_std.md) diff --git a/docs/cn/ubring.md b/docs/cn/ubring.md new file mode 100644 index 0000000000..576930f539 --- /dev/null +++ b/docs/cn/ubring.md @@ -0,0 +1,184 @@ +# UBRing: 高性能共享内存 RPC + +UBRing 是 brpc 中的高性能 RPC 实现,它利用共享内存进行进程间通信(IPC)。它支持本地共享内存(POSIX IPC)和远端共享内存(ubs-mem)两种模式,提供微秒到纳秒级的进程间通信延迟。 + +## 技术背景 + +传统的 RPC 框架通常使用网络套接字进行通信,由于内核参与、上下文切换和数据拷贝等原因,会引入显著的开销。UBRing 通过使用共享内存作为通信介质来解决这个问题,允许进程之间直接内存访问,最小化内核干预。 + +UBRing 的主要优势: + +- **超低延迟**:微秒级 RPC 延迟 +- **高吞吐量**:每秒数百万次 RPC 调用 +- **减少数据拷贝**:进程间直接内存访问 +- **跨平台支持**:支持 Linux 和 macOS + +## 支持的共享内存后端 + +UBRing 支持两种共享内存后端,通过 `ub_shm_type` 参数控制: + +### 1. POSIX IPC 共享内存 (ub\_shm\_type = 1) + +这是默认模式,使用标准 POSIX 共享内存进行本地 IPC。同一机器上的进程可以通过共享内存区域直接通信。 + +### 2. UBS-Mem 远端共享内存 (ub\_shm\_type = 2) + +此模式使用 ubs-mem(Unified Block Storage Memory),这是来自 openEuler 的开源远端共享内存框架。它支持机架内节点之间的共享内存通信,类似于 RDMA 但部署要求更简单。 + +**UBS-Mem 开源地址**: + +### 未来扩展 + +该架构设计支持未来扩展 CXL(Compute Express Link)基于的远端共享内存,实现更灵活的分布式内存共享。 + +## 构建配置 + +### 使用 CMake 构建 + +要构建带有 UBRing 支持的 brpc,请使用以下命令: + +```bash +# 构建 brpc 并启用 UBRing 支持 +cd /path/to/brpc +cmake -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_UBRING:BOOL=ON +cmake --build build -j 8 + +# 构建 ubring_performance 示例 +cd /path/to/brpc/example/ubring_performance +cmake -B build +cmake --build build -j 8 +``` + +### 使用 Bazel 构建 + +使用 Bazel 构建带有 UBRing 支持的 brpc: + +```bash +# 构建 brpc 并启用 UBRing 支持 +cd /path/to/brpc +bazel build //... --define=with_ubring=true + +# 构建 ubring_performance 示例 +bazel build //example/ubring_performance/... +``` + +### 选择共享内存后端 + +共享内存后端通过 `--ub_shm_type` 参数控制: + +```bash +# 使用 POSIX IPC(默认) +./your_program --ub_shm_type=1 + +# 使用 UBS-Mem +./your_program --ub_shm_type=2 +``` + +## 性能测试 + +### 示例: ubring\_performance + +brpc 在 `example/ubring_performance/` 目录提供了性能测试示例。 + +#### 构建示例 + +```bash +cd example/ubring_performance +mkdir -p build && cd build +cmake .. +make +``` + +#### 运行服务端 + +```bash +# 使用 POSIX IPC +./ubring_performance_server --ub_shm_type=1 + +# 使用 UBS-Mem +./ubring_performance_server --ub_shm_type=2 +``` + +#### 运行客户端 + +```bash +# 使用 POSIX IPC +./ubring_performance_client --ub_shm_type=1 --server=127.0.0.1:8000 + +# 使用 UBS-Mem +./ubring_performance_client --ub_shm_type=2 --server=:8000 +``` + +#### 测试选项 + +| 选项 | 描述 | 默认值 | +| --------------- | ------------------------- | -------------- | +| `--ub_shm_type` | 共享内存类型 (1=IPC, 2=UBS-Mem) | 1 | +| `--server` | 服务端地址 | 127.0.0.1:8000 | +| `--thread_num` | 客户端线程数 | 1 | +| `--request_num` | 每线程请求总数 | 1000000 | +| `--timeout_ms` | 请求超时时间(毫秒) | 1000 | + +## 架构概述 + +```mermaid +graph TD + subgraph 客户端进程 + A[Client] + end + + subgraph 服务端进程 + B[Server] + end + + subgraph 共享内存层 + C[SHM Manager] + D[IPC Backend] + E[UBS-Mem Backend] + end + + A -->|直接内存访问| C + B -->|直接内存访问| C + C --> D + C --> E + + style A fill:#636,color:#fff,stroke:#333,stroke-width:2px + style B fill:#369,color:#fff,stroke:#333,stroke-width:2px + style C fill:#396,color:#fff,stroke:#333,stroke-width:2px +``` + +### 架构细节 + +UBRing 架构包含以下组件: + +1. **客户端/服务端进程**: 通过共享内存通信的应用进程 +2. **SHM Manager**: 共享内存操作的中央管理器 (`shm_mgr.cpp`) +3. **IPC Backend**: 用于本地通信的 POSIX 共享内存实现 +4. **UBS-Mem Backend**: 用于跨节点通信的远端共享内存实现 + +## 实现细节 + +### 共享内存管理 + +共享内存管理器 (`shm_mgr.cpp`) 为不同的共享内存后端提供统一接口: + +- **初始化**: `ShmMgrInit()` - 初始化共享内存子系统 +- **本地分配**: `ShmLocalMalloc()` - 分配本地共享内存 +- **远端分配**: `ShmRemoteMalloc()` - 分配远程节点可访问的共享内存 +- **释放**: `ShmFree()` - 释放共享内存资源 + +### 定时器管理 + +UBRing 使用高精度定时器系统 (`timer_mgr.cpp`) 进行连接管理和超时处理,支持 epoll(Linux)和 kqueue(macOS)。 + +## 参考资料 + +- [UBRing 特性提案](https://github.com/apache/brpc/issues/3226) +- [UBRing 技术讨论](https://github.com/apache/brpc/discussions/3217) +- [UBS-Mem 开源项目](https://atomgit.com/openeuler/ubs-mem) + +## 相关文档 + +- [UB Client](ub_client.md) - 访问 UB 服务 +- [RDMA 支持](rdma.md) - 远程直接内存访问 + diff --git a/docs/en/ubring.md b/docs/en/ubring.md new file mode 100644 index 0000000000..93b9be2054 --- /dev/null +++ b/docs/en/ubring.md @@ -0,0 +1,182 @@ +# UBRing: High-Performance Shared Memory RPC + +UBRing is a high-performance RPC implementation in brpc that leverages shared memory for inter-process communication (IPC). It supports both local shared memory (POSIX IPC) and remote shared memory (ubs-mem), providing ultra-low latency communication between processes. + +## Technical Background + +Traditional RPC frameworks typically use network sockets for communication, which introduces significant overhead due to kernel involvement, context switches, and data copying. UBRing addresses this by using shared memory as the communication medium, allowing direct memory access between processes with minimal kernel intervention. + +Key advantages of UBRing: +- **Ultra-low latency**: Microsecond-level RPC latency +- **High throughput**: Millions of RPC calls per second +- **Reduced data copying**: Direct memory access between processes +- **Cross-platform support**: Works on Linux and macOS + +## Supported Shared Memory Backends + +UBRing supports two types of shared memory backends, controlled by the `ub_shm_type` flag: + +### 1. POSIX IPC Shared Memory (ub_shm_type = 1) + +This is the default mode, using standard POSIX shared memory for local IPC. Processes on the same machine can communicate directly through shared memory regions. + +### 2. UBS-Mem Remote Shared Memory (ub_shm_type = 2) + +This mode uses ubs-mem (Unified Block Storage Memory), an open-source remote shared memory framework from openEuler. It enables shared memory communication across nodes in a rack, similar to RDMA but with simpler deployment requirements. + +**UBS-Mem Open Source**: https://atomgit.com/openeuler/ubs-mem + +### Future Expansion + +The architecture is designed to support CXL (Compute Express Link) based remote shared memory in the future, enabling even more flexible distributed memory sharing. + +## Build Configuration + +### Build with CMake + +To build brpc with UBRing support, use the following commands: + +```bash +# Build brpc with UBRing support +cd /path/to/brpc +cmake -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_UBRING:BOOL=ON +cmake --build build -j 8 + +# Build the ubring_performance example +cd /path/to/brpc/example/ubring_performance +cmake -B build +cmake --build build -j 8 +``` + +### Build with Bazel + +To build brpc with UBRing support using Bazel: + +```bash +# Build brpc with UBRing support +cd /path/to/brpc +bazel build //... --define=with_ubring=true + +# Build the ubring_performance example +bazel build //example/ubring_performance/... +``` + +### Select Shared Memory Backend + +The shared memory backend is controlled by the `--ub_shm_type` flag: + +```bash +# Use POSIX IPC (default) +./your_program --ub_shm_type=1 + +# Use UBS-Mem +./your_program --ub_shm_type=2 +``` + +## Performance Testing + +### Example: ubring_performance + +brpc provides a performance test example at `example/ubring_performance/`. + +#### Build the Example + +```bash +cd example/ubring_performance +mkdir -p build && cd build +cmake .. +make +``` + +#### Run Server + +```bash +# Run with POSIX IPC +./ubring_performance_server --ub_shm_type=1 + +# Run with UBS-Mem +./ubring_performance_server --ub_shm_type=2 +``` + +#### Run Client + +```bash +# Run with POSIX IPC +./ubring_performance_client --ub_shm_type=1 --server=127.0.0.1:8000 + +# Run with UBS-Mem +./ubring_performance_client --ub_shm_type=2 --server=:8000 +``` + +#### Test Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--ub_shm_type` | Shared memory type (1=IPC, 2=UBS-Mem) | 1 | +| `--server` | Server address | 127.0.0.1:8000 | +| `--thread_num` | Number of client threads | 1 | +| `--request_num` | Total requests per thread | 1000000 | +| `--timeout_ms` | Request timeout in milliseconds | 1000 | + +## Architecture Overview + +```mermaid +graph TD + subgraph Client Process + A[Client] + end + + subgraph Server Process + B[Server] + end + + subgraph Shared Memory + C[SHM Manager] + D[IPC Backend] + E[UBS-Mem Backend] + end + + A -->|Direct Memory Access| C + B -->|Direct Memory Access| C + C --> D + C --> E + + style A fill:#636,color:#fff,stroke:#333,stroke-width:2px + style B fill:#369,color:#fff,stroke:#333,stroke-width:2px + style C fill:#396,color:#fff,stroke:#333,stroke-width:2px +``` + +### Architecture Details + +The UBRing architecture consists of: + +1. **Client/Server Processes**: Application processes that communicate via shared memory +2. **SHM Manager**: Central manager for shared memory operations (`shm_mgr.cpp`) +3. **IPC Backend**: POSIX shared memory implementation for local communication +4. **UBS-Mem Backend**: Remote shared memory implementation for cross-node communication + +## Implementation Details + +### Shared Memory Management + +The shared memory manager (`shm_mgr.cpp`) provides a unified interface for different shared memory backends: + +- **Initialization**: `ShmMgrInit()` - Initializes the shared memory subsystem +- **Local Allocation**: `ShmLocalMalloc()` - Allocates shared memory for local use +- **Remote Allocation**: `ShmRemoteMalloc()` - Allocates shared memory accessible by remote nodes +- **Free**: `ShmFree()` - Releases shared memory resources + +### Timer Management + +UBRing uses a high-precision timer system (`timer_mgr.cpp`) for connection management and timeout handling, supporting both epoll (Linux) and kqueue (macOS). + +## References + +- [UBRing Feature Proposal](https://github.com/apache/brpc/issues/3226) +- [UBRing Technical Discussion](https://github.com/apache/brpc/discussions/3217) +- [UBS-Mem Open Source](https://atomgit.com/openeuler/ubs-mem) + +## See Also + +- [UB Client](ub_client.md) - Accessing UB services +- [RDMA Support](rdma.md) - Remote direct memory access \ No newline at end of file diff --git a/example/http_c++/http_client.cpp b/example/http_c++/http_client.cpp index 23222dee9b..7df7461135 100644 --- a/example/http_c++/http_client.cpp +++ b/example/http_c++/http_client.cpp @@ -25,8 +25,11 @@ #include #include #include +#include "bthread/countdown_event.h" DEFINE_string(d, "", "POST this data to the http server"); +DEFINE_bool(progressive, false, "whether or not progressive read data from server"); +DEFINE_int32(progressive_read_timeout_ms, 5000, "progressive read data idle timeout in milliseconds"); DEFINE_string(load_balancer, "", "The algorithm for load balancing"); DEFINE_int32(timeout_ms, 2000, "RPC timeout in milliseconds"); DEFINE_int32(max_retry, 3, "Max retries(not including the first RPC)"); @@ -36,6 +39,25 @@ namespace brpc { DECLARE_bool(http_verbose); } +class PartDataReader: public brpc::ProgressiveReader { +public: + explicit PartDataReader(bthread::CountdownEvent* done): _done(done){} + + butil::Status OnReadOnePart(const void* data, size_t length) { + memcpy(_buffer, data, length); + LOG(INFO) << "data : " << _buffer << " size : " << length; + return butil::Status::OK(); + } + + void OnEndOfMessage(const butil::Status& status) { + _done->signal(); + LOG(INFO) << "progressive read data final status : " << status; + } +private: + char _buffer[1024]; + bthread::CountdownEvent* _done; +}; + int main(int argc, char* argv[]) { // Parse gflags. We recommend you to use gflags as well. GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); @@ -71,6 +93,11 @@ int main(int argc, char* argv[]) { cntl.request_attachment().append(FLAGS_d); } + if (FLAGS_progressive) { + cntl.set_progressive_read_timeout_ms(FLAGS_progressive_read_timeout_ms); + cntl.response_will_be_read_progressively(); + } + // Because `done'(last parameter) is NULL, this function waits until // the response comes back or error occurs(including timedout). channel.CallMethod(NULL, &cntl, NULL, NULL, NULL); @@ -78,6 +105,13 @@ int main(int argc, char* argv[]) { std::cerr << cntl.ErrorText() << std::endl; return -1; } + + if (FLAGS_progressive) { + bthread::CountdownEvent done(1); + cntl.ReadProgressiveAttachmentBy(new PartDataReader(&done)); + done.wait(); + LOG(INFO) << "wait client progressive read done safely"; + } // If -http_verbose is on, brpc already prints the response to stderr. if (!brpc::FLAGS_http_verbose) { std::cout << cntl.response_attachment() << std::endl; diff --git a/example/http_c++/http_server.cpp b/example/http_c++/http_server.cpp index 05c9a0ee4c..3cc4c63f86 100644 --- a/example/http_c++/http_server.cpp +++ b/example/http_c++/http_server.cpp @@ -31,6 +31,7 @@ DEFINE_int32(idle_timeout_s, -1, "Connection will be closed if there is no " DEFINE_string(certificate, "cert.pem", "Certificate file path to enable SSL"); DEFINE_string(private_key, "key.pem", "Private key file path to enable SSL"); DEFINE_string(ciphers, "", "Cipher suite used for SSL connections"); +DEFINE_bool(enable_progressive_timeout, false, "whether or not trigger progressive write attachement data timeout"); namespace example { @@ -104,6 +105,9 @@ class FileServiceImpl : public FileService { // sleep a while to send another part. bthread_usleep(10000); + if (FLAGS_enable_progressive_timeout && i > 50) { + bthread_usleep(100000000UL); + } } return NULL; } @@ -194,6 +198,9 @@ class HttpSSEServiceImpl : public HttpSSEService { // sleep a while to send another part. bthread_usleep(10000 * 10); + if (FLAGS_enable_progressive_timeout && i > 50) { + bthread_usleep(100000000UL); + } } return NULL; } diff --git a/example/ubring_performance/CMakeLists.txt b/example/ubring_performance/CMakeLists.txt new file mode 100644 index 0000000000..729381ccb8 --- /dev/null +++ b/example/ubring_performance/CMakeLists.txt @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 2.8.10) +project(ubring_performance C CXX) + +option(LINK_SO "Whether examples are linked dynamically" OFF) + +execute_process( + COMMAND bash -c "find ${PROJECT_SOURCE_DIR}/../.. -type d -regex '.*output/include$' | head -n1 | xargs dirname | tr -d '\n'" + OUTPUT_VARIABLE OUTPUT_PATH +) + +set(CMAKE_PREFIX_PATH ${OUTPUT_PATH}) + +include(FindThreads) +include(FindProtobuf) +protobuf_generate_cpp(PROTO_SRC PROTO_HEADER test.proto) +# include PROTO_HEADER +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Search for libthrift* by best effort. If it is not found and brpc is +# compiled with thrift protocol enabled, a link error would be reported. +find_library(THRIFT_LIB NAMES thrift) +if (NOT THRIFT_LIB) + set(THRIFT_LIB "") +endif() + +find_path(BRPC_INCLUDE_PATH NAMES brpc/server.h) +if(LINK_SO) + find_library(BRPC_LIB NAMES brpc) +else() + find_library(BRPC_LIB NAMES libbrpc.a brpc) +endif() +if((NOT BRPC_INCLUDE_PATH) OR (NOT BRPC_LIB)) + message(FATAL_ERROR "Fail to find brpc") +endif() +include_directories(${BRPC_INCLUDE_PATH}) + +find_path(GFLAGS_INCLUDE_PATH gflags/gflags.h) +find_library(GFLAGS_LIBRARY NAMES gflags libgflags) +if((NOT GFLAGS_INCLUDE_PATH) OR (NOT GFLAGS_LIBRARY)) + message(FATAL_ERROR "Fail to find gflags") +endif() +include_directories(${GFLAGS_INCLUDE_PATH}) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + include(CheckFunctionExists) + CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME) + if(NOT HAVE_CLOCK_GETTIME) + set(DEFINE_CLOCK_GETTIME "-DNO_CLOCK_GETTIME_IN_MAC") + endif() +endif() + +set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_UBRING=1") +set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -DNDEBUG -O2 -D__const__=__unused__ -pipe -W -Wall -Wno-unused-parameter -fPIC -fno-omit-frame-pointer") + +if(CMAKE_VERSION VERSION_LESS "3.1.3") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() +else() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() + +find_path(LEVELDB_INCLUDE_PATH NAMES leveldb/db.h) +find_library(LEVELDB_LIB NAMES leveldb) +if ((NOT LEVELDB_INCLUDE_PATH) OR (NOT LEVELDB_LIB)) + message(FATAL_ERROR "Fail to find leveldb") +endif() +include_directories(${LEVELDB_INCLUDE_PATH}) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(OPENSSL_ROOT_DIR + "/usr/local/opt/openssl" # Homebrew installed OpenSSL + ) +endif() + +find_package(OpenSSL) +include_directories(${OPENSSL_INCLUDE_DIR}) + +set(DYNAMIC_LIB + ${CMAKE_THREAD_LIBS_INIT} + ${GFLAGS_LIBRARY} + ${PROTOBUF_LIBRARIES} + ${LEVELDB_LIB} + ${OPENSSL_CRYPTO_LIBRARY} + ${OPENSSL_SSL_LIBRARY} + ${THRIFT_LIB} + dl + z + ) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(DYNAMIC_LIB ${DYNAMIC_LIB} + pthread + "-framework CoreFoundation" + "-framework CoreGraphics" + "-framework CoreData" + "-framework CoreText" + "-framework Security" + "-framework Foundation" + "-Wl,-U,_MallocExtension_ReleaseFreeMemory" + "-Wl,-U,_ProfilerStart" + "-Wl,-U,_ProfilerStop" + "-Wl,-U,__Z13GetStackTracePPvii" + "-Wl,-U,_mallctl" + "-Wl,-U,_malloc_stats_print" + ) +endif() + +add_executable(ubring_performance_client client.cpp ${PROTO_SRC} ${PROTO_HEADER}) +add_executable(ubring_performance_server server.cpp ${PROTO_SRC} ${PROTO_HEADER}) + +target_link_libraries(ubring_performance_client ${BRPC_LIB} ${DYNAMIC_LIB}) +target_link_libraries(ubring_performance_server ${BRPC_LIB} ${DYNAMIC_LIB}) \ No newline at end of file diff --git a/example/ubring_performance/client.cpp b/example/ubring_performance/client.cpp new file mode 100644 index 0000000000..05b1d733e5 --- /dev/null +++ b/example/ubring_performance/client.cpp @@ -0,0 +1,328 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/fast_rand.h" +#include "butil/logging.h" +#include "brpc/server.h" +#include "brpc/channel.h" +#include "bthread/bthread.h" +#include "bvar/latency_recorder.h" +#include "bvar/variable.h" +#include "test.pb.h" + +#ifdef BRPC_WITH_UBRING + +DEFINE_int32(thread_num, 0, "How many threads are used"); +DEFINE_int32(queue_depth, 1, "How many requests can be pending in the queue"); +DEFINE_int32(expected_qps, 0, "The expected QPS"); +DEFINE_int32(max_thread_num, 16, "The max number of threads are used"); +DEFINE_int32(attachment_size, -1, "Attachment size is used (in Bytes)"); +DEFINE_bool(echo_attachment, false, "Select whether attachment should be echo"); +DEFINE_string(connection_type, "single", "Connection type of the channel"); +DEFINE_string(protocol, "baidu_std", "Protocol type."); +DEFINE_string(servers, "0.0.0.0:8002+0.0.0.0:8002", "IP Address of servers"); +DEFINE_bool(use_ubring, true, "Use UBRING or not"); +DEFINE_int32(rpc_timeout_ms, 5000, "RPC call timeout"); +DEFINE_int32(test_seconds, 20, "Test running time"); +DEFINE_int32(test_iterations, 0, "Test iterations"); +DEFINE_int32(dummy_port, 8001, "Dummy server port number"); + +bvar::LatencyRecorder g_latency_recorder("client"); +bvar::LatencyRecorder g_server_cpu_recorder("server_cpu"); +bvar::LatencyRecorder g_client_cpu_recorder("client_cpu"); +butil::atomic g_last_time(0); +butil::atomic g_total_bytes; +butil::atomic g_total_cnt; +std::vector g_servers; +int rr_index = 0; +volatile bool g_stop = false; + +butil::atomic g_token(10000); + +static void* GenerateToken(void* arg) { + int64_t start_time = butil::monotonic_time_ns(); + int64_t accumulative_token = g_token.load(butil::memory_order_relaxed); + while (!g_stop) { + bthread_usleep(100000); + int64_t now = butil::monotonic_time_ns(); + if (accumulative_token * 1000000000 / (now - start_time) < FLAGS_expected_qps) { + int64_t delta = FLAGS_expected_qps * (now - start_time) / 1000000000 - accumulative_token; + g_token.fetch_add(delta, butil::memory_order_relaxed); + accumulative_token += delta; + } + } + return NULL; +} + +class PerformanceTest { +public: + PerformanceTest(int attachment_size, bool echo_attachment) + : _addr(NULL) + , _channel(NULL) + , _start_time(0) + , _iterations(0) + , _stop(false) + { + if (attachment_size > 0) { + _addr = malloc(attachment_size); + butil::fast_rand_bytes(_addr, attachment_size); + _attachment.append(_addr, attachment_size); + } + _echo_attachment = echo_attachment; + } + + ~PerformanceTest() { + if (_addr) { + free(_addr); + } + delete _channel; + } + + inline bool IsStop() { return _stop; } + + int Init() { + brpc::ChannelOptions options; + options.socket_mode = FLAGS_use_ubring? brpc::SOCKET_MODE_UBRING : brpc::SOCKET_MODE_TCP; + options.protocol = FLAGS_protocol; + options.connection_type = FLAGS_connection_type; + options.timeout_ms = FLAGS_rpc_timeout_ms; + options.max_retry = 0; + // TODO A bug exists when the connection_group parameter is used. + // options.connection_group = std::to_string(reinterpret_cast(this)); + std::string server = g_servers[(rr_index++) % g_servers.size()]; + _channel = new brpc::Channel(); + if (_channel->Init(server.c_str(), &options) != 0) { + LOG(ERROR) << "Fail to initialize channel"; + return -1; + } + + // Add retry mechanism for RPC call + int retry = 3; + while (retry > 0) { + brpc::Controller cntl; + test::PerfTestResponse response; + test::PerfTestRequest request; + request.set_echo_attachment(_echo_attachment); + test::PerfTestService_Stub stub(_channel); + stub.Test(&cntl, &request, &response, NULL); + if (!cntl.Failed()) { + return 0; + } + LOG(WARNING) << "RPC call failed, retrying... (" << retry << " left): " << cntl.ErrorText(); + retry--; + bthread_usleep(1000000); // 100ms delay before retry + } + LOG(ERROR) << "RPC call failed after multiple retries"; + return -1; + } + + struct RespClosure { + brpc::Controller* cntl; + test::PerfTestResponse* resp; + PerformanceTest* test; + }; + + void SendRequest() { + if (FLAGS_expected_qps > 0) { + while (g_token.load(butil::memory_order_relaxed) <= 0) { + bthread_usleep(10); + } + g_token.fetch_sub(1, butil::memory_order_relaxed); + } + RespClosure* closure = new RespClosure; + test::PerfTestRequest request; + closure->resp = new test::PerfTestResponse(); + closure->cntl = new brpc::Controller(); + request.set_echo_attachment(_echo_attachment); + closure->cntl->request_attachment().append(_attachment); + closure->test = this; + google::protobuf::Closure* done = brpc::NewCallback(&HandleResponse, closure); + test::PerfTestService_Stub stub(_channel); + stub.Test(closure->cntl, &request, closure->resp, done); + } + + static void HandleResponse(RespClosure* closure) { + std::unique_ptr cntl_guard(closure->cntl); + std::unique_ptr response_guard(closure->resp); + if (closure->cntl->Failed()) { + LOG(DEBUG) << "RPC call failed: " << closure->cntl->ErrorText(); + // Don't stop the test immediately, just log the error and continue + } else { + g_latency_recorder << closure->cntl->latency_us(); + if (closure->resp->cpu_usage().size() > 0) { + g_server_cpu_recorder << atof(closure->resp->cpu_usage().c_str()) * 100; + } + g_total_bytes.fetch_add(closure->cntl->request_attachment().size(), butil::memory_order_relaxed); + g_total_cnt.fetch_add(1, butil::memory_order_relaxed); + } + + cntl_guard.reset(NULL); + response_guard.reset(NULL); + + if (closure->test->_iterations == 0 && FLAGS_test_iterations > 0) { + closure->test->_stop = true; + return; + } + --closure->test->_iterations; + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::gettimeofday_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + g_client_cpu_recorder << + atof(bvar::Variable::describe_exposed("process_cpu_usage").c_str()) * 100; + } + } + if (now - closure->test->_start_time > FLAGS_test_seconds * 1000000u) { + closure->test->_stop = true; + return; + } + closure->test->SendRequest(); + } + + static void* RunTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + test->_start_time = butil::gettimeofday_us(); + test->_iterations = FLAGS_test_iterations; + + for (int i = 0; i < FLAGS_queue_depth; ++i) { + test->SendRequest(); + } + + return NULL; + } + +private: + void* _addr; + brpc::Channel* _channel; + uint64_t _start_time; + uint32_t _iterations; + volatile bool _stop; + butil::IOBuf _attachment; + bool _echo_attachment; +}; + +static void* DeleteTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + delete test; + return NULL; +} + +void Test(int thread_num, int attachment_size) { + std::cout << "[Threads: " << thread_num + << ", Depth: " << FLAGS_queue_depth + << ", Attachment: " << attachment_size << "B" + << ", UBRING: " << (FLAGS_use_ubring ? "yes" : "no") + << ", Echo: " << (FLAGS_echo_attachment ? "yes]" : "no]") + << std::endl; + g_total_bytes.store(0, butil::memory_order_relaxed); + g_total_cnt.store(0, butil::memory_order_relaxed); + std::vector tests; + for (int k = 0; k < thread_num; ++k) { + PerformanceTest* t = new PerformanceTest(attachment_size, FLAGS_echo_attachment); + if (t->Init() < 0) { + exit(1); + } + tests.push_back(t); + } + uint64_t start_time = butil::gettimeofday_us(); + bthread_t tid[thread_num]; + if (FLAGS_expected_qps > 0) { + bthread_t tid; + bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, GenerateToken, NULL); + } + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, + PerformanceTest::RunTest, tests[k]); + } + for (int k = 0; k < thread_num; ++k) { + while (!tests[k]->IsStop()) { + bthread_usleep(10000); + } + } + uint64_t end_time = butil::gettimeofday_us(); + double throughput = g_total_bytes / 1.048576 / (end_time - start_time); + if (FLAGS_test_iterations == 0) { + std::cout << "Avg-Latency: " << g_latency_recorder.latency(10) + << ", 90th-Latency: " << g_latency_recorder.latency_percentile(0.9) + << ", 99th-Latency: " << g_latency_recorder.latency_percentile(0.99) + << ", 99.9th-Latency: " << g_latency_recorder.latency_percentile(0.999) + << ", Throughput: " << throughput << "MB/s" + << ", QPS: " << (g_total_cnt.load(butil::memory_order_relaxed) * 1000 / (end_time - start_time)) << "k" + << ", Server CPU-utilization: " << g_server_cpu_recorder.latency(10) << "%" + << ", Client CPU-utilization: " << g_client_cpu_recorder.latency(10) << "%" + << std::endl; + } else { + std::cout << " Throughput: " << throughput << "MB/s" << std::endl; + } + g_stop = true; + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, DeleteTest, tests[k]); + } + for (int k = 0; k < thread_num; ++k) { + bthread_join(tid[k], NULL); + } +} + +int main(int argc, char* argv[]) { + GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); + + brpc::StartDummyServerAt(FLAGS_dummy_port); + + std::string::size_type pos1 = 0; + std::string::size_type pos2 = FLAGS_servers.find('+'); + while (pos2 != std::string::npos) { + g_servers.push_back(FLAGS_servers.substr(pos1, pos2 - pos1)); + pos1 = pos2 + 1; + pos2 = FLAGS_servers.find('+', pos1); + } + g_servers.push_back(FLAGS_servers.substr(pos1)); + + if (FLAGS_thread_num > 0 && FLAGS_attachment_size >= 0) { + Test(FLAGS_thread_num, FLAGS_attachment_size); + } else if (FLAGS_thread_num <= 0 && FLAGS_attachment_size >= 0) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, FLAGS_attachment_size); + } + } else if (FLAGS_thread_num > 0 && FLAGS_attachment_size < 0) { + for (int i = 1; i <= 1024; i *= 4) { + Test(FLAGS_thread_num, i); + } + } else { + for (int j = 1; j <= 1024; j *= 4) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, j); + } + } + } + + return 0; +} + +#else + +int main(int argc, char* argv[]) { + LOG(ERROR) << " brpc is not compiled with ubring. To enable it, please refer to the ubring documentation"; + return 0; +} + +#endif diff --git a/example/ubring_performance/server.cpp b/example/ubring_performance/server.cpp new file mode 100644 index 0000000000..35277255e1 --- /dev/null +++ b/example/ubring_performance/server.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include +#include "butil/atomicops.h" +#include "butil/logging.h" +#include "butil/time.h" +#include "brpc/server.h" +#include "bvar/variable.h" +#include "test.pb.h" + +#ifdef BRPC_WITH_UBRING + +DEFINE_int32(port, 8002, "TCP Port of this server"); +DEFINE_bool(use_ubring, true, "Use UBRING or not"); + +butil::atomic g_last_time(0); + +namespace test { +class PerfTestServiceImpl : public PerfTestService { +public: + PerfTestServiceImpl() {} + ~PerfTestServiceImpl() {} + + void Test(google::protobuf::RpcController* cntl_base, + const PerfTestRequest* request, + PerfTestResponse* response, + google::protobuf::Closure* done) { + brpc::ClosureGuard done_guard(done); + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::monotonic_time_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + response->set_cpu_usage(bvar::Variable::describe_exposed("process_cpu_usage")); + } else { + response->set_cpu_usage(""); + } + } else { + response->set_cpu_usage(""); + } + if (request->echo_attachment()) { + brpc::Controller* cntl = + static_cast(cntl_base); + cntl->response_attachment().append(cntl->request_attachment()); + } + } +}; +} + +int main(int argc, char* argv[]) { + GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); + + brpc::Server server; + test::PerfTestServiceImpl perf_test_service_impl; + + if (server.AddService(&perf_test_service_impl, + brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { + LOG(ERROR) << "Fail to add service"; + return -1; + } + g_last_time.store(0, butil::memory_order_relaxed); + + brpc::ServerOptions options; + options.socket_mode = FLAGS_use_ubring? brpc::SOCKET_MODE_UBRING : brpc::SOCKET_MODE_TCP; + if (server.Start(FLAGS_port, &options) != 0) { + LOG(ERROR) << "Fail to start EchoServer"; + return -1; + } + + server.RunUntilAskedToQuit(); + return 0; +} + +#else + + +int main(int argc, char* argv[]) { + LOG(ERROR) << " brpc is not compiled with ubring. To enable it, please refer to the ubring documentation"; + return 0; +} + +#endif \ No newline at end of file diff --git a/example/ubring_performance/test.proto b/example/ubring_performance/test.proto new file mode 100644 index 0000000000..22646d113c --- /dev/null +++ b/example/ubring_performance/test.proto @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax="proto2"; +option cc_generic_services = true; + +package test; + +message PerfTestRequest { + required bool echo_attachment = 1; +}; + +message PerfTestResponse { + required string cpu_usage = 1; +}; + +service PerfTestService { + rpc Test(PerfTestRequest) returns (PerfTestResponse); +}; \ No newline at end of file diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index 15c8c91887..7c413dd4e0 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -95,7 +95,8 @@ DEFINE_bool(graceful_quit_on_sigterm, false, "Register SIGTERM handle func to quit graceful"); DEFINE_bool(graceful_quit_on_sighup, false, "Register SIGHUP handle func to quit graceful"); - +DEFINE_bool(log_idle_progressive_read_close, false, + "Print log when an idle progressive read is closed"); const IdlNames idl_single_req_single_res = { "req", "res" }; const IdlNames idl_single_req_multi_res = { "req", "" }; const IdlNames idl_multi_req_single_res = { "", "res" }; @@ -174,6 +175,80 @@ class IgnoreAllRead : public ProgressiveReader { void OnEndOfMessage(const butil::Status&) {} }; +class ProgressiveTimeoutReader : public ProgressiveReader { +public: + explicit ProgressiveTimeoutReader(SocketId id, int32_t read_timeout_ms, ProgressiveReader* reader): + _socket_id(id), + _read_timeout_ms(read_timeout_ms), + _reader(reader), + _timeout_id(0), + _is_read_timeout(false) { + AddIdleReadTimeoutMonitor(); + } + + ~ProgressiveTimeoutReader() { + if(_timeout_id > 0) { + bthread_timer_del(_timeout_id); + } + } + + butil::Status OnReadOnePart(const void* data, size_t length) { + return _reader->OnReadOnePart(data, length); + } + + void OnEndOfMessage(const butil::Status& status) { + if (_is_read_timeout) { + _reader->OnEndOfMessage(butil::Status(EPROGREADTIMEOUT, "The progressive read timeout")); + } else { + _reader->OnEndOfMessage(status); + } + if(_timeout_id > 0) { + bthread_timer_del(_timeout_id); + _timeout_id = 0; + } + } + +private: + static void HandleIdleProgressiveReader(void* arg) { + if(arg == nullptr){ + LOG(ERROR) << "Controller::HandleIdleProgressiveReader arg is null."; + return; + } + ProgressiveTimeoutReader* reader = static_cast(arg); + SocketUniquePtr s; + if (Socket::Address(reader->_socket_id, &s) != 0) { + LOG(ERROR) << "not found the socket id : " << reader->_socket_id; + return; + } + auto log_idle = FLAGS_log_idle_progressive_read_close; + reader->_is_read_timeout = true; + LOG_IF(INFO, log_idle) << "progressive read timeout socket id : " << reader->_socket_id + << " progressive read timeout us : " << reader->_read_timeout_ms; + if (s->parsing_context() != NULL) { + s->parsing_context()->Destroy(); + } + s->ReleaseReferenceIfIdle(0); + } + void AddIdleReadTimeoutMonitor() { + if (_read_timeout_ms <= 0) { + return; + } + bthread_timer_add(&_timeout_id, + butil::milliseconds_from_now(_read_timeout_ms), + HandleIdleProgressiveReader, + this + ); + } + +private: + SocketId _socket_id; + int32_t _read_timeout_ms; + ProgressiveReader* _reader; + // Timer registered to trigger progressive timeout event + bthread_timer_t _timeout_id; + butil::atomic _is_read_timeout; +}; + static IgnoreAllRead* s_ignore_all_read = NULL; static pthread_once_t s_ignore_all_read_once = PTHREAD_ONCE_INIT; static void CreateIgnoreAllRead() { s_ignore_all_read = new IgnoreAllRead; } @@ -260,6 +335,7 @@ void Controller::ResetPods() { _backup_request_ms = UNSET_MAGIC_NUM; _backup_request_policy = NULL; _connect_timeout_ms = UNSET_MAGIC_NUM; + _progressive_read_timeout_ms = UNSET_MAGIC_NUM; _real_timeout_ms = UNSET_MAGIC_NUM; _deadline_us = -1; _timeout_id = 0; @@ -331,6 +407,15 @@ void Controller::Call::Reset() { stream_user_data = NULL; } +void Controller::set_progressive_read_timeout_ms(int32_t progressive_read_timeout_ms){ + if(progressive_read_timeout_ms <= 0x7fffffff){ + _progressive_read_timeout_ms = progressive_read_timeout_ms; + } else { + _progressive_read_timeout_ms = 0x7fffffff; + LOG(WARNING) << "progressive_read_timeout_seconds is limited to 0x7fffffff"; + } +} + void Controller::set_timeout_ms(int64_t timeout_ms) { if (timeout_ms <= 0x7fffffff) { _timeout_ms = timeout_ms; @@ -1040,6 +1125,7 @@ void Controller::SubmitSpan() { } } + void Controller::HandleSendFailed() { if (!FailedInline()) { SetFailed("Must be SetFailed() before calling HandleSendFailed()"); @@ -1566,6 +1652,10 @@ void Controller::ReadProgressiveAttachmentBy(ProgressiveReader* r) { __FUNCTION__)); } add_flag(FLAGS_PROGRESSIVE_READER); + if (progressive_read_timeout_ms() > 0) { + auto reader = new ProgressiveTimeoutReader(_rpa->GetSocketId(), _progressive_read_timeout_ms, r); + return _rpa->ReadProgressiveAttachmentBy(reader); + } return _rpa->ReadProgressiveAttachmentBy(r); } diff --git a/src/brpc/controller.h b/src/brpc/controller.h index 45f71b72f6..396aaa33a2 100644 --- a/src/brpc/controller.h +++ b/src/brpc/controller.h @@ -48,7 +48,6 @@ #include "brpc/grpc.h" #include "brpc/kvmap.h" #include "brpc/rpc_dump.h" - // EAUTH is defined in MAC #ifndef EAUTH #define EAUTH ERPCAUTH @@ -164,7 +163,6 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); uint64_t log_id; std::string request_id; }; - public: Controller(); Controller(const Inheritable& parent_ctx); @@ -178,6 +176,9 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); // Set/get timeout in milliseconds for the RPC call. Use // ChannelOptions.timeout_ms on unset. + void set_progressive_read_timeout_ms(int32_t progressive_read_timeout_ms); + int32_t progressive_read_timeout_ms() const { return _progressive_read_timeout_ms; } + void set_timeout_ms(int64_t timeout_ms); int64_t timeout_ms() const { return _timeout_ms; } @@ -212,6 +213,9 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); // In client side it gets latency of the RPC call. While in server side, // it gets queue time before server processes the RPC call. int64_t latency_us() const { + if (_begin_time_us == 0) { + return 0; + } if (_end_time_us == UNSET_MAGIC_NUM) { return butil::cpuwide_time_us() - _begin_time_us; } @@ -324,7 +328,9 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); // Make the RPC end when the HTTP response has complete headers and let // user read the remaining body by using ReadProgressiveAttachmentBy(). - void response_will_be_read_progressively() { add_flag(FLAGS_READ_PROGRESSIVELY); } + void response_will_be_read_progressively() { + add_flag(FLAGS_READ_PROGRESSIVELY); + } // Make the RPC end when the HTTP request has complete headers and let // user read the remaining body by using ReadProgressiveAttachmentBy(). void request_will_be_read_progressively() { add_flag(FLAGS_READ_PROGRESSIVELY); } @@ -838,6 +844,7 @@ friend void policy::ProcessThriftRequest(InputMessageBase*); int32_t _timeout_ms; int32_t _connect_timeout_ms; int32_t _backup_request_ms; + int32_t _progressive_read_timeout_ms; // Priority: `_backup_request_policy' > `_backup_request_ms'. BackupRequestPolicy* _backup_request_policy; // If this rpc call has retry/backup request,this var save the real timeout for current call diff --git a/src/brpc/errno.proto b/src/brpc/errno.proto index 26ffadc201..45e0c00568 100644 --- a/src/brpc/errno.proto +++ b/src/brpc/errno.proto @@ -41,7 +41,8 @@ enum Errno { ESSL = 1016; // SSL related error EH2RUNOUTSTREAMS = 1017; // The H2 socket was run out of streams EREJECT = 1018; // The Request is rejected - + EPROGREADTIMEOUT = 1019; // The Progressive read timeout + // Errno caused by server EINTERNAL = 2001; // Internal Server Error ERESPONSE = 2002; // Bad Response diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index c249cca22c..fa05423640 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -312,7 +312,7 @@ int InputMessenger::ProcessNewMessage( // not in the bthread where the polling bthread is located, because the // method for processing messages may call synchronization primitives, // causing the polling bthread to be scheduled out. - if (m->_socket_mode == SOCKET_MODE_RDMA) { + if (m->_socket_mode == SOCKET_MODE_RDMA || m->_socket_mode == SOCKET_MODE_UBRING) { m->_transport->QueueMessage(last_msg, &num_bthread_created, true); } if (num_bthread_created) { diff --git a/src/brpc/input_messenger.h b/src/brpc/input_messenger.h index 8482c3f3fc..5203c02505 100644 --- a/src/brpc/input_messenger.h +++ b/src/brpc/input_messenger.h @@ -29,6 +29,9 @@ namespace brpc { namespace rdma { class RdmaEndpoint; } +namespace ubring { +class UBShmEndpoint; +} class TcpTransport; struct InputMessageHandler { // The callback to cut a message from `source'. @@ -93,6 +96,7 @@ class InputMessenger : public SocketUser { friend class Socket; friend class TcpTransport; friend class rdma::RdmaEndpoint; +friend class ubring::UBShmEndpoint; public: explicit InputMessenger(size_t capacity = 128); ~InputMessenger(); diff --git a/src/brpc/policy/http_rpc_protocol.cpp b/src/brpc/policy/http_rpc_protocol.cpp index b03a961b52..86737e9b90 100644 --- a/src/brpc/policy/http_rpc_protocol.cpp +++ b/src/brpc/policy/http_rpc_protocol.cpp @@ -1201,6 +1201,7 @@ ParseResult ParseHttpMessage(butil::IOBuf *source, Socket *socket, LOG(FATAL) << "Fail to new HttpContext"; return MakeParseError(PARSE_ERROR_NO_RESOURCE); } + http_imsg->SetSocketId(socket->id()); // Parsing http is costly, parsing an incomplete http message from the // beginning repeatedly should be avoided, otherwise the cost may reach // O(n^2) in the worst case. Save incomplete http messages in sockets diff --git a/src/brpc/policy/http_rpc_protocol.h b/src/brpc/policy/http_rpc_protocol.h index bc8bd06593..2b2e9296ab 100644 --- a/src/brpc/policy/http_rpc_protocol.h +++ b/src/brpc/policy/http_rpc_protocol.h @@ -87,11 +87,20 @@ class HttpContext : public ReadableProgressiveAttachment , public InputMessageBase , public HttpMessage { public: + SocketId GetSocketId() override { + return _socket_id; + } + + void SetSocketId(SocketId id) { + _socket_id = id; + } + explicit HttpContext(bool read_body_progressively, HttpMethod request_method = HTTP_METHOD_GET) : InputMessageBase() , HttpMessage(read_body_progressively, request_method) - , _is_stage2(false) { + , _is_stage2(false) + , _socket_id(0) { // add one ref for Destroy butil::intrusive_ptr(this).detach(); } @@ -122,6 +131,7 @@ class HttpContext : public ReadableProgressiveAttachment private: bool _is_stage2; + SocketId _socket_id; }; // Implement functions required in protocol.h diff --git a/src/brpc/progressive_reader.h b/src/brpc/progressive_reader.h index 6f54ae68a7..860068e2e6 100644 --- a/src/brpc/progressive_reader.h +++ b/src/brpc/progressive_reader.h @@ -20,6 +20,7 @@ #define BRPC_PROGRESSIVE_READER_H #include "brpc/shared_object.h" +#include "brpc/socket.h" namespace brpc { @@ -84,6 +85,7 @@ class ReadableProgressiveAttachment : public SharedObject { // Any error occurred should destroy the reader by calling r->Destroy(). // r->Destroy() should be guaranteed to be called once and only once. virtual void ReadProgressiveAttachmentBy(ProgressiveReader* r) = 0; + virtual SocketId GetSocketId() = 0; }; } // namespace brpc diff --git a/src/brpc/rdma_transport.cpp b/src/brpc/rdma_transport.cpp index 88d89a7b06..4cd93994e2 100644 --- a/src/brpc/rdma_transport.cpp +++ b/src/brpc/rdma_transport.cpp @@ -50,7 +50,7 @@ void RdmaTransport::Init(Socket *socket, const SocketOptions &options) { if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { _on_edge_trigger = rdma::RdmaEndpoint::OnNewDataFromTcp; } - _tcp_transport = std::make_shared(); + _tcp_transport = std::unique_ptr(); _tcp_transport->Init(socket, options); } diff --git a/src/brpc/socket.h b/src/brpc/socket.h index 816fccdf27..167cc8f418 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -57,7 +57,10 @@ namespace rdma { class RdmaEndpoint; class RdmaConnect; } - +namespace ubring { + class UBShmEndpoint; + class UBConnect; +} class Socket; class AuthContext; class EventDispatcher; @@ -317,6 +320,9 @@ friend class policy::RtmpContext; friend class schan::ChannelBalancer; friend class rdma::RdmaEndpoint; friend class rdma::RdmaConnect; +friend class ubring::UBShmEndpoint; +friend class ubring::UBConnect; +friend class UBShmTransport; friend class HealthCheckTask; friend class OnAppHealthCheckDone; friend class HealthCheckManager; diff --git a/src/brpc/socket_mode.h b/src/brpc/socket_mode.h index b5d42be4aa..b4ac7dfbca 100644 --- a/src/brpc/socket_mode.h +++ b/src/brpc/socket_mode.h @@ -20,7 +20,8 @@ namespace brpc { enum SocketMode { SOCKET_MODE_TCP = 0, - SOCKET_MODE_RDMA = 1 + SOCKET_MODE_RDMA = 1, + SOCKET_MODE_UBRING = 2 }; } // namespace brpc #endif //BRPC_SOCKET_MODE_H \ No newline at end of file diff --git a/src/brpc/transport_factory.cpp b/src/brpc/transport_factory.cpp index b689e2edd2..36fdaaed05 100644 --- a/src/brpc/transport_factory.cpp +++ b/src/brpc/transport_factory.cpp @@ -18,6 +18,7 @@ #include "brpc/transport_factory.h" #include "brpc/tcp_transport.h" #include "brpc/rdma_transport.h" +#include "brpc/ubshm_transport.h" namespace brpc { int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const void* _options) { @@ -28,6 +29,11 @@ int TransportFactory::ContextInitOrDie(SocketMode mode, bool serverOrNot, const else if (mode == SOCKET_MODE_RDMA) { return RdmaTransport::ContextInitOrDie(serverOrNot, _options); } +#endif +#if BRPC_WITH_UBRING + else if (mode == SOCKET_MODE_UBRING) { + return UBShmTransport::ContextInitOrDie(serverOrNot, _options); + } #endif else { LOG(ERROR) << "unknown transport type " << mode; @@ -43,6 +49,11 @@ std::unique_ptr TransportFactory::CreateTransport(SocketMode mode) { else if (mode == SOCKET_MODE_RDMA) { return std::unique_ptr(new RdmaTransport()); } +#endif +#if BRPC_WITH_UBRING + else if (mode == SOCKET_MODE_UBRING) { + return std::unique_ptr(new UBShmTransport()); + } #endif else { LOG(ERROR) << "socket_mode set error"; diff --git a/src/brpc/ubshm/common/common.h b/src/brpc/ubshm/common/common.h new file mode 100644 index 0000000000..4390726954 --- /dev/null +++ b/src/brpc/ubshm/common/common.h @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_COMMON_H +#define BRPC_COMMON_H +#include +#include +#include +#include +#include "butil/logging.h" + +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +#ifndef UNREFERENCE_PARAM +#define UNREFERENCE_PARAM(x) ((void)(x)) +#endif + +#ifdef UT +#define STATIC +#define INLINE +#define UBRING_STATISTICS_PATH ROOT_PATH "/ubring/run" +#else +#define STATIC static +#define INLINE inline +#define UBRING_STATISTICS_PATH "/opt/ubring/run" +#endif + +#ifdef __cplusplus +#include +using AtomicInt = std::atomic; +using AtomicBool = std::atomic; +using AtomicUintFast64 = std::atomic; +using AtomicUintFast8 = std::atomic; +#define ATOMIC_INIT(var, value) var.store(value) +#define ATOMIC_STORE(var, value) var.store(value) +#define ATOMIC_LOAD(var) var.load() +#define ATOMIC_ADD(var, value) var.fetch_add(value) +#define ATOMIC_SUB(var, value) var.fetch_sub(value) +#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) \ + var.compare_exchange_strong((expected), (desired)) +#else +#include +typedef atomic_int AtomicInt; +typedef atomic_bool AtomicBool; +typedef atomic_uint_fast64_t AtomicUintFast64; +typedef atomic_uint_fast8_t AtomicUintFast8; +#define ATOMIC_INIT(var, value) atomic_init(&(var), value) +#define ATOMIC_STORE(var, value) atomic_store(&(var), value) +#define ATOMIC_LOAD(var) atomic_load(&(var)) +#define ATOMIC_ADD(var, value) atomic_fetch_add(&(var), value) +#define ATOMIC_SUB(var, value) atomic_fetch_sub(&(var), value) +#define ATOMIC_COMPARE_EXCHANGE_STRONG(var, expected, desired) \ + atomic_compare_exchange_strong(&(var), &(expected), (desired)) +#endif + +#define ISB() __asm__ __volatile__("isb" ::: "memory") +#define DSB() __asm__ __volatile__("dsb sy" ::: "memory") + +#ifndef errno_t +typedef int errno_t; +#endif +#ifndef EOK +#define EOK 0 +#endif + +#define MAX_NODE_NUM 8 +#define IPV4_FIRST_BYTE_OFFSET 24 +#define COPY_ALIGNED_DATA_BYTES 64 + +#if defined(OS_MACOSX) +#define EPOLLET 0x80000000 +#endif + +static inline int Copy64Byte(int8_t *dst, int8_t *src) { +#ifdef LS64 + asm volatile ( + "mov x12, %0\n" + "mov x13, %1\n" + "ldr x4, [x12]\n" + "ldr x5, [x12, #8]\n" + "ldr x6, [x12, #16]\n" + "ldr x7, [x12, #24]\n" + "ldr x8, [x12, #32]\n" + "ldr x9, [x12, #40]\n" + "ldr x10, [x12, #48]\n" + "ldr x11, [x12, #56]\n" + "ST64B x4, [x13]\n" + : + : "r" (src), "r" (dst) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13" + ); + return EOK; +#else + memcpy(dst, src, COPY_ALIGNED_DATA_BYTES); + return EOK; +#endif +} + +#define SEC_TO_NSEC 1000000000 +#define MSEC_TO_NSEC 1000000 +#define USEC_TO_NSEC 1000 +#define MSEC_TO_SEC 1000 +#define MAX_IP_PORT_STR_LEN 23 +#define DECIMAL_BASE 10 + +static inline uint64_t GetCurNanoSeconds(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t timestamp = (uint64_t)ts.tv_sec * SEC_TO_NSEC + (uint64_t)ts.tv_nsec; + return timestamp; +} + +#define FREE_PTR(ptr) \ + do { \ + if ((ptr) != NULL) { \ + free(ptr); \ + (ptr) = NULL; \ + } \ + } while (0) + +typedef enum { + UBRING_OK = 0, + UBRING_ERR = -1, + UBRING_RETRY = -2, + UBRING_REENTRY = -3, + UBRING_ERR_TIMEOUT = -4, + SHM_ERR = -100, + SHM_ERR_INPUT_INVALID = -101, + SHM_ERR_EXIST = -102, + SHM_ERR_RESOURCE_ATTACHED = -103, + SHM_ERR_NOT_FOUND = -104, + SHM_ERR_UBSM_NET_ERR = -105, + MPA_UDP_ERR = -200, + MPA_UDP_NO_TRX = -201, + MPA_UDP_STATUS_NOT_JOINED = -202, + MPA_MUXER_NOT_READY = -203, + MPA_PORT_FULL = -204, + MPA_PORT_OUTRANGE = -205, + MPA_PORT_TAKEN = -206, + MPA_UDP_STATUS_NOT_CONNECTED = -207, + MPA_UDP_STATUS_ALREADY_CONNECTED = -208, + MPA_UDP_OLD_RDLIST = -209, + MPA_UDP_RDLIST_FULL = -210, + UBR_NOT_CONNECTED = -300, + UBR_ERR_ADDR_IN_USE = -301, +} RETURN_CODE; + +#define ALIGN_BYTES 0x40 +#define CHECKED_ALIGN_BITS (ALIGN_BYTES - 1) + +static inline size_t Aligned64Offset(uint8_t *addr) { + return ((ALIGN_BYTES - (((size_t)(addr)) & CHECKED_ALIGN_BITS)) & CHECKED_ALIGN_BITS); +} + +static inline RETURN_CODE HasTimedOut(const uint64_t startTime, const uint32_t timeout) { + uint64_t endTime = startTime + (uint64_t)timeout * SEC_TO_NSEC; + if (GetCurNanoSeconds() > endTime) { + LOG(ERROR) << "task time out " << timeout << " seconds."; + return UBRING_ERR; + } + return UBRING_OK; +} + +#endif // BRPC_COMMON_H \ No newline at end of file diff --git a/src/brpc/ubshm/common/thread_lock.h b/src/brpc/ubshm/common/thread_lock.h new file mode 100644 index 0000000000..8c07ce360d --- /dev/null +++ b/src/brpc/ubshm/common/thread_lock.h @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_THREAD_LOCK_H +#define BRPC_THREAD_LOCK_H +#include +#include +#include +#include +#include +#include "brpc/ubshm/common/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void UnlockMutex(pthread_mutex_t **mtx) +{ + if (LIKELY(mtx != NULL && *mtx != NULL)) { + pthread_mutex_unlock(*mtx); + } else { + LOG(ERROR) << "Invalid input for mtx."; + } +} + +#define LOCK_GUARD(mtxPtr) \ + pthread_mutex_t *__attribute__((cleanup(UnlockMutex))) _mtxPtr = ({ \ + pthread_mutex_lock(&(mtxPtr)); \ + &(mtxPtr); \ + }) + +static inline void UnlockSpinLock(pthread_spinlock_t **spinLock) +{ + if (LIKELY(spinLock != NULL && *spinLock != NULL)) { + pthread_spin_unlock(*spinLock); + } else { + LOG(ERROR) << "Invalid input for spinLock."; + } +} + +#define SPIN_LOCK_GUARD(spinLockPtr) \ + pthread_spinlock_t *__attribute__((cleanup(UnlockSpinLock))) _spinLockPtr = ({ \ + pthread_spin_lock(&(spinLockPtr)); \ + &(spinLockPtr); \ + }) + +static inline void UnlockRWLock(pthread_rwlock_t **rwLock) +{ + if (LIKELY(rwLock != NULL && *rwLock != NULL)) { + pthread_rwlock_unlock(*rwLock); + } else { + LOG(ERROR) << "Invalid input for rwLock."; + } +} + +#define R_LOCK_GUARD(readLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _readLockPtr = ({ \ + pthread_rwlock_rdlock(&(readLockPtr)); \ + &(readLockPtr); \ + }) + +#define W_LOCK_GUARD(writeLockPtr) \ + pthread_rwlock_t *__attribute__((cleanup(UnlockRWLock))) _writeLockPtr = ({ \ + pthread_rwlock_wrlock(&(writeLockPtr)); \ + &(writeLockPtr); \ + }) + +static inline void PostSemWithClose(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + sem_close(*sem); + *sem = NULL; + sem = NULL; + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +static inline void PostSem(sem_t **sem) +{ + if (LIKELY(sem != NULL && *sem != NULL)) { + sem_post(*sem); + } else { + LOG(ERROR) << "Invalid input for semaphore."; + } +} + +#define SEMAPHORE_WAIT_GUARD_WITH_CLOSE(semPtr) \ + sem_t *__attribute__((cleanup(PostSemWithClose))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#define SEMAPHORE_WAIT_GUARD(semPtr) \ + sem_t *__attribute__((cleanup(PostSem))) _semPtr = ({ \ + sem_wait(semPtr); \ + semPtr; \ + }) + +#ifdef __cplusplus +} +#endif +#endif //BRPC_THREAD_LOCK_H \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_def.h b/src/brpc/ubshm/shm/shm_def.h new file mode 100644 index 0000000000..0c28084b96 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_def.h @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_SHM_DEF_H +#define BRPC_SHM_DEF_H +#include +#include +#include + +#define PROT_READ 0x1 /* Page can be read. */ +#define PROT_WRITE 0x2 /* Page can be written. */ +#define PROT_EXEC 0x4 /* Page can be executed. */ +#define PROT_NONE 0x0 /* Page can not be accessed. */ +#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of growsdown vma (mprotect only). */ +#define PROT_GROWSUP 0x02000000 /* Extend change to start of growsup vma (mprotect only). */ +/* Sharing types (must choose one and only one of these). */ +#define MAP_SHARED 0x01 /* Share changes. */ +#define MAP_PRIVATE 0x02 /* Changes are private. */ +#define SHM_MAX_NAME_BUFF_LEN 48 // byte, buffer size, ubsm_sdk need name to be below 48byte +#define SHM_MAX_NAME_LEN (SHM_MAX_NAME_BUFF_LEN - 1) // byte, string length +#define SHM_ALLOC_UNIT_SIZE (4 * 1024 * 1024) // 4MB + +namespace brpc { +namespace ubring { +typedef enum { SHM_TYPE_UB, SHM_TYPE_IPC, SHM_TYPE_UBS, SHM_TYPE_UNSUPPORT } SHM_TYPE; + +typedef struct { + uint8_t *addr; + size_t len; + uint64_t memid; + char name[SHM_MAX_NAME_BUFF_LEN]; + uint32_t fd; +} SHM; + +typedef struct ShmListNode { + SHM shm; + struct ShmListNode *next; + struct ShmListNode *prev; +} ShmListNode; + +typedef struct { + ShmListNode* head; + ShmListNode* tail; + size_t size; + pthread_mutex_t shmLock; +} ShmList; +} +} +#endif //BRPC_SHM_DEF_H \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_ipc.cpp b/src/brpc/ubshm/shm/shm_ipc.cpp new file mode 100644 index 0000000000..7e934c7568 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_ipc.cpp @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/shm/shm_ipc.h" + +namespace brpc { +namespace ubring { +RETURN_CODE IpcShmLocalMalloc(SHM *shm) +{ + int fd = shm_open(shm->name, O_CREAT | O_EXCL | O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + if (errno == EEXIST) { + LOG(ERROR) << "IPC Create shm=" << shm->name << " failed, shm exists."; + return SHM_ERR_EXIST; + } + + LOG(ERROR) << "IPC Open shm=" << shm->name << " failed, ret(" << errno << ")."; + return SHM_ERR; + } + + int ret = ftruncate(fd, (off_t)shm->len); + if (ret < 0) { + LOG(ERROR) << "IPC Set shm=" << shm->name << " length=" << shm->len << " failed, ret(" << errno << ")."; + close(fd); + shm_unlink(shm->name); + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " length=" << shm->len << " failed, ret(" << errno << ")."; + close(fd); + shm_unlink(shm->name); + return SHM_ERR; + } + + close(fd); + return UBRING_OK; +} + +RETURN_CODE IpcShmMunmap(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(INFO) << "IPC unmap shm=" << shm->name << " already unmapped."; + return UBRING_OK; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != UBRING_OK) { + LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR; + } + + LOG(INFO) << "IPC unmap shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE IpcShmFree(SHM *shm) +{ + // free + int ret = shm_unlink(shm->name); + if (ret != UBRING_OK) { + if (errno == EBUSY) { + LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR_RESOURCE_ATTACHED; + } + if (errno == ENOENT) { + LOG(INFO) << "IPC free shm=" << shm->name << " already deleted."; + shm->addr = NULL; + return SHM_ERR_NOT_FOUND; + } + LOG_EVERY_SECOND(ERROR) << "IPC free shm=" << shm->name << " failed, errno=" << errno; + return SHM_ERR; + } + return UBRING_OK; +} + +RETURN_CODE IpcShmLocalFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(INFO) << "IPC free local shm=" << shm->name << " already freed."; + return SHM_ERR_NOT_FOUND; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != UBRING_OK) { + LOG(WARNING) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; + } + + ret = shm_unlink(shm->name); + if (ret != UBRING_OK) { + if (errno == EBUSY) { + LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } + if (errno == ENOENT) { + LOG(INFO) << "IPC delete shm=" << shm->name << " already deleted by peer."; + shm->addr = NULL; + return SHM_ERR_NOT_FOUND; + } + LOG_EVERY_SECOND(ERROR) << "IPC delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(INFO) << "IPC free local shm=" << shm->name << " success."; + return UBRING_OK; +} + +RETURN_CODE IpcShmRemoteMalloc(SHM *shm) +{ + int fd = shm_open(shm->name, O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + LOG(ERROR) << "IPC open shm=" << shm->name << " failed, ret=" << errno; + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " failed, ret=" << errno; + close(fd); + return SHM_ERR; + } + + close(fd); + return UBRING_OK; +} + +RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot) +{ + int fd = shm_open(shm->name, O_RDWR, SHM_IPC_MODE); + if (fd < 0) { + LOG(ERROR) << "IPC open shm=" << shm->name << " failed, ret=" << errno; + return SHM_ERR; + } + + shm->addr = (uint8_t*)mmap(NULL, shm->len, prot, MAP_SHARED, fd, 0); + if (shm->addr == (uint8_t*)MAP_FAILED) { + LOG(ERROR) << "IPC map shm=" << shm->name << " failed, ret=" << errno; + close(fd); + return SHM_ERR; + } + + close(fd); + LOG(INFO) << "IPC mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE IpcShmRemoteFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(INFO) << "IPC free remote shm=" << shm->name << " already freed."; + return UBRING_OK; + } + + int ret = munmap(shm->addr, shm->len); + if (ret != UBRING_OK) { + LOG(ERROR) << "IPC unmap shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "IPC free remote shm=" << shm->name << " success."; + return UBRING_OK; +} +} +} diff --git a/src/brpc/ubshm/shm/shm_ipc.h b/src/brpc/ubshm/shm/shm_ipc.h new file mode 100644 index 0000000000..34e8307bb8 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_ipc.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_SHM_IPC_H +#define BRPC_SHM_IPC_H + +#include "shm_def.h" + +#define SHM_IPC_MODE 0666 + +namespace brpc { +namespace ubring { + RETURN_CODE IpcShmLocalMalloc(SHM *shm); + RETURN_CODE IpcShmMunmap(SHM *shm); + RETURN_CODE IpcShmFree(SHM *shm); + RETURN_CODE IpcShmLocalFree(SHM *shm); + RETURN_CODE IpcShmRemoteMalloc(SHM *shm); + RETURN_CODE IpcShmRemoteFree(SHM *shm); + RETURN_CODE IpcShmLocalMmap(SHM *shm, int prot); +} +} + +#endif //BRPC_SHM_IPC_H \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_mgr.cpp b/src/brpc/ubshm/shm/shm_mgr.cpp new file mode 100644 index 0000000000..cc588da8bd --- /dev/null +++ b/src/brpc/ubshm/shm/shm_mgr.cpp @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_ipc.h" +#include "brpc/ubshm/shm/shm_ubs.h" +#include "brpc/ubshm/shm/shm_mgr.h" + +namespace brpc { +namespace ubring { +DEFINE_int32(ub_shm_type, 1, "shm type: 1-ipc; 2-ub_ring"); +static SHM_TYPE g_shmType; + +static bool CheckInputShmParam(SHM *shm) { + if (shm == NULL) { + LOG(ERROR) << "Input Param shm is NULL."; + return false; + } + + size_t nameLen = strlen(shm->name); + if (nameLen <= 0 || nameLen > SHM_MAX_NAME_LEN) { + LOG(ERROR) << "Shm name=" << shm->name << ", length=" << shm->len + << ", which is not between 1 and " << SHM_MAX_NAME_LEN; + return false; + } + + if (shm->len <= 0) { + LOG(ERROR) << "Shm length=" << shm->len << " is invalid."; + return false; + } + + if (shm->len < SHM_ALLOC_UNIT_SIZE || (shm->len & (SHM_ALLOC_UNIT_SIZE - 1)) != 0) { + LOG(ERROR) << "Shm length=" << shm->len << " need to be (1..n) * 4MB."; + return false; + } + + return true; +} + +RETURN_CODE ShmMgrInit(void) { + if (UNLIKELY(FLAGS_ub_shm_type >= (uint32_t)SHM_TYPE_UNSUPPORT)) { + LOG(ERROR) << "Shm type config=" << FLAGS_ub_shm_type << " is not supported."; + return UBRING_ERR; + } + + g_shmType = (SHM_TYPE)FLAGS_ub_shm_type; + if (g_shmType == SHM_TYPE_UBS) { + if (UbsShmInit() != UBRING_OK) { + LOG(ERROR) << "Init beiming ubs shm failed."; + return UBRING_ERR; + } + } + LOG(INFO) << "shm mgr init success, shm type=" << g_shmType; + return UBRING_OK; +} + +void ShmMgrFini(void) { + if (g_shmType == SHM_TYPE_UBS) { + if (UbsShmFini() != UBRING_OK) { + LOG(ERROR) << "Fini beiming ubs shm failed."; + return; + } + } + LOG(INFO) << "shm mgr fini success, shm type=" << g_shmType; +} + +void SetShmType(SHM_TYPE type) { + g_shmType = type; +} + +RETURN_CODE ShmLocalMalloc(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalMalloc(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalMalloc(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmLocalCalloc(SHM *shm) { + RETURN_CODE rc = ShmLocalMalloc(shm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Failed to alloc local shm."; + return rc; + } + memset(shm->addr, 0, shm->len); + return UBRING_OK; +} + +RETURN_CODE ShmLocalFree(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmRemoteMalloc(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmRemoteMalloc(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmRemoteMalloc(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmRemoteFree(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmRemoteFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmRemoteFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmLocalMmap(SHM *shm, int prot) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmLocalMmap(shm, prot); + break; + case SHM_TYPE_UBS: + rc = UbsShmLocalMmap(shm, prot); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmMunmap(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmMunmap(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmMunmap(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} + +RETURN_CODE ShmFree(SHM *shm) { + if (UNLIKELY(!CheckInputShmParam(shm))) { + LOG(ERROR) << "Input param shm is invalid."; + return SHM_ERR_INPUT_INVALID; + } + + RETURN_CODE rc = UBRING_OK; + switch (g_shmType) { + case SHM_TYPE_IPC: + rc = IpcShmFree(shm); + break; + case SHM_TYPE_UBS: + rc = UbsShmFree(shm); + break; + default: + rc = SHM_ERR; + LOG(ERROR) << "Unsupported shm type."; + } + return rc; +} +} +} \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_mgr.h b/src/brpc/ubshm/shm/shm_mgr.h new file mode 100644 index 0000000000..597f5e4ba5 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_mgr.h @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_SHM_MGR_H +#define BRPC_SHM_MGR_H + +#include +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" + +namespace brpc { +namespace ubring { +void SetShmType(SHM_TYPE type); + +RETURN_CODE ShmMgrInit(void); + +void ShmMgrFini(void); + +RETURN_CODE ShmLocalMalloc(SHM *shm); + +RETURN_CODE ShmLocalCalloc(SHM *shm); + +RETURN_CODE ShmLocalFree(SHM *shm); + +RETURN_CODE ShmRemoteMalloc(SHM *shm); + +RETURN_CODE ShmRemoteFree(SHM *shm); + +RETURN_CODE ShmLocalMmap(SHM *shm, int prot); + +RETURN_CODE ShmMunmap(SHM *shm); + +RETURN_CODE ShmFree(SHM *shm); +} +} + +#endif //BRPC_SHM_MGR_H \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_ubs.cpp b/src/brpc/ubshm/shm/shm_ubs.cpp new file mode 100644 index 0000000000..74c8cfc967 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_ubs.cpp @@ -0,0 +1,565 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "brpc/ubshm/timer/timer_mgr.h" +#include "brpc/ubshm/common/thread_lock.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/ub_ring_manager.h" +#include "brpc/ubshm/ubs_mem/ubs_mem.h" +#include "brpc/ubshm/ubs_mem/ubs_mem_def.h" +#ifdef UT +#include "ubs_mem.h" +#endif +#include "shm_ubs.h" + +namespace brpc { +namespace ubring { +#define UBRING_MK_UBSM(ret, fn, args) ret (*fn) args = NULL +#include "brpc/ubshm/ubs_mem/declare_shm_ubs.h" +#define SHM_RIGHT_MODE 0666 +#define UBRING_REGION_NAME_PREFIX "UbrONE2ALLRegion" +DEFINE_uint32(node_location, 1, "Location of the ub machine."); +DEFINE_bool(shm_wr_delay_comp, true, "Indicates whether to enable the write relay." + "0: relay; 1: non-relay."); +DEFINE_int32(ub_flying_io_timeout, 5, "Waiting time for stopping data" + "sending and receiving when the link is disconnected."); +char g_regionName[MAX_REGION_NAME_DESC_LENGTH] = {0}; +int g_shmTimerFd = 0; +ShmList *g_shmList = NULL; +static RETURN_CODE UbsShmInterfacesLoad(void); +char hostname[MAX_HOST_NAME_DESC_LENGTH]; + +RETURN_CODE UbsShmInterfacesLoad(void) +{ +#ifndef UT + const char *ubsmSdkLocation = "/usr/local/ubs_mem/lib/libubsm_sdk.so"; +#if defined(OS_LINUX) + void* dlhandler = dlmopen(LM_ID_NEWLM, ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE | RTLD_DEEPBIND); +#elif defined(OS_MACOSX) + void* dlhandler = dlopen(ubsmSdkLocation, RTLD_NOW | RTLD_LOCAL | RTLD_NODELETE); +#endif + if (dlhandler == NULL) { + LOG(ERROR) << "Dlopen libubsm_sdk.so in " << ubsmSdkLocation << " failed, error:" << dlerror(); + return UBRING_ERR; + } + +#define UBRING_MK_UBSM_OPTIONAL(ret, fn, args) \ + do { \ + fn = (decltype(fn))dlsym(dlhandler, #fn); \ + } while (0) + +#define UBRING_MK_UBSM(ret, fn, args) \ + do { \ + if ((fn) != NULL) { \ + break; \ + } \ + UBRING_MK_UBSM_OPTIONAL(ret, fn, args); \ + if ((fn) == NULL) { \ + LOG(ERROR) << "Fail load ubs_mem func " << #fn <<" error:" << dlerror(); \ + return UBRING_ERR; \ + } \ + } while (0) +#include "brpc/ubshm/ubs_mem/declare_shm_ubs.h" + + dlclose(dlhandler); + dlhandler = NULL; +#endif + return UBRING_OK; +} + +static RETURN_CODE CreateUbsShmRegion(const char *regionName) +{ + int ret = snprintf(g_regionName, MAX_REGION_NAME_DESC_LENGTH, "%s_%u", + UBRING_REGION_NAME_PREFIX, FLAGS_node_location); + if (ret < 0) { + LOG(ERROR) << "Snprintf_s region name failed, ret=" << ret; + return UBRING_ERR; + } + + ubsmem_regions_t regions = {0}; // 16 * (48 + 1) bytes, 约0.8k + ret = ubsmem_lookup_regions(®ions); + if (ret != UBSM_OK || regions.region[0].host_num <= 0) { + LOG(ERROR) << "Ubs lookup share region failed, ret=" << ret << ", region.num=" << regions.region[0].host_num; + return UBRING_ERR; + } + ubsmem_region_attributes_t regionAttr = {0}; + regionAttr.host_num = regions.region[0].host_num; + for (int i = 0; i < regionAttr.host_num; i++) { + strcpy(regionAttr.hosts[i].host_name, regions.region[0].hosts[i].host_name); + regionAttr.hosts[i].affinity = (strcmp(regionAttr.hosts[i].host_name, hostname) == 0) ? + true : false; + } + + ret = ubsmem_create_region(regionName, 0, ®ionAttr); + if (ret == UBSM_ERR_ALREADY_EXIST) { + LOG(WARNING) << "Ubs region exists, region_name=" << regionName; + return UBRING_OK; + } else if (ret != UBSM_OK) { + LOG(ERROR) << "Ubsmem create region failed, ret=" << ret; + return UBRING_ERR; + } + + return UBRING_OK; +} + +static uint64_t AquireFlagIfWrDelayComp(const uint64_t flag) +{ + if (FLAGS_shm_wr_delay_comp == 0) { + return flag; + } + return flag | UBSM_FLAG_WR_DELAY_COMP; +} + +RETURN_CODE UbsShmLocalMalloc(SHM *shm) +{ + int ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); +do { + if (ret == UBSM_ERR_ALREADY_EXIST) { + if (ubsmem_shmem_deallocate(shm->name) != UBSM_OK) { + LOG(ERROR) << "Ubs create shm name=" << shm->name << " failed, shm exists, ret=" << ret; + return SHM_ERR_EXIST; + } + LOG(INFO) << "Ubs delete shm name=" << shm->name << " success, try to recreate."; + ret = ubsmem_shmem_allocate(g_regionName, shm->name, shm->len, SHM_RIGHT_MODE, + AquireFlagIfWrDelayComp(UBSM_FLAG_ONLY_IMPORT_NONCACHE | UBSM_FLAG_MEM_ANONYMOUS)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs recreate shm name=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + } else if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs create shm name=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } +} while (0); + + ret = ubsmem_shmem_map(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map shm=" << shm->name << " failed, ret=" << ret; + if (ret == UBSM_ERR_NOT_FOUND) { + return SHM_ERR_NOT_FOUND; + } + ubsmem_shmem_deallocate(shm->name); + return SHM_ERR; + } + + // 通过MXE获取memid + shm->memid = 1; // 暂时打桩 + LOG(INFO) << "Ubs malloc local shm=" << shm->name << " length=" << shm->len << " memid=" << shm->memid << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmMunmap(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmFree(SHM *shm) +{ + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + // free + int ret = ubsmem_shmem_deallocate(shm->name); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_IN_USING) { + LOG(INFO) << "Ubs free shm=" << shm->name << " failed, resource attached=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } else if (ret == UBSM_ERR_NOT_FOUND) { + LOG(INFO) << "Ubs free shm=" << shm->name << " failed, resource not found=" << ret; + return SHM_ERR_NOT_FOUND; + } + LOG(ERROR) << "Ubs free shm="<< shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(INFO) << "Ubs free shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmLocalFree(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(WARNING) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + } + + // free + ret = ubsmem_shmem_deallocate(shm->name); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_IN_USING) { + LOG_EVERY_SECOND(INFO) << "Ubs delete shm=" << shm->name << " failed, resource attached=" << ret; + return SHM_ERR_RESOURCE_ATTACHED; + } + LOG(ERROR) << "Ubs delete shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + shm->addr = NULL; + LOG(INFO) << "Ubs free local shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmRemoteMalloc(SHM *shm) +{ + int ret = ubsmem_shmem_map(NULL, shm->len, PROT_READ | PROT_WRITE, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map Shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "Ubs malloc remote shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot) +{ + int ret = ubsmem_shmem_map(NULL, shm->len, prot, MAP_SHARED, shm->name, 0, (void**)&(shm->addr)); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs map Shm=" << shm->name << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "Ubs mmap remote shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmRemoteFree(SHM *shm) +{ + // unmap + if (shm->addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return SHM_ERR_INPUT_INVALID; + } + + int ret = ubsmem_shmem_unmap(shm->addr, shm->len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " failed, ubsm net err=" << ret; + AddShmToList(g_shmList, shm); + return SHM_ERR_UBSM_NET_ERR; + } + LOG(ERROR) << "Ubs unmap shm=" << shm->name << " length=" << shm->len << " failed, ret=" << ret; + return SHM_ERR; + } + + LOG(INFO) << "Ubs free Remote shm=" << shm->name << " length=" << shm->len << " success."; + return UBRING_OK; +} + +void UbsMemLoggerPrint(int level, const char *msg) +{ + if (level == UBSM_LOG_ERROR_LEVEL) { + LOG(ERROR) << msg; + } else if (level == UBSM_LOG_WARN_LEVEL) { + LOG(WARNING) << msg; + } else { + LOG(INFO) << msg; + } + return; +} + +RETURN_CODE UbsShmInit(void) +{ + // 加载libubsm_sdk.so函数指针 + RETURN_CODE retCode = UbsShmInterfacesLoad(); + if (retCode != UBRING_OK) { + LOG(ERROR) << "Load ubs shm functions failed, ret=" << retCode; + return UBRING_ERR; + } + + if (gethostname(hostname, MAX_HOST_NAME_DESC_LENGTH) != 0) { + LOG(ERROR) << "ubring config gethostname failed, errno=" << errno; + return UBRING_ERR; + } + + int ret = ubsmem_set_extern_logger(UbsMemLoggerPrint); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs set logger failed, ret=" << ret; + return UBRING_ERR; + } + + ret = ubsmem_set_logger_level(UBSM_LOG_INFO_LEVEL); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs set logger level failed, ret=" << ret; + return UBRING_ERR; + } + + ubsmem_options_t options = {}; + ret = ubsmem_init_attributes(&options); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm init attributes failed, ret=" << ret; + return UBRING_ERR; + } + + ret = ubsmem_initialize(&options); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm initialize failed, ret=" << ret; + return UBRING_ERR; + } + + if (UNLIKELY(ubsmem_local_nid_query(&FLAGS_node_location) != UBSM_OK)) { + LOG(ERROR) << "Get local nid failed."; + return UBRING_ERR; + } + + if (UNLIKELY(ubsmem_shmem_faults_register(brpc::ubring::UBRingManager::UbEventCallback) != UBSM_OK)) { + LOG(ERROR) << "Failed to register the ub event callback function."; + return UBRING_ERR; + } + + if (CreateUbsShmRegion(g_regionName) != UBRING_OK) { + LOG(ERROR) << "Create Ubs region failed."; + return UBRING_ERR; + } + + if (InitShmTimer(&g_shmList) != UBRING_OK) { + LOG(ERROR) << "Ubs shm list init failed."; + return UBRING_ERR; + } + + LOG(INFO) << "Ubs shm init success."; + return UBRING_OK; +} + +RETURN_CODE UbsShmFini(void) +{ + int ret = ubsmem_finalize(); + if (ret != UBSM_OK) { + LOG(ERROR) << "Ubs shm finalize fail, ret=" << ret; + return UBRING_ERR; + } + + if (UNLIKELY(DestroyShmTimer(g_shmList) != UBRING_OK)) { + LOG(ERROR) << "Ubs shm list finalize failed."; + return UBRING_ERR; + } + + LOG(INFO) << "Ubs shm finalize success."; + return UBRING_OK; +} + +static void DeleteShmToList(ShmList* shmList) +{ + if (shmList == NULL || shmList->head == NULL) { + return; + } + + ShmListNode *curNode = shmList->head; + shmList->head = curNode->next; + if (shmList->head != NULL) { + shmList->head->prev = NULL; + } else { + shmList->tail = NULL; + } + LOG(INFO) << "Delete shm to list, name=" << curNode->shm.name << " size=" << shmList->size; + FREE_PTR(curNode); + shmList->size--; +} + +void *UbsShmCallback(void* args) +{ + ShmList *shmList = (ShmList*)args; + if (UNLIKELY(shmList == NULL)) { + LOG(ERROR) << "Shm list is null."; + return NULL; + } + + LOCK_GUARD(shmList->shmLock); + while (shmList->head != NULL) { + SHM shm = shmList->head->shm; + if (shm.addr == NULL) { + LOG(ERROR) << "Ubs input shm param is invalid, addr is NULL."; + return NULL; + } + + int ret = ubsmem_shmem_unmap(shm.addr, shm.len); + if (ret != UBSM_OK) { + if (ret == UBSM_ERR_NET) { + return NULL; + } + LOG(ERROR) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " failed, ret=" << ret; + return NULL; + } + LOG(INFO) << "Ubs unmap shm=" << shm.name << " length=" << shm.len << " success."; + + ret = ubsmem_shmem_deallocate(shm.name); + if (ret != UBSM_OK) { + DeleteShmToList(shmList); + LOG(ERROR) << "Ubs delete shm=" << shm.name << " failed, ret=" << ret; + return NULL; + } + DeleteShmToList(shmList); + LOG(INFO) << "Ubs free local shm=" << shm.name << " length=" << shm.len << " success."; + } + + return NULL; +} + +RETURN_CODE UbsShmAddTimer(ShmList *shmList) +{ + uint32_t timerInterval = FLAGS_ub_flying_io_timeout; + itimerspec timeSpec = { + .it_interval = {.tv_sec = timerInterval, .tv_nsec = 0}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbsShmCallback, (void*)shmList); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start shm timer failed."; + return UBRING_ERR; + } + g_shmTimerFd = timerFd; + + return UBRING_OK; +} + +RETURN_CODE InitShmTimer(ShmList **shmList) +{ + *shmList = (ShmList *)malloc(sizeof(ShmList)); + if (*shmList == NULL) { + LOG(ERROR) << "Malloc shm list failed."; + return UBRING_ERR; + } + (*shmList)->head = NULL; + (*shmList)->tail = NULL; + (*shmList)->size = 0; + + if (pthread_mutex_init(&(*shmList)->shmLock, NULL) != 0) { + LOG(ERROR) << "Init shm list mutex failed."; + FREE_PTR(*shmList); + return UBRING_ERR; + } + + if (UbsShmAddTimer(*shmList) == UBRING_ERR) { + LOG(ERROR) << "Ubs add timer failed."; + FREE_PTR(*shmList); + return UBRING_ERR; + } + return UBRING_OK; +} + +RETURN_CODE DestroyShmTimer(ShmList *shmList) +{ + DeleteTimerSafe((uint32_t)g_shmTimerFd); + if (shmList == NULL) { + LOG(WARNING) << "Shm list is null."; + return UBRING_ERR; + } + ShmListNode* current = shmList->head; + ShmListNode* next; + + while (current != NULL) { + next = current->next; + free(current); + current = next; + } + pthread_mutex_destroy(&shmList->shmLock); + FREE_PTR(shmList); + return UBRING_OK; +} + +RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm) +{ + LOCK_GUARD(shmList->shmLock); + if (UNLIKELY(shmList == NULL)) { + LOG(ERROR) << "Shm list is null."; + return UBRING_ERR; + } + + ShmListNode *curNode = shmList->head; + while (curNode != NULL) { + if (strcmp(curNode->shm.name, shm->name) == 0 && curNode->shm.len == shm->len) { + return UBRING_OK; + } + curNode = curNode->next; + } + return UBRING_ERR; +} + +RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm) +{ + if (shmList == NULL || shm == NULL) { + LOG(ERROR) << "Shm list or shm is null."; + return UBRING_ERR; + } + + if (IsExistInShmList(shmList, shm) == UBRING_OK) { + LOG(ERROR) << "Shm name=" << shm->name << " is exist in shm list."; + return UBRING_ERR; + } + + ShmListNode *newShmNode = (ShmListNode *)malloc(sizeof(ShmListNode)); + if (newShmNode == NULL) { + LOG(ERROR) << "Malloc shm node failed."; + return UBRING_ERR; + } + + memcpy(&newShmNode->shm, shm, sizeof(SHM)); + LOCK_GUARD(shmList->shmLock); + newShmNode->next = NULL; + newShmNode->prev = shmList->tail; + if (shmList->tail) { + shmList->tail->next = newShmNode; + shmList->tail = newShmNode; + } else { + shmList->head = newShmNode; + shmList->tail = newShmNode; + } + shmList->size++; + LOG(INFO) << "Add shm to list success, shm name=" << shm->name << " size=" << shmList->size; + return UBRING_OK; +} +} +} \ No newline at end of file diff --git a/src/brpc/ubshm/shm/shm_ubs.h b/src/brpc/ubshm/shm/shm_ubs.h new file mode 100644 index 0000000000..14b5916503 --- /dev/null +++ b/src/brpc/ubshm/shm/shm_ubs.h @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_SHM_UBS_H +#define BRPC_SHM_UBS_H +namespace brpc { +namespace ubring { +DECLARE_int32(ub_flying_io_timeout); + +typedef enum TagUbsLogLevel { + UBSM_LOG_DEBUG_LEVEL = 0, + UBSM_LOG_INFO_LEVEL = 1, + UBSM_LOG_WARN_LEVEL = 2, + UBSM_LOG_ERROR_LEVEL = 3, + UBSM_LOG_CLOSED_LEVEL = 4 +} UbsLogLevel; + +RETURN_CODE UbsShmLocalMalloc(SHM *shm); +RETURN_CODE UbsShmMunmap(SHM *shm); +RETURN_CODE UbsShmFree(SHM *shm); +RETURN_CODE UbsShmLocalFree(SHM *shm); +RETURN_CODE UbsShmRemoteMalloc(SHM *shm); +RETURN_CODE UbsShmRemoteFree(SHM *shm); +RETURN_CODE UbsShmInit(void); +RETURN_CODE UbsShmFini(void); +RETURN_CODE UbsShmLocalMmap(SHM *shm, int prot); +void UbsMemLoggerPrint(int level, const char *msg); + +void *UbsShmCallback(void* args); +RETURN_CODE UbsShmAddTimer(ShmList *shmList); +RETURN_CODE InitShmTimer(ShmList **shmList); +RETURN_CODE DestroyShmTimer(ShmList *shmList); +RETURN_CODE AddShmToList(ShmList *shmList, SHM *shm); +RETURN_CODE IsExistInShmList(ShmList *shmList, const SHM *shm); +} +} +#endif //BRPC_SHM_UBS_H \ No newline at end of file diff --git a/src/brpc/ubshm/timer/timer_mgr.cpp b/src/brpc/ubshm/timer/timer_mgr.cpp new file mode 100644 index 0000000000..e53833f95e --- /dev/null +++ b/src/brpc/ubshm/timer/timer_mgr.cpp @@ -0,0 +1,468 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include "brpc/ubshm/timer/timer_mgr.h" + +namespace brpc { +namespace ubring { + +int32_t g_epollFd = -1; +std::atomic g_totalTimerNum; +TimerFdCtx *g_timerFdCtxMap = NULL; +uint32_t maxSystemFd; +static pthread_t g_epollExecuteThread; +static int32_t g_timerModuleInitialized; + +#if defined(OS_MACOSX) +static int timerfd_create_macosx(int clockid, int flags); +static int timerfd_settime_macosx(int fd, int flags, + const itimerspec *new_value, + itimerspec *old_value); +#endif + +static RETURN_CODE DeleteTimerInner(uint32_t fd) { + if (g_timerFdCtxMap == NULL) { + return UBRING_OK; + } + + if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { + return UBRING_ERR; + } + + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + return UBRING_OK; + } + + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].periodical = 0; + g_timerFdCtxMap[fd].fd = 0; + + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + +#if defined(OS_LINUX) + epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, fd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + kevent(g_epollFd, &evt, 1, NULL, 0, NULL); +#endif + + uint64_t exp = 0; + read((int)fd, &exp, sizeof(exp)); + + close((int)fd); + atomic_fetch_sub(&g_totalTimerNum, 1); + return UBRING_OK; +} + +static RETURN_CODE StartTimeEpoll(void) { +#if defined(OS_LINUX) + g_epollFd = epoll_create1(0); +#elif defined(OS_MACOSX) + g_epollFd = kqueue(); +#endif + if (UNLIKELY(g_epollFd == -1)) { + LOG(ERROR) << "Failed to create epoll/kqueue. errno=" << errno; + return UBRING_ERR; + } + + int ret = pthread_create(&g_epollExecuteThread, NULL, TimerEpoll, NULL); + if (UNLIKELY(ret != 0)) { + LOG(ERROR) << "Failed to create thread err=" << ret; + return UBRING_ERR; + } + return UBRING_OK; +} + +static RETURN_CODE TimerSpinLocksInit(void) { + if (g_timerFdCtxMap == NULL) { + LOG(ERROR) << "Timer module is not fully initialized."; + return UBRING_ERR; + } + + for (uint32_t fd = 0; fd < maxSystemFd; fd++) { + int ret = pthread_spin_init(&g_timerFdCtxMap[fd].spinLock, + PTHREAD_PROCESS_PRIVATE); + if (ret != EOK) { + LOG(ERROR) << "Failed to initialize spin lock for fd=" << fd; + for (uint32_t cleanupFd = 0; cleanupFd < fd; cleanupFd++) { + pthread_spin_destroy(&g_timerFdCtxMap[cleanupFd].spinLock); + } + return UBRING_ERR; + } + } + return UBRING_OK; +} + +static RETURN_CODE ExecuteCallback(int32_t timerFd) { + UnifiedCallback((void *)(&g_timerFdCtxMap[timerFd])); + return UBRING_OK; +} + +static RETURN_CODE TimerCtxMapCompletion(void) { + memset(g_timerFdCtxMap, 0, sizeof(TimerFdCtx) * maxSystemFd); + + RETURN_CODE ret = TimerSpinLocksInit(); + if (ret != UBRING_OK) { + LOG(ERROR) << "Failed to init spin locks for timer module."; + return UBRING_ERR; + } + return UBRING_OK; +} + +RETURN_CODE TimerInit(void) { + if (g_timerModuleInitialized > 0) { + return UBRING_OK; + } + + g_totalTimerNum.store(0); + + struct rlimit rlim; + if (getrlimit(RLIMIT_NOFILE, &rlim) != UBRING_OK) { + LOG(ERROR) << "Failed to get fd"; + return UBRING_ERR; + } + maxSystemFd = (uint32_t)rlim.rlim_cur; + + if (g_timerFdCtxMap == NULL) { + g_timerFdCtxMap = (TimerFdCtx *)malloc(sizeof(TimerFdCtx) * maxSystemFd); + if (UNLIKELY(!g_timerFdCtxMap)) { + LOG(ERROR) << "Fail to malloc space for timer modules. errno=%d", errno; + return UBRING_ERR; + } + + RETURN_CODE ret = TimerCtxMapCompletion(); + if (ret != UBRING_OK) { + LOG(ERROR) << "Failed to init main data structure of Time Module. ret=" << ret; + free(g_timerFdCtxMap); + g_timerFdCtxMap = NULL; + return UBRING_ERR; + } + } + + RETURN_CODE ret = StartTimeEpoll(); + if (ret != UBRING_OK) { + LOG(ERROR) << "Failed to start Timer Epoll. ret=" << ret; + if (LIKELY(g_timerFdCtxMap != NULL)) { + FREE_PTR(g_timerFdCtxMap); + } + return UBRING_ERR; + } + g_timerModuleInitialized = 1; + return UBRING_OK; +} + +void *UnifiedCallback(void *args) { + TimerFdCtx *ctx = (TimerFdCtx *)args; + if (pthread_spin_lock(&ctx->spinLock) != 0) { + return NULL; + } + + if (ctx->status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&ctx->spinLock); + return NULL; + } + + void *(*cb)(void *) = ctx->cb; + void *cbArgs = ctx->args; + uint32_t fd = ctx->fd; + int isPeriodical = ctx->periodical; + ctx->status = TIMER_CONTEXT_CALLBACK_ONGOING; + + pthread_spin_unlock(&ctx->spinLock); + + cb(cbArgs); + + if (!isPeriodical) { + DeleteTimerInner(fd); + } + return NULL; +} + +void *TimerEpoll(void *args) { + UNREFERENCE_PARAM(args); +#if defined(OS_LINUX) + struct epoll_event readyEvents[MAX_TIMER]; +#elif defined(OS_MACOSX) + struct kevent readyEvents[MAX_TIMER]; +#endif + + while (1) { + if (g_timerModuleInitialized <= 0) { + LOG(ERROR) << "The Timer module is not initialized."; + break; + } + +#if defined(OS_LINUX) + int32_t readyNum = epoll_wait(g_epollFd, readyEvents, MAX_TIMER, + TIMER_EPOLL_WAIT_TIMEOUT); +#elif defined(OS_MACOSX) + struct timespec timeout = {0, TIMER_EPOLL_WAIT_TIMEOUT * 1000000}; + int32_t readyNum = kevent(g_epollFd, NULL, 0, readyEvents, MAX_TIMER, &timeout); +#endif + + if (UNLIKELY(readyNum == -1)) { + errno_t err = errno; + if (err == EINTR) { + LOG_EVERY_SECOND(WARNING) << "Epoll/Kqueue wait was interrupted. errno=" << err; + continue; + } else if (err == EBADF) { + LOG(WARNING) << "The Timer module is destroyed."; + break; + } + LOG(ERROR) << "Epoll/Kqueue wait internal error. errno=" << err; + break; + } + + for (int32_t i = 0; i < readyNum; i++) { +#if defined(OS_LINUX) + struct epoll_event *event = &readyEvents[i]; + int32_t timerFd = event->data.fd; +#elif defined(OS_MACOSX) + struct kevent *event = &readyEvents[i]; + int32_t timerFd = event->ident; +#endif + + uint64_t exp = 0; + if (read(timerFd, &exp, sizeof(exp)) < 0) { + if (errno != EBADF) { + LOG(ERROR) << "Failed to read timerfd=" << timerFd << " errno=" << errno; + } + continue; + } + if (TimerFdCtxValidate((uint32_t)timerFd) != UBRING_OK) { + continue; + } + + RETURN_CODE ret = ExecuteCallback(timerFd); + if (ret != UBRING_OK) { + LOG(ERROR) << "Failed execute callback ret=" << ret; + DeleteTimerInner((uint32_t)timerFd); + continue; + } + } + } + return NULL; +} + +void DeleteTimerSafe(uint32_t fd) { + if (g_timerFdCtxMap == NULL) { + return; + } + + if (pthread_spin_lock(&g_timerFdCtxMap[fd].spinLock) != 0) { + return; + } + + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + return; + } + + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].periodical = 0; + g_timerFdCtxMap[fd].fd = 0; + + pthread_spin_unlock(&g_timerFdCtxMap[fd].spinLock); + +#if defined(OS_LINUX) + epoll_ctl(g_epollFd, EPOLL_CTL_DEL, (int)fd, NULL); +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, fd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + kevent(g_epollFd, &evt, 1, NULL, 0, NULL); +#endif + + uint64_t exp = 0; + read((int)fd, &exp, sizeof(exp)); + + close((int)fd); + atomic_fetch_sub(&g_totalTimerNum, 1); +} + +void DeleteTimer(uint32_t fd) { + if (g_timerFdCtxMap == NULL) { + LOG(WARNING) << "The timer is not initialized."; + return; + } + + g_timerFdCtxMap[fd].periodical = 0; +} + +int32_t TimerStart(const itimerspec *time, void *(*cb)(void *), void *args) { + if (g_epollFd == -1) { + LOG(ERROR) << "Timer epoll/kqueue encountered internal error."; + return -1; + } + +#if defined(OS_LINUX) + int timerFd = timerfd_create(CLOCK_MONOTONIC, 0); +#elif defined(OS_MACOSX) + int timerFd = timerfd_create_macosx(CLOCK_MONOTONIC, 0); +#endif + + if (UNLIKELY(timerFd >= (int)maxSystemFd || timerFd == -1)) { + LOG(ERROR) << "Failed to create timerfd=" << timerFd << " errno=" << errno; + return -1; + } + + g_timerFdCtxMap[timerFd].status = TIMER_CONTEXT_EPOLL_WAITING; + g_timerFdCtxMap[timerFd].cb = cb; + g_timerFdCtxMap[timerFd].args = args; + g_timerFdCtxMap[timerFd].fd = (uint32_t)timerFd; + + if (LIKELY(time->it_interval.tv_sec > 0 || time->it_interval.tv_nsec > 0)) { + g_timerFdCtxMap[timerFd].periodical = 1; + } + +#if defined(OS_LINUX) + struct epoll_event event = { + .events = EPOLLIN, + .data = {.fd = timerFd} + }; + + int32_t ret = epoll_ctl(g_epollFd, EPOLL_CTL_ADD, timerFd, &event); +#elif defined(OS_MACOSX) + struct kevent event; + uint64_t timeout_nsec = time->it_value.tv_sec * 1000000000ULL + time->it_value.tv_nsec; + uint64_t interval_nsec = time->it_interval.tv_sec * 1000000000ULL + time->it_interval.tv_nsec; + EV_SET(&event, timerFd, EVFILT_TIMER, EV_ADD | EV_ENABLE, 0, + timeout_nsec / 1000000, NULL); + int32_t ret = kevent(g_epollFd, &event, 1, NULL, 0, NULL); +#endif + + if (UNLIKELY(ret != 0)) { + CloseTimerFd((uint32_t)timerFd); + LOG(ERROR) << "Failed to add event to epoll/kqueue. errno=" << errno; + return -1; + } + + atomic_fetch_add(&g_totalTimerNum, 1); + +#if defined(OS_LINUX) + ret = timerfd_settime(timerFd, 0, time, NULL); +#elif defined(OS_MACOSX) + ret = timerfd_settime_macosx(timerFd, 0, time, NULL); +#endif + + if (UNLIKELY(ret != 0)) { +#if defined(OS_LINUX) + if (epoll_ctl(g_epollFd, EPOLL_CTL_DEL, timerFd, NULL) != 0) { +#elif defined(OS_MACOSX) + struct kevent evt; + EV_SET(&evt, timerFd, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); + if (kevent(g_epollFd, &evt, 1, NULL, 0, NULL) != 0) { +#endif + LOG(ERROR) << "Failed to delete the timer fd=" << timerFd << " with errno=" << errno; + } + CloseTimerFd((uint32_t)timerFd); + atomic_fetch_sub(&g_totalTimerNum, 1); + LOG(ERROR) << "Failed to set timer"; + return -1; + } + + return timerFd; +} + +uint32_t GetActiveTimerNum(void) { + return atomic_load(&g_totalTimerNum); +} + +void CloseTimerFd(uint32_t fd) { + g_timerFdCtxMap[fd].cb = NULL; + g_timerFdCtxMap[fd].args = NULL; + g_timerFdCtxMap[fd].status = TIMER_CONTEXT_NOT_USING; + g_timerFdCtxMap[fd].fd = 0; + g_timerFdCtxMap[fd].periodical = 0; + if (close((int)fd) != 0) { + LOG(ERROR) << "Failed to close timer fd=" << fd << " errno=" << errno; + return; + } +} + +void TimerModuleDestroy(void) { + uint32_t maxFd = maxSystemFd; + if (g_timerFdCtxMap) { + for (uint32_t fd = 0; fd < maxFd; fd++) { + if (g_timerFdCtxMap[fd].status != TIMER_CONTEXT_NOT_USING) { + DeleteTimerSafe(fd); + } + } + } + close(g_epollFd); + g_epollFd = -1; + g_totalTimerNum = 0; + g_timerModuleInitialized = 0; + int32_t ret = pthread_join(g_epollExecuteThread, NULL); + if (ret != EOK) { + LOG(ERROR) << "Failed to join pthread, during destroying timer module. ret=" << ret; + return; + } +} + +RETURN_CODE TimerFdCtxValidate(uint32_t fd) { + if (fd >= maxSystemFd) { + LOG(ERROR) << "TimerFd=" << fd << " is out of range=" << maxSystemFd; + return UBRING_ERR; + } + if (g_timerFdCtxMap[fd].status == TIMER_CONTEXT_NOT_USING) { + LOG(ERROR) << "TimerFd=" << fd << " has wrong status=" << g_timerFdCtxMap[fd].status; + return UBRING_ERR; + } + if (g_timerFdCtxMap[fd].cb == NULL) { + LOG(ERROR) << "The callback is not set."; + return UBRING_ERR; + } + + return UBRING_OK; +} + +#if defined(OS_MACOSX) +static int timerfd_create_macosx(int clockid, int flags) { + int pipefd[2]; + if (pipe(pipefd) == -1) { + return -1; + } + return pipefd[0]; +} + +static int timerfd_settime_macosx(int fd, int flags, + const itimerspec *new_value, + itimerspec *old_value) { + if (old_value != NULL) { + memset(old_value, 0, sizeof(itimerspec)); + } + return 0; +} +#endif + +} // namespace ubring +} // namespace brpc \ No newline at end of file diff --git a/src/brpc/ubshm/timer/timer_mgr.h b/src/brpc/ubshm/timer/timer_mgr.h new file mode 100644 index 0000000000..9630430a2c --- /dev/null +++ b/src/brpc/ubshm/timer/timer_mgr.h @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_TIMER_MGR_H +#define BRPC_TIMER_MGR_H +#include +#include +#include "brpc/ubshm/common/common.h" + +#if defined(OS_LINUX) +#include +#include +#elif defined(OS_MACOSX) +#include +#include +#include +#endif + +#define MAX_TIMER 1024 +#define TIMER_EPOLL_WAIT_TIMEOUT 1000 + +#if defined(OS_MACOSX) +struct itimerspec +{ + struct timespec it_interval; + struct timespec it_value; +}; +#endif +namespace brpc { +namespace ubring { +typedef enum { + TIMER_CONTEXT_NOT_USING, + TIMER_CONTEXT_EPOLL_WAITING, + TIMER_CONTEXT_CALLBACK_ONGOING +} TimerFdCtxStatus; + +typedef struct { + void *(*cb)(void*); + void *args; + uint32_t fd; + TimerFdCtxStatus status; + uint32_t periodical; + pthread_spinlock_t spinLock; +} TimerFdCtx; + +RETURN_CODE TimerInit(void); +void TimerModuleDestroy(void); +void *UnifiedCallback(void *args); +void *TimerEpoll(void *args); +int32_t TimerStart(const itimerspec *time, void *(*cb)(void *), void *args); +uint32_t GetActiveTimerNum(void); +void CloseTimerFd(uint32_t fd); + +void DeleteTimerSafe(uint32_t fd); +void DeleteTimer(uint32_t fd); +RETURN_CODE TimerFdCtxValidate(uint32_t fd); +} +} +#endif //BRPC_TIMER_MGR_H \ No newline at end of file diff --git a/src/brpc/ubshm/ub_endpoint.cpp b/src/brpc/ubshm/ub_endpoint.cpp new file mode 100644 index 0000000000..24b3ffdd5c --- /dev/null +++ b/src/brpc/ubshm/ub_endpoint.cpp @@ -0,0 +1,917 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_UBRING + +#include +#include "butil/fd_utility.h" +#include "butil/logging.h" // CHECK, LOG +#include "butil/sys_byteorder.h" // HostToNet,NetToHost +#include "bthread/bthread.h" +#include "brpc/errno.pb.h" +#include "brpc/event_dispatcher.h" +#include "brpc/input_messenger.h" +#include "brpc/socket.h" +#include "brpc/reloadable_flags.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm_transport.h" +#include "brpc/ubshm/ubr_trx.h" + +DECLARE_int32(task_group_ntags); + +namespace brpc { +DECLARE_bool(log_connection_close); +namespace ubring { + +extern bool g_skip_ub_init; +DEFINE_int32(data_queue_size, 4, "data queue size for UB"); +DEFINE_bool(ub_trace_verbose, false, "Print log message verbosely"); +BRPC_VALIDATE_GFLAG(ub_trace_verbose, brpc::PassValidate); +DEFINE_int32(ub_poller_num, 1, "Poller number in ub polling mode."); +DEFINE_bool(ub_poller_yield, false, "Yield thread in RDMA polling mode."); +DEFINE_bool(ub_edisp_unsched, false, "Disable event dispatcher schedule"); +DEFINE_bool(ub_disable_bthread, false, "Disable bthread in RDMA"); + +static const size_t MIN_ONCE_READ = 4096; +static const size_t MAX_ONCE_READ = 524288; +static const size_t IOBUF_IOV_MAX = 256; + +static const char* MAGIC_STR = "UB"; +static const size_t MAGIC_STR_LEN = 2; +static const size_t HELLO_MSG_LEN_MIN = 64; +static const size_t ACK_MSG_LEN = 4; +static uint16_t g_ub_hello_msg_len = 64; +static uint16_t g_ub_hello_version = 2; +static uint16_t g_ub_impl_version = 1; + +static const uint32_t ACK_MSG_UB_OK = 0x1; + +static butil::Mutex* g_ubring_resource_mutex = NULL; + +struct HelloMessage { + void Serialize(void* data) const; + void Deserialize(void* data); + std::string toString() const; + + uint16_t msg_len; + uint16_t hello_ver; + uint16_t impl_ver; + uint64_t len; + char shm_name[SHM_MAX_NAME_BUFF_LEN]; +}; + +void HelloMessage::Serialize(void* data) const { + uint16_t* current_pos = (uint16_t*)data; + *(current_pos++) = butil::HostToNet16(msg_len); + *(current_pos++) = butil::HostToNet16(hello_ver); + *(current_pos++) = butil::HostToNet16(impl_ver); + uint64_t* len_pos = (uint64_t*)current_pos; + *len_pos = butil::HostToNet64(len); + current_pos += 4; + memcpy(current_pos, shm_name, SHM_MAX_NAME_BUFF_LEN); +} + +void HelloMessage::Deserialize(void* data) { + uint16_t* current_pos = (uint16_t*)data; + msg_len = butil::NetToHost16(*current_pos++); + hello_ver = butil::NetToHost16(*current_pos++); + impl_ver = butil::NetToHost16(*current_pos++); + len = butil::NetToHost64(*(uint64_t*)current_pos); + current_pos += 4; // move forward 4 Bytes + memcpy(shm_name, current_pos, SHM_MAX_NAME_BUFF_LEN); +} + +std::string HelloMessage::toString() const { + constexpr size_t MAX_LEN = 16 + 6 + 16 + 6 + 16 + 6 + 20 + 6 + SHM_MAX_NAME_BUFF_LEN + 32; + std::array buf; + int n = snprintf(buf.data(), buf.size(), + "msg_len=%u, hello_ver=%u, impl_ver=%u, len=%lu, shm_name=%.*s", + msg_len, + hello_ver, + impl_ver, + static_cast(len), // 兼容32/64位 + static_cast(SHM_MAX_NAME_BUFF_LEN), // 限制最大输出长度 + shm_name + ); + return std::string(buf.data(), static_cast(n)); +} + +UBShmEndpoint::UBShmEndpoint(Socket* s) + : _socket(s) + , _state(UNINIT) + , _ub_ring(nullptr) + , _cq_sid(INVALID_SOCKET_ID) +{ + _read_butex = bthread::butex_create_checked>(); +} + +UBShmEndpoint::~UBShmEndpoint() { + Reset(); + bthread::butex_destroy(_read_butex); +} + +void UBShmEndpoint::Reset() { + DeallocateResources(); + + delete _ub_ring; + _ub_ring = nullptr; + _cq_sid = INVALID_SOCKET_ID; + _state = UNINIT; +} + +void UBConnect::StartConnect(const Socket* socket, + void (*done)(int err, void* data), + void* data) { + auto* ub_transport = static_cast(socket->_transport.get()); + CHECK(ub_transport->_ub_ep != NULL); + SocketUniquePtr s; + if (Socket::Address(socket->id(), &s) != 0) { + return; + } + if (!IsUBAvailable()) { + ub_transport->_ub_ep->_state = UBShmEndpoint::FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + done(0, data); + return; + } + _done = done; + _data = data; + bthread_t tid; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "UBProcessHandshakeAtClient"); + if (bthread_start_background(&tid, &attr, + UBShmEndpoint::ProcessHandshakeAtClient, ub_transport->_ub_ep) < 0) { + LOG(FATAL) << "Fail to start handshake bthread"; + Run(); + } else { + s.release(); + } +} + +void UBConnect::StopConnect(Socket* socket) { } + +void UBConnect::Run() { + _done(errno, _data); +} + +static void TryReadOnTcpDuringRdmaEst(Socket* s) { + int progress = Socket::PROGRESS_INIT; + while (true) { + uint8_t tmp; + ssize_t nr = read(s->fd(), &tmp, 1); + if (nr < 0) { + if (errno != EAGAIN) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read from " << s; + s->SetFailed(saved_errno, "Fail to read from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + if (!s->MoreReadEvents(&progress)) { + break; + } + } else if (nr == 0) { + s->SetEOF(); + return; + } else { + LOG(WARNING) << "Read unexpected data from " << s; + s->SetFailed(EPROTO, "Read unexpected data from %s", + s->description().c_str()); + return; + } + } +} + +void UBShmEndpoint::OnNewDataFromTcp(Socket* m) { + auto* ub_transport = static_cast(m->_transport.get()); + UBShmEndpoint* ep = ub_transport->GetUBShmEp(); + CHECK(ep != NULL); + + int progress = Socket::PROGRESS_INIT; + while (true) { + if (ep->_state == UNINIT) { + if (!m->CreatedByConnect()) { + if (!IsUBAvailable()) { + ep->_state = FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + continue; + } + bthread_t tid; + ep->_state = S_HELLO_WAIT; + SocketUniquePtr s; + m->ReAddress(&s); + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + bthread_attr_set_name(&attr, "UBProcessHandshakeAtServer"); + if (bthread_start_background(&tid, &attr, + ProcessHandshakeAtServer, ep) < 0) { + ep->_state = UNINIT; + LOG(FATAL) << "Fail to start handshake bthread"; + } else { + s.release(); + } + } else { + // The connection may be closed or reset before the client + // starts handshake. This will be handled by client handshake. + // Ignore the exception here. + } + } else if (ep->_state < ESTABLISHED) { // during handshake + ep->_read_butex->fetch_add(1, butil::memory_order_release); + bthread::butex_wake(ep->_read_butex); + } else if (ep->_state == FALLBACK_TCP){ // handshake finishes + InputMessenger::OnNewMessages(m); + return; + } else if (ep->_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(ep->_socket); + return; + } + if (!m->MoreReadEvents(&progress)) { + break; + } + } +} +bool HelloNegotiationValid(HelloMessage& msg) { + if (msg.hello_ver == g_ub_hello_version && + msg.impl_ver == g_ub_impl_version) { + // This can be modified for future compatibility + return true; + } + return false; +} + +static const int WAIT_TIMEOUT_MS = 50; + +int UBShmEndpoint::ReadFromFd(void* data, size_t len) { + CHECK(data != NULL); + int nr = 0; + size_t received = 0; + do { + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nr = read(_socket->fd(), (uint8_t*)data + received, len - received); + if (nr < 0) { + if (errno == EAGAIN) { + const int expected_val = _read_butex->load(butil::memory_order_acquire); + if (bthread::butex_wait(_read_butex, expected_val, &duetime) < 0) { + if (errno != EWOULDBLOCK && errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else if (nr == 0) { + errno = EEOF; + return -1; + } else { + received += nr; + } + } while (received < len); + return 0; +} + +int UBShmEndpoint::WriteToFd(void* data, size_t len) { + CHECK(data != NULL); + int nw = 0; + size_t written = 0; + do { + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nw = write(_socket->fd(), (uint8_t*)data + written, len - written); + if (nw < 0) { + if (errno == EAGAIN) { + if (_socket->WaitEpollOut(_socket->fd(), true, &duetime) < 0) { + if (errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else { + written += nw; + } + } while (written < len); + return 0; +} + +inline void UBShmEndpoint::TryReadOnTcp() { + if (_socket->_nevent.fetch_add(1, butil::memory_order_acq_rel) == 0) { + if (_state == FALLBACK_TCP) { + InputMessenger::OnNewMessages(_socket); + } else if (_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(_socket); + } + } +} + +void* UBShmEndpoint::ProcessHandshakeAtClient(void* arg) { + UBShmEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + UBConnect::RunGuard rg((UBConnect*)s->_app_connect.get()); + + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Start handshake on " << s->_local_side; + + uint8_t data[g_ub_hello_msg_len]; + + ep->_state = C_ALLOC_SHM; + auto* ub_transport = static_cast(s->_transport.get()); + size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; + SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint32_t)s->fd()}; + const char* shm_name = butil::endpoint2str(s->local_side()).c_str(); + if (ep->AllocateClientResources(&local_trx_shm, shm_name) < 0) { + LOG(WARNING) << "Fallback to tcp:" << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->_state = FALLBACK_TCP; + return NULL; + } + + ep->_state = C_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_ub_hello_msg_len; + local_msg.hello_ver = g_ub_hello_version; + local_msg.impl_ver = g_ub_impl_version; + local_msg.len = local_shm_len; + memcpy(local_msg.shm_name, local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN); + memcpy(data, MAGIC_STR, MAGIC_STR_LEN); + local_msg.Serialize((char*)data + MAGIC_STR_LEN); + if (ep->WriteToFd(data, g_ub_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send hello message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "client handshake message : " << local_msg.toString(); + + ep->_state = C_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get hello message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG(WARNING) << "Read unexpected data during handshake:" << s->description(); + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + if (ep->ReadFromFd(data, HELLO_MSG_LEN_MIN - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get Hello Message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + HelloMessage remote_msg; + remote_msg.Deserialize(data); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from server:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized data + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with server, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ep->_state = C_MAP_REMOTE_SHM; + if (ep->_ub_ring->UbrMapRemoteShm(&local_trx_shm, shm_name) < 0) { + LOG(WARNING) << "Fail to map the remote shm, fallback to tcp:" << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ub_transport->_ub_state = UBShmTransport::UB_ON; + } + } + + ep->_state = C_ACK_SEND; + uint32_t flags = 0; + if (ub_transport->_ub_state != UBShmTransport::UB_OFF) { + flags |= ACK_MSG_UB_OK; + } + uint32_t* tmp = (uint32_t*)data; + *tmp = butil::HostToNet32(flags); + if (ep->WriteToFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Ack Message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + if (ub_transport->_ub_state == UBShmTransport::UB_ON) { + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Client handshake ends (use ubring) on " << s->description(); + } else { + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Client handshake ends (use tcp) on " << s->description(); + } + + errno = 0; + + return NULL; +} + +void* UBShmEndpoint::ProcessHandshakeAtServer(void* arg) { + UBShmEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Start handshake on " << s->description(); + + uint8_t data[g_ub_hello_msg_len]; + + ep->_state = S_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description() << " " << s->_remote_side; + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + auto* ub_transport = static_cast(s->_transport.get()); + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "It seems that the " + << "client does not use RDMA, fallback to TCP:" + << s->description(); + s->_read_buf.append(data, MAGIC_STR_LEN); + ep->_state = FALLBACK_TCP; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->TryReadOnTcp(); + return NULL; + } + + if (ep->ReadFromFd(data, g_ub_hello_msg_len - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + HelloMessage remote_msg; + remote_msg.Deserialize(data); + LOG_IF(INFO, FLAGS_ub_trace_verbose) << "server receive handshake message : " << remote_msg.toString(); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized header + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with client, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } else { + ep->_state = S_ALLOC_SHM; + ubring::SHM remote_trx_shm = {NULL, remote_msg.len, 0, {0}, (uint8_t)ep->_socket->fd()}; + strncpy(remote_trx_shm.name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + + size_t local_shm_len = (size_t)(FLAGS_data_queue_size) * MB_TO_BYTE; + // server端共享内存名称 + ubring::SHM local_trx_shm = {NULL, local_shm_len, 0, {0}, (uint8_t)ep->_socket->fd()}; + char clientName[SHM_MAX_NAME_BUFF_LEN]; + strncpy(clientName, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + + char *clientIpPort = strrchr(clientName, '_'); + if (clientIpPort != NULL) { + *clientIpPort = '\0'; + } + int result = snprintf(local_trx_shm.name, SHM_MAX_NAME_BUFF_LEN, "%s_%s", + clientName, SERVER_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(WARNING) << "Copy client shared memory name failed, ret=" << result; + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } + if (result >= 0 && ep->AllocateServerResources(&remote_trx_shm, &local_trx_shm) < 0) { + LOG(WARNING) << "Fail to allocate ub resources, fallback to tcp:" + << s->description(); + ub_transport->_ub_state = UBShmTransport::UB_OFF; + } + } + + ep->_state = S_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_ub_hello_msg_len; + if (ub_transport->_ub_state == UBShmTransport::UB_OFF) { + local_msg.impl_ver = 0; + local_msg.hello_ver = 0; + } else { + local_msg.hello_ver = g_ub_hello_version; + local_msg.impl_ver = g_ub_impl_version; + local_msg.len = (FLAGS_data_queue_size) * MB_TO_BYTE; + memcpy(local_msg.shm_name, remote_msg.shm_name, SHM_MAX_NAME_BUFF_LEN); + } + memcpy(data, MAGIC_STR, MAGIC_STR_LEN); + local_msg.Serialize((char*)data + MAGIC_STR_LEN); + if (ep->WriteToFd(data, g_ub_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Hello Message to client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ub handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + ep->_state = S_ACK_WAIT; + if (ep->ReadFromFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read ack message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete ubring handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + uint32_t* tmp = (uint32_t*)data; + uint32_t flags = butil::NetToHost32(*tmp); + if (flags & ACK_MSG_UB_OK) { + if (ub_transport->_ub_state == UBShmTransport::UB_OFF) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete ub handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } else { + ub_transport->_ub_state = UBShmTransport::UB_ON; + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Server handshake ends (use ubring) on " << s->description(); + } + } else { + ub_transport->_ub_state = UBShmTransport::UB_OFF; + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_ub_trace_verbose) + << "Server handshake ends (use tcp) on " << s->description(); + } + ep->TryReadOnTcp(); + + return NULL; +} + +bool UBShmEndpoint::IsWritable() const { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + return false; + } + auto ret = _ub_ring->IsUbrTrxWriteable(EPOLLET); + if (ret == 0) { + return true; + } + return false; +} + +ssize_t UBShmEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + errno = EAGAIN; + return -1; + } + if (BAIDU_UNLIKELY(ndata == 0)) { + return 0; + } + struct iovec vec[IOBUF_IOV_MAX]; + size_t nvec = 0; + for (size_t i = 0; i < ndata; ++i) { + const butil::IOBuf* p = from[i]; + const size_t nref = p->backing_block_num(); + for (size_t j = 0; j < nref && nvec < IOBUF_IOV_MAX; ++j, ++nvec) { + butil::StringPiece sp = p->backing_block(j); + vec[nvec].iov_base = const_cast(sp.data()); + vec[nvec].iov_len = sp.size(); + } + } + + ssize_t nw = 0; + nw = _ub_ring->UbrTrxWritev(vec, nvec); + if (UNLIKELY(nw == -1)) { + LOG(ERROR) << "Non-blocking send msg in failed, connection has been closed."; + errno = EPIPE; + } else if (UNLIKELY(nw == UBRING_RETRY)) { + errno = EAGAIN; + nw = -1; + } + if (nw <= 0) { + return nw; + } + size_t npop_all = nw; + for (size_t i = 0; i < ndata; ++i) { + npop_all -= from[i]->pop_front(npop_all); + if (npop_all == 0) { + break; + } + } + return nw; +} + +int UBShmEndpoint::AllocateClientResources(ubring::SHM* local_trx_shm, const char* shm_name) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // For UT + return 0; + } + + CHECK(_ub_ring == NULL); + // TODO: Pooling management + _ub_ring = new UBRing(); + + SocketOptions options; + options.user = this; + options.keytable_pool = _socket->_keytable_pool; + if (Socket::Create(options, &_cq_sid) < 0) { + PLOG(WARNING) << "Fail to create socket for cq"; + return -1; + } + int ret = _ub_ring->UbrAllocateLocalShm(local_trx_shm, shm_name); + if (ret != 0) { + return ret; + } + PollerRegisterEvent(CqSidOp::ADD, EPOLLIN); + return 0; +} + +int UBShmEndpoint::AllocateServerResources(ubring::SHM* remote_trx_shm, ubring::SHM* local_trx_shm) { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // For UT + return 0; + } + + CHECK(_ub_ring == NULL); + // TODO: Pooling management + _ub_ring = new UBRing(); + + SocketOptions options; + options.user = this; + options.keytable_pool = _socket->_keytable_pool; + if (Socket::Create(options, &_cq_sid) < 0) { + PLOG(WARNING) << "Fail to create socket for cq"; + return -1; + } + int ret = _ub_ring->UbrAllocateServerShm(remote_trx_shm, local_trx_shm); + if (ret != 0) { + return ret; + } + // TODO mwj 是否应该在连接之后再进行轮询? + PollerRegisterEvent(CqSidOp::ADD, EPOLLIN); + return ret; +} + +void UBShmEndpoint::DeallocateResources() { + if (!_ub_ring) { + return; + } + PollerRegisterEvent(CqSidOp::REMOVE); + _ub_ring->UbrTrxClose(); + if (INVALID_SOCKET_ID != _cq_sid) { + SocketUniquePtr s; + if (Socket::Address(_cq_sid, &s) == 0) { + s->_user = NULL; + s->_fd = -1; + s->SetFailed(); + } + } +} + +void UBShmEndpoint::PollIn(UBShmEndpoint* ep, uint32_t epEvent) { + SocketUniquePtr s; + if (Socket::Address(ep->_socket->id(), &s) < 0) { + return; + } + auto* ub_transport = static_cast(s->_transport.get()); + CHECK(ep == ub_transport->_ub_ep); + + InputMessageClosure last_msg; + while (true) { + int ret = ep->_ub_ring->IsUbrTrxReadable(epEvent); + if (ret < 0) { + return; + } + + bool read_eof = false; + while (!read_eof) { + const int64_t received_us = butil::cpuwide_time_us(); + const int64_t base_realtime = butil::gettimeofday_us() - received_us; + + size_t once_read = s->_avg_msg_size * 16; + if (once_read < MIN_ONCE_READ) { + once_read = MIN_ONCE_READ; + } else if (once_read > MAX_ONCE_READ) { + once_read = MAX_ONCE_READ; + } + + const ssize_t nr = s->_read_buf.append_from_reader(ep->_ub_ring, once_read); + if (nr <= 0) { + if (0 == nr) { + // Set `read_eof' flag and proceed to feed EOF into `Protocol' + // (implied by m->_read_buf.empty), which may produce a new + // `InputMessageBase' under some protocols such as HTTP + LOG_IF(WARNING, FLAGS_log_connection_close) << *s << " was closed by remote side"; + read_eof = true; + } else if (errno != EAGAIN) { + if (errno == EINTR) { + continue; + } + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read from " << *s; + s->SetFailed(saved_errno, "Fail to read from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } else { + return; + } + } + + InputMessenger* messenger = static_cast(s->user()); + if (messenger->ProcessNewMessage(s.get(), nr, read_eof, received_us, + base_realtime, last_msg) < 0) { + return; + } + } + + if (read_eof) { + s->SetEOF(); + } + } +} + +void UBShmEndpoint::PollOut(UBShmEndpoint* ep, uint32_t epEvent) { + SocketUniquePtr s; + if (Socket::Address(ep->_socket->id(), &s) < 0) { + return; + } + auto* ub_transport = static_cast(s->_transport.get()); + CHECK(ep == ub_transport->_ub_ep); + if (ep->IsWritable()) { + ep->_socket->WakeAsEpollOut(); + } + +} + +int UBShmEndpoint::GlobalInitialize() { + g_ubring_resource_mutex = new butil::Mutex; + _poller_groups = std::vector(FLAGS_task_group_ntags); + return 0; +} + +void UBShmEndpoint::GlobalRelease() { + for (int i = 0; i < FLAGS_task_group_ntags; ++i) { + PollingModeRelease(i); + } +} + +std::vector UBShmEndpoint::_poller_groups; + +int UBShmEndpoint::PollingModeInitialize(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn) { + auto& group = _poller_groups[tag]; + auto& pollers = group.pollers; + auto& running = group.running; + bool expected = false; + if (!running.compare_exchange_strong(expected, true)) { + return 0; + } + struct FnArgs { + Poller* poller; + std::atomic* running; + }; + auto fn = [](void* p) -> void* { + std::unique_ptr args(static_cast(p)); + auto poller = args->poller; + auto running = args->running; + std::unordered_set cq_sids; + CqSidOp op; + + if (poller->init_fn) { + poller->init_fn(); + } + while (running->load(std::memory_order_relaxed)) { + while (poller->op_queue.Dequeue(op)) { + if (op.type == CqSidOp::ADD) { + cq_sids.emplace(op); + } else if (op.type == CqSidOp::REMOVE) { + cq_sids.erase(op); + + } else if (op.type == CqSidOp::MOD) { + cq_sids.erase(op); + cq_sids.emplace(op); + } + } + for (auto cq : cq_sids) { + SocketUniquePtr s; + if (Socket::Address(cq.sid, &s) < 0) { + continue; + } + UBShmEndpoint* ep = static_cast(s->user()); + if (!ep) { + continue; + } + + if (cq.event & EPOLLIN) { + PollIn(ep, cq.event); + } + + if (cq.event & EPOLLOUT) { + PollOut(ep, cq.event); + } + } + if (poller->callback) { + poller->callback(); + } + if (FLAGS_ub_poller_yield) { + bthread_yield(); + } + } + + if (poller->release_fn) { + poller->release_fn(); + } + + return nullptr; + }; + for (int i = 0; i < FLAGS_ub_poller_num; ++i) { + auto args = new FnArgs{&pollers[i], &running}; + auto attr = FLAGS_ub_disable_bthread ? BTHREAD_ATTR_PTHREAD + : BTHREAD_ATTR_NORMAL; + attr.tag = tag; + bthread_attr_set_name(&attr, "UBPolling"); + pollers[i].callback = callback; + pollers[i].init_fn = init_fn; + pollers[i].release_fn = release_fn; + auto rc = bthread_start_background(&pollers[i].tid, &attr, fn, args); + if (rc != 0) { + LOG(ERROR) << "Fail to start ubring polling bthread"; + return -1; + } + } + return 0; +} + +void UBShmEndpoint::PollingModeRelease(bthread_tag_t tag) { + auto& group = _poller_groups[tag]; + auto& pollers = group.pollers; + auto& running = group.running; + running.store(false, std::memory_order_relaxed); + for (int i = 0; i < FLAGS_ub_poller_num; ++i) { + bthread_join(pollers[i].tid, NULL); + } +} + +void UBShmEndpoint::PollerRegisterEvent(CqSidOp::OpType op, uint32_t events) { + auto index = butil::fmix32(_cq_sid) % FLAGS_ub_poller_num; + auto& group = _poller_groups[bthread_self_tag()]; + auto& pollers = group.pollers; + auto& poller = pollers[index]; + if (INVALID_SOCKET_ID != _cq_sid) { + poller.op_queue.Enqueue(CqSidOp{_cq_sid, events, op}); + } +} + +} // namespace ubring +} // namespace brpc + +#endif // if BRPC_WITH_UBRING diff --git a/src/brpc/ubshm/ub_endpoint.h b/src/brpc/ubshm/ub_endpoint.h new file mode 100644 index 0000000000..d199f5881a --- /dev/null +++ b/src/brpc/ubshm/ub_endpoint.h @@ -0,0 +1,234 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UB_ENDPOINT_H +#define BRPC_UB_ENDPOINT_H + +#if BRPC_WITH_UBRING + +#include +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/iobuf.h" +#include "butil/macros.h" +#include "butil/containers/mpsc_queue.h" +#include "brpc/socket.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/shm/shm_def.h" + + +namespace brpc { +class Socket; +namespace ubring { + +DECLARE_int32(ub_poller_num); +DECLARE_bool(ub_edisp_unsched); +DECLARE_bool(ub_disable_bthread); + +class UBConnect : public AppConnect { +public: + void StartConnect(const Socket* socket, + void (*done)(int err, void* data), void* data) override; + void StopConnect(Socket*) override; + struct RunGuard { + RunGuard(UBConnect* rc) { this_rc = rc; } + ~RunGuard() { if (this_rc) this_rc->Run(); } + UBConnect* this_rc; + }; + +private: + void Run(); + void (*_done)(int, void*){NULL}; + void* _data{NULL}; +}; + +class BAIDU_CACHELINE_ALIGNMENT UBShmEndpoint : public SocketUser { +friend class UBConnect; +friend class Socket; +public: + explicit UBShmEndpoint(Socket* s); + ~UBShmEndpoint() override; + + // Global initialization + // Return 0 if success, -1 if failed and errno set + static int GlobalInitialize(); + + static void GlobalRelease(); + + // Reset the endpoint (for next use) + void Reset(); + + // Cut data from the given IOBuf list and use UBRING to send + // Return bytes cut if success, -1 if failed and errno set + ssize_t CutFromIOBufList(butil::IOBuf** data, size_t ndata); + + // Whether the endpoint can send more data + bool IsWritable() const; + + void PollerRegisterEpollOut(bool pollin) { + uint32_t events = EPOLLOUT | EPOLLET; + if (pollin) { + PollerRegisterEvent(CqSidOp::MOD, events | EPOLLIN); + return; + } + PollerRegisterEvent(CqSidOp::ADD, events); + } + + void PollerUnRegisterEpollOut(bool pollin) { + uint32_t events = EPOLLIN | EPOLLET; + if (pollin) { + PollerRegisterEvent(CqSidOp::MOD, events); + return; + } + PollerRegisterEvent(CqSidOp::REMOVE); + } + + // Callback when there is new epollin event on TCP fd + static void OnNewDataFromTcp(Socket* m); + + // Initialize polling mode + static int PollingModeInitialize(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn); + + static void PollingModeRelease(bthread_tag_t tag); + +private: + enum State { + UNINIT = 0x0, + C_ALLOC_SHM = 0x1, + C_HELLO_SEND = 0x2, + C_HELLO_WAIT = 0x3, + C_MAP_REMOTE_SHM = 0x4, + C_ACK_SEND = 0x5, + S_HELLO_WAIT = 0x11, + S_ALLOC_SHM = 0x12, + S_HELLO_SEND = 0x13, + S_ACK_WAIT = 0x14, + ESTABLISHED = 0x100, + FALLBACK_TCP = 0x200, + FAILED = 0x300 + }; + + // Process handshake at the client + static void* ProcessHandshakeAtClient(void* arg); + + // Process handshake at the server + static void* ProcessHandshakeAtServer(void* arg); + + // Allocate resources + // Return 0 if success, -1 if failed and errno set + int AllocateClientResources(SHM* local_trx_shm, const char* shm_name); + + int AllocateServerResources(SHM* remote_trx_shm, SHM* local_trx_shm); + + // Release resources + void DeallocateResources(); + + // Read at most len bytes from fd in _socket to data + // wait for _read_butex if encounter EAGAIN + // return -1 if encounter other errno (including EOF) + int ReadFromFd(void* data, size_t len); + + + // Write at most len bytes from data to fd in _socket + // wait for _epollout_butex if encounter EAGAIN + // return -1 if encounter other errno + int WriteToFd(void* data, size_t len); + + // Poll CQ and get the work completion + static void PollIn(UBShmEndpoint* ep, uint32_t epEvent); + + static void PollOut(UBShmEndpoint* ep, uint32_t epEvent); + + // Try to read data on TCP fd in _socket + inline void TryReadOnTcp(); + + // Not owner + Socket* _socket; + + State _state; + + // ub resource + ubring::UBRing* _ub_ring{nullptr}; + + SocketId _cq_sid; + + // butex for inform read events on TCP fd during handshake + butil::atomic *_read_butex; + + DISALLOW_COPY_AND_ASSIGN(UBShmEndpoint); + + struct CqSidOp { + enum OpType { + ADD, + REMOVE, + MOD + }; + SocketId sid; + uint32_t event; + OpType type; + }; + + struct CqSidOpHash { + std::size_t operator()(const CqSidOp& op) const { + return op.sid; + } + }; + + struct CqSidOpEqual { + bool operator()(const CqSidOp& lhs, const CqSidOp& rhs) const { + return lhs.sid == rhs.sid; + } + }; + + // Poller instance + struct BAIDU_CACHELINE_ALIGNMENT Poller { + bthread_t tid{INVALID_BTHREAD}; + butil::MPSCQueue> op_queue; + // Callback used for io_uring/spdk etc + std::function callback; + // Init and Destroy function + std::function init_fn; + std::function release_fn; + }; + // Poller group + struct BAIDU_CACHELINE_ALIGNMENT PollerGroup { + PollerGroup() : pollers(FLAGS_ub_poller_num), running(false) {} + std::vector pollers; + std::atomic running; + }; + static std::vector _poller_groups; + + void PollerRegisterEvent(CqSidOp::OpType op, uint32_t events = EPOLLET); +}; + +} // namespace ubring +} // namespace brpc + +#else // if BRPC_WITH_UBRING + +class UBShmEndpoint { }; + +#endif + +#endif //BRPC_UB_ENDPOINT_H diff --git a/src/brpc/ubshm/ub_helper.cpp b/src/brpc/ubshm/ub_helper.cpp new file mode 100644 index 0000000000..6c4c7a5fde --- /dev/null +++ b/src/brpc/ubshm/ub_helper.cpp @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_UBRING + +#include // dlopen +#include +#include +#include +#include +#include "butil/logging.h" +#include "brpc/socket.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/ub_helper.h" +#include "brpc/ubshm/ub_ring_manager.h" + +namespace brpc { +namespace ubring { + +void* g_handle_ub = NULL; +bool g_skip_ub_init = false; + +butil::atomic g_ub_available(false); + +void GlobalRelease() { + g_ub_available.store(false, butil::memory_order_release); + UBShmEndpoint::GlobalRelease(); + UBRingManager::UbrMgrFini(); + ShmMgrFini(); +} + +static inline void ExitWithError() { + GlobalRelease(); + exit(1); +} + +static void GlobalUBInitializeOrDieImpl() { + if (BAIDU_UNLIKELY(g_skip_ub_init)) { + // Just for UT + return; + } + + if (UBRingManager::UbrMgrInit()) { + PLOG(ERROR) << "Fail to UbrMgrInit"; + ExitWithError(); + } + + if (TimerInit()) { + PLOG(ERROR) << "Fail to TimerInit"; + ExitWithError(); + } + + if (ShmMgrInit()) { + PLOG(ERROR) << "Fail to ShmMgrInit"; + ExitWithError(); + } + + if (UBShmEndpoint::GlobalInitialize() < 0) { + LOG(ERROR) << "ubring_recv_block_type incorrect " + << "(valid value: default/large/huge)"; + ExitWithError(); + } + + g_ub_available.store(true, butil::memory_order_relaxed); +} + +static pthread_once_t initialize_UB_once = PTHREAD_ONCE_INIT; + +void GlobalUBInitializeOrDie() { + if (pthread_once(&initialize_UB_once, + GlobalUBInitializeOrDieImpl) != 0) { + LOG(FATAL) << "Fail to pthread_once GlobalUBInitializeOrDie"; + exit(1); + } +} + +bool IsUBAvailable() { + return g_ub_available.load(butil::memory_order_acquire); +} + +void GlobalDisableUb() { + if (g_ub_available.exchange(false, butil::memory_order_acquire)) { + LOG(FATAL) << "ub is disabled due to some unrecoverable problem"; + } +} + +bool SupportedByUB(std::string protocol) { + if (protocol.compare("baidu_std") == 0) { + return true; + } + return false; +} + +bool InitPollingModeWithTag(bthread_tag_t tag, + std::function callback, + std::function init_fn, + std::function release_fn) { + if (UBShmEndpoint::PollingModeInitialize(tag, callback, init_fn, + release_fn) == 0) { + return true; + } + return false; +} + +} // namespace ubring +} // namespace brpc + +#else + +#include +#include "butil/logging.h" + +namespace brpc { +namespace ubring { +void GlobalUBInitializeOrDie() { + LOG(ERROR) << "brpc is not compiled with ubring. To enable it, please refer to " + << "https://github.com/apache/brpc/blob/master/docs/en/ubring.md"; + exit(1); +} +} +} + +#endif // if BRPC_WITH_UBRING \ No newline at end of file diff --git a/src/brpc/ubshm/ub_helper.h b/src/brpc/ubshm/ub_helper.h new file mode 100644 index 0000000000..6ad9ebe3eb --- /dev/null +++ b/src/brpc/ubshm/ub_helper.h @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UB_HELPER_H +#define BRPC_UB_HELPER_H + +#if BRPC_WITH_UBRING + +#include +#include +#include "bthread/types.h" + +namespace brpc { +namespace ubring { + +void GlobalRelease(); + +void GlobalUBInitializeOrDie(); + +bool InitPollingModeWithTag(bthread_tag_t tag, + std::function callback = nullptr, + std::function init_fn = nullptr, + std::function release_fn = nullptr); + +bool IsUBAvailable(); + +void GlobalDisableUb(); + +bool SupportedByUB(std::string protocol); + +} // namespace ubring +} // namespace brpc + +#else + +namespace brpc { +namespace ubring { + +void GlobalRelease(); + +void GlobalUBInitializeOrDie(); + +} // namespace ubring +} // namespace brpc + +#endif // if BRPC_WITH_UBRING + +#endif // BRPC_UB_HELPER_H \ No newline at end of file diff --git a/src/brpc/ubshm/ub_ring.cpp b/src/brpc/ubshm/ub_ring.cpp new file mode 100644 index 0000000000..0ea64f07c1 --- /dev/null +++ b/src/brpc/ubshm/ub_ring.cpp @@ -0,0 +1,1083 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include "bthread/bthread.h" +#include "butil/logging.h" +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/ub_ring_manager.h" +#include "brpc/ubshm/shm/shm_ipc.h" + +namespace brpc { +namespace ubring { +uint32_t g_sleepTime[UBR_TASK_STEP_NUM] = {0}; +#define TIME_COVERSION 1000 +DEFINE_int32(ub_disconnect_timeout, 5, "Ubshm disconnection timeout."); +DEFINE_int32(ub_connect_timeout, 1, "Ubshm connection timeout."); +DEFINE_int32(ub_hb_timer_interval, 5, "Heartbeat timer interval."); +DEFINE_int32(ub_hb_retry_cnt, 10, "Heartbeat retry times."); +DEFINE_int32(ub_event_queue_timer_interval, 100, "Interval of the disconnection timer."); + +UBRing::UBRing() +{} +UBRing::~UBRing() +{} + +RETURN_CODE UBRing::UbrTrxMapShm(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapLocalShm(localShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map local shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map remote shared memory failed."; + return rc; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxClose() { + RETURN_CODE closeCheckRc = UbrTrxCloseCheck(_trx); + if (UNLIKELY(closeCheckRc != UBRING_OK)) { + if (closeCheckRc == UBRING_REENTRY) { + LOG(INFO) << "Trx close skipped, already closing, local name=" << _trx->localShm.name; + return UBRING_OK; + } + return UBRING_ERR; + } + if (_trx->ubrRx.remoteTxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CLOSING; + } + + uint32_t disconnectTimeout = FLAGS_ub_disconnect_timeout; + uint64_t startTime = GetCurNanoSeconds(); + + if (_trx->ubrTx.localTxEventQ.addr != nullptr && ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag == UBR_STATE_CONNECTED) { + ((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr)->flag = UBR_STATE_CLOSED; + _trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + + if (_trx->ubrTx.remoteRxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + } + while (_trx->ubrRx.localRxEventQ.addr != nullptr && ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag != UBR_STATE_CLOSED) { + UbrSetSleepTask(UBR_TASK_CLOSE); + if (HasTimedOut(startTime, disconnectTimeout) != UBRING_OK) { + LOG(WARNING) << "Local shm " << _trx->localShm.name + << " wait for the peer to close timed out, force cleanup."; + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + // Force synchronous cleanup instead of relying on async timer + DeleteTimerSafe((uint32_t)_trx->timerFd); + DeleteTimerSafe((uint32_t)_trx->hbTimerFd); + if (_trx->ubrTx.remoteRxEventQ.addr != nullptr) { + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CLOSED; + } + if (UNLIKELY(ShmRemoteFree(&_trx->remoteShm) != UBRING_OK)) { + LOG(WARNING) << "Force close, remote shm " << _trx->remoteShm.name << " free failed."; + } + if (UNLIKELY(UbrTrxFreeShm(_trx) != UBRING_OK)) { + LOG(WARNING) << "Force close, local shm " << _trx->localShm.name << " free failed."; + } + if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(_trx) != UBRING_OK)) { + LOG(WARNING) << "Force close, release trx " << _trx->localShm.name << " failed."; + } + return UBRING_ERR_TIMEOUT; + } + bthread_usleep(1000); // 1ms, yield to other bthreads + } + _trx->ubrRx.trxState = UBR_STATE_CLOSED; + RETURN_CODE rc; + if (UNLIKELY((rc = ClearTrxResource(_trx, startTime, UBR_SEND_CLOSE)) != UBRING_OK)) { + if (rc == UBRING_REENTRY) { + LOG(INFO) << "Trx close, peer is closing, trx local name=" << _trx->localShm.name; + return UBRING_OK; + } + LOG(ERROR) << "Trx close, clear trx resource failed, trx local name=" << _trx->localShm.name; + return UBRING_ERR; + } + // Unlink local shm name immediately so process exit does not leave visible leftovers. + RETURN_CODE unlinkRc = ShmFree(&_trx->localShm); + if (unlinkRc != UBRING_OK && unlinkRc != SHM_ERR_NOT_FOUND && unlinkRc != SHM_ERR_RESOURCE_ATTACHED) { + LOG(WARNING) << "Trx close, unlink local shm failed, trx local name=" << _trx->localShm.name + << ", rc=" << unlinkRc; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrAddCloseTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return UBRING_ERR; + } + + uint32_t eventQTimerInterval = FLAGS_ub_event_queue_timer_interval * TIME_COVERSION; + itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = eventQTimerInterval}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxCloseCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx local name=" << _trx->localShm.name; + return UBRING_ERR; + } + _trx->timerFd = timerFd; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrAddTimer() { + if (UNLIKELY(UbrAddCloseTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add closed timer failed."; + return UBRING_ERR; + } + + if (UNLIKELY(UbrAddHBTimer() != UBRING_OK)) { + DeleteTimerSafe((uint32_t)_trx->timerFd); + LOG(ERROR) << "Ubr " << _trx->localShm.name << " add heartbeat timer failed."; + return UBRING_ERR; + } + return UBRING_OK; +} + +void* UBRing::UbrTrxCloseCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UBRing::UbrTrxCallbackCheck(trx) != UBRING_OK)) { + return nullptr; + } + + auto* localRxEventQ = (UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr; + auto* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + if (localRxEventQ->flag != UBR_STATE_CLOSED || localTxEventQ->flag == UBR_STATE_CLOSED) { + return nullptr; + } + trx->ubrRx.trxState = UBR_STATE_CLOSED; + int fd = (int)trx->localShm.fd; + do { + if (ATOMIC_LOAD(trx->closeCnt) == 0) { + break; + } + ATOMIC_SUB(trx->closeCnt, 1); + + uint64_t startTime = GetCurNanoSeconds(); + + if (localTxEventQ->flag == UBR_STATE_CONNECTED || ATOMIC_LOAD(trx->closeCnt) == 1) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + UbrEventQMsg* remoteRxEventQ = (UbrEventQMsg *)trx->ubrTx.remoteRxEventQ.addr; + if (remoteRxEventQ == nullptr) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " remoteRxEventQ is NULL."; + break; + } + remoteRxEventQ->flag = UBR_STATE_CLOSED; + RETURN_CODE clearRc = ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE, 1); + if (UNLIKELY(clearRc != UBRING_OK && clearRc != UBRING_REENTRY)) { + LOG(ERROR) << "Trx close callback failed, " << trx->localShm.name << " clear trx resource failed."; + break; + } + } while (0); + return nullptr; +} + +RETURN_CODE UBRing::UbrAddHBTimer() { + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx add heartbeat timer failed, trx is null."; + return UBRING_ERR; + } + + itimerspec timeSpec = { + .it_interval = {.tv_sec = FLAGS_ub_hb_timer_interval, .tv_nsec = 0}, + .it_value = {.tv_sec = 0, .tv_nsec = 1} + }; + int timerFd = TimerStart(&timeSpec, UbrTrxHBCallback, (void*)_trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr heartbeat timer failed."; + return UBRING_ERR; + } + _trx->hbTimerFd = timerFd; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type) { + RETURN_CODE passiveCloseCheckRc = UbrTrxCloseCheck(trx); + if (UNLIKELY(passiveCloseCheckRc != UBRING_OK)) { + if (passiveCloseCheckRc == UBRING_REENTRY) { + LOG(INFO) << "Passive close skipped, active close in progress, name=" << trx->localShm.name; + uint64_t startTime = GetCurNanoSeconds(); + return ClearTrxResource(trx, startTime, UBR_CALL_BACK_CLOSE); + } + return UBRING_ERR; + } + trx->ubrTx.trxState = UBR_STATE_CLOSED; + trx->ubrRx.trxState = UBR_STATE_CLOSED; + DeleteTimerSafe((uint32_t)trx->timerFd); + const char *typeName = NULL; + if (type == UBR_HEARTBEAT) { + DeleteTimer((uint32_t)trx->hbTimerFd); + typeName = "Trx heartbeat"; + } else if (type == UBR_UB_EVENT) { + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + typeName = "Ub event callback"; + } + bthread_usleep(FLAGS_ub_flying_io_timeout * 1000000LL); // yield-friendly sleep + + int rc = ShmLocalFree(&trx->remoteShm); + if (rc != UBRING_OK) { + LOG(ERROR) << typeName << ", delete remote shm failed. ret=" << rc; + } + rc = ShmLocalFree(&trx->localShm); + if (rc != UBRING_OK) { + LOG(ERROR) << typeName << ", delete local shm failed. ret=" << rc; + } + + UBRingManager::ReleaseUbrTrxFromMgr(trx); + return UBRING_OK; +} + +void* UBRing::UbrTrxHBCallback(void* args) { + auto* trx = (UbrTrx*) args; + if (UNLIKELY(UbrTrxCallbackCheck(trx) != UBRING_OK)) { + return NULL; + } + + auto* localDataStatus = (UbrDataStatusQMsg *)trx->ubrTx.localDataStatusQ.addr; + auto* remoteDataStatus = (UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr; + if (UNLIKELY(localDataStatus == NULL || remoteDataStatus == NULL)) { + LOG(ERROR) << "Heartbeat error, datastatus is NULL."; + return NULL; + } + + if (trx->ubrTx.trxState != UBR_STATE_CONNECTED || trx->ubrRx.trxState != UBR_STATE_CONNECTED) { + LOG_EVERY_SECOND(INFO) << "Heartbeat cannot be started, wait connected state."; + return NULL; + } + + remoteDataStatus->heartBeat = 1; + if (localDataStatus->heartBeat == 1) { + localDataStatus->heartBeat = 0; + trx->ubrTx.hbRetryCnt = 0; + return NULL; + } + + ++trx->ubrTx.hbRetryCnt; + if (trx->ubrTx.hbRetryCnt <= FLAGS_ub_hb_retry_cnt) { + return NULL; + } + + int fd = (int)trx->localShm.fd; + LOG(INFO) << "Hlc heartbeat, start to clear trx resource. hbTimerFd=" << fd << ", shmName=" << trx->localShm.name; + UbrPassiveClearTrx(trx, fd, UBR_HEARTBEAT); + LOG(INFO) << "Hlc heartbeat clear trx resource finish."; + return NULL; +} + +RETURN_CODE UBRing::UbrAddAsynClearTimer(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx add close timer failed, trx is null."; + return UBRING_ERR; + } + + if (trx->clearTimerFd > 0) { + return UBRING_OK; + } + + itimerspec timeSpec = { + .it_interval = {.tv_sec = 0, .tv_nsec = 0}, + .it_value = {.tv_sec = FLAGS_ub_flying_io_timeout, .tv_nsec = 0} + }; + + int timerFd = TimerStart(&timeSpec, UbrAsynClearCallback, (void*)trx); + if (UNLIKELY(timerFd == -1)) { + LOG(ERROR) << "Start ubr close timer failed, trx name=%s.", trx->localShm.name; + return UBRING_ERR; + } + trx->clearTimerFd = timerFd; + return UBRING_OK; +} + +void *UBRing::UbrAsynClearCallback(void *args) +{ + auto* trx = (UbrTrx*) args; + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close, trx is null."; + return NULL; + } + + if (UNLIKELY(ShmRemoteFree(&trx->remoteShm) != UBRING_OK)) { + LOG(ERROR) << "Trx close, remote shm " << trx->remoteShm.name << " free failed."; + } + + if (UNLIKELY(UbrTrxFreeShm(trx) != UBRING_OK)) { + LOG(ERROR) << "Trx close, wait for local shm " << trx->localShm.name << " free fail."; + } + + if (UNLIKELY(UBRingManager::ReleaseUbrTrxFromMgr(trx) != UBRING_OK)) { + LOG(ERROR) << "Trx close, release shm " << trx->localShm.name << " trx failed."; + } + return NULL; +} + +int UBRing::UbrTrxSend(const void *buf, uint32_t bufLen) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + // 1.2 计算空间 + auto *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + auto *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt(bufLen); + if (remainChunkNum < needMsgChunkNum) { + return UBRING_RETRY; + } + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + uint32_t totalSendLen = 0; + uint32_t remainBufLen = bufLen; + uint8_t isLastPkt = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (remainBufLen > 0) { + isLastPkt = (uint8_t)(remainBufLen <= UBR_MSG_PAYLOAD_LEN); + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = isLastPkt ? (uint8_t)remainBufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_CUR_INDEX] = 0; + memcpy(msg->payload.inner, (const uint8_t *)buf + totalSendLen, msg->header[UBR_MSG_LEN_INDEX]); + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % cap; + totalSendLen += msg->header[UBR_MSG_LEN_INDEX]; + remainBufLen -= msg->header[UBR_MSG_LEN_INDEX]; + } + return (int)totalSendLen; +} + +int UBRing::UbrTrxRecv(void *buf, uint32_t bufLen) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, buf, bufLen)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return UBRING_RETRY; + } + return UbrTrxRecvBlockMode(static_cast(buf), bufLen); +} + +int UBRing::UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, dest, bufLen)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + int32_t totalCopied = 0; + int32_t remainingLen = (int32_t)bufLen; + bool notEofEncountered = true; + + UbrRx *ubrRx = &_trx->ubrRx; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)ubrRx->localDataQ.addr; + bool needUpdateEpollEofPos = ubrRx->readPos == ubrRx->epEofPos; + + while (notEofEncountered && remainingLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[ubrRx->readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + if (totalCopied > 0) { + break; + } + errno = EAGAIN; + return -1; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t availableData = chunkMsgLen - curIndex; + + int32_t copyLen = (remainingLen < availableData) ? remainingLen : availableData; + memcpy(dest + totalCopied, dataMsg[ubrRx->readPos].payload.inner + curIndex, (size_t)copyLen); + totalCopied += copyLen; + remainingLen -= copyLen; + currentChunk->header[UBR_MSG_CUR_INDEX] += (uint8_t)copyLen; + if (LIKELY(currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen)) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(_trx); + ubrRx->readPos = (ubrRx->readPos + 1) % ubrRx->capacity; + } + } + if (needUpdateEpollEofPos) { + ubrRx->epEofPos = ubrRx->readPos; + } + return (int)totalCopied; +} + +ssize_t UBRing::UbrTrxWritev(const struct iovec *iov, int iovcnt) +{ + if (UNLIKELY(CheckTrxSendPreCheck(_trx) != UBRING_OK)) { + return UBRING_ERR; + } + + size_t bufLen = 0; + for (int i = 0; i < iovcnt; i++) { + bufLen += iov[i].iov_len; + } + RETURN_CODE rc = WritevHasEnoughSpace(bufLen); + if (rc != UBRING_OK) { + return rc; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrTx.remoteDataQ.addr; + UbrMsgFormat *msg = &(_trx->ubrTx.localMsgSpace); + int curIov = 0; + size_t curIovPos = 0; + ssize_t totalSendLen = 0; + size_t pktRemainN = 0; + size_t iovRemain = 0; + size_t fulled = 0; + uint8_t isLastPkt = 0; + uint8_t curPktLen = 0; + _trx->ubrTx.outIoId++; + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->ioId = _trx->ubrTx.outIoId; + while (bufLen > 0) { + isLastPkt = (uint8_t)(bufLen <= UBR_MSG_PAYLOAD_LEN); + curPktLen = isLastPkt ? (uint8_t)bufLen : UBR_MSG_PAYLOAD_LEN; + msg->header[UBR_MSG_FLAG_INDEX] = isLastPkt ? UBR_MSG_CHUNK_EOF : UBR_MSG_CHUNK_EXIST; + msg->header[UBR_MSG_LEN_INDEX] = curPktLen; + msg->header[UBR_MSG_CUR_INDEX] = 0; + pktRemainN = curPktLen; + while (curIov < iovcnt && pktRemainN > 0) { + iovRemain = (iov[curIov].iov_len - curIovPos); + fulled = iovRemain > pktRemainN ? pktRemainN : iovRemain; + memcpy((msg->payload.inner + (curPktLen - (uint8_t)pktRemainN)), + (uint8_t *)(iov[curIov].iov_base) + curIovPos, + fulled); + pktRemainN -= fulled; + curIovPos += fulled; + if (curIovPos == iov[curIov].iov_len) { + curIov++; + curIovPos = 0; + } + } + + Copy64Byte((int8_t *)&dataMsg[_trx->ubrTx.writePos], (int8_t *)msg); + _trx->ubrTx.writePos = (_trx->ubrTx.writePos + 1) % _trx->ubrTx.capacity; + totalSendLen += (ssize_t)curPktLen; + bufLen -= (int)curPktLen; + } + return totalSendLen; +} + +ssize_t UBRing::UbrTrxReadv(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint32_t readPosEnd = _trx->ubrRx.readPos; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + errno = EAGAIN; + return -1; + } + ssize_t nr = UbrTrxReadvBlockMode(iov, iovcnt); + if (UNLIKELY(nr == -1)) { + LOG(ERROR) << "Non-blocking readv msg in failed, connection has been closed."; + errno = EPIPE; + return -1; + } + return nr; +} + +ssize_t UBRing::UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt) +{ + RETURN_CODE rc = UBRING_OK; + if (UNLIKELY((rc = CheckTrxRecvParam(_trx, iov, (uint32_t)iovcnt)) != UBRING_OK)) { + return (rc == UBR_NOT_CONNECTED) ? 0 : rc; + } + + size_t remainBufLen = 0; + for (int i = 0; i < iovcnt; i++) { + remainBufLen += iov[i].iov_len; + } + + bool needUpdateEpollEofPos = _trx->ubrRx.readPos == _trx->ubrRx.epEofPos; + ssize_t totalRecvLen = StartReadv(_trx, iov, iovcnt, remainBufLen); + + if (needUpdateEpollEofPos) { + _trx->ubrRx.epEofPos = _trx->ubrRx.readPos; + } + return totalRecvLen; +} + +RETURN_CODE UBRing::IsUbrTrxReadable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + // TODO mwj 这几块的日志是否需要删除 + // LOG(ERROR) << "The trx is not connected state."; + return UBRING_ERR; + } + + uint64_t ioId = ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->ioId; + if ((epEvent & EPOLLET) && ioId == _trx->ubrRx.inIoId) { + return MPA_MUXER_NOT_READY; + } + + uint32_t readPosEnd = _trx->ubrRx.readPos; + if (epEvent & EPOLLET) { + readPosEnd = _trx->ubrRx.epEofPos; + } + + UbrMsgFormat *dataMsg = (UbrMsgFormat *)_trx->ubrRx.localDataQ.addr; + uint8_t flag = dataMsg[readPosEnd].header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + return MPA_MUXER_NOT_READY; + } + if (epEvent & EPOLLET) { + _trx->ubrRx.inIoId = ioId; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::IsUbrTrxWriteable(uint32_t epEvent) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "The trx to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(_trx->localShm.addr == NULL)) { + LOG(ERROR) << "The trx localShm to be checked is NULL."; + return UBRING_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "The trx localTxEventQ addr is NULL."; + return UBRING_ERR; + } + if (UNLIKELY((UbrEventQMsg *)_trx->ubrTx.localDataStatusQ.addr == NULL)) { + LOG(ERROR) << "The trx localDataStatusQ addr is NULL."; + return UBRING_ERR; + } + + if (UNLIKELY(_trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "The trx is not connected state."; + return UBRING_ERR; + } + + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + if (remainChunkNum == 0) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + + if ((epEvent & EPOLLET) && (_trx->ubrTx.epLastCap >= remainChunkNum)) { + _trx->ubrTx.epLastCap = remainChunkNum; + return MPA_MUXER_NOT_READY; + } + _trx->ubrTx.epLastCap = remainChunkNum; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrSetTimeout(UbrTaskStep taskType, int timeout) +{ + if (taskType >= UBR_TASK_STEP_NUM || timeout < 0) { + LOG(ERROR) << "Set timeout failed, invalid task type."; + return UBRING_ERR; + } + + g_sleepTime[taskType] = (uint32_t)timeout; + LOG(INFO) << "Set timeout success, taskType=" << taskType << ", timeout=" << timeout; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxFreeShm(UbrTrx *trx) +{ + if (trx == NULL) { + LOG(ERROR) << "Trx is NULL."; + return UBRING_ERR; + } + + RETURN_CODE rc = UBRING_OK; + rc = ShmMunmap(&trx->localShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx close, local unmap " << trx->localShm.name << " shm fail."; + return UBRING_ERR; + } + + rc = ShmFree(&trx->localShm); + if (UNLIKELY(rc != UBRING_OK)) { + if (rc != SHM_ERR_RESOURCE_ATTACHED && rc != SHM_ERR_NOT_FOUND) { + LOG(ERROR) << "Wait for " << trx->localShm.name << " local shm free fail."; + return UBRING_ERR; + } + LOG(INFO) << "Local shm " << trx->localShm.name << " already freed, continue to free remote shm."; + } + + RETURN_CODE remoteRc = UBRING_OK; + if (trx->remoteShm.addr != NULL) { + remoteRc = IpcShmRemoteFree(&trx->remoteShm); + } + if (remoteRc != UBRING_OK) { + LOG(WARNING) << "Free remote shm " << trx->remoteShm.name << " failed, rc=" << remoteRc; + } + + return UBRING_OK; +} + +void UBRing::PreWriteAddr(uint8_t *addr, size_t len) +{ + if (addr == NULL) { + return; + } + + size_t i = 0; + while (i < len) { + if (i + sizeof(uint64_t) <= len) { + *(uint64_t *)(addr + i) = (uint64_t)0; + i += sizeof(uint64_t); + } else if (i + sizeof(uint32_t) < len) { + *(uint32_t *)(addr + i) = (uint32_t)0; + i += sizeof(uint32_t); + } else if (i + sizeof(uint16_t) < len) { + *(uint16_t *)(addr + i) = (uint16_t)0; + i += sizeof(uint16_t); + } else { + *(addr + i) = (uint8_t)0; + i += sizeof(uint8_t); + } + } +} + +void UBRing::PrewriteUbrTx(UbrTx *tx) +{ + if (tx == NULL) { + return; + } + PreWriteAddr(tx->remoteDataQ.addr, tx->capacity * sizeof(UbrMsgFormat)); +} + +void UBRing::PrewriteUbrRx(UbrRx *rx) +{ + if (rx == NULL) { + return; + } + PreWriteAddr(rx->localDataQ.addr, rx->capacity * sizeof(UbrMsgFormat)); +} + +RETURN_CODE UBRing::UbrTrxMapLocalShm(SHM *localShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return UBRING_ERR; + } + if (UNLIKELY(localShm == NULL || localShm->addr == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, localShm is null or addr is NULL."; + return UBRING_ERR; + } + _trx->localShm = *localShm; + _trx->ubrTx.localTxEventQ.addr = localShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.localTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.localRxEventQ.addr = localShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.localRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.localDataStatusQ.addr = localShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrTx.localDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(localShm->addr + DATAQ_ADDR_OFFSET); + _trx->ubrRx.localDataQ.addr = localShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrRx.localDataQ.len = localShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxMapRemoteShm(SHM *remoteShm) +{ + if (UNLIKELY(_trx == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null."; + return UBRING_ERR; + } + if (UNLIKELY(remoteShm == NULL || remoteShm->addr == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, remoteShm is null or addr is NULL."; + return UBRING_ERR; + } + _trx->remoteShm = *remoteShm; + _trx->ubrRx.remoteTxEventQ.addr = remoteShm->addr + TX_EVENTQ_ADDR_OFFSET; + _trx->ubrRx.remoteTxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrTx.remoteRxEventQ.addr = remoteShm->addr + RX_EVENTQ_ADDR_OFFSET; + _trx->ubrTx.remoteRxEventQ.len = UBR_EVENTQ_LEN; + _trx->ubrRx.remoteDataStatusQ.addr = remoteShm->addr + DATASTATUSQ_ADDR_OFFSET; + _trx->ubrRx.remoteDataStatusQ.len = UBR_DATASTATUSQ_LEN; + size_t addrAlignedOffset = Aligned64Offset(remoteShm->addr + DATAQ_ADDR_OFFSET); + _trx->ubrTx.remoteDataQ.addr = remoteShm->addr + DATAQ_ADDR_OFFSET + addrAlignedOffset; + _trx->ubrTx.remoteDataQ.len = remoteShm->len - DATAQ_ADDR_OFFSET - addrAlignedOffset; + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrServerTrxInit(SHM *localShm, SHM *remoteShm) +{ + RETURN_CODE rc = UbrTrxMapShm(localShm, remoteShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) <<"Trx map shared memory failed."; + return rc; + } + + uint32_t localDataMsgCap = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + uint32_t remoteDataMsgCap = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + _trx->ubrRx.capacity = localDataMsgCap; + _trx->ubrTx.capacity = remoteDataMsgCap; + rc = UBRingManager::GetUbrDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Get ubring deal msg max cnt."; + return rc; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail = remoteDataMsgCap - 1; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->tail = localDataMsgCap - 1; + + if (UNLIKELY(UbrAddTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localShm->name; + return UBRING_ERR; + } + + ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + ((UbrDataStatusQMsg *)(_trx->ubrRx.remoteDataStatusQ.addr))->timeout = FLAGS_ub_connect_timeout; + + ((UbrEventQMsg *)_trx->ubrTx.remoteRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return UBRING_OK; +} + +int UBRing::UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm) { + UbrSetSleepTask(UBR_TASK_ACCEPT_MAP_FRONT); + if (UNLIKELY((ShmRemoteMalloc(remote_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return -1; + } + + if (UNLIKELY((ShmLocalCalloc(local_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed."; + return -1; + } + + UbrTrx **ubrTrxPtr = &_trx; + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(ubrTrxPtr)) != UBRING_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed."; + ShmRemoteFree(remote_trx_shm); + ShmLocalFree(local_trx_shm); + return -1; + } + _trx->type = TCP_TRX; + if (UNLIKELY((UbrServerTrxInit(local_trx_shm, remote_trx_shm)) != UBRING_OK)) { + LOG(ERROR) << "Server trx init failed."; + ShmRemoteFree(remote_trx_shm); + UbrTrxFreeShm(_trx); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return -1; + } + return 0; +} + +int UBRing::UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name) +{ + if (UNLIKELY((UBRingManager::AcquireUbrTrxFromMgr(&(_trx))) != UBRING_OK)) { + LOG(ERROR) << "Acquire ubrtrx failed, localName=" << shm_name; + return -1; + } + + _trx->type = TCP_TRX; + if (UNLIKELY((ApplyAndMapLocalShm(local_trx_shm, shm_name)) != UBRING_OK)) { + LOG(ERROR) << "Trx apply or map local shared memory failed, localName=" << shm_name; + return -1; + } + return 0; +} + +int UBRing::UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name) +{ + RETURN_CODE rc = UbrMapRemoteShmAddTimer(local_trx_shm, local_name); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Connect Trx failed, local shm name=" << local_trx_shm->name; + return -1; + } + PrewriteUbrRx(&_trx->ubrRx); + PrewriteUbrTx(&_trx->ubrTx); + ((UbrEventQMsg *)_trx->ubrRx.remoteTxEventQ.addr)->flag = UBR_STATE_CONNECTED; + ((UbrEventQMsg *)_trx->ubrRx.localRxEventQ.addr)->flag = UBR_STATE_CONNECTED; + _trx->ubrTx.trxState = UBR_STATE_CONNECTED; + _trx->ubrRx.trxState = UBR_STATE_CONNECTED; + return 0; +} + +RETURN_CODE UBRing::UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName) +{ + uint64_t startTime = GetCurNanoSeconds(); + + size_t remoteServerLen = UBR_MSG_LEN * (((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->tail + 1) + + UBR_MSG_LEN * ((DATAQ_ADDR_OFFSET / UBR_MSG_LEN) + 1); + SHM remoteTrxShm = {NULL, remoteServerLen, 0, {0}, localTrxShm->fd}; + int result = snprintf(remoteTrxShm.name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + SERVER_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy server shared memory name failed, localName=%s, ret=%d.", localName, result; + return UBRING_ERR; + } + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_FRONT); + RETURN_CODE rc = ApplyAndMapRemoteShm(&remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Connect Trx map shared memory failed, remote shm=" << remoteTrxShm.name; + return rc; + } + + if (UNLIKELY(UbrAddTimer() != UBRING_OK)) { + LOG(ERROR) << "Ubr add timer failed, localName=" << localName; + ShmRemoteFree(&remoteTrxShm); + return UBRING_ERR; + } + + UbrSetSleepTask(UBR_TASK_CONNECT_MAP_AFTER); + + uint32_t timeout = ((UbrDataStatusQMsg *)(_trx->ubrTx.localDataStatusQ.addr))->timeout; + if (HasTimedOut(startTime, timeout) != UBRING_OK) { + LOG(ERROR) << "Local shm " << localTrxShm->name << " wait for connect remote map timeout."; + DeleteTimerSafe((uint32_t)_trx->hbTimerFd); + DeleteTimerSafe((uint32_t)_trx->timerFd); + ShmRemoteFree(&remoteTrxShm); + return UBRING_ERR_TIMEOUT; + } + + return UBRING_OK; +} + +RETURN_CODE UBRing::ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName) +{ + if (UNLIKELY(_trx == NULL || localTrxShm == NULL)) { + LOG(ERROR) << "Trx map Shared memory failed, trx is null, localName=" << localName; + return UBRING_ERR; + } + int result = snprintf(localTrxShm->name, + SHM_MAX_NAME_BUFF_LEN, + "%s_%s_%s", + SHM_NAME_PREFIX, + localName, + CLIENT_SHM_NAME_SUFFIX); + if (UNLIKELY(result < 0)) { + LOG(ERROR) << "Copy client localTrx shared memory name failed, localName=" << localName << ", ret=" << result; + return UBRING_ERR; + } + + RETURN_CODE rc = ShmLocalCalloc(localTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx apply local shared memory failed, local shm name=" << localTrxShm->name << ", rc=" << rc; + if (rc == SHM_ERR_EXIST || rc == SHM_ERR_NOT_FOUND) { + rc = UBR_ERR_ADDR_IN_USE; + } + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + rc = UbrTrxMapLocalShm(localTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map local shared memory failed, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + ((UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr)->timeout = FLAGS_ub_connect_timeout; + _trx->ubrRx.capacity = (uint32_t)(_trx->ubrRx.localDataQ.len / UBR_MSG_LEN); + rc = UBRingManager::GetUbrDealMsgMaxCnt(_trx->ubrRx.capacity, &_trx->ubrRx.dealMsgMaxCnt); + if (rc != UBRING_OK) { + LOG(ERROR) << "Get ubring deal msg max cnt, local shm name=" << localTrxShm->name; + ShmLocalFree(localTrxShm); + UBRingManager::ReleaseUbrTrxFromMgr(_trx); + return rc; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::ApplyAndMapRemoteShm(SHM *remoteTrxShm) +{ + RETURN_CODE rc = ShmRemoteMalloc(remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx apply remote shared memory failed."; + return rc; + } + rc = UbrTrxMapRemoteShm(remoteTrxShm); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Trx map shared memory failed."; + ShmRemoteFree(remoteTrxShm); + return rc; + } + _trx->ubrTx.capacity = (uint32_t)(_trx->ubrTx.remoteDataQ.len / UBR_MSG_LEN); + return UBRING_OK; +} + +RETURN_CODE UBRing::WritevHasEnoughSpace(size_t bufLen) +{ + UbrDataStatusQMsg *dataStatusMsg = (UbrDataStatusQMsg *)_trx->ubrTx.localDataStatusQ.addr; + uint32_t cap = _trx->ubrTx.capacity; + uint32_t tail = dataStatusMsg->tail; + uint32_t remainChunkNum = + (_trx->ubrTx.writePos > tail) ? (tail + cap - _trx->ubrTx.writePos) : (tail - _trx->ubrTx.writePos); + uint32_t needMsgChunkNum = CalcUbrMsgChunkCnt((uint32_t)bufLen); + if (remainChunkNum < needMsgChunkNum) { + return UBRING_RETRY; + } + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, trx is null."; + return UBRING_ERR; + } + + UbrEventQMsg* localTxEventQ = (UbrEventQMsg *)trx->ubrTx.localTxEventQ.addr; + if (localTxEventQ->flag == UBR_STATE_CONNECTED) { + localTxEventQ->flag = UBR_STATE_CLOSING; + } + + if (closeType == UBR_SEND_CLOSE) { + DeleteTimerSafe((uint32_t)trx->timerFd); + } else { + DeleteTimer((uint32_t)trx->timerFd); + } + DeleteTimerSafe((uint32_t)trx->hbTimerFd); + + if (localTxEventQ->flag == UBR_STATE_CLOSING) { + localTxEventQ->flag = UBR_STATE_CLOSED; + trx->ubrTx.trxState = UBR_STATE_CLOSED; + } + + return UBRING_OK; +} + +RETURN_CODE UBRing::ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op) +{ + RETURN_CODE rc = UbrClearResourceCheck(trx, startTime, closeType); + if (rc != UBRING_OK) { + return rc; + } + + rc = UbrAddAsynClearTimer(trx); + if (rc != UBRING_OK) { + LOG(ERROR) << "Trx close, add " << trx->localShm.name << " close clear timer failed."; + return UBRING_ERR; + } + + return UBRING_OK; +} + +RETURN_CODE UBRing::UbrTrxCloseCheck(UbrTrx *trx) +{ + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx close failed, client trx is null."; + return UBRING_ERR; + } + int expected = MAX_CLOSE_COUNT; + if (!ATOMIC_COMPARE_EXCHANGE_STRONG(trx->closeCnt, expected, MAX_CLOSE_COUNT - 1)) { + LOG(INFO) << "Trx close skipped, already closing, trx local name=" << trx->localShm.name; + return UBRING_REENTRY; + } + + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == nullptr)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL, trx local name=" << trx->localShm.name; + return UBRING_ERR; + } + return UBRING_OK; +} + +ssize_t UBRing::StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen) +{ + ssize_t totalRecvLen = 0; + int iovIndex = 0; + size_t iovPos = 0; + UbrMsgFormat *dataMsg = (UbrMsgFormat *)trx->ubrRx.localDataQ.addr; + bool notEofEncountered = true; + while (notEofEncountered && remainBufLen > 0) { + if (UNLIKELY(CheckTrxRecvPreCheck(trx) != UBRING_OK)) { + return UBRING_ERR; + } + UbrMsgFormat *currentChunk = &dataMsg[trx->ubrRx.readPos]; + uint8_t flag = currentChunk->header[UBR_MSG_FLAG_INDEX]; + if (flag == UBR_MSG_CHUNK_NONE) { + if (totalRecvLen > 0) { + break; + } + errno = EAGAIN; + return -1; + } + if (flag == UBR_MSG_CHUNK_EOF) { + notEofEncountered = false; + } + uint8_t chunkMsgLen = currentChunk->header[UBR_MSG_LEN_INDEX]; + uint8_t curIndex = currentChunk->header[UBR_MSG_CUR_INDEX]; + uint8_t recvLen = + remainBufLen > (size_t)(chunkMsgLen - curIndex) ? (chunkMsgLen - curIndex) : (uint8_t)remainBufLen; + while (iovIndex < iovcnt && recvLen > 0) { + size_t copyLen = + recvLen > (iov[iovIndex].iov_len - iovPos) ? iov[iovIndex].iov_len - iovPos : (size_t)recvLen; + memcpy((uint8_t *)iov[iovIndex].iov_base + iovPos, currentChunk->payload.inner + curIndex, copyLen); + recvLen -= (uint8_t)copyLen; + iovPos += copyLen; + curIndex += (uint8_t)copyLen; + if (iovPos == iov[iovIndex].iov_len) { + iovIndex++; + iovPos = 0; + } + remainBufLen -= copyLen; + totalRecvLen += (ssize_t)copyLen; + } + currentChunk->header[UBR_MSG_CUR_INDEX] = curIndex; + if (currentChunk->header[UBR_MSG_CUR_INDEX] == chunkMsgLen) { + currentChunk->header[UBR_MSG_FLAG_INDEX] = UBR_MSG_CHUNK_NONE; + UpdateDataQTail(trx); + trx->ubrRx.readPos = (trx->ubrRx.readPos + 1) % trx->ubrRx.capacity; + } + } + return totalRecvLen; +} +} // namespace ubring +} // namespace brpc diff --git a/src/brpc/ubshm/ub_ring.h b/src/brpc/ubshm/ub_ring.h new file mode 100644 index 0000000000..09a97d1dcb --- /dev/null +++ b/src/brpc/ubshm/ub_ring.h @@ -0,0 +1,206 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UB_RING_H +#define BRPC_UB_RING_H + +#include +#include +#include "butil/macros.h" +#include "butil/reader_writer.h" +#include "brpc/ubshm/ubr_trx.h" +#include "brpc/ubshm/shm/shm_mgr.h" +#include "brpc/ubshm/timer/timer_mgr.h" + +namespace brpc { +namespace ubring { +DECLARE_int32(ub_flying_io_timeout); +extern uint32_t g_sleepTime[UBR_TASK_STEP_NUM]; + +class UBRing : public butil::IReader { +public: + UBRing(); + ~UBRing(); + DISALLOW_COPY_AND_ASSIGN(UBRing); + + ssize_t ReadV(const iovec* iov, int iovcnt) override { + return UbrTrxReadv(iov, iovcnt); + } + + RETURN_CODE UbrTrxMapShm(SHM *localShm, SHM *remoteShm); + + RETURN_CODE UbrTrxClose(); + + RETURN_CODE UbrAddCloseTimer(); + + RETURN_CODE UbrAddTimer(); + + static void *UbrTrxCloseCallback(void *args); + + RETURN_CODE UbrAddHBTimer(); + + static void *UbrTrxHBCallback(void *args); + + static RETURN_CODE UbrPassiveClearTrx(UbrTrx *trx, int fd, PASSIVE_DISC_TYPE type); + + static RETURN_CODE UbrAddAsynClearTimer(UbrTrx *trx); + + static void *UbrAsynClearCallback(void *args); + + int UbrTrxSend(const void *buf, uint32_t bufLen); + + int UbrTrxRecv(void *buf, uint32_t bufLen); + + int UbrTrxRecvBlockMode(uint8_t *dest, uint32_t bufLen); + + ssize_t UbrTrxWritev(const struct iovec *iov, int iovcnt); + ssize_t UbrTrxReadv(const struct iovec *iov, int iovcnt); + ssize_t UbrTrxReadvBlockMode(const struct iovec *iov, int iovcnt); + + RETURN_CODE IsUbrTrxReadable(uint32_t epEvent); + + RETURN_CODE IsUbrTrxWriteable(uint32_t epEvent); + + RETURN_CODE UbrSetTimeout(UbrTaskStep taskType, int timeout); + + static RETURN_CODE UbrTrxFreeShm(UbrTrx *trx); + + void PrewriteUbrTx(UbrTx *tx); + void PrewriteUbrRx(UbrRx *rx); + + static inline void UbrSetSleepTask(UbrTaskStep taskType) + { + if (taskType >= UBR_TASK_STEP_NUM || taskType < 0) { + return; + } + uint32_t type = (uint32_t)taskType; + sleep(g_sleepTime[type]); + return; + } + + static inline RETURN_CODE CheckTrxConnectParam(const char *listenerName, const char *localName) + { + if (UNLIKELY(listenerName == NULL)) { + LOG(ERROR) << "The request listener name is null."; + return UBRING_ERR; + } + if (UNLIKELY(localName == NULL)) { + LOG(ERROR) << "The request trx shared memory name is null."; + return UBRING_ERR; + } + return UBRING_OK; + } + + int UbrAllocateServerShm(SHM* remote_trx_shm, SHM* local_trx_shm); + + int UbrMapRemoteShm(SHM *local_trx_shm, const char *local_name); + + int UbrAllocateLocalShm(SHM *local_trx_shm, const char *shm_name); + + RETURN_CODE UbrMapRemoteShmAddTimer(SHM *localTrxShm, const char *localName); + + static inline RETURN_CODE CheckTrxSendPreCheck(UbrTrx *trx) + { + if (UNLIKELY(trx->ubrTx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx send failed, trx is not connected state."; + return UBRING_ERR; + } + + return UBRING_OK; + } + static RETURN_CODE CheckTrxRecvParam(UbrTrx *trx, const void *buf, uint32_t bufLen) + { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Trx recv failed, trx is null."; + return UBRING_ERR; + } + + if (UNLIKELY((UbrEventQMsg *)trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx send failed, localTxEventQ addr is NULL."; + return UBRING_ERR; + } + + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected statep=" << trx->ubrRx.trxState; + return UBR_NOT_CONNECTED; + } + if (UNLIKELY(buf == NULL)) { + LOG(ERROR) << "Trx recv failed, buf is null."; + return UBRING_ERR; + } + if (UNLIKELY(bufLen == 0)) { + LOG(ERROR) << "Trx recv failed, bufLen is 0."; + return UBRING_ERR; + } + return UBRING_OK; + } + + static inline RETURN_CODE CheckTrxRecvPreCheck(UbrTrx *trx) + { + if (UNLIKELY(trx->ubrRx.trxState != UBR_STATE_CONNECTED)) { + LOG(ERROR) << "Trx recv failed, trx is not connected state."; + return UBRING_ERR; + } + return UBRING_OK; + } + + static inline void UpdateDataQTail(UbrTrx *trx) + { + ((UbrDataStatusQMsg *)trx->ubrRx.remoteDataStatusQ.addr)->tail = trx->ubrRx.readPos; + } + + static RETURN_CODE UbrTrxCallbackCheck(UbrTrx *trx) + { + if (trx == NULL) { + LOG(ERROR) << "Trx close callback failed, trx is null."; + return UBRING_ERR; + } + if (UNLIKELY(trx->localShm.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localShm addr is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(trx->ubrRx.localRxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localRxEventQ addr is NULL."; + return UBRING_ERR; + } + if (UNLIKELY(trx->ubrTx.localTxEventQ.addr == NULL)) { + LOG(ERROR) << "Trx close failed, localTxEventQ addr is NULL."; + return UBRING_ERR; + } + return UBRING_OK; + } + +private: + RETURN_CODE UbrTrxMapLocalShm(SHM *localShm); + RETURN_CODE UbrTrxMapRemoteShm(SHM *remoteShm); + RETURN_CODE ApplyAndMapLocalShm(SHM *localTrxShm, const char *localName); + RETURN_CODE ApplyAndMapRemoteShm(SHM *remoteTrxShm); + static RETURN_CODE UbrTrxCloseCheck(UbrTrx *trx); + void ReleaseFileLock(int lockFd); + ssize_t StartReadv(UbrTrx *trx, const struct iovec *iov, int iovcnt, size_t remainBufLen); + void PreWriteAddr(uint8_t *addr, size_t len); + RETURN_CODE WritevHasEnoughSpace(size_t bufLen); + RETURN_CODE UbrServerTrxInit(SHM *localShm, SHM *remoteShm); + static RETURN_CODE UbrClearResourceCheck(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType); + static RETURN_CODE ClearTrxResource(UbrTrx *trx, uint64_t startTime, UbrCloseType closeType, int op=0); + + UbrTrx* _trx{nullptr}; +}; +} +} + +#endif //BRPC_UB_RING_H \ No newline at end of file diff --git a/src/brpc/ubshm/ub_ring_manager.cpp b/src/brpc/ubshm/ub_ring_manager.cpp new file mode 100644 index 0000000000..13df631f9e --- /dev/null +++ b/src/brpc/ubshm/ub_ring_manager.cpp @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "brpc/ubshm/ub_ring.h" +#include "brpc/ubshm/ub_ring_manager.h" +#include "butil/logging.h" + +namespace brpc { +namespace ubring { +DEFINE_int32(ubr_max_managed_num, 1024, "maximum number of managed ubring"); +DEFINE_int32(tail_update_after_read, 8, "Position of the tail update after the read"); + +UbrMgr UBRingManager::g_ubrMgr; +UbrLinkInfoMgr UBRingManager::g_linkInfoMgr; +pthread_mutex_t UBRingManager::g_ubrTrxMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_ubrListenerMgrMtx = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t UBRingManager::g_linkInfoMgrMtx = PTHREAD_MUTEX_INITIALIZER; + +uint64_t g_ubrTrxNum = 0; +uint64_t g_ubEventCnt = 0; +uint64_t g_ubrListenerNum = 0; + +RETURN_CODE UBRingManager::GetUbrDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt) { + if (UNLIKELY(dealMsgMaxCnt == NULL)) { + LOG(ERROR) << "Get update factor failed, dealMsgMaxCnt is null."; + return UBRING_ERR; + } + if (UNLIKELY(FLAGS_tail_update_after_read == 0)) { + LOG(ERROR) << "Get update factor failed, factor is 0."; + return UBRING_ERR; + } + *dealMsgMaxCnt = capacity / FLAGS_tail_update_after_read; + return UBRING_OK; +} + +RETURN_CODE UBRingManager::UbrMgrDefault() +{ + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = FLAGS_ubr_max_managed_num; + g_ubrMgr.trxMgrUnitStatus = NULL; + g_ubrMgr.trxMgr = NULL; + return UBRING_OK; +} + +RETURN_CODE UBRingManager::UbrMgrInit() { + RETURN_CODE rc = UbrMgrDefault(); + if (UNLIKELY(rc != UBRING_OK)) { + LOG(ERROR) << "Ubr manager set default values failed."; + return rc; + } + + size_t trxMgrSize = g_ubrMgr.trxCap * sizeof(UbrTrx); + g_ubrMgr.trxMgr = (UbrTrx *)malloc(trxMgrSize); + size_t trxMgrStatusSize = g_ubrMgr.trxCap * sizeof(UbrMgrUnitStatus); + g_ubrMgr.trxMgrUnitStatus = (UbrMgrUnitStatus *)malloc(trxMgrStatusSize); + if (UNLIKELY(g_ubrMgr.trxMgr == NULL || + g_ubrMgr.trxMgrUnitStatus == NULL)) { + LOG(ERROR) << "Ubr manager memory allocation failed."; + UbrMgrFini(); + return UBRING_ERR; + } + + memset(g_ubrMgr.trxMgr, 0, trxMgrSize); + memset(g_ubrMgr.trxMgrUnitStatus, UBR_MGR_UNIT_FREE, trxMgrStatusSize); + LinkInfoInit(); + return UBRING_OK; +} + +void UBRingManager::UbrMgrFini() { + { + LOCK_GUARD(g_ubrTrxMgrMtx); + FREE_PTR(g_ubrMgr.trxMgr); + FREE_PTR(g_ubrMgr.trxMgrUnitStatus); + } + { + LOCK_GUARD(g_ubrListenerMgrMtx); + } + g_ubrMgr.trxNum = 0; + g_ubrMgr.trxCap = 0; + LinkInfoFini(); +} + +RETURN_CODE UBRingManager::AcquireUbrTrxFromMgr(UbrTrx **trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Acquire trx failed, trx is null."; + return UBRING_ERR; + } + + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Acquire trx failed, trxMgr is null."; + return UBRING_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + if (g_ubrMgr.trxNum >= g_ubrMgr.trxCap) { + LOG(ERROR) << "Acquire trx failed, trx number is full."; + return UBRING_ERR; + } + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + memset(&g_ubrMgr.trxMgr[i], 0, sizeof(UbrTrx)); + g_ubrMgr.trxMgrUnitStatus[i] = UBR_MGR_UNIT_USED; + *trx = &g_ubrMgr.trxMgr[i]; + (*trx)->trxMgrIndex = i; + (*trx)->ubrId = g_ubrTrxNum; + (*trx)->closeState = UBR_CLOSE_FIRST; + (*trx)->closeCnt = MAX_CLOSE_COUNT; + ++g_ubrMgr.trxNum; + ++g_ubrTrxNum; + return UBRING_OK; + } + } + LOG(ERROR) << "Acquire trx failed, no available space."; + return UBRING_ERR; +} + +RETURN_CODE UBRingManager::ReleaseUbrTrxFromMgr(UbrTrx *trx) { + if (UNLIKELY(trx == NULL)) { + LOG(ERROR) << "Release trx failed, trx is null."; + return UBRING_ERR; + } + + trx->localShm.addr = NULL; + trx->ubrTx.localTxEventQ.addr = NULL; + trx->ubrTx.localDataStatusQ.addr = NULL; + trx->ubrRx.localRxEventQ.addr = NULL; + trx->ubrRx.remoteDataStatusQ.addr = NULL; + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Release trx failed, trxMgr is null."; + return UBRING_ERR; + } + + LOCK_GUARD(g_ubrTrxMgrMtx); + uint32_t idx = trx->trxMgrIndex; + if (g_ubrMgr.trxMgrUnitStatus[idx] == UBR_MGR_UNIT_FREE) { + LOG(INFO) << "Release trx already freed, name=" << trx->localShm.name; + return UBRING_OK; + } + + if (g_ubrMgr.trxNum == 0) { + LOG(ERROR) << "Release trx failed, trx number is 0."; + return UBRING_ERR; + } + + g_ubrMgr.trxMgrUnitStatus[idx] = UBR_MGR_UNIT_FREE; + --g_ubrMgr.trxNum; + return UBRING_OK; +} + +void UBRingManager::LinkInfoInit(void) { + + size_t linkInfoMgrSize = FLAGS_ubr_max_managed_num * sizeof(UbrLinkInfo); + g_linkInfoMgr.allLinkInfo = (UbrLinkInfo*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "allLinkInfo is NULL"; + LinkInfoFini(); + return; + } + + g_linkInfoMgr.linkMgrUnitStatus = (UbrMgrUnitStatus*) malloc(linkInfoMgrSize); + if (g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LinkInfoFini(); + return; + } + + memset(g_linkInfoMgr.allLinkInfo, 0, linkInfoMgrSize); + memset(g_linkInfoMgr.linkMgrUnitStatus, 0, linkInfoMgrSize); +} + +void UBRingManager::LinkInfoFini(void) { + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL"; + return; + } + { + LOCK_GUARD(g_linkInfoMgrMtx); + FREE_PTR(g_linkInfoMgr.allLinkInfo); + FREE_PTR(g_linkInfoMgr.linkMgrUnitStatus); + } + + g_linkInfoMgr.linkNum = 0; +} + +void UBRingManager::AcquireLinkInfoToMgr(const char *listenerName, UbrTrx *trx) { + if (listenerName == NULL || trx == NULL) { + LOG(ERROR) << "LinkInfo acquire fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus == NULL || g_linkInfoMgr.allLinkInfo == NULL) { + LOG(ERROR) << "LinkInfo is NULL."; + return; + } + uint32_t ubrIndex = trx->trxMgrIndex; + char* connectName = trx->localShm.name; + if (g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] == UBR_MGR_UNIT_FREE) { + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].connectName, + connectName, SHM_MAX_NAME_BUFF_LEN); + strncpy(g_linkInfoMgr.allLinkInfo[ubrIndex].listenerName, + listenerName, SHM_MAX_NAME_BUFF_LEN); + g_linkInfoMgr.linkMgrUnitStatus[ubrIndex] = UBR_MGR_UNIT_USED; + g_linkInfoMgr.linkNum++; + } +} + +void UBRingManager::ReleaseLinkInfoFromMgr(UbrTrx *trx) { + if (trx == NULL || g_linkInfoMgr.linkMgrUnitStatus == NULL) { + LOG(ERROR) << "LinkInfo release fail."; + return; + } + + if (g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] == UBR_MGR_UNIT_FREE) { + LOG(ERROR) << "Release linkInfo failed, trx is not in manager."; + return; + } + g_linkInfoMgr.linkMgrUnitStatus[trx->trxMgrIndex] = UBR_MGR_UNIT_FREE; + g_linkInfoMgr.linkNum--; +} + +int32_t UBRingManager::UbEventCallback(const char *shmName) +{ + if (UNLIKELY(shmName == NULL)) { + LOG(ERROR) << "Ub event callback failed, shm name is null."; + return UBRING_ERR; + } + if (UNLIKELY(g_ubrMgr.trxMgr == NULL)) { + LOG(ERROR) << "Ub event callback failed, trx mgr is null."; + return UBRING_ERR; + } + LOG(INFO) << "Ub event callback is processing. shm_name=" << shmName; + + for (uint32_t i = 0; i < g_ubrMgr.trxCap; ++i) { + if (g_ubrMgr.trxMgrUnitStatus[i] == UBR_MGR_UNIT_FREE) { + continue; + } + + if (strcmp(g_ubrMgr.trxMgr[i].localShm.name, shmName) == 0 || // 故障链路为该trx的本端shm + strcmp(g_ubrMgr.trxMgr[i].remoteShm.name, shmName) == 0) { // 故障链路为该trx的对端shm + ++g_ubEventCnt; + int fd = (int)g_ubrMgr.trxMgr[i].localShm.fd; + LOG(WARNING) << "Ub event callback, the fd of the faulty link is " << fd; + return UBRing::UbrPassiveClearTrx(&g_ubrMgr.trxMgr[i], fd, UBR_UB_EVENT); + } + } + return UBRING_ERR; +} +} +} diff --git a/src/brpc/ubshm/ub_ring_manager.h b/src/brpc/ubshm/ub_ring_manager.h new file mode 100644 index 0000000000..c901791565 --- /dev/null +++ b/src/brpc/ubshm/ub_ring_manager.h @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UB_RING_MANAGER_H +#define BRPC_UB_RING_MANAGER_H + +#include "brpc/ubshm/ubr_trx.h" +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" + +namespace brpc { +namespace ubring { +typedef enum { + UBR_MGR_UNIT_FREE = 0, + UBR_MGR_UNIT_USED = 1 +} UbrMgrUnitStatus; + +typedef struct TagUbrMgr { + uint32_t trxNum; + uint32_t trxCap; + UbrTrx *trxMgr; + UbrMgrUnitStatus *trxMgrUnitStatus; +} UbrMgr; + +typedef struct TagUbrLinkInfo { + char connectName[SHM_MAX_NAME_BUFF_LEN]; + char listenerName[SHM_MAX_NAME_BUFF_LEN]; +} UbrLinkInfo; + +typedef struct TagUbrLinkInfoMgr { + uint32_t linkNum; + UbrLinkInfo* allLinkInfo; + UbrMgrUnitStatus *linkMgrUnitStatus; +} UbrLinkInfoMgr; + +class UBRingManager { +public: + ~UBRingManager(){ + UbrMgrFini(); + } + + static RETURN_CODE GetUbrDealMsgMaxCnt(const uint32_t capacity, uint32_t *dealMsgMaxCnt); + + static RETURN_CODE UbrMgrDefault(); + + static RETURN_CODE UbrMgrInit(); + + static void UbrMgrFini(); + + static RETURN_CODE AcquireUbrTrxFromMgr(UbrTrx **trx); + + static RETURN_CODE ReleaseUbrTrxFromMgr(UbrTrx *trx); + + static void LinkInfoInit(void); + static void LinkInfoFini(void); + static void AcquireLinkInfoToMgr(const char* listenerName, UbrTrx *trx); + static void ReleaseLinkInfoFromMgr(UbrTrx* trx); + static int32_t UbEventCallback(const char *shmName); + +private: + UBRingManager() { + } + + static UbrMgr g_ubrMgr; + static UbrLinkInfoMgr g_linkInfoMgr; + static pthread_mutex_t g_ubrTrxMgrMtx; + static pthread_mutex_t g_ubrListenerMgrMtx; + static pthread_mutex_t g_linkInfoMgrMtx; +}; +} +} + +#endif //BRPC_UB_RING_MANAGER_H \ No newline at end of file diff --git a/src/brpc/ubshm/ubr_msg.h b/src/brpc/ubshm/ubr_msg.h new file mode 100644 index 0000000000..8a19b6f6bc --- /dev/null +++ b/src/brpc/ubshm/ubr_msg.h @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UBR_MSG_H +#define BRPC_UBR_MSG_H +#define UBR_MSG_HEADER_LEN 4 +#define UBR_MSG_PAYLOAD_LEN 60 +#define UBR_MSG_LEN (UBR_MSG_HEADER_LEN + UBR_MSG_PAYLOAD_LEN) + +#define UBR_MSG_FLAG_INDEX 0 +#define UBR_MSG_LEN_INDEX 1 +#define UBR_MSG_CUR_INDEX 2 + +namespace brpc { +namespace ubring { +typedef enum { + UBR_MSG_CHUNK_NONE = 0, + UBR_MSG_CHUNK_EXIST = 1, + UBR_MSG_CHUNK_EOF = 2 +} UbrMsgHdrFlag; + +typedef struct TagUbrMsgPayload { + uint8_t inner[UBR_MSG_PAYLOAD_LEN]; +} UbrMsgPayload; + +typedef struct __attribute__((aligned(64))) TagUbrMsgFormat { + UbrMsgPayload payload; + + uint8_t header[UBR_MSG_HEADER_LEN]; +} UbrMsgFormat; + +static inline uint32_t CalcUbrMsgChunkCnt(uint32_t bufLen) +{ + uint32_t msgChunkNum = (bufLen + UBR_MSG_PAYLOAD_LEN - 1) / UBR_MSG_PAYLOAD_LEN; + return msgChunkNum; +} +} +} +#endif //BRPC_UBR_MSG_H \ No newline at end of file diff --git a/src/brpc/ubshm/ubr_trx.h b/src/brpc/ubshm/ubr_trx.h new file mode 100644 index 0000000000..af9c52ade7 --- /dev/null +++ b/src/brpc/ubshm/ubr_trx.h @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UBR_TRX_H +#define BRPC_UBR_TRX_H +#include +#include +#include +#include "brpc/ubshm/shm/shm_def.h" +#include "brpc/ubshm/common/common.h" +#include "brpc/ubshm/common/thread_lock.h" +#include "brpc/ubshm/ubr_msg.h" + +/* +----------------------------------------------------------------------------+ + │ UbrTrx shm │ + +-------------+-------------+-------------+---------------+------------------+ + │ TxEventQ │ RxEventQ │ DataStatusQ │ zero(44Bytes) | DataQ │ + +-------------+-------------+-------------+---------------+------------------+ */ + +#define UBR_EVENTQ_LEN sizeof(UbrEventQMsg) +#define UBR_DATASTATUSQ_LEN sizeof(UbrDataStatusQMsg) + +#define TX_EVENTQ_ADDR_OFFSET 0 +#define RX_EVENTQ_ADDR_OFFSET UBR_EVENTQ_LEN +#define DATASTATUSQ_ADDR_OFFSET ((UBR_EVENTQ_LEN) << 1) +#define DATAQ_ADDR_OFFSET (DATASTATUSQ_ADDR_OFFSET + UBR_DATASTATUSQ_LEN) +#define MB_TO_BYTE (1024 * 1024) +#define MAX_CLOSE_COUNT 2 + +#define SHM_NAME_PREFIX "UBRING" +#define SERVER_SHM_NAME_SUFFIX "S" +#define CLIENT_SHM_NAME_SUFFIX "C" + +namespace brpc { +namespace ubring { +extern RETURN_CODE(*g_BeforeTcpClose)(int); +extern RETURN_CODE(*g_AfterTcpClose)(int); + +typedef enum { + UBR_STATE_NONE, + UBR_STATE_CONNECTED, + UBR_STATE_CLOSING, + UBR_STATE_CLOSED +} EventQState; + +typedef enum { + UBR_SEND_CLOSE, + UBR_CALL_BACK_CLOSE +} UbrCloseType; + +typedef enum { + UBR_CLOSE_FIRST, + UBR_CLOSE_SECOND, + UBR_CLOSE_END +} UbrCloseCount; + +typedef enum { + UDP_TRX, + TCP_TRX, + UBR_TRX +} UbrTrxType; + +typedef enum { + UBR_TASK_CONNECT_MAP_FRONT, + UBR_TASK_CONNECT_MAP_AFTER, + UBR_TASK_ACCEPT_MAP_FRONT, + UBR_TASK_ACCEPT_MAP_AFTER, + UBR_TASK_CLOSE, + UBR_TASK_STEP_NUM +} UbrTaskStep; + +typedef struct TagUbrDataStatusQMsg { + uint32_t tail; + uint32_t timeout; + uint8_t heartBeat; +} UbrDataStatusQMsg; + +typedef struct TagUbrEventQMsg { + uint64_t ioId; + EventQState flag; +} UbrEventQMsg; + +typedef struct TagUbrAddrInfo { + uint8_t *addr; + size_t len; +} UbrAddrInfo; + +typedef struct TagUbrTx { + UbrAddrInfo remoteDataQ; + UbrAddrInfo remoteRxEventQ; + UbrAddrInfo localDataStatusQ; + UbrAddrInfo localTxEventQ; + uint64_t outIoId; + uint32_t writePos; + uint32_t capacity; + UbrMsgFormat localMsgSpace; + uint32_t hbRetryCnt; + uint32_t epLastCap; + volatile EventQState trxState; +} UbrTx; + +typedef struct TagUbrRx { + UbrAddrInfo localDataQ; + UbrAddrInfo localRxEventQ; + UbrAddrInfo remoteDataStatusQ; + UbrAddrInfo remoteTxEventQ; + uint64_t inIoId; + uint32_t readPos; + uint32_t capacity; + uint32_t dealMsgNum; + uint32_t dealMsgMaxCnt; + uint32_t epEofPos; + volatile EventQState trxState; +} UbrRx; + +typedef struct TagUbrTrx { + UbrTx ubrTx; + UbrRx ubrRx; + uint64_t ubrId; + uint32_t trxMgrIndex; + UbrTrxType type; + SHM localShm; + SHM remoteShm; + int timerFd; + int hbTimerFd; + int clearTimerFd; + AtomicInt closeCnt; + AtomicInt closeState; +} UbrTrx; + +typedef struct TagFileLock { + int lockFd; + char* lockPath; +} FileLock; + +typedef struct TagUbrLinkLock { + int fileLockNum; + FileLock* fileLock; +} UbrLinkLock; + +typedef enum { + UBR_UB_EVENT, + UBR_HEARTBEAT, +}PASSIVE_DISC_TYPE; + +} +} +#endif //BRPC_UBR_TRX_H \ No newline at end of file diff --git a/src/brpc/ubshm/ubs_mem/declare_shm_ubs.h b/src/brpc/ubshm/ubs_mem/declare_shm_ubs.h new file mode 100644 index 0000000000..b09b2bf943 --- /dev/null +++ b/src/brpc/ubshm/ubs_mem/declare_shm_ubs.h @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef UBRING_MK_UBSM +#error Do not include this file unless you know what you are doing. +#endif + +#ifndef UBRING_MK_UBSM_OPTIONAL +#define UBRING_MK_UBSM_OPTIONAL UBRING_MK_UBSM +#endif + +UBRING_MK_UBSM(int, ubsmem_init_attributes, (ubsmem_options_t *ubsm_shmem_opts)); + +UBRING_MK_UBSM(int, ubsmem_initialize, (const ubsmem_options_t *ubsm_shmem_opts)); + +UBRING_MK_UBSM(int, ubsmem_finalize, (void)); + +UBRING_MK_UBSM(int, ubsmem_set_logger_level, (int level)); + +UBRING_MK_UBSM(int, ubsmem_set_extern_logger, (void (*func)(int level, const char *msg))); + +UBRING_MK_UBSM(int, ubsmem_lookup_regions, (ubsmem_regions_t* regions)); + +UBRING_MK_UBSM(int, ubsmem_create_region, (const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr)); + +UBRING_MK_UBSM(int, ubsmem_destroy_region, (const char *region_name)); + +UBRING_MK_UBSM(int, ubsmem_shmem_allocate,(const char *region_name, const char *name, size_t size, mode_t mode, + uint64_t flags)); + +UBRING_MK_UBSM(int, ubsmem_shmem_deallocate, (const char *name)); + +UBRING_MK_UBSM(int, ubsmem_shmem_map, (void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr)); + +UBRING_MK_UBSM(int, ubsmem_shmem_unmap, (void *local_ptr, size_t length)); + +UBRING_MK_UBSM(int, ubsmem_shmem_faults_register, (shmem_faults_func registerFunc)); + +UBRING_MK_UBSM(int, ubsmem_local_nid_query, (uint32_t *nid)); + +#undef UBRING_MK_UBSM_OPTIONAL +#undef UBRING_MK_UBSM \ No newline at end of file diff --git a/src/brpc/ubshm/ubs_mem/ubs_mem.h b/src/brpc/ubshm/ubs_mem/ubs_mem.h new file mode 100644 index 0000000000..66069c6e9c --- /dev/null +++ b/src/brpc/ubshm/ubs_mem/ubs_mem.h @@ -0,0 +1,210 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UBS_MEM_H +#define BRPC_UBS_MEM_H +#include "ubs_mem_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Initialize the UBSMSHMEM attributes + * + * @param ubsm_shmem_opts - [out] shmem attributes + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_init_attributes(ubsmem_options_t *ubsm_shmem_opts); + +/** + * Initialize the UBSMSHMEM library. + * Required to be the first called when a process uses the UBSMSHMEM library. + * @param ubsm_shmem_opts - options structure containing initialization choices + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_initialize(const ubsmem_options_t *ubsm_shmem_opts); + +/** + * Finalize the UBSMSHMEM library. + * Once finalized, the process can continue work,but it is disconnected from the UBSMSHMEM library functions. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_finalize(void); + +/** + * @brief Set log level + * @return - 0 on success and other on failure + * @param level - level to be set, debug(0), info(1), warning(2), error(3), closed(4) + */ +SHMEM_API int ubsmem_set_logger_level(int level); + +/** + * @brief Set external log function, user can set customized logger function, + * in the customized logger function, user can use unified logger utility, + * then the log message can be written into the same log file as caller's, + * if it is not set, log message will be printed to stdout. + * @param func - [in] external logger function + * @return 0 on success and other on failure + */ +SHMEM_API int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)); + +/** + * Look up regions in UBSMSHMEM associated with the local node. + * @param regions - [out] The descriptor to the regions. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lookup_regions(ubsmem_regions_t* regions); + +/** + * Create a large region of UBSMSHMEM. + * Regions are primarily used as large containers within which additional memory may be allocated and managed by + * the program. + * @param region_name - name of the region + * @param size - size (in bytes) requested for the region, 930 no use, default 0. + * Note that implementations may round up the size to implementation-dependent sizes, + * and may impose system-wide (or user-dependent) limits on individual and total size allocated to a given user. + * @param reg_attr - details of UBSMSHMEM region attributes + * @param region_desc - [out] Region_Descriptor for the created region + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_create_region(const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr); + +/** + * Look up a region in UBSMSHMEM by name in the name service. + * @param region_name - name of the region. + * @param region_desc - [out] The descriptor to the region. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lookup_region(const char *region_name, ubsmem_region_desc_t *region_desc); + +/** + * Destroy a region, and all contents within the region. Note that this + * method call will trigger a delayed free operation to permit other + * instances currently using the region to finish. + * @param region_name - name of the region. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_destroy_region(const char *region_name); + +/** + * Allocate some named space within a region. Allocates an area of UBSMSHMEM within a region + * @param region_name - name of the region. + * @param name - name of the share memory object + * @param size - size of the space to allocate in bytes. + * @param mode - mode associated with this space. + * @param flags - Special marking for this object, MXMEM_FLAG_WITH_LOCK etc. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_allocate(const char *region_name, const char *name, size_t size, mode_t mode, + uint64_t flags); + +/** + * Deallocate allocated space in memory + * @param name - name of the share memory object + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_deallocate(const char *name); + +/** + * Map item in UBSMSHMEM to the local virtual address space, and return its pointer. + * @param addr - The starting address for the new mapping is specified in addr, If addr is NULL, then + * the kernel chooses the (page-aligned) address at which to create the mapping + * @param length - The length argument specifies the length of the mapping (which must be greater than 0) + * @param prot - same as mmap, describes the desired memory protection of the mapping (and must not conflict with + * the open mode of the file). + * @param flags - same as mmap + * @param name - name of the share memory object which to be mapped, same as mmap's fd + * @param offset - same as mmap, offset must be a multiple of the page size + * @param local_ptr - [out] within the process virtual address space that can be used to directly access the + * data item in UBSMSHMEM + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_map(void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr); + +/** + * Unmap a data item in UBSMSHMEM from the local virtual address space. + * @param local_ptr - pointer within the process virtual address space to be unmapped + * @param length - the size to be unmapped + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_unmap(void *local_ptr, size_t length); + +/** + * Change permissions associated with a data item descriptor. + * @param name - descriptor associated with some data item + * @param perm - new permissions for the data item + * @return - 0 on success and other on failure,other return described in UBSM_SHMEM_RETURN. + */ +SHMEM_API int ubsmem_shmem_set_ownership(const char *name, void *start, size_t length, int prot); + +/** + * shmem lock - Set the lock, status, and data consistency of the shmem item + * @param name - descriptor associated with share memory object + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_write_lock(const char *name); +SHMEM_API int ubsmem_shmem_read_lock(const char *name); +SHMEM_API int ubsmem_shmem_unlock(const char *name); + +SHMEM_API int ubsmem_shmem_list_lookup(const char *prefix, ubsmem_shmem_desc_t *shm_list, uint32_t *shm_cnt); +SHMEM_API int ubsmem_shmem_lookup(const char *name, ubsmem_shmem_info_t *shm_info); +SHMEM_API int ubsmem_shmem_attach(const char *name); +SHMEM_API int ubsmem_shmem_detach(const char *name); + +/** + * Alloc an area from the resource pool and use it only within the scope of the current process. + * @param region_name - name of the region. + * @param size - size of the space to allocate in bytes. + * Note that implementations may round up the size to implementation-dependent sizes. + * @param mem_distance - Describe the performance distance between memory resources and local nodes. + * Note that described in perf_desc_distance + * @param is_numa - is numa or fd malloc, true: numa, false: fd + * @param local_ptr - [out] pointer within the process virtual address space that can be used to directly access. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lease_malloc(const char *region_name, size_t size, ubsmem_distance_t mem_distance, bool is_numa, + void **local_ptr); + +/** + * Release the pointer. + * @param local_ptr - The pointer returned by the malloc function. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_lease_free(void *local_ptr); + +SHMEM_API int ubsmem_lookup_cluster_statistic(ubsmem_cluster_info_t *info); + +/** + * Subscribes to shared memory UB Event. + * @param registerFunc - Shared Memory UB Event Response Handling Function. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_shmem_faults_register(shmem_faults_func registerFunc); + +/** + * Query the supernode ID of this node within the supernode domain. + * @param nid - The supernode ID of this node within the supernode domain. + * @return - 0 on success and other on failure + */ +SHMEM_API int ubsmem_local_nid_query(uint32_t *nid); + +#ifdef __cplusplus +} // end of extern "C" +#endif +#endif //BRPC_UBS_MEM_H \ No newline at end of file diff --git a/src/brpc/ubshm/ubs_mem/ubs_mem_def.h b/src/brpc/ubshm/ubs_mem/ubs_mem_def.h new file mode 100644 index 0000000000..29646611f3 --- /dev/null +++ b/src/brpc/ubshm/ubs_mem/ubs_mem_def.h @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UBS_MEM_DEF_H +#define BRPC_UBS_MEM_DEF_H +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SHMEM_API +#define SHMEM_API __attribute__((visibility("default"))) +#endif + +// 先修改为48,与旧版本对齐 +#define MAX_HOST_NUM 16 +#define MAX_NUMA_NUM 32 +#define MAX_NUMA_RESV_LEN 16 + +#define MAX_HOST_NAME_DESC_LENGTH 64 +#define MAX_SHM_NAME_LENGTH 48 +#define MAX_REGION_NAME_DESC_LENGTH 48 +#define MAX_REGION_NODE_NUM 16 +#define MAX_REGIONS_NUM 6 +#define MAX_OBMM_SHMDEV_PATH_LEN 64 + +#define MAX_MEMID_NUM 2048 +#define MAX_SHM_CNT 300 + +#define UBSM_FLAG_CACHE 0x0UL +#define UBSM_FLAG_WITH_LOCK 0x1UL +#define UBSM_FLAG_NONCACHE 0x2UL // open O_SYNC +#define UBSM_FLAG_WR_DELAY_COMP 0x4UL // obmm import with wr_delay_comp +#define UBSM_FLAG_ONLY_IMPORT_NONCACHE 0x8UL // only import open O_SYNC +#define UBSM_FLAG_MEM_ANONYMOUS 0x10UL // auto cleanup when all references in domain drop to zero + +typedef enum { + UBSM_OK = 0, + // common error + UBSM_ERR_PARAM_INVALID = 6010, + UBSM_ERR_NOPERM = 6011, // no permision + UBSM_ERR_MEMORY = 6012, // memcpy or other mem func failed + UBSM_ERR_UNIMPL = 6013, // not implement + UBSM_CHECK_RESOURCE_ERROR = 6014, // resource check failed. + UBSM_ERR_MEMLIB = 6015, // mem lib failed + UBSM_ERR_NO_NEEDED = 6016, // default region no need to create + + // resource error + UBSM_ERR_NOT_FOUND = 6020, + UBSM_ERR_ALREADY_EXIST = 6021, + UBSM_ERR_MALLOC_FAIL = 6022, + UBSM_ERR_RECORD = 6023, + UBSM_ERR_IN_USING = 6024, // shm is in use (usrNum > 0) + + // net error + UBSM_ERR_NET = 6040, + + // under api + UBSM_ERR_UBSE = 6050, + UBSM_ERR_OBMM = 6051, + + // cc lock error + UBSM_ERR_LOCK_NOT_SUPPORTED = 6060, + UBSM_ERR_LOCK_ALREADY_LOCKED = 6061, + UBSM_ERR_DLOCK = 6062, + + UBSM_ERR_BUFF = 6099, +} ubsmshmem_ret_t; +/** + * Memory distance, describes the physical memory resource distance relative to the current PE. + */ +typedef enum { + /** direct connect node is provided, same as PerfLevel::L0 */ + DISTANCE_DIRECT_NODE = 0, + /** one hop connect node is provided, same as PerfLevel::L1, not support 930 */ + DISTANCE_HOP_NODE = 1, +} ubsmem_distance_t; + +typedef struct { + // todo +} ubsmem_options_t; + +typedef struct { + char host_name[MAX_HOST_NAME_DESC_LENGTH]; // include '\0' + bool affinity; +} ubsmem_region_node_desc_t; + +typedef struct { + int host_num; + ubsmem_region_node_desc_t hosts[MAX_REGION_NODE_NUM]; +} ubsmem_region_attributes_t; + +typedef struct { + int num; + ubsmem_region_attributes_t region[MAX_REGIONS_NUM]; +} ubsmem_regions_t; + +typedef struct { + char region_name[MAX_REGION_NAME_DESC_LENGTH]; + size_t size; + ubsmem_region_attributes_t region_attr; +} ubsmem_region_desc_t; + +typedef struct { + uint32_t slot_id; // 节点唯一标识, 采用slotid, 与lcne保持一致 + uint32_t socket_id; // socket id + uint32_t numa_id; // 节点中的numa id + uint32_t mem_lend_ratio; // 池化内存借出比例上限 + uint64_t mem_total; // 内存总量, 单位字节 + uint64_t mem_free; // 内存空闲量, 单位字节 + uint64_t mem_borrow; // 借用的内存,单位字节 + uint64_t mem_lend; // 借出的内存,单位字节 + uint8_t resv[MAX_NUMA_RESV_LEN]; +} ubsmem_numa_mem_t; + +typedef struct { + char host_name[MAX_HOST_NAME_DESC_LENGTH]; + int numa_num; + ubsmem_numa_mem_t numa[MAX_NUMA_NUM]; +} ubsmem_host_info_t; + +typedef struct { + int host_num; // 集群可用节点数量 + ubsmem_host_info_t host[MAX_HOST_NUM]; +} ubsmem_cluster_info_t; + +typedef struct { + char name[MAX_SHM_NAME_LENGTH + 1]; + size_t size; +} ubsmem_shmem_desc_t; + +typedef struct { + char name[MAX_SHM_NAME_LENGTH + 1]; + size_t size; + uint32_t mem_num; + uint64_t mem_unit_size; + uint64_t mem_id_list[MAX_MEMID_NUM]; +} ubsmem_shmem_info_t; + +typedef int32_t (*shmem_faults_func)(const char *shm_name); + +#ifdef __cplusplus +} +#endif +#endif //BRPC_UBS_MEM_DEF_H \ No newline at end of file diff --git a/src/brpc/ubshm/ubs_mem/ubshmem_stub.cpp b/src/brpc/ubshm/ubs_mem/ubshmem_stub.cpp new file mode 100644 index 0000000000..f0eaf29f8e --- /dev/null +++ b/src/brpc/ubshm/ubs_mem/ubshmem_stub.cpp @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubs_mem.h" + +int ubsmem_init_attributes(ubsmem_options_t *ubsm_shmem_opts) +{ + return UBSM_OK; +} + +int ubsmem_initialize(const ubsmem_options_t *ubsm_shmem_opts) +{ + return UBSM_OK; +} + +int ubsmem_finalize(void) +{ + return UBSM_OK; +} + +int ubsmem_set_logger_level(int level) +{ + return UBSM_OK; +} + +int ubsmem_set_extern_logger(void (*func)(int level, const char *msg)) +{ + return UBSM_OK; +} + +int ubsmem_lookup_regions(ubsmem_regions_t* regions) +{ + regions->num = 1; + regions->region[0].host_num = 1; + regions->region[0].hosts[0].affinity = true; + regions->region[0].hosts[0].host_name[0] = 'h'; + regions->region[0].hosts[0].host_name[1] = '1'; + regions->region[0].hosts[0].host_name[2] = '\0'; // 2号位置使用\0 + return UBSM_OK; +} + +int ubsmem_create_region(const char *region_name, size_t size, const ubsmem_region_attributes_t *reg_attr) +{ + return UBSM_OK; +} + + +int ubsmem_destroy_region(const char *region_name) +{ + return UBSM_OK; +} + +int ubsmem_shmem_allocate(const char *region_name, const char *name, size_t size, mode_t mode, uint64_t flags) +{ + return UBSM_OK; +} + +int ubsmem_shmem_deallocate(const char *name) +{ + return UBSM_OK; +} + +int ubsmem_shmem_map(void *addr, size_t length, int prot, int flags, const char *name, off_t offset, + void **local_ptr) +{ + return UBSM_OK; +} + +int ubsmem_shmem_unmap(void *local_ptr, size_t length) +{ + return UBSM_OK; +} + +int ubsmem_shmem_faults_register(shmem_faults_func registerFunc) +{ + return UBSM_OK; +} + +int ubsmem_local_nid_query(uint32_t *nid) +{ + *nid = 1; // stub + return UBSM_OK; +} \ No newline at end of file diff --git a/src/brpc/ubshm_transport.cpp b/src/brpc/ubshm_transport.cpp new file mode 100644 index 0000000000..233850bf20 --- /dev/null +++ b/src/brpc/ubshm_transport.cpp @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_UBRING + +#include "brpc/ubshm_transport.h" +#include "brpc/tcp_transport.h" +#include "brpc/ubshm/ub_endpoint.h" +#include "brpc/ubshm/ub_helper.h" + +namespace brpc { +DECLARE_bool(usercode_in_coroutine); +DECLARE_bool(usercode_in_pthread); + +extern SocketVarsCollector *g_vars; + +void UBShmTransport::Init(Socket *socket, const SocketOptions &options) { + CHECK(_ub_ep == NULL); + if (options.socket_mode == SOCKET_MODE_UBRING) { + _ub_ep = new(std::nothrow)ubring::UBShmEndpoint(socket); + if (!_ub_ep) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to create UBShmEndpoint"; + socket->SetFailed( + saved_errno, "Fail to create UBShmEndpoint: %s", berror(saved_errno)); + } + _ub_state = UB_UNKNOWN; + } else { + _ub_state = UB_OFF; + socket->_socket_mode = SOCKET_MODE_TCP; + } + _socket = socket; + _default_connect = options.app_connect; + _on_edge_trigger = options.on_edge_triggered_events; + if (options.need_on_edge_trigger && _on_edge_trigger == NULL) { + _on_edge_trigger = ubring::UBShmEndpoint::OnNewDataFromTcp; + } + _tcp_transport = std::unique_ptr(new TcpTransport()); + _tcp_transport->Init(socket, options); +} + +void UBShmTransport::Release() { + if (_ub_ep) { + delete _ub_ep; + _ub_ep = NULL; + _ub_state = UB_UNKNOWN; + } +} + +int UBShmTransport::Reset(int32_t expected_nref) { + if (_ub_ep) { + _ub_ep->Reset(); + _ub_state = UB_UNKNOWN; + } + return 0; +} + +std::shared_ptr UBShmTransport::Connect() { + if (_default_connect == nullptr) { + return std::make_shared(); + } + return _default_connect; +} + +int UBShmTransport::CutFromIOBuf(butil::IOBuf *buf) { + if (_ub_ep && _ub_state != UB_OFF) { + butil::IOBuf *data_arr[1] = {buf}; + return _ub_ep->CutFromIOBufList(data_arr, 1); + } else { + return _tcp_transport->CutFromIOBuf(buf); + } +} + +ssize_t UBShmTransport::CutFromIOBufList(butil::IOBuf **buf, size_t ndata) { + if (_ub_ep && _ub_state != UB_OFF) { + return _ub_ep->CutFromIOBufList(buf, ndata); + } + return _tcp_transport->CutFromIOBufList(buf, ndata); +} + +int UBShmTransport::WaitEpollOut(butil::atomic *_epollout_butex, + bool pollin, const timespec duetime) { + // LOG(INFO) << "mwj pollin4=" << pollin << " duetime=" << butil::timespec_to_microseconds(duetime); + if (_ub_state == UB_ON) { + // LOG(INFO) << "mwj pollin1=" << pollin; + const int expected_val = _epollout_butex->load(butil::memory_order_acquire); + CHECK(_ub_ep != NULL); + if (!_ub_ep->IsWritable()) { + g_vars->nwaitepollout << 1; + _ub_ep->PollerRegisterEpollOut(pollin); + auto mwj_ret = bthread::butex_wait(_epollout_butex, expected_val, &duetime); + // LOG(INFO) << "mwj pollin2=" << pollin << " mwj_ret=" << mwj_ret; + if (mwj_ret < 0) { + if (errno != EAGAIN && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait ub window of " << _socket; + _socket->SetFailed(saved_errno, + "Fail to wait ub window of %s: %s", + _socket->description().c_str(), + berror(saved_errno)); + } + if (_socket->Failed()) { + // NOTE: + // Different from TCP, we cannot find the UB channel + // failed by writing to it. Thus we must check if it + // is already failed here. + return 1; + } + } + _ub_ep->PollerUnRegisterEpollOut(pollin); + } + } else { + return _tcp_transport->WaitEpollOut(_epollout_butex, pollin, duetime); + } + // LOG(INFO) << "mwj return 0"; + return 0; +} + +void UBShmTransport::ProcessEvent(bthread_attr_t attr) { + bthread_t tid; + if (FLAGS_usercode_in_coroutine) { + OnEdge(_socket); + } else if (ubring::FLAGS_ub_edisp_unsched == false) { + auto rc = bthread_start_background(&tid, &attr, OnEdge, _socket); + if (rc != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } + } else if (bthread_start_urgent(&tid, &attr, OnEdge, _socket) != 0) { + LOG(FATAL) << "Fail to start ProcessEvent"; + OnEdge(_socket); + } +} + +void UBShmTransport::QueueMessage(InputMessageClosure& input_msg, + int* num_bthread_created, bool last_msg) { + if (last_msg) { + return; + } + InputMessageBase* to_run_msg = input_msg.release(); + if (!to_run_msg) { + return; + } + + if (ubring::FLAGS_ub_disable_bthread) { + ProcessInputMessage(to_run_msg); + return; + } + // Create bthread for last_msg. The bthread is not scheduled + // until bthread_flush() is called (in the worse case). + + // TODO(gejun): Join threads. + bthread_t th; + bthread_attr_t tmp = (FLAGS_usercode_in_pthread ? + BTHREAD_ATTR_PTHREAD : + BTHREAD_ATTR_NORMAL) | BTHREAD_NOSIGNAL; + tmp.keytable_pool = _socket->keytable_pool(); + tmp.tag = bthread_self_tag(); + bthread_attr_set_name(&tmp, "ProcessInputMessage"); + + if (!FLAGS_usercode_in_coroutine && bthread_start_background( + &th, &tmp, ProcessInputMessage, to_run_msg) == 0) { + ++*num_bthread_created; + } else { + ProcessInputMessage(to_run_msg); + } +} + +void UBShmTransport::Debug(std::ostream &os) {} + +int UBShmTransport::ContextInitOrDie(bool serverOrNot, const void* _options) { + if (serverOrNot) { + if (!OptionsAvailableOverUB(static_cast(_options))) { + return -1; + } + ubring::GlobalUBInitializeOrDie(); + if (!ubring::InitPollingModeWithTag(static_cast(_options)->bthread_tag)) { + return -1; + } + } else { + if (!OptionsAvailableForUB(static_cast(_options))) { + return -1; + } + ubring::GlobalUBInitializeOrDie(); + if (!ubring::InitPollingModeWithTag(bthread_self_tag())) { + return -1; + } + return 0; + } + + return 0; +} + +bool UBShmTransport::OptionsAvailableForUB(const ChannelOptions* opt) { + if (opt->has_ssl_options()) { + LOG(WARNING) << "Cannot use SSL and UB at the same time"; + return false; + } + if (!ubring::SupportedByUB(opt->protocol.name())) { + LOG(WARNING) << "Cannot use " << opt->protocol.name() + << " over UB"; + return false; + } + return true; +} + +bool UBShmTransport::OptionsAvailableOverUB(const ServerOptions* opt) { + if (opt->rtmp_service) { + LOG(WARNING) << "RTMP is not supported by UB"; + return false; + } + if (opt->has_ssl_options()) { + LOG(WARNING) << "SSL is not supported by UB"; + return false; + } + if (opt->nshead_service) { + LOG(WARNING) << "NSHEAD is not supported by UB"; + return false; + } + if (opt->mongo_service_adaptor) { + LOG(WARNING) << "MONGO is not supported by UB"; + return false; + } + return true; +} +} // namespace brpc +#endif \ No newline at end of file diff --git a/src/brpc/ubshm_transport.h b/src/brpc/ubshm_transport.h new file mode 100644 index 0000000000..7119a96ac5 --- /dev/null +++ b/src/brpc/ubshm_transport.h @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_UB_TRANSPORT_H +#define BRPC_UB_TRANSPORT_H +#if BRPC_WITH_UBRING +#include "brpc/socket.h" +#include "brpc/channel.h" +#include "brpc/transport.h" + +namespace brpc { + class UBShmTransport : public Transport { + friend class TransportFactory; + friend class ubring::UBShmEndpoint; + friend class ubring::UBConnect; + public: + void Init(Socket* socket, const SocketOptions& options) override; + void Release() override; + int Reset(int32_t expected_nref) override; + std::shared_ptr Connect() override; + int CutFromIOBuf(butil::IOBuf* buf) override; + ssize_t CutFromIOBufList(butil::IOBuf** buf, size_t ndata) override; + int WaitEpollOut(butil::atomic* _epollout_butex, bool pollin, const timespec duetime) override; + void ProcessEvent(bthread_attr_t attr) override; + void QueueMessage(InputMessageClosure& inputMsg, int* num_bthread_created, bool last_msg) override; + void Debug(std::ostream &os) override; + ubring::UBShmEndpoint* GetUBShmEp() { + CHECK(_ub_ep != NULL); + return _ub_ep; + } + static int ContextInitOrDie(bool serverOrNot, const void* _options); + private: + static bool OptionsAvailableForUB(const ChannelOptions* opt); + static bool OptionsAvailableOverUB(const ServerOptions* opt); + private: + // The on/off state of UB + enum UBState { + UB_ON, + UB_OFF, + UB_UNKNOWN + }; + // The UBShmEndpoint + ubring::UBShmEndpoint* _ub_ep = NULL; + // Should use UB or not + UBState _ub_state; + std::shared_ptr _tcp_transport; + }; +} // namespace brpc +#endif // BRPC_WITH_UBRING +#endif //BRPC_UB_TRANSPORT_H \ No newline at end of file