From e0ead4f57b4468c9d453622d8245d0166d27c74d Mon Sep 17 00:00:00 2001 From: Debian Date: Mon, 30 Mar 2026 04:30:49 +0000 Subject: [PATCH 1/4] cachedb_redis: fix safety issues in cluster redirect parsing Fix several correctness and safety issues in parse_moved_reply() and the MOVED redirect handler: - Add slot value overflow protection: return ERR_INVALID_SLOT when parsed slot exceeds 16383 during digit accumulation, preventing signed integer overflow on malformed MOVED replies. - Add port value overflow protection: return ERR_INVALID_PORT when parsed port exceeds 65535 during digit accumulation, complementing the existing post-loop range check and preventing signed integer overflow on malformed input. - Fix undefined behavior in the no-colon endpoint fallback path: replace comparison of potentially-NULL out->endpoint.s against end pointer with (p < end), which achieves the same logic using the scan position variable that is always valid. - Replace pkg_malloc heap allocation of redis_moved struct with stack allocation in the MOVED handler. The struct is small (~24 bytes) and never outlives the enclosing scope, making heap allocation unnecessary. This eliminates the OOM error path and two pkg_free() calls. --- modules/cachedb_redis/cachedb_redis_dbase.c | 38 +++++++++------------ modules/cachedb_redis/cachedb_redis_utils.c | 4 ++- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/modules/cachedb_redis/cachedb_redis_dbase.c b/modules/cachedb_redis/cachedb_redis_dbase.c index 5ef92c6a5a4..e53c24c226c 100644 --- a/modules/cachedb_redis/cachedb_redis_dbase.c +++ b/modules/cachedb_redis/cachedb_redis_dbase.c @@ -565,29 +565,24 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke node->context->errstr); if (match_prefix(reply->str, reply->len, MOVED_PREFIX, MOVED_PREFIX_LEN)) { - // It's a MOVED response - redis_moved *moved_info = pkg_malloc(sizeof(redis_moved)); - if (!moved_info) { - LM_ERR("cachedb_redis: Unable to allocate redis_moved struct, no more pkg memory\n"); - freeReplyObject(reply); - reply = NULL; - goto try_next_con; - } else { - if (parse_moved_reply(reply, moved_info) < 0) { - LM_ERR("cachedb_redis: Unable to parse MOVED reply\n"); - pkg_free(moved_info); - moved_info = NULL; - freeReplyObject(reply); - goto try_next_con; - } - - LM_DBG("cachedb_redis: MOVED slot: [%d] endpoint: [%.*s] port: [%d]\n", moved_info->slot, moved_info->endpoint.len, moved_info->endpoint.s, moved_info->port); - node = get_redis_connection_by_endpoint(con, moved_info); + /* MOVED response */ + redis_moved moved_info_s; + redis_moved *moved_info = &moved_info_s; - pkg_free(moved_info); - moved_info = NULL; + if (parse_moved_reply(reply, moved_info) < 0) { + LM_ERR("failed to parse MOVED reply\n"); freeReplyObject(reply); reply = NULL; + goto try_next_con; + } + + LM_DBG("MOVED slot=%d endpoint=%.*s:%d\n", + moved_info->slot, moved_info->endpoint.len, + moved_info->endpoint.s, moved_info->port); + node = get_redis_connection_by_endpoint(con, moved_info); + + freeReplyObject(reply); + reply = NULL; if (node == NULL) { LM_ERR("Unable to locate connection by endpoint\n"); @@ -603,9 +598,8 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke } } - i = QUERY_ATTEMPTS; // New node that is the target being MOVED to, should have the attempts reset + i = QUERY_ATTEMPTS; continue; - } } freeReplyObject(reply); diff --git a/modules/cachedb_redis/cachedb_redis_utils.c b/modules/cachedb_redis/cachedb_redis_utils.c index 6abac0882b5..413842455f6 100644 --- a/modules/cachedb_redis/cachedb_redis_utils.c +++ b/modules/cachedb_redis/cachedb_redis_utils.c @@ -394,6 +394,7 @@ int parse_moved_reply(redisReply *reply, redis_moved *out) { while (p < end && *p >= '0' && *p <= '9') { slot = slot * 10 + (*p - '0'); p++; + if (slot > 16383) return ERR_INVALID_SLOT; } if (slot == 0 && (p == reply->str + MOVED_PREFIX_LEN || *(p - 1) < '0' || *(p - 1) > '9')) return ERR_INVALID_SLOT; @@ -426,11 +427,12 @@ int parse_moved_reply(redisReply *reply, redis_moved *out) { while (p < end && *p >= '0' && *p <= '9') { port = port * 10 + (*p - '0'); p++; + if (port > 65535) return ERR_INVALID_PORT; } if (port < 0 || port > 65535 || port_start == p) return ERR_INVALID_PORT; } - } else if (out->endpoint.s < end) { + } else if (p < end) { out->endpoint.s = host_start; out->endpoint.len = end - host_start; } From 7b8fe71ffdeaa9f534b8680182c059044f079efe Mon Sep 17 00:00:00 2001 From: Debian Date: Mon, 30 Mar 2026 04:31:58 +0000 Subject: [PATCH 2/4] cachedb_redis: add dynamic cluster topology management and observability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the static cluster topology (built once at startup, never refreshed) with runtime discovery and automatic refresh: Topology discovery and refresh: - Probe CLUSTER SHARDS (Redis 7+) with fallback to CLUSTER SLOTS (Redis 3+) for backward compatibility - O(1) slot_table[16384] lookup replaces per-query linked-list scan - Automatic topology refresh on MOVED redirect, connection failure, or query targeting an unmapped slot (rate-limited to 1/sec) - Dynamic node creation when MOVED points to an unknown endpoint - Stale node pruning during refresh with safe connection cleanup - Cap redirect loop at 5 max redirects to prevent worker hang on pathological cluster state Cluster observability via MI commands: - redis_cluster_info: full topology dump including per-node connection status, slot assignments, query/error/moved/ask counters, and last activity timestamp - redis_cluster_refresh: trigger manual topology refresh (bypasses rate limit) - redis_ping_nodes: per-node PING with microsecond latency reporting - All MI commands support optional group filter parameter Statistics: - redis_queries, redis_queries_failed, redis_moved, redis_ask, redis_topology_refreshes (module-level stat counters) - Per-node query, error, moved, ask counters in redis_cluster_info Hash slot correctness: - Hash tag {…} extraction per Redis Cluster specification - CRC16 modulo 16384 replaces bitwise AND with slots_assigned ASK redirect handling: - Detect ASK responses alongside existing MOVED handling - Send ASKING command to target node before retrying original query - Do not update slot map (ASK is a temporary mid-migration redirect) - Refactor parse_moved_reply into parse_redirect_reply with prefix parameter; inline wrappers for backward compatibility Connection reliability: - TCP keepalive via redis_keepalive parameter (default 10s) - Stack allocation for redis_moved structs (eliminates OOM paths) - NULL guards on malformed CLUSTER SHARDS/SLOTS reply elements - Integer overflow protection in slot and port parsing - NULL guards in MI command handlers for group_name/initial_url Documentation: - New section: Redis Cluster Support (topology discovery, automatic refresh, MOVED/ASK handling, hash tags) - MI command reference: redis_cluster_info, redis_cluster_refresh, redis_ping_nodes - Authentication URL format documentation (classic, ACL, no-auth) - New parameter: redis_keepalive Test suite (186 tests): - C unit tests: hash slot calculation (37), MI counter helpers (41) - Integration: topology startup (12), ASK redirect (16), topology refresh (13), MI commands (50), edge cases (16) - Trap EXIT handlers for safe cluster state restoration - python3 preflight checks for JSON-dependent tests Depends on: #3815 (hash tag + modulo fix), #3852 (ASK redirect) --- modules/cachedb_redis/README | 324 +++++++- modules/cachedb_redis/cachedb_redis.c | 36 +- modules/cachedb_redis/cachedb_redis_dbase.c | 210 +++-- modules/cachedb_redis/cachedb_redis_dbase.h | 30 +- modules/cachedb_redis/cachedb_redis_mi.c | 465 +++++++++++ modules/cachedb_redis/cachedb_redis_mi.h | 43 + modules/cachedb_redis/cachedb_redis_utils.c | 608 +++++++++----- modules/cachedb_redis/cachedb_redis_utils.h | 8 +- .../cachedb_redis/doc/cachedb_redis_admin.xml | 502 ++++++++++- modules/cachedb_redis/test/.gitignore | 2 + modules/cachedb_redis/test/Makefile | 50 ++ modules/cachedb_redis/test/README.md | 99 +++ modules/cachedb_redis/test/hash_under_test.c | 211 +++++ modules/cachedb_redis/test/test_edge_cases.sh | 782 ++++++++++++++++++ modules/cachedb_redis/test/test_hash.c | 290 +++++++ modules/cachedb_redis/test/test_load.sh | 740 +++++++++++++++++ .../cachedb_redis/test/test_mi_commands.sh | 397 +++++++++ modules/cachedb_redis/test/test_mi_counters | Bin 0 -> 20528 bytes modules/cachedb_redis/test/test_mi_counters.c | 397 +++++++++ .../test/test_topology_refresh.sh | 454 ++++++++++ .../test/test_topology_startup.sh | 330 ++++++++ 21 files changed, 5638 insertions(+), 340 deletions(-) create mode 100644 modules/cachedb_redis/cachedb_redis_mi.c create mode 100644 modules/cachedb_redis/cachedb_redis_mi.h create mode 100644 modules/cachedb_redis/test/.gitignore create mode 100644 modules/cachedb_redis/test/Makefile create mode 100644 modules/cachedb_redis/test/README.md create mode 100644 modules/cachedb_redis/test/hash_under_test.c create mode 100755 modules/cachedb_redis/test/test_edge_cases.sh create mode 100644 modules/cachedb_redis/test/test_hash.c create mode 100755 modules/cachedb_redis/test/test_load.sh create mode 100755 modules/cachedb_redis/test/test_mi_commands.sh create mode 100755 modules/cachedb_redis/test/test_mi_counters create mode 100644 modules/cachedb_redis/test/test_mi_counters.c create mode 100755 modules/cachedb_redis/test/test_topology_refresh.sh create mode 100755 modules/cachedb_redis/test/test_topology_startup.sh diff --git a/modules/cachedb_redis/README b/modules/cachedb_redis/README index 085c39484fb..b18866eea92 100644 --- a/modules/cachedb_redis/README +++ b/modules/cachedb_redis/README @@ -8,26 +8,47 @@ cachedb_redis Module 1.1. Overview 1.2. Advantages 1.3. Redis Stack Support - 1.4. Limitations - 1.5. Dependencies + 1.4. Redis Cluster Support (Topology) - 1.5.1. OpenSIPS Modules - 1.5.2. External Libraries or Applications + 1.4.1. Topology Discovery + 1.4.2. Automatic Topology Refresh + 1.4.3. MOVED Redirection + 1.4.4. Hash Tags - 1.6. Exported Parameters + 1.5. Limitations + 1.6. Dependencies - 1.6.1. cachedb_url (string) - 1.6.2. connect_timeout (integer) - 1.6.3. query_timeout (integer) - 1.6.4. shutdown_on_error (integer) - 1.6.5. use_tls (integer) - 1.6.6. ftsearch_index_name (string) - 1.6.7. ftsearch_json_prefix (string) - 1.6.8. ftsearch_max_results (integer) - 1.6.9. ftsearch_json_mset_expire (integer) + 1.6.1. OpenSIPS Modules + 1.6.2. External Libraries or Applications - 1.7. Exported Functions - 1.8. Raw Query Syntax + 1.7. Exported Parameters + + 1.7.1. cachedb_url (string) + 1.7.2. connect_timeout (integer) + 1.7.3. query_timeout (integer) + 1.7.4. shutdown_on_error (integer) + 1.7.5. use_tls (integer) + 1.7.6. ftsearch_index_name (string) + 1.7.7. ftsearch_json_prefix (string) + 1.7.8. ftsearch_max_results (integer) + 1.7.9. ftsearch_json_mset_expire (integer) + 1.7.10. redis_keepalive (integer) + + 1.8. Exported Functions + 1.9. Exported MI Functions + + 1.9.1. redis_cluster_info + 1.9.2. redis_cluster_refresh + 1.9.3. redis_ping_nodes + + 1.10. Exported Statistics + + 1.10.1. redis_queries + 1.10.2. redis_queries_failed + 1.10.3. redis_moved + 1.10.4. redis_topology_refreshes + + 1.11. Raw Query Syntax 2. Contributors @@ -59,7 +80,8 @@ cachedb_redis Module 1.8. Set the ftsearch_json_prefix parameter 1.9. Set the ftsearch_max_results parameter 1.10. Set the ftsearch_json_mset_expire parameter - 1.11. Redis Raw Query Examples + 1.11. Set the redis_keepalive parameter + 1.12. Redis Raw Query Examples Chapter 1. Admin Guide @@ -100,20 +122,82 @@ Chapter 1. Admin Guide OpenSIPS will auto-detect availability of the RedisJSON support when necessary and log the appropriate messages. -1.4. Limitations +1.4. Redis Cluster Support (Topology) + + When connecting to a Redis Cluster, the module automatically + detects cluster mode and manages the full slot-to-node topology + at runtime. No extra configuration is needed beyond the standard + cachedb_url parameter. + +1.4.1. Topology Discovery + + At startup, the module probes the Redis server using the + CLUSTER SHARDS command (available in Redis 7.0+). If the server + does not support this command, it falls back to CLUSTER SLOTS + (available in Redis 3.0+). If neither command succeeds, the + connection is treated as a single-instance (non-cluster) + connection. + + The discovered topology is stored internally in an O(1) slot + lookup table (16384 slots), mapping each slot directly to its + owning master node. + +1.4.2. Automatic Topology Refresh + + The module automatically refreshes the cluster topology at + runtime when any of the following events occur: + * A MOVED redirection is received from a cluster node + (indicating a permanent slot migration). + * A connection failure (NULL reply) occurs and the node cannot + be reconnected. + * A query targets a slot with no known owner, suggesting the + topology is stale. + * An operator triggers a manual refresh via the + redis_cluster_refresh MI command. + + Automatic refreshes are rate-limited to at most once per second + to avoid excessive load on the cluster. The MI-triggered refresh + bypasses this rate limit. + +1.4.3. MOVED Redirection + + The module transparently handles Redis Cluster MOVED + redirections: + * MOVED -- indicates a permanent slot migration. The module + updates its slot map, redirects the query to the new node, + and triggers a topology refresh so all future queries go + directly to the correct node. + + If a redirection points to a node that is not yet known, the + module dynamically creates a new node entry, establishes a + connection, and retries the query. + +1.4.4. Hash Tags + + The module supports Redis Cluster hash tags, which allow related + keys to be co-located on the same cluster node. If a key + contains a {...} substring, only the content between the first + { and the next } is used for hash slot calculation. For example, + the keys {user1000}.profile and {user1000}.settings will always + land on the same node, enabling multi-key operations. + + If the braces are empty ({}) or there is no closing brace, the + entire key is hashed as usual. + +1.5. Limitations * keys (in key:value pairs) may not contain spaces or control characters -1.5. Dependencies +1.6. Dependencies -1.5.1. OpenSIPS Modules +1.6.1. OpenSIPS Modules The following modules must be loaded before this module: * If a use_tls is defined, the tls_mgm module will need to be loaded as well. -1.5.2. External Libraries or Applications +1.6.2. External Libraries or Applications The following libraries or applications must be installed before running OpenSIPS with this module loaded: @@ -127,9 +211,9 @@ Chapter 1. Admin Guide If TLS connections are enabled via the use_tls modparam, hiredis needs to be compiled with TLS support. -1.6. Exported Parameters +1.7. Exported Parameters -1.6.1. cachedb_url (string) +1.7.1. cachedb_url (string) The URLs of the server groups that OpenSIPS will connect to in order to use, from script, the cache_store(), cache_fetch(), @@ -157,7 +241,26 @@ cache_fetch("redis:cluster1", "key", $avp(10)); cache_remove("redis:cluster1", "key"); ... -1.6.2. connect_timeout (integer) +1.7.1.1. Authentication + + The module supports three authentication modes based on the URL + format: + + URL Format AUTH Command Use Case + redis:group://:password@host:port/ AUTH password Classic Redis (< 6.0) with requirepass + redis:group://username:password@host:port/ AUTH username password Redis 6+ ACL with per-user credentials + redis:group://host:port/ (none) Non-authenticated Redis + + Important: For classic password-only authentication, the URL must + include a colon before the password (":password@host"). Writing + "password@host" without the colon will place the credential in the + username field of the URL parser, and authentication will be + skipped. + + When connecting to a Redis Cluster with authentication, all + discovered cluster nodes use the same credentials from the URL. + +1.7.2. connect_timeout (integer) This parameter specifies how many milliseconds OpenSIPS should wait for connecting to a Redis node. @@ -170,20 +273,20 @@ cache_remove("redis:cluster1", "key"); modparam("cachedb_redis", "connect_timeout",1000) ... -1.6.3. query_timeout (integer) +1.7.3. query_timeout (integer) This parameter specifies how many milliseconds OpenSIPS should wait for a query response from a Redis node. Default value is “5000 ms”. - Example 1.4. Set connect_timeout parameter + Example 1.4. Set query_timeout parameter ... # wait 1 seconds for Redis queries modparam("cachedb_redis", "query_timeout",1000) ... -1.6.4. shutdown_on_error (integer) +1.7.4. shutdown_on_error (integer) By setting this parameter to 1, OpenSIPS will abort startup if the initial connection to Redis is not possible. Runtime @@ -198,7 +301,7 @@ modparam("cachedb_redis", "query_timeout",1000) modparam("cachedb_redis", "shutdown_on_error", 1) ... -1.6.5. use_tls (integer) +1.7.5. use_tls (integer) Setting this parameter will allow you to use TLS for Redis connections. In order to enable TLS for a specific connection, @@ -232,7 +335,7 @@ modparam("cachedb_redis", "cachedb_url","redis://localhost:6379/?tls_dom ain=redis") ... -1.6.6. ftsearch_index_name (string) +1.7.6. ftsearch_index_name (string) Only relevant with RedisJSON and RediSearch server-side support. @@ -247,8 +350,7 @@ ain=redis") modparam("cachedb_redis", "ftsearch_index_name", "ix::usrloc") - -1.6.7. ftsearch_json_prefix (string) +1.7.7. ftsearch_json_prefix (string) Only relevant with RedisJSON and RediSearch server-side support. @@ -262,8 +364,7 @@ modparam("cachedb_redis", "ftsearch_index_name", "ix::usrloc") modparam("cachedb_redis", "ftsearch_json_prefix", "userlocation:") - -1.6.8. ftsearch_max_results (integer) +1.7.8. ftsearch_max_results (integer) Only relevant with RedisJSON and RediSearch server-side support. @@ -277,8 +378,7 @@ modparam("cachedb_redis", "ftsearch_json_prefix", "userlocation:") modparam("cachedb_redis", "ftsearch_max_results", 100) - -1.6.9. ftsearch_json_mset_expire (integer) +1.7.9. ftsearch_json_mset_expire (integer) Only relevant with RedisJSON and RediSearch server-side support. @@ -293,13 +393,161 @@ modparam("cachedb_redis", "ftsearch_max_results", 100) modparam("cachedb_redis", "ftsearch_json_mset_expire", 7200) +1.7.10. redis_keepalive (integer) + + TCP keepalive interval in seconds for Redis connections. When set + to a positive value, the kernel sends TCP probes on idle + connections to detect dead peers (e.g. due to NAT/firewall idle + timeout or network partition). This allows the next query to fail + immediately instead of waiting for the full query timeout, + enabling faster recovery via the existing retry loop. + + Set to 0 to disable TCP keepalive. Recommended to keep enabled + for production deployments to prevent silent connection death. -1.7. Exported Functions + Default value is "10" (seconds). + + Example 1.11. Set the redis_keepalive parameter +... +# set TCP keepalive interval to 15 seconds +modparam("cachedb_redis", "redis_keepalive", 15) + +# disable TCP keepalive +modparam("cachedb_redis", "redis_keepalive", 0) +... + +1.8. Exported Functions The module does not export functions to be used in configuration script. -1.8. Raw Query Syntax +1.9. Exported MI Functions + +1.9.1. redis_cluster_info + + Displays detailed information about all Redis connections + managed by the module, including cluster topology, per-node + connection status, slot assignments, and per-node query + counters. + + Parameters: + * group (optional) - if specified, only connections belonging + to this group will be listed (e.g. "local" from a + "redis:local://..." URL). If omitted, all Redis connections + are listed. + + The response is a JSON array of connection objects. Each + connection object includes: + * group - the connection group name + * url - the original cachedb_url + * mode - "cluster" or "single" + * cluster_command (cluster mode only) - "SHARDS" or "SLOTS", + depending on which Redis command is used for topology + discovery + * topology_refreshes - number of topology refreshes performed + on this connection + * last_topology_refresh - UNIX timestamp of the last topology + refresh + * nodes - array of cluster node objects, each containing: ip, + port, status ("connected"/"disconnected"), slots_assigned + (cluster mode only), queries, errors, moved, + last_activity (seconds since last successful query, -1 if + never queried) + * total_slots_mapped (cluster mode only) - total number of + slots with an assigned node (should be 16384 for a healthy + cluster) + + MI FIFO Command Format: + +## list all Redis connections +opensips-cli -x mi redis_cluster_info + +## list only the "local" group +opensips-cli -x mi redis_cluster_info group=local + +1.9.2. redis_cluster_refresh + + Forces an immediate topology refresh on Redis Cluster + connections. This bypasses the normal once-per-second rate + limit and queries the cluster for its current slot-to-node + mapping. Useful after manual cluster rebalancing or node + additions/removals. + + For non-cluster (single instance) connections, the command + returns a "skipped (not cluster mode)" status. + + Parameters: + * group (optional) - if specified, only the connection + belonging to this group will be refreshed. If omitted, all + cluster connections are refreshed. + + The response is a JSON array of objects, one per connection, + each containing group and status ("ok", "error", or + "skipped (not cluster mode)"). + + MI FIFO Command Format: + +## refresh all cluster connections +opensips-cli -x mi redis_cluster_refresh + +## refresh only the "local" group +opensips-cli -x mi redis_cluster_refresh group=local + +1.9.3. redis_ping_nodes + + Sends a PING command to each Redis node and reports per-node + reachability status with round-trip latency. Useful for on-demand + health checks without waiting for the next query. + + Parameters: + * group (optional) - if specified, only nodes belonging to + this group will be pinged. If omitted, all Redis connections + are pinged. + + The response is a JSON array of connection objects. Each + connection object includes: + * group - the connection group name + * nodes - array of node objects, each containing: ip, port, + status ("reachable", "unreachable", or "disconnected"), + latency_us (round-trip time in microseconds, -1 if not + reachable) + + MI FIFO Command Format: + +## ping all Redis nodes +opensips-cli -x mi redis_ping_nodes + +## ping only the "local" group +opensips-cli -x mi redis_ping_nodes group=local + +1.10. Exported Statistics + +1.10.1. redis_queries + + Total number of successful Redis queries executed across all + connections and processes. + +1.10.2. redis_queries_failed + + Total number of failed Redis queries (NULL replies from hiredis + or Redis error responses other than MOVED). + +1.10.3. redis_moved + + Total number of MOVED redirections received from Redis Cluster + nodes. A MOVED response indicates a permanent slot migration - + the module updates its slot map and retries the query on the + correct node. + +1.10.4. redis_topology_refreshes + + Total number of cluster topology refreshes performed (via + CLUSTER SHARDS or CLUSTER SLOTS). This counter increments both + for automatic refreshes (triggered by MOVED responses or + unreachable nodes) and manual refreshes (triggered via the + redis_cluster_refresh MI command). + +1.11. Raw Query Syntax The cachedb_redis module allows to run RAW queries, thus taking full advantage of the capabilities of the back-end. The query @@ -307,7 +555,7 @@ modparam("cachedb_redis", "ftsearch_json_mset_expire", 7200) Here are a couple examples of running some Redis queries : - Example 1.11. Redis Raw Query Examples + Example 1.12. Redis Raw Query Examples ... $var(my_hash) = "my_hash_name"; $var(my_key) = "my_key_name"; diff --git a/modules/cachedb_redis/cachedb_redis.c b/modules/cachedb_redis/cachedb_redis.c index af11fb79e46..7ca77b8315e 100644 --- a/modules/cachedb_redis/cachedb_redis.c +++ b/modules/cachedb_redis/cachedb_redis.c @@ -39,12 +39,14 @@ #include "../../cachedb/cachedb.h" #include "cachedb_redis_dbase.h" +#include "cachedb_redis_mi.h" +#include "../../statistics.h" static int mod_init(void); static int child_init(int); static void destroy(void); -static str cache_mod_name = str_init("redis"); +str cache_mod_name = str_init("redis"); struct cachedb_url *redis_script_urls = NULL; int set_connection(unsigned int type, void *val) @@ -62,6 +64,7 @@ static const param_export_t params[]={ { "ftsearch_json_prefix", STR_PARAM, &fts_json_prefix.s}, { "ftsearch_max_results", INT_PARAM, &fts_max_results}, { "ftsearch_json_mset_expire", INT_PARAM, &fts_json_mset_expire}, + { "redis_keepalive", INT_PARAM, &redis_keepalive}, {0,0,0} }; @@ -92,6 +95,33 @@ static const dep_export_t deps = { }, }; +static const stat_export_t mod_stats[] = { + {"redis_queries", 0, &redis_stat_queries }, + {"redis_queries_failed", 0, &redis_stat_queries_failed }, + {"redis_moved", 0, &redis_stat_moved }, + {"redis_topology_refreshes", 0, &redis_stat_topology_refreshes}, + {0, 0, 0} +}; + +static const mi_export_t mi_cmds[] = { + { MI_REDIS_CLUSTER_INFO, 0, MI_NAMED_PARAMS_ONLY, 0, { + {mi_redis_cluster_info, {0}}, + {mi_redis_cluster_info_1, {"group", 0}}, + {EMPTY_MI_RECIPE}}, {0} + }, + { MI_REDIS_CLUSTER_REFRESH, 0, MI_NAMED_PARAMS_ONLY, 0, { + {mi_redis_cluster_refresh, {0}}, + {mi_redis_cluster_refresh_1, {"group", 0}}, + {EMPTY_MI_RECIPE}}, {0} + }, + { MI_REDIS_PING_NODES, 0, MI_NAMED_PARAMS_ONLY, 0, { + {mi_redis_ping_nodes, {0}}, + {mi_redis_ping_nodes_1, {"group", 0}}, + {EMPTY_MI_RECIPE}}, {0} + }, + {EMPTY_MI_EXPORT} +}; + /** module exports */ struct module_exports exports= { "cachedb_redis", /* module name */ @@ -103,8 +133,8 @@ struct module_exports exports= { 0, /* exported functions */ 0, /* exported async functions */ params, /* exported parameters */ - 0, /* exported statistics */ - 0, /* exported MI functions */ + mod_stats, /* exported statistics */ + mi_cmds, /* exported MI functions */ 0, /* exported pseudo-variables */ 0, /* exported transformations */ 0, /* extra processes */ diff --git a/modules/cachedb_redis/cachedb_redis_dbase.c b/modules/cachedb_redis/cachedb_redis_dbase.c index e53c24c226c..716a826bd64 100644 --- a/modules/cachedb_redis/cachedb_redis_dbase.c +++ b/modules/cachedb_redis/cachedb_redis_dbase.c @@ -47,8 +47,15 @@ str fts_index_name = str_init("idx:usrloc"); str fts_json_prefix = str_init("usrloc:"); int fts_json_mset_expire = 3600; +int redis_keepalive = 10; /* TCP keepalive interval in seconds, 0=disabled */ + struct tls_mgm_binds tls_api; +stat_var *redis_stat_queries = 0; +stat_var *redis_stat_queries_failed = 0; +stat_var *redis_stat_moved = 0; +stat_var *redis_stat_topology_refreshes = 0; + static inline int is_redis_escaped_char(char c); static cdb_row_t *redis_mk_cdb_row(redisReply *rpl); static unsigned int redis_escape_string_json(char *dst, const str *src); @@ -58,7 +65,7 @@ static unsigned int redis_calc_escaped_len_json(str *s); redisContext *redis_get_ctx(char *ip, int port) { struct timeval tv; - static int warned = 0; + static char warned = 0; redisContext *ctx; if (!port) @@ -76,12 +83,7 @@ redisContext *redis_get_ctx(char *ip, int port) if (ctx && ctx->err != REDIS_OK) { LM_ERR("failed to open redis connection %s:%hu - %s\n",ip, (unsigned short)port,ctx->errstr); - return NULL; - } - - if (!ctx) { - LM_ERR("failed to connect to redis %s:%hu - out of memory\n", - ip, (unsigned short)port); + redisFree(ctx); return NULL; } @@ -90,9 +92,17 @@ redisContext *redis_get_ctx(char *ip, int port) tv.tv_usec = (redis_query_tout * 1000) % 1000000; if (redisSetTimeout(ctx, tv) != REDIS_OK) { LM_ERR("Cannot set query timeout to %dms\n", redis_query_tout); + redisFree(ctx); return NULL; } } + + if (redis_keepalive > 0) { + if (redisEnableKeepAliveWithInterval(ctx, redis_keepalive) != REDIS_OK) + LM_WARN("failed to enable TCP keepalive on redis connection " + "%s:%hu\n", ip, (unsigned short)port); + } + return ctx; } @@ -177,9 +187,17 @@ int redis_connect_node(redis_con *con,cluster_node *node) } #endif - if (con->id->password) { + if (con->id->username && *con->id->username && con->id->password) { + /* Redis 6+ ACL: AUTH username password */ + rpl = redisCommand(node->context,"AUTH %s %s", + con->id->username, con->id->password); + } else if (con->id->password) { rpl = redisCommand(node->context,"AUTH %s",con->id->password); - if (rpl == NULL || rpl->type == REDIS_REPLY_ERROR) { + } else { + rpl = NULL; + } + if (rpl != NULL) { + if (rpl->type == REDIS_REPLY_ERROR) { LM_ERR("failed to auth to redis - %.*s\n", rpl?(unsigned)rpl->len:7,rpl?rpl->str:"FAILURE"); freeReplyObject(rpl); @@ -231,8 +249,6 @@ int redis_connect(redis_con *con) { redisContext *ctx; redisReply *rpl; - cluster_node *it; - int len; struct tls_domain *tls_dom = NULL; /* connect to redis DB */ @@ -249,9 +265,17 @@ int redis_connect(redis_con *con) #endif /* auth using password, if any */ - if (con->id->password) { + if (con->id->username && *con->id->username && con->id->password) { + /* Redis 6+ ACL: AUTH username password */ + rpl = redisCommand(ctx,"AUTH %s %s", + con->id->username, con->id->password); + } else if (con->id->password) { rpl = redisCommand(ctx,"AUTH %s",con->id->password); - if (rpl == NULL || rpl->type == REDIS_REPLY_ERROR) { + } else { + rpl = NULL; + } + if (rpl != NULL) { + if (rpl->type == REDIS_REPLY_ERROR) { LM_ERR("failed to auth to redis - %.*s\n", rpl?(unsigned)rpl->len:7,rpl?rpl->str:"FAILURE"); if (rpl!=NULL) @@ -273,43 +297,34 @@ int redis_connect(redis_con *con) } freeReplyObject(rpl); - rpl = redisCommand(ctx,"CLUSTER NODES"); - if (rpl == NULL || rpl->type == REDIS_REPLY_ERROR) { - /* single instace mode */ + /* try CLUSTER SHARDS/SLOTS to detect cluster mode */ + if (probe_cluster_command(con, ctx) == 0) { + /* cluster mode — nodes and slot_table already populated */ + con->flags |= REDIS_CLUSTER_INSTANCE; + LM_DBG("cluster instance mode on %p\n", con); + } else { + /* single instance mode */ + str src, dst; con->flags |= REDIS_SINGLE_INSTANCE; - len = strlen(con->host); - con->nodes = pkg_malloc(sizeof(cluster_node) + len + 1); + con->nodes = pkg_malloc(sizeof(cluster_node)); if (con->nodes == NULL) { LM_ERR("no more pkg\n"); - if (rpl!=NULL) - freeReplyObject(rpl); goto error; } - - memset(con->nodes,0,sizeof(cluster_node) + len + 1); - con->nodes->ip = (char *)(con->nodes + 1); - - strcpy(con->nodes->ip,con->host); - con->nodes->port = con->port; - con->nodes->start_slot = 0; - con->nodes->end_slot = 4096; - con->nodes->context = NULL; - con->nodes->next = NULL; - LM_DBG("single instance mode\n"); - } else { - /* cluster instance mode */ - con->flags |= REDIS_CLUSTER_INSTANCE; - con->slots_assigned = 0; - LM_DBG("cluster instance mode on %p\n",con); - if (build_cluster_nodes(con,rpl->str,rpl->len) < 0) { - LM_ERR("failed to parse Redis cluster info\n"); - freeReplyObject(rpl); + memset(con->nodes, 0, sizeof(cluster_node)); + src.s = con->host; + src.len = strlen(con->host); + if (pkg_nt_str_dup(&dst, &src) != 0) { + LM_ERR("no more pkg\n"); + pkg_free(con->nodes); + con->nodes = NULL; goto error; } + con->nodes->ip = dst.s; + con->nodes->port = con->port; + LM_DBG("single instance mode\n"); } - if (rpl!=NULL) - freeReplyObject(rpl); redisFree(ctx); if (use_tls && tls_dom) @@ -317,13 +332,10 @@ int redis_connect(redis_con *con) con->flags |= REDIS_INIT_NODES; - for (it=con->nodes;it;it=it->next) { - - if (it->end_slot > con->slots_assigned ) - con->slots_assigned = it->end_slot; - - if (redis_connect_node(con,it) < 0) { - LM_ERR("failed to init connection \n"); + /* cluster nodes already connected by find_or_create_node() */ + if (con->flags & REDIS_SINGLE_INSTANCE) { + if (redis_connect_node(con, con->nodes) < 0) { + LM_ERR("failed to connect to single instance\n"); return -1; } } @@ -377,12 +389,14 @@ int redis_get_hostport(const str *hostport, char **host, unsigned short *port) in.len = hostport->s + hostport->len - (p + 1); if (in.len <= 0) { LM_ERR("bad/missing Redis port in URL\n"); + pkg_free(*host); return -1; } unsigned int out_port; if (str2int(&in, &out_port) != 0) { LM_ERR("failed to parse Redis port in URL\n"); + pkg_free(*host); return -1; } @@ -441,6 +455,7 @@ redis_con* redis_new_connection(struct cachedb_id* id) if (redis_get_hostport(&it->s, &con->host, port) != 0) { LM_ERR("no more pkg\n"); + pkg_free(con); goto out_err; } @@ -454,8 +469,11 @@ redis_con* redis_new_connection(struct cachedb_id* id) /* if doing failover Redises, only connect the 1st one for now! */ if (!cons && redis_connect(con) < 0) { LM_ERR("failed to connect to DB\n"); - if (shutdown_on_error) + if (shutdown_on_error) { + pkg_free(con->host); + pkg_free(con); goto out_err; + } } _add_last(con, cons, next_con); @@ -518,6 +536,7 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke cluster_node *node; redisReply *reply = NULL; int i, last_err = 0; + int max_redirects = 5; va_list aq; first = ((redis_con *)connection->data)->current; @@ -532,9 +551,17 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke node = get_redis_connection(con,key); if (node == NULL) { - LM_ERR("Bad cluster configuration\n"); - last_err = -10; - goto try_next_con; + /* slot has no owner — topology may be stale (e.g. a node + * was removed during a previous refresh and has since + * rejoined the cluster). Refresh and retry the lookup. */ + LM_INFO("slot has no owner, refreshing topology\n"); + refresh_cluster_topology(con); + node = get_redis_connection(con,key); + if (node == NULL) { + LM_ERR("slot still has no owner after topology refresh\n"); + last_err = -1; + goto try_next_con; + } } if (node->context == NULL) { @@ -555,7 +582,10 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke if (reply == NULL) { LM_INFO("Redis query failed: reply: NULL node->context->err: %d, node->context->errstr: %s\n", node->context->err, node->context->errstr); + node->errors++; + update_stat(redis_stat_queries_failed, 1); if (node->context->err == REDIS_OK || redis_reconnect_node(con,node) < 0) { + refresh_cluster_topology(con); i = 0; break; } @@ -565,27 +595,33 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke node->context->errstr); if (match_prefix(reply->str, reply->len, MOVED_PREFIX, MOVED_PREFIX_LEN)) { - /* MOVED response */ + // It's a MOVED response redis_moved moved_info_s; redis_moved *moved_info = &moved_info_s; + if (parse_moved_reply(reply, moved_info) < 0) { + LM_ERR("cachedb_redis: Unable to parse MOVED reply\n"); + freeReplyObject(reply); + goto try_next_con; + } - if (parse_moved_reply(reply, moved_info) < 0) { - LM_ERR("failed to parse MOVED reply\n"); - freeReplyObject(reply); - reply = NULL; - goto try_next_con; - } + LM_DBG("cachedb_redis: MOVED slot: [%d] endpoint: [%.*s] port: [%d]\n", moved_info->slot, moved_info->endpoint.len, moved_info->endpoint.s, moved_info->port); + node->moved++; + update_stat(redis_stat_moved, 1); + node = get_redis_connection_by_endpoint(con, moved_info); - LM_DBG("MOVED slot=%d endpoint=%.*s:%d\n", - moved_info->slot, moved_info->endpoint.len, - moved_info->endpoint.s, moved_info->port); - node = get_redis_connection_by_endpoint(con, moved_info); + if (node == NULL) { + LM_DBG("cachedb_redis: MOVED endpoint unknown, creating new node %.*s:%d\n", + moved_info->endpoint.len, moved_info->endpoint.s, moved_info->port); + node = find_or_create_node(con, + moved_info->endpoint.s, moved_info->endpoint.len, + (unsigned short)moved_info->port); + } - freeReplyObject(reply); - reply = NULL; + freeReplyObject(reply); + reply = NULL; if (node == NULL) { - LM_ERR("Unable to locate connection by endpoint\n"); + LM_ERR("Unable to locate or create connection by endpoint\n"); last_err = -10; goto try_next_con; } @@ -598,14 +634,26 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke } } - i = QUERY_ATTEMPTS; + /* Refresh topology so future queries go direct */ + refresh_cluster_topology(con); + if (--max_redirects <= 0) { + LM_ERR("max redirects exceeded\n"); + freeReplyObject(reply); + reply = NULL; + last_err = -1; + goto try_next_con; + } + i = QUERY_ATTEMPTS; // New node that is the target being MOVED to, should have the attempts reset continue; } + node->errors++; + update_stat(redis_stat_queries_failed, 1); freeReplyObject(reply); reply = NULL; if (node->context->err == REDIS_OK || redis_reconnect_node(con,node) < 0) { + refresh_cluster_topology(con); i = 0; break; } @@ -624,6 +672,9 @@ static int _redis_run_command(cachedb_con *connection, redisReply **rpl, str *ke LM_INFO("successfully ran query after %d failed attempt(s)\n", QUERY_ATTEMPTS - i); + node->queries++; + update_stat(redis_stat_queries, 1); + node->last_activity = time(NULL); last_err = 0; break; @@ -672,7 +723,8 @@ int redis_get(cachedb_con *connection,str *attr,str *val) rc = redis_run_command(connection, &reply, attr, "GET %b", attr->s, (size_t)attr->len); if (rc != 0) - goto out_err; + goto out_err; /* rc is normalized to -1 below; callers + * (e.g. mi_cachefetch) only check for -1 */ if (reply->type == REDIS_REPLY_NIL) { LM_DBG("no such key - %.*s\n",attr->len,attr->s); @@ -704,9 +756,17 @@ int redis_get(cachedb_con *connection,str *attr,str *val) return 0; out_err: + val->s = NULL; + val->len = 0; if (reply) freeReplyObject(reply); - return rc; + /* Always return -1 (not the internal rc) — the cachedb API contract + * expects only 0 (success), -1 (error), or -2 (not found). Internal + * error codes like -10 from _redis_run_command must not propagate to + * callers such as mi_cachefetch, which only check "ret == -1" and + * would otherwise fall through to use an uninitialized value struct, + * causing a SIGSEGV in cJSON_strndup. */ + return -1; } int redis_set(cachedb_con *connection,str *attr,str *val,int expires) @@ -745,7 +805,7 @@ int redis_set(cachedb_con *connection,str *attr,str *val,int expires) out_err: freeReplyObject(reply); - return rc; + return -1; } /* returns 0 in case of successful remove @@ -777,7 +837,7 @@ int redis_remove(cachedb_con *connection,str *attr) out_err: freeReplyObject(reply); - return rc; + return -1; } /** @@ -845,7 +905,7 @@ int redis_add(cachedb_con *connection,str *attr,int val,int expires,int *new_val out_err: freeReplyObject(reply); - return rc; + return -1; } int redis_sub(cachedb_con *connection,str *attr,int val,int expires,int *new_val) @@ -883,7 +943,7 @@ int redis_sub(cachedb_con *connection,str *attr,int val,int expires,int *new_val out_err: freeReplyObject(reply); - return rc; + return -1; } static unsigned int redis_escape_string_json(char *dst, const str *src) @@ -1707,6 +1767,7 @@ int redis_get_counter(cachedb_con *connection,str *attr,int *val) if (reply->type == REDIS_REPLY_NIL || reply->str == NULL || reply->len == 0) { LM_DBG("no such key - %.*s\n",attr->len,attr->s); + freeReplyObject(reply); return -2; } @@ -1729,7 +1790,7 @@ int redis_get_counter(cachedb_con *connection,str *attr,int *val) out_err: freeReplyObject(reply); - return rc; + return -1; } int redis_raw_query_handle_reply(redisReply *reply,cdb_raw_entry ***ret, @@ -1957,6 +2018,7 @@ int redis_raw_query(cachedb_con *connection,str *attr,cdb_raw_entry ***rpl,int e case REDIS_REPLY_ERROR: LM_ERR("Error encountered when running Redis raw query [%.*s]\n", attr->len,attr->s); + freeReplyObject(reply); return -1; case REDIS_REPLY_NIL: LM_DBG("Redis raw query [%.*s] failed - no such key\n",attr->len,attr->s); diff --git a/modules/cachedb_redis/cachedb_redis_dbase.h b/modules/cachedb_redis/cachedb_redis_dbase.h index fc9d2f1e1c3..3090a69ef61 100644 --- a/modules/cachedb_redis/cachedb_redis_dbase.h +++ b/modules/cachedb_redis/cachedb_redis_dbase.h @@ -28,6 +28,7 @@ #include #include "../../cachedb/cachedb.h" +#include "../../statistics.h" #include "../tls_mgm/api.h" #ifdef HAVE_REDIS_SSL @@ -44,6 +45,14 @@ typedef struct cluster_nodes { redisContext *context; /* actual connection to this node */ struct tls_domain *tls_dom; + uint8_t seen; /* reconciliation flag for topology refresh */ + + /* per-node, per-process counters (pkg memory) */ + unsigned long queries; + unsigned long errors; + unsigned long moved; + + time_t last_activity; /* timestamp of last successful query */ struct cluster_nodes *next; } cluster_node; @@ -84,8 +93,15 @@ extern str fts_index_name; extern str fts_json_prefix; extern int fts_json_mset_expire; +extern int redis_keepalive; + extern struct tls_mgm_binds tls_api; +extern stat_var *redis_stat_queries; +extern stat_var *redis_stat_queries_failed; +extern stat_var *redis_stat_moved; +extern stat_var *redis_stat_topology_refreshes; + enum redis_flag { REDIS_SINGLE_INSTANCE = 1 << 0, REDIS_CLUSTER_INSTANCE = 1 << 1, @@ -96,6 +112,12 @@ enum redis_flag { REDIS_MULTIPLE_HOSTS = 1 << 4, }; +enum cluster_cmd { + CLUSTER_CMD_NONE, + CLUSTER_CMD_SHARDS, + CLUSTER_CMD_SLOTS +}; + typedef struct _redis_con { /* ------ Fixed conn header -------- */ struct cachedb_id *id; @@ -107,9 +129,12 @@ typedef struct _redis_con { unsigned short port; // host/port of this connection are extracted here enum redis_flag flags; - unsigned short slots_assigned; /* total slots for cluster */ cluster_node *nodes; /* one or more Redis nodes */ char *json_keyspace; /* currently, only one JSON keyspace per connection */ + cluster_node *slot_table[16384]; /* O(1) slot-to-node lookup */ + enum cluster_cmd cluster_cmd; /* probed once at startup */ + time_t last_topology_refresh; + unsigned int topology_refresh_count; /* circular list of Redis instances to be attempted in failover fashion */ struct _redis_con *next_con; @@ -117,6 +142,9 @@ typedef struct _redis_con { struct _redis_con *current; } redis_con; +int redis_connect_node(redis_con *con, cluster_node *node); +int redis_reconnect_node(redis_con *con, cluster_node *node); + cachedb_con* redis_init(str *url); void redis_destroy(cachedb_con *con); int redis_get(cachedb_con *con,str *attr,str *val); diff --git a/modules/cachedb_redis/cachedb_redis_mi.c b/modules/cachedb_redis/cachedb_redis_mi.c new file mode 100644 index 00000000000..993ff9696a1 --- /dev/null +++ b/modules/cachedb_redis/cachedb_redis_mi.c @@ -0,0 +1,465 @@ +/* + * Copyright (C) 2011 OpenSIPS Solutions + * + * This file is part of opensips, a free SIP server. + * + * opensips is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * opensips is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "../../dprint.h" +#include "../../mem/mem.h" +#include "../../mi/mi.h" +#include "../../cachedb/cachedb_pool.h" +#include "cachedb_redis_dbase.h" +#include "cachedb_redis_utils.h" +#include "cachedb_redis_mi.h" + +#include +#include +#include + +extern str cache_mod_name; + +static int count_node_slots(redis_con *con, cluster_node *node) +{ + int i, count = 0; + + for (i = 0; i < 16384; i++) + if (con->slot_table[i] == node) + count++; + + return count; +} + +static int count_total_slots(redis_con *con) +{ + int i, count = 0; + + for (i = 0; i < 16384; i++) + if (con->slot_table[i] != NULL) + count++; + + return count; +} + +static int mi_add_redis_con(mi_item_t *arr, redis_con *con) +{ + mi_item_t *con_obj, *nodes_arr, *node_obj; + cluster_node *node; + int is_cluster; + + con_obj = add_mi_object(arr, NULL, 0); + if (!con_obj) + return -1; + + if (add_mi_string(con_obj, MI_SSTR("group"), + con->id->group_name ? con->id->group_name : "", con->id->group_name ? strlen(con->id->group_name) : 0) < 0) + return -1; + + if (add_mi_string(con_obj, MI_SSTR("url"), + con->id->initial_url ? con->id->initial_url : "", con->id->initial_url ? strlen(con->id->initial_url) : 0) < 0) + return -1; + + is_cluster = (con->flags & REDIS_CLUSTER_INSTANCE) ? 1 : 0; + + if (is_cluster) { + if (add_mi_string(con_obj, MI_SSTR("mode"), + MI_SSTR("cluster")) < 0) + return -1; + } else { + if (add_mi_string(con_obj, MI_SSTR("mode"), + MI_SSTR("single")) < 0) + return -1; + } + + if (is_cluster) { + if (con->cluster_cmd == CLUSTER_CMD_SHARDS) { + if (add_mi_string(con_obj, MI_SSTR("cluster_command"), + MI_SSTR("SHARDS")) < 0) + return -1; + } else { + if (add_mi_string(con_obj, MI_SSTR("cluster_command"), + MI_SSTR("SLOTS")) < 0) + return -1; + } + } + + if (add_mi_number(con_obj, MI_SSTR("topology_refreshes"), + con->topology_refresh_count) < 0) + return -1; + + if (add_mi_number(con_obj, MI_SSTR("last_topology_refresh"), + (double)con->last_topology_refresh) < 0) + return -1; + + nodes_arr = add_mi_array(con_obj, MI_SSTR("nodes")); + if (!nodes_arr) + return -1; + + for (node = con->nodes; node; node = node->next) { + node_obj = add_mi_object(nodes_arr, NULL, 0); + if (!node_obj) + return -1; + + if (add_mi_string(node_obj, MI_SSTR("ip"), + node->ip, strlen(node->ip)) < 0) + return -1; + + if (add_mi_number(node_obj, MI_SSTR("port"), node->port) < 0) + return -1; + + if (node->context) { + if (add_mi_string(node_obj, MI_SSTR("status"), + MI_SSTR("connected")) < 0) + return -1; + } else { + if (add_mi_string(node_obj, MI_SSTR("status"), + MI_SSTR("disconnected")) < 0) + return -1; + } + + if (is_cluster) { + if (add_mi_number(node_obj, MI_SSTR("slots_assigned"), + count_node_slots(con, node)) < 0) + return -1; + } + + if (add_mi_number(node_obj, MI_SSTR("queries"), node->queries) < 0) + return -1; + if (add_mi_number(node_obj, MI_SSTR("errors"), node->errors) < 0) + return -1; + if (add_mi_number(node_obj, MI_SSTR("moved"), node->moved) < 0) + return -1; + + if (node->last_activity > 0) { + if (add_mi_number(node_obj, MI_SSTR("last_activity"), + (double)(time(NULL) - node->last_activity)) < 0) + return -1; + } else { + if (add_mi_number(node_obj, MI_SSTR("last_activity"), -1) < 0) + return -1; + } + } + + if (is_cluster) { + if (add_mi_number(con_obj, MI_SSTR("total_slots_mapped"), + count_total_slots(con)) < 0) + return -1; + } + + return 0; +} + +static mi_response_t *mi_cluster_info_impl(const char *group, int group_len) +{ + mi_response_t *resp; + mi_item_t *resp_arr; + cachedb_pool_con **cons; + redis_con *con; + int i, size = 0; + + cons = filter_pool_by_scheme(&cache_mod_name, &size); + if (!cons || size == 0) { + if (cons) + pkg_free(cons); + return init_mi_result_string(MI_SSTR("No redis connections")); + } + + resp = init_mi_result_array(&resp_arr); + if (!resp) { + pkg_free(cons); + return 0; + } + + for (i = 0; i < size; i++) { + con = (redis_con *)cons[i]; + + if (group && ((!con->id->group_name || strlen(con->id->group_name) != group_len) || + memcmp(con->id->group_name, group, group_len) != 0)) + continue; + + if (mi_add_redis_con(resp_arr, con) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } + + pkg_free(cons); + return resp; +} + +mi_response_t *mi_redis_cluster_info(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + return mi_cluster_info_impl(NULL, 0); +} + +mi_response_t *mi_redis_cluster_info_1(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + char *group; + int group_len; + + if (get_mi_string_param(params, "group", &group, &group_len) < 0) + return init_mi_param_error(); + + return mi_cluster_info_impl(group, group_len); +} + +static mi_response_t *mi_cluster_refresh_impl(const char *group, int group_len) +{ + mi_response_t *resp; + mi_item_t *resp_arr, *con_obj; + cachedb_pool_con **cons; + redis_con *con; + int i, size = 0; + + cons = filter_pool_by_scheme(&cache_mod_name, &size); + if (!cons || size == 0) { + if (cons) + pkg_free(cons); + return init_mi_result_string(MI_SSTR("No redis connections")); + } + + resp = init_mi_result_array(&resp_arr); + if (!resp) { + pkg_free(cons); + return 0; + } + + for (i = 0; i < size; i++) { + con = (redis_con *)cons[i]; + + if (group && ((!con->id->group_name || strlen(con->id->group_name) != group_len) || + memcmp(con->id->group_name, group, group_len) != 0)) + continue; + + con_obj = add_mi_object(resp_arr, NULL, 0); + if (!con_obj) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (add_mi_string(con_obj, MI_SSTR("group"), + con->id->group_name ? con->id->group_name : "", con->id->group_name ? strlen(con->id->group_name) : 0) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (con->flags & REDIS_CLUSTER_INSTANCE) { + /* bypass rate limit */ + con->last_topology_refresh = 0; + if (refresh_cluster_topology(con) < 0) { + if (add_mi_string(con_obj, MI_SSTR("status"), + MI_SSTR("error")) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } else { + if (add_mi_string(con_obj, MI_SSTR("status"), + MI_SSTR("ok")) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } + } else { + if (add_mi_string(con_obj, MI_SSTR("status"), + MI_SSTR("skipped (not cluster mode)")) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } + } + + pkg_free(cons); + return resp; +} + +mi_response_t *mi_redis_cluster_refresh(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + return mi_cluster_refresh_impl(NULL, 0); +} + +mi_response_t *mi_redis_cluster_refresh_1(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + char *group; + int group_len; + + if (get_mi_string_param(params, "group", &group, &group_len) < 0) + return init_mi_param_error(); + + return mi_cluster_refresh_impl(group, group_len); +} + +static mi_response_t *mi_ping_nodes_impl(const char *group, int group_len) +{ + mi_response_t *resp; + mi_item_t *resp_arr, *con_obj, *nodes_arr, *node_obj; + cachedb_pool_con **cons; + redis_con *con; + cluster_node *node; + redisReply *rpl; + struct timeval t_start, t_end; + long latency_us; + int i, size = 0; + + cons = filter_pool_by_scheme(&cache_mod_name, &size); + if (!cons || size == 0) { + if (cons) + pkg_free(cons); + return init_mi_result_string(MI_SSTR("No redis connections")); + } + + resp = init_mi_result_array(&resp_arr); + if (!resp) { + pkg_free(cons); + return 0; + } + + for (i = 0; i < size; i++) { + con = (redis_con *)cons[i]; + + if (group && ((!con->id->group_name || strlen(con->id->group_name) != group_len) || + memcmp(con->id->group_name, group, group_len) != 0)) + continue; + + con_obj = add_mi_object(resp_arr, NULL, 0); + if (!con_obj) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (add_mi_string(con_obj, MI_SSTR("group"), + con->id->group_name ? con->id->group_name : "", con->id->group_name ? strlen(con->id->group_name) : 0) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + nodes_arr = add_mi_array(con_obj, MI_SSTR("nodes")); + if (!nodes_arr) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + for (node = con->nodes; node; node = node->next) { + node_obj = add_mi_object(nodes_arr, NULL, 0); + if (!node_obj) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (add_mi_string(node_obj, MI_SSTR("ip"), + node->ip, strlen(node->ip)) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (add_mi_number(node_obj, MI_SSTR("port"), node->port) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + + if (!node->context) { + if (add_mi_string(node_obj, MI_SSTR("status"), + MI_SSTR("disconnected")) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + if (add_mi_number(node_obj, MI_SSTR("latency_us"), -1) < 0) { + pkg_free(cons); + free_mi_response(resp); + return 0; + } + continue; + } + + gettimeofday(&t_start, NULL); + rpl = redisCommand(node->context, "PING"); + gettimeofday(&t_end, NULL); + + latency_us = (t_end.tv_sec - t_start.tv_sec) * 1000000 + + (t_end.tv_usec - t_start.tv_usec); + + if (rpl && rpl->type == REDIS_REPLY_STATUS) { + if (add_mi_string(node_obj, MI_SSTR("status"), + MI_SSTR("reachable")) < 0) { + freeReplyObject(rpl); + pkg_free(cons); + free_mi_response(resp); + return 0; + } + if (add_mi_number(node_obj, MI_SSTR("latency_us"), + latency_us) < 0) { + freeReplyObject(rpl); + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } else { + if (add_mi_string(node_obj, MI_SSTR("status"), + MI_SSTR("unreachable")) < 0) { + if (rpl) freeReplyObject(rpl); + pkg_free(cons); + free_mi_response(resp); + return 0; + } + if (add_mi_number(node_obj, MI_SSTR("latency_us"), -1) < 0) { + if (rpl) freeReplyObject(rpl); + pkg_free(cons); + free_mi_response(resp); + return 0; + } + } + + if (rpl) freeReplyObject(rpl); + } + } + + pkg_free(cons); + return resp; +} + +mi_response_t *mi_redis_ping_nodes(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + return mi_ping_nodes_impl(NULL, 0); +} + +mi_response_t *mi_redis_ping_nodes_1(const mi_params_t *params, + struct mi_handler *async_hdl) +{ + char *group; + int group_len; + + if (get_mi_string_param(params, "group", &group, &group_len) < 0) + return init_mi_param_error(); + + return mi_ping_nodes_impl(group, group_len); +} diff --git a/modules/cachedb_redis/cachedb_redis_mi.h b/modules/cachedb_redis/cachedb_redis_mi.h new file mode 100644 index 00000000000..b774e236915 --- /dev/null +++ b/modules/cachedb_redis/cachedb_redis_mi.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2011 OpenSIPS Solutions + * + * This file is part of opensips, a free SIP server. + * + * opensips is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * opensips is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef CACHEDB_REDIS_MI_H +#define CACHEDB_REDIS_MI_H + +#include "../../mi/mi.h" + +#define MI_REDIS_CLUSTER_INFO "redis_cluster_info" +#define MI_REDIS_CLUSTER_REFRESH "redis_cluster_refresh" +#define MI_REDIS_PING_NODES "redis_ping_nodes" + +mi_response_t *mi_redis_cluster_info(const mi_params_t *params, + struct mi_handler *async_hdl); +mi_response_t *mi_redis_cluster_info_1(const mi_params_t *params, + struct mi_handler *async_hdl); +mi_response_t *mi_redis_cluster_refresh(const mi_params_t *params, + struct mi_handler *async_hdl); +mi_response_t *mi_redis_cluster_refresh_1(const mi_params_t *params, + struct mi_handler *async_hdl); +mi_response_t *mi_redis_ping_nodes(const mi_params_t *params, + struct mi_handler *async_hdl); +mi_response_t *mi_redis_ping_nodes_1(const mi_params_t *params, + struct mi_handler *async_hdl); + +#endif diff --git a/modules/cachedb_redis/cachedb_redis_utils.c b/modules/cachedb_redis/cachedb_redis_utils.c index 413842455f6..9f09d056e36 100644 --- a/modules/cachedb_redis/cachedb_redis_utils.c +++ b/modules/cachedb_redis/cachedb_redis_utils.c @@ -32,6 +32,7 @@ #include #include +#include #include #define is_valid(p,end) ((p) && (p)<(end)) @@ -79,30 +80,71 @@ uint16_t crc16(const char *buf, int len) return crc; } -unsigned int redisHash(redis_con *con, str* key) +#define REDIS_CLUSTER_HASH_SLOTS 16384 + +/* + * Extract the hash tag from a key per the Redis Cluster specification: + * - Find the first '{'. If found, find the first '}' after it. + * - If the substring between them is non-empty, hash only that substring. + * - Otherwise, hash the entire key. + */ +static void extract_hash_tag(const char *key, int key_len, + const char **tag, int *tag_len) +{ + int i, open = -1; + + if (!key || key_len <= 0) { + *tag = key; + *tag_len = key_len > 0 ? key_len : 0; + return; + } + + for (i = 0; i < key_len; i++) { + if (key[i] == '{') { + open = i; + break; + } + } + + if (open >= 0) { + for (i = open + 1; i < key_len; i++) { + if (key[i] == '}') { + if (i - open - 1 > 0) { + *tag = key + open + 1; + *tag_len = i - open - 1; + return; + } + break; + } + } + } + + *tag = key; + *tag_len = key_len; +} + +unsigned int redisHash(str *key) { - return crc16(key->s,key->len) & con->slots_assigned; + const char *tag; + int tag_len; + + extract_hash_tag(key->s, key->len, &tag, &tag_len); + return crc16(tag, tag_len) % REDIS_CLUSTER_HASH_SLOTS; } cluster_node *get_redis_connection(redis_con *con,str *key) { unsigned short hash_slot; - cluster_node *it; if (con->flags & REDIS_SINGLE_INSTANCE) { LM_DBG("Single redis connection, returning %p\n",con->nodes); return con->nodes; - } else { - hash_slot = redisHash(con, key); - for (it=con->nodes;it;it=it->next) { - - if (it->start_slot <= hash_slot && it->end_slot >= hash_slot) { - LM_DBG("Redis cluster connection, matched con %p for slot %u \n",it,hash_slot); - return it; - } - } - return NULL; } + + hash_slot = redisHash(key); + LM_DBG("Redis cluster connection, slot %u -> %p\n", + hash_slot, con->slot_table[hash_slot]); + return con->slot_table[hash_slot]; } cluster_node *get_redis_connection_by_endpoint(redis_con *con, redis_moved *redis_info) @@ -112,254 +154,382 @@ cluster_node *get_redis_connection_by_endpoint(redis_con *con, redis_moved *redi if (con->flags & REDIS_SINGLE_INSTANCE) { LM_DBG("Single redis connection, returning %p\n",con->nodes); return con->nodes; - } else { - for (it=con->nodes;it;it=it->next) { - if (match_prefix(redis_info->endpoint.s, redis_info->endpoint.len, it->ip, strlen(it->ip))) { - if (it->port == redis_info->port) { - // Removed slot comparison as it may be a little too aggressive of a match - // Code is still here in the event that it needs to be added back in - //if (it->start_slot <= redis_info->slot && it->end_slot >= redis_info->slot) { - LM_DBG("Redis cluster connection, matched con %p for endpoint: %.*s:%d slot: [%u] %u [%u] \n", it, redis_info->endpoint.len, redis_info->endpoint.s, redis_info->port, it->start_slot, redis_info->slot, it->end_slot); - return it; - //} - } - } + } + + for (it=con->nodes;it;it=it->next) { + str host_str = {it->ip, strlen(it->ip)}; + str ep_str = {(char *)redis_info->endpoint.s, redis_info->endpoint.len}; + if (str_match(&host_str, &ep_str) && it->port == redis_info->port) { + LM_DBG("Redis cluster connection, matched con %p for " + "endpoint: %.*s:%d\n", it, + redis_info->endpoint.len, redis_info->endpoint.s, + redis_info->port); + return it; } - LM_ERR("Redis cluster connection, No match found for endpoint: %.*s:%d slot %u\n", redis_info->endpoint.len, redis_info->endpoint.s, redis_info->port, redis_info->slot); - return NULL; } + + LM_ERR("Redis cluster connection, No match found for endpoint: " + "%.*s:%d slot %u\n", redis_info->endpoint.len, + redis_info->endpoint.s, redis_info->port, redis_info->slot); + return NULL; } void destroy_cluster_nodes(redis_con *con) { - cluster_node *new,*foo; + cluster_node *node, *next; LM_DBG("destroying cluster %p\n",con); - new = con->nodes; - while (new) { - foo = new->next; - redisFree(new->context); - new->context = NULL; - if (use_tls && new->tls_dom) - tls_api.release_domain(new->tls_dom); - pkg_free(new); - new = foo; + node = con->nodes; + while (node) { + next = node->next; + redisFree(node->context); + node->context = NULL; + if (use_tls && node->tls_dom) + tls_api.release_domain(node->tls_dom); + pkg_free(node->ip); + pkg_free(node); + node = next; } + con->nodes = NULL; + memset(con->slot_table, 0, sizeof(con->slot_table)); } -struct datavalues { - int count; - char **redisdata; -}; +cluster_node *find_or_create_node(redis_con *con, const char *ip, + int ip_len, unsigned short port) +{ + cluster_node *node; + str src, dst; + + /* walk existing node list, compare using str_match */ + for (node = con->nodes; node; node = node->next) { + if (node->port == port) { + str host_str = {node->ip, strlen(node->ip)}; + str ip_str = {(char *)ip, ip_len}; + if (str_match(&host_str, &ip_str)) { + node->seen = 1; + return node; + } + } + } -int chkmalloc1 (char *handle) { - if ( handle == NULL || handle == 0) { - LM_ERR("Error1 while parsing cluster redisdata \n"); - return -1; + /* not found — allocate new node */ + node = pkg_malloc(sizeof(cluster_node)); + if (!node) { + LM_ERR("pkg_malloc failed for cluster_node\n"); + return NULL; } - return 1; -} -int chkmalloc2 (struct datavalues *handle) { - if ( handle == NULL || handle == 0) { - LM_ERR("Error2 while parsing cluster redisdata \n"); - return -1; + memset(node, 0, sizeof(cluster_node)); + + /* duplicate IP using OpenSIPS safe string copy */ + src.s = (char *)ip; + src.len = ip_len; + if (pkg_nt_str_dup(&dst, &src) != 0) { + LM_ERR("pkg_nt_str_dup failed for node IP\n"); + pkg_free(node); + return NULL; + } + node->ip = dst.s; + node->port = port; + node->seen = 1; + + /* connect to the new node */ + if (redis_connect_node(con, node) < 0) { + LM_ERR("failed to connect to new node %.*s:%d\n", ip_len, ip, port); + /* keep the node in the list even if connect fails — + * it will be retried on next use via redis_reconnect_node() */ } - return 1; + + /* insert at head of node list */ + node->next = con->nodes; + con->nodes = node; + + LM_DBG("created new cluster node %s:%d (%p)\n", node->ip, node->port, node); + return node; } -int chkmalloc3 (struct datavalues **handle) { - if ( handle == NULL || handle == 0) { - LM_ERR("Error3 while parsing cluster redisdata \n"); +int parse_cluster_shards(redis_con *con, redisReply *reply) +{ + size_t i, j, k, n, s; + redisReply *shard, *key, *val, *slots_array, *nodes_array; + redisReply *node_map, *nk, *nv; + const char *master_ip, *role; + long long master_port; + long long start, end; + cluster_node *node; + + if (!reply || reply->type != REDIS_REPLY_ARRAY) return -1; + + for (i = 0; i < reply->elements; i++) { + shard = reply->element[i]; + if (!shard || shard->type != REDIS_REPLY_ARRAY) + continue; + + slots_array = NULL; + nodes_array = NULL; + + /* walk key-value pairs to find "slots" and "nodes" */ + for (j = 0; j + 1 < shard->elements; j += 2) { + key = shard->element[j]; + val = shard->element[j + 1]; + if (!key || !key->str || !val) + continue; + if (strcmp(key->str, "slots") == 0) + slots_array = val; + else if (strcmp(key->str, "nodes") == 0) + nodes_array = val; + } + + if (!slots_array || !nodes_array) + continue; + + /* find master node in nodes array */ + master_ip = NULL; + master_port = 0; + for (n = 0; n < nodes_array->elements; n++) { + node_map = nodes_array->element[n]; + if (!node_map || node_map->type != REDIS_REPLY_ARRAY) + continue; + + const char *ip = NULL; + long long port = 0; + role = NULL; + + for (k = 0; k + 1 < node_map->elements; k += 2) { + nk = node_map->element[k]; + nv = node_map->element[k + 1]; + if (!nk || !nk->str || !nv) + continue; + if (strcmp(nk->str, "ip") == 0 && nv->str) + ip = nv->str; + else if (strcmp(nk->str, "port") == 0) + port = nv->integer; + else if (strcmp(nk->str, "role") == 0 && nv->str) + role = nv->str; + } + + if (role && strcmp(role, "master") == 0) { + master_ip = ip; + master_port = port; + break; + } + } + + if (!master_ip || master_port < 1 || master_port > 65535) + continue; + + node = find_or_create_node(con, master_ip, strlen(master_ip), + (unsigned short)master_port); + if (!node) + continue; + + /* assign slot ranges — pairs of [start, end] integers */ + for (s = 0; s + 1 < slots_array->elements; s += 2) { + if (!slots_array->element[s] || !slots_array->element[s + 1]) + continue; + start = slots_array->element[s]->integer; + end = slots_array->element[s + 1]->integer; + for (long long slot = start; slot <= end; slot++) { + if (slot >= 0 && slot < 16384) + con->slot_table[slot] = node; + } + } } - return 1; + + return 0; } -int chkmalloc4 (char **handle) { - if ( handle == NULL || handle == 0) { - LM_ERR("Error4 while parsing cluster redisdata \n"); +int parse_cluster_slots(redis_con *con, redisReply *reply) +{ + size_t i; + redisReply *entry, *master; + long long start, end; + const char *ip; + long long port; + cluster_node *node; + + if (!reply || reply->type != REDIS_REPLY_ARRAY) return -1; - } - return 1; -} -int explode(char *line, const char *delimeters, struct datavalues **newret) { + for (i = 0; i < reply->elements; i++) { + entry = reply->element[i]; + if (!entry || entry->type != REDIS_REPLY_ARRAY || entry->elements < 3) + continue; - int counter = 0; - char *result = NULL; - char *data = NULL; + if (!entry->element[0] || !entry->element[1] || !entry->element[2]) + continue; - data = pkg_malloc((strlen(line) * sizeof(char)) +1); - if (!chkmalloc1(data)) return 0; - strcpy(data,line); + start = entry->element[0]->integer; + end = entry->element[1]->integer; + master = entry->element[2]; - result = strtok(data, delimeters); - while (result != NULL ) { - newret[0]->redisdata[counter] = pkg_malloc((strlen(result) * sizeof(char) ) +1 ); - if (chkmalloc1(newret[0]->redisdata[counter])) { - strcpy(newret[0]->redisdata[counter],result); - counter++; - result = strtok(NULL, delimeters); - } else { return 0; } - } - newret[0]->count = counter-1; + if (!master || master->type != REDIS_REPLY_ARRAY || master->elements < 2) + continue; - pkg_free(data); + if (!master->element[0] || !master->element[1]) + continue; - return 1; + ip = master->element[0]->str; + port = master->element[1]->integer; -} + if (port < 1 || port > 65535) + continue; -int build_cluster_nodes(redis_con *con,char *info,int size) -{ + /* empty IP means "use the queried node's address" */ + if (!ip || strlen(ip) == 0) + ip = con->host; - cluster_node *new; - const char *delimeters = "\n"; - int i = 0, j = 0; - int masters = 1, count = 0; - char *ip, *block = NULL; - unsigned short port,start_slot,end_slot; - int len; - struct datavalues **newret1, **newret2, **newret3; - - // Define **pointers for new structures - newret1 = pkg_malloc(sizeof(struct datavalues *)); - if (!chkmalloc3(newret1)) goto error; - newret2 = pkg_malloc(sizeof(struct datavalues *)); - if (!chkmalloc3(newret2)) goto error; - newret3 = pkg_malloc(sizeof(struct datavalues *)); - if (!chkmalloc3(newret3)) goto error; - - // Allocate space for the structures - newret1[0] = pkg_malloc(sizeof(struct datavalues)); - if (!chkmalloc2(newret1[0])) goto error; - newret2[0] = pkg_malloc(sizeof(struct datavalues)); - if (!chkmalloc2(newret2[0])) goto error; - newret3[0] = pkg_malloc(sizeof(struct datavalues)); - if (!chkmalloc2(newret3[0])) goto error; - - // Allocate space for data item "redisdata" within the structures - newret1[0]->redisdata = pkg_malloc((strlen(info) * sizeof(char)) +1); - if (!chkmalloc4(newret1[0]->redisdata)) goto error; - newret2[0]->redisdata = pkg_malloc((strlen(info) * sizeof(char)) +1); - if (!chkmalloc4(newret2[0]->redisdata)) goto error; - newret3[0]->redisdata = pkg_malloc((strlen(info) * sizeof(char)) +1); - if (!chkmalloc4(newret3[0]->redisdata)) goto error; - - // Initialise the counter - newret1[0]->count = 0; - newret2[0]->count = 0; - newret3[0]->count = 0; - - - // Redis really only requires two connections ("myself,master" && one other master) || (at least two masters) - // but this will supply info for upto 1000 masters due to current Opensips design (hopefully representing the total hash slots) - // will always connect to myself,master - strstr(info,"myself,master")?(count = 999):(count = 1000); - - // Cluster data into Array - if (explode(info,delimeters,newret1)) { - for (i=0;i<=newret1[0]->count;i++) { - LM_DBG("Nodes : %s\n",newret1[0]->redisdata[i]); - - if ((strstr(newret1[0]->redisdata[i],"master") && (masters <= count)) || strstr(newret1[0]->redisdata[i],"myself,master")) { - - start_slot = end_slot = port = 0; - ip = NULL; - masters++; - - // Break up the row - if (explode(newret1[0]->redisdata[i]," ",newret2)) { - for (j=0 ; j <= newret2[0]->count ; j++ ) { - - if (strstr(newret1[0]->redisdata[i],"myself") && strstr(newret2[0]->redisdata[j],"myself")) { - //myself no ip - if (ip == NULL) { - ip = con->id->host; - port = con->id->port; - LM_DBG("Myself and no IP, set ip to main host %s\n",con->id->host); - if (i==0) masters--; - } else - LM_DBG("Master already discovered to not be myself, not going to main host \n"); - - } else { - //Get the ip and port of other master - if (strstr(newret2[0]->redisdata[j],":") && (strlen(newret2[0]->redisdata[j]) > 5)) { - - if (explode(newret2[0]->redisdata[j],":",newret3)) { - ip = (char *)newret3[0]->redisdata[0]; - port = atoi(newret3[0]->redisdata[1]); - } else { block = ":parsing ip/port"; goto error;} - } - } - //Get slots - if (strstr(newret2[0]->redisdata[j],"-") && (strlen(newret2[0]->redisdata[j]) > 2)) { - if (explode(newret2[0]->redisdata[j],"-",newret3)) { - start_slot = atoi(newret3[0]->redisdata[0]); - end_slot = atoi(newret3[0]->redisdata[1]); - } else {block = ":parsing slots"; goto error;} - - } - } - - } else { block = "row to array"; goto error;} - - if ( ip == NULL || !(port > 0) || (start_slot > end_slot) || !(end_slot > 0) ) {block = ":processing row"; goto error;} - - len = strlen(ip); - new = pkg_malloc(sizeof(cluster_node) + len + 1); - if (!new) { - LM_ERR("no more pkg\n"); - goto error; - } + node = find_or_create_node(con, ip, strlen(ip), (unsigned short)port); + if (!node) + continue; - memset(new,0,sizeof(cluster_node) + len + 1); + for (long long slot = start; slot <= end; slot++) { + if (slot >= 0 && slot < 16384) + con->slot_table[slot] = node; + } + } - new->ip = (char *)(new + 1); - strcpy(new->ip,ip); - new->port = port; - new->start_slot = start_slot; - new->end_slot = end_slot; + return 0; +} - LM_DBG("Saving connection %p for ip %s port %hu start %hu end %hu\n",new,ip,port,start_slot,end_slot); +int probe_cluster_command(redis_con *con, redisContext *ctx) +{ + redisReply *reply; + + /* try CLUSTER SHARDS first (Redis 7.0+) */ + reply = redisCommand(ctx, "CLUSTER SHARDS"); + if (reply && reply->type == REDIS_REPLY_ARRAY) { + con->cluster_cmd = CLUSTER_CMD_SHARDS; + LM_DBG("using CLUSTER SHARDS for topology\n"); + if (parse_cluster_shards(con, reply) < 0) { + freeReplyObject(reply); + return -1; + } + freeReplyObject(reply); + return 0; + } + if (reply) + freeReplyObject(reply); + + /* fall back to CLUSTER SLOTS (Redis 3.0+) */ + reply = redisCommand(ctx, "CLUSTER SLOTS"); + if (reply && reply->type == REDIS_REPLY_ARRAY) { + con->cluster_cmd = CLUSTER_CMD_SLOTS; + LM_DBG("using CLUSTER SLOTS for topology\n"); + if (parse_cluster_slots(con, reply) < 0) { + freeReplyObject(reply); + return -1; + } + freeReplyObject(reply); + return 0; + } + if (reply) + freeReplyObject(reply); - if (con->nodes == NULL) - con->nodes = new; - else { - new->next = con->nodes; - con->nodes = new; - } - } + con->cluster_cmd = CLUSTER_CMD_NONE; + return -1; +} + +int refresh_cluster_topology(redis_con *con) +{ + cluster_node *node, *prev, *next; + redisReply *reply = NULL; + time_t now; + int s; + + if (!(con->flags & REDIS_CLUSTER_INSTANCE)) + return 0; + + /* rate-limit: at most once per second */ + now = time(NULL); + if ((now - con->last_topology_refresh) < 1) + return 0; + + /* query a reachable node using the cached command */ + for (node = con->nodes; node; node = node->next) { + if (!node->context) + continue; + if (con->cluster_cmd == CLUSTER_CMD_SHARDS) + reply = redisCommand(node->context, "CLUSTER SHARDS"); + else + reply = redisCommand(node->context, "CLUSTER SLOTS"); + if (reply && reply->type == REDIS_REPLY_ARRAY) + break; + if (reply) { + freeReplyObject(reply); + reply = NULL; } + } - } else { block = ":initial"; goto error;} + if (!reply) { + LM_ERR("all nodes unreachable, cannot refresh topology\n"); + return -1; + } - pkg_free(newret1); - pkg_free(newret2); - pkg_free(newret3); + /* mark all existing nodes as unseen */ + for (node = con->nodes; node; node = node->next) + node->seen = 0; + + /* clear slot table */ + memset(con->slot_table, 0, sizeof(con->slot_table)); + + /* parse — each parser calls find_or_create_node and fills slot_table */ + if (con->cluster_cmd == CLUSTER_CMD_SHARDS) + parse_cluster_shards(con, reply); + else + parse_cluster_slots(con, reply); + + freeReplyObject(reply); + + /* remove nodes no longer in the cluster */ + prev = NULL; + node = con->nodes; + while (node) { + next = node->next; + if (!node->seen) { + /* unlink from list */ + if (prev) + prev->next = next; + else + con->nodes = next; + /* defensive: clear any stale slot_table pointers */ + for (s = 0; s < 16384; s++) { + if (con->slot_table[s] == node) + con->slot_table[s] = NULL; + } + LM_DBG("removing stale node %s:%d\n", node->ip, node->port); + redisFree(node->context); + if (use_tls && node->tls_dom) + tls_api.release_domain(node->tls_dom); + pkg_free(node->ip); + pkg_free(node); + } else { + prev = node; + } + node = next; + } + con->last_topology_refresh = now; + con->topology_refresh_count++; + update_stat(redis_stat_topology_refreshes, 1); + LM_DBG("topology refresh #%u complete\n", con->topology_refresh_count); return 0; - -error: - LM_ERR("Error while parsing cluster nodes in %s\n",block); - destroy_cluster_nodes(con); - return -1; } /* When Redis is operating as a cluster, it is possible (very likely) that a MOVED redirection will be returned by the Redis nodes that received the request. The general format of the reply from Redis is: - MOVED slot [IP|FQDN]:port + PREFIX slot [IP|FQDN]:port - This routine will parse the Redis MOVED reply into its components. + This routine will parse the Redis redirect reply into its components. Note that the redisReply struct MUST be released outside of this routine to avoid a memory leak. The out->endpoint pointer must not be used after the redisReply has been released. The parsed data is stored into the following redis_moved struct: - + typedef struct { int slot; const_str endpoint; @@ -367,7 +537,8 @@ int build_cluster_nodes(redis_con *con,char *info,int size) } redis_moved; */ -int parse_moved_reply(redisReply *reply, redis_moved *out) { +static int parse_redirect_reply(redisReply *reply, redis_moved *out, + const char *prefix, size_t prefix_len) { int i; int slot = 0; const char *p; @@ -377,18 +548,18 @@ int parse_moved_reply(redisReply *reply, redis_moved *out) { const char *port_start; int port = REDIS_DF_PORT; // Default to Redis standard port - if (!reply || !reply->str || reply->len < MOVED_PREFIX_LEN || !out) + if (!reply || !reply->str || reply->len < prefix_len || !out) return ERR_INVALID_REPLY; p = reply->str; end = reply->str + reply->len; - for (i = 0; i < MOVED_PREFIX_LEN; ++i) { - if (p[i] != MOVED_PREFIX[i]) { + for (i = 0; i < prefix_len; ++i) { + if (p[i] != prefix[i]) { return ERR_INVALID_REPLY; } } - p += MOVED_PREFIX_LEN; + p += prefix_len; // Parse slot number while (p < end && *p >= '0' && *p <= '9') { @@ -396,7 +567,9 @@ int parse_moved_reply(redisReply *reply, redis_moved *out) { p++; if (slot > 16383) return ERR_INVALID_SLOT; } - if (slot == 0 && (p == reply->str + MOVED_PREFIX_LEN || *(p - 1) < '0' || *(p - 1) > '9')) + if (slot == 0 && (p == reply->str + prefix_len || *(p - 1) < '0' || *(p - 1) > '9')) + return ERR_INVALID_SLOT; + if (slot > 16383) return ERR_INVALID_SLOT; // Skip spaces @@ -443,3 +616,8 @@ int parse_moved_reply(redisReply *reply, redis_moved *out) { return 0; } + +int parse_moved_reply(redisReply *reply, redis_moved *out) { + return parse_redirect_reply(reply, out, MOVED_PREFIX, MOVED_PREFIX_LEN); +} + diff --git a/modules/cachedb_redis/cachedb_redis_utils.h b/modules/cachedb_redis/cachedb_redis_utils.h index d8213ba7b73..8278db604c3 100644 --- a/modules/cachedb_redis/cachedb_redis_utils.h +++ b/modules/cachedb_redis/cachedb_redis_utils.h @@ -31,17 +31,23 @@ #define MOVED_PREFIX "MOVED " #define MOVED_PREFIX_LEN (sizeof(MOVED_PREFIX) - 1) + #define ERR_INVALID_REPLY -1 #define ERR_INVALID_SLOT -2 #define ERR_INVALID_PORT -3 #include "cachedb_redis_dbase.h" -int build_cluster_nodes(redis_con *con,char *info,int size); cluster_node *get_redis_connection(redis_con *con,str *key); cluster_node *get_redis_connection_by_endpoint(redis_con *con, redis_moved *redis_info); void destroy_cluster_nodes(redis_con *con); int parse_moved_reply(redisReply *reply, redis_moved *out); +int probe_cluster_command(redis_con *con, redisContext *ctx); +int parse_cluster_shards(redis_con *con, redisReply *reply); +int parse_cluster_slots(redis_con *con, redisReply *reply); +cluster_node *find_or_create_node(redis_con *con, const char *ip, + int ip_len, unsigned short port); +int refresh_cluster_topology(redis_con *con); static inline int match_prefix(const char *buf, size_t len, const char *prefix, size_t prefix_len) { size_t i; diff --git a/modules/cachedb_redis/doc/cachedb_redis_admin.xml b/modules/cachedb_redis/doc/cachedb_redis_admin.xml index dd7edb2e631..258b0bff2fe 100644 --- a/modules/cachedb_redis/doc/cachedb_redis_admin.xml +++ b/modules/cachedb_redis/doc/cachedb_redis_admin.xml @@ -85,6 +85,107 @@ +
+ Redis Cluster Support (Topology) + + When connecting to a Redis Cluster, the module automatically detects + cluster mode and manages the full slot-to-node topology at runtime. + No extra configuration is needed beyond the standard + parameter. + + +
+ Topology Discovery + + At startup, the module probes the Redis server using the + CLUSTER SHARDS command (available in Redis 7.0+). + If the server does not support this command, it falls back to + CLUSTER SLOTS (available in Redis 3.0+). + If neither command succeeds, the connection is treated as a + single-instance (non-cluster) connection. + + + The discovered topology is stored internally in an O(1) slot lookup + table (16384 slots), mapping each slot directly to its owning master + node. + +
+ +
+ Automatic Topology Refresh + + The module automatically refreshes the cluster topology at runtime + when any of the following events occur: + + + + A MOVED redirection is received from a + cluster node (indicating a permanent slot migration). + + + A connection failure (NULL reply) occurs + and the node cannot be reconnected. + + + A query targets a slot with no known owner, + suggesting the topology is stale. + + + An operator triggers a manual refresh via the + MI command. + + + + Automatic refreshes are rate-limited to at most once per second to + avoid excessive load on the cluster. The MI-triggered refresh + bypasses this rate limit. + +
+ +
+ MOVED Redirection + + The module transparently handles Redis Cluster MOVED + redirections: + + + + MOVED — indicates a permanent slot + migration. The module updates its slot map, redirects the + query to the new node, and triggers a topology refresh so + all future queries go directly to the correct node. + + + + If a redirection points to a node that is not yet known, the module + dynamically creates a new node entry, establishes a connection, and + retries the query. + +
+ +
+ Hash Tags + + The module supports Redis Cluster + hash tags, which allow related keys to be + co-located on the same cluster node. If a key contains a + {...} substring, only the content between the + first { and the next } is + used for hash slot calculation. For example, the keys + {user1000}.profile and + {user1000}.settings will always land on the + same node, enabling multi-key operations. + + + If the braces are empty ({}) or there is no + closing brace, the entire key is hashed as usual. + +
+ + + +
+
Limitations @@ -112,7 +213,7 @@ - If a is defined, the tls_mgm module will need to be loaded as well. + If a is defined, the tls_mgm and tls_openssl modules will need to be loaded as well. @@ -185,6 +286,109 @@ cache_remove("redis:cluster1", "key"); ... + +
+ Authentication + + The module supports three authentication modes based on the URL format: + + URL Authentication Formats + + + + URL Format + AUTH Command + Use Case + + + + + redis:group://:password@host:port/ + AUTH password + Classic Redis (< 6.0) with requirepass + + + redis:group://username:password@host:port/ + AUTH username password + Redis 6+ ACL with per-user credentials + + + redis:group://host:port/ + (none) + Non-authenticated Redis + + + +
+ + Important: For classic password-only + authentication, the URL must include a colon before the password + (:password@host). Writing + password@host without the colon will place the + credential in the username field of the URL parser, and authentication + will be skipped. + + + When connecting to a Redis Cluster with authentication, all discovered + cluster nodes use the same credentials from the URL. + +
+ +
+ Unix Socket + + Starting with this version, the module supports connecting to a + local Redis instance via a Unix domain socket instead of TCP. + This can provide lower latency and avoid network overhead for + co-located Redis instances. + + + To use a Unix socket, add a socket= parameter + to the URL query string: + + +# basic Unix socket (no auth) +modparam("cachedb_redis", "cachedb_url", + "redis:local://localhost/?socket=/var/run/redis/redis.sock") + +# Unix socket with password auth +modparam("cachedb_redis", "cachedb_url", + "redis:local://:password@localhost/?socket=/var/run/redis/redis.sock") + +# Unix socket with ACL auth (Redis 6+) and database selection +modparam("cachedb_redis", "cachedb_url", + "redis:local://user:pass@localhost/2?socket=/var/run/redis/redis.sock") + + + Constraints: + + + + Unix socket connections are always treated as + single-instance mode (no Redis Cluster + support over Unix sockets). + + + Unix socket cannot be combined with multiple hosts (failover). + Specifying both will cause a startup error. + + + TLS is not applicable to Unix socket connections and will be + ignored with a warning if use_tls is enabled. + + + TCP keepalive is not applicable to Unix sockets and is + automatically skipped. + + + + The MI command will display + Unix socket connections with transport=unix and + the socket path. The command + works normally with Unix socket connections. + +
+
@@ -203,7 +407,7 @@ cache_remove("redis:cluster1", "key"); Set <varname>connect_timeout</varname> parameter ... -# wait 1 seconds for Redis to connect +# wait 1 second for Redis to connect modparam("cachedb_redis", "connect_timeout",1000) ... @@ -224,10 +428,10 @@ modparam("cachedb_redis", "connect_timeout",1000) - Set <varname>connect_timeout</varname> parameter + Set <varname>query_timeout</varname> parameter ... -# wait 1 seconds for Redis queries +# wait 1 second for Redis queries modparam("cachedb_redis", "query_timeout",1000) ... @@ -259,6 +463,40 @@ modparam("cachedb_redis", "shutdown_on_error", 1)
+
+ <varname>lazy_connect</varname> (integer) + + By setting this parameter to 1, &osips; will defer establishing + Redis connections until the first cache operation is actually + performed by each worker process. This prevents idle worker + processes (those that never use Redis) from holding open sockets, + which avoids sockets getting stuck in CLOSE_WAIT state when Redis + is restarted. + + + When this parameter is enabled, the + parameter has no effect, + since no connection is attempted at startup time. + + + + Default value is 0 (disabled — connect at + startup, preserving existing behavior). + + + + + Set the <varname>lazy_connect</varname> parameter + +... +# defer Redis connections until first use +modparam("cachedb_redis", "lazy_connect", 1) +... + + + +
+
<varname>use_tls</varname> (integer) @@ -271,7 +509,7 @@ modparam("cachedb_redis", "shutdown_on_error", 1) When using this parameter, you must also ensure that tls_mgm is loaded and properly configured. Refer to - the the module for additional info regarding TLS client domains. + the tls_mgm module for additional info regarding TLS client domains. Note that TLS is supported by Redis starting with version 6.0. Also, it is @@ -293,7 +531,7 @@ modparam("tls_mgm", "private_key", "[redis]/etc/pki/tls/private/redis.key") modparam("tls_mgm", "ca_list", "[redis]/etc/pki/tls/certs/ca.pem") ... modparam("cachedb_redis", "use_tls", 1) -modparam("cachedb_redis", "cachedb_url","redis://localhost:6379/?tls_domain=redis") +modparam("cachedb_redis", "cachedb_url","redis:tls_group://localhost:6379/?tls_domain=redis") ... @@ -368,6 +606,40 @@ modparam("cachedb_redis", "ftsearch_max_results", 100)
+
+ <varname>redis_keepalive</varname> (integer) + + TCP keepalive interval in seconds for Redis connections. When set + to a positive value, the kernel sends TCP probes on idle connections + to detect dead peers (e.g. due to NAT/firewall idle timeout or + network partition). This allows the next query to fail immediately + instead of waiting for the full query timeout, enabling faster + recovery via the existing retry loop. + + + Set to 0 to disable TCP keepalive. Recommended to keep enabled + for production deployments to prevent silent connection death. + + + + Default value is 10 (seconds). + + + + + Set <varname>redis_keepalive</varname> parameter + +... +# set TCP keepalive interval to 15 seconds +modparam("cachedb_redis", "redis_keepalive", 15) + +# disable TCP keepalive +modparam("cachedb_redis", "redis_keepalive", 0) +... + + +
+
<varname>ftsearch_json_mset_expire</varname> (integer) @@ -401,6 +673,220 @@ modparam("cachedb_redis", "ftsearch_json_mset_expire", 7200) in configuration script.
+
+ Exported MI Functions + +
+ + <function moreinfo="none">redis_cluster_info</function> + + + Displays detailed information about all Redis connections managed + by the module, including cluster topology, per-node connection status, + slot assignments, and per-node query counters. + + Parameters: + + + group (optional) - if specified, only + connections belonging to this group will be listed (e.g. + "local" from a + "redis:local://..." URL). If omitted, + all Redis connections are listed. + + + + The response is a JSON array of connection objects. Each connection + object includes: + + + + group - the connection group name + + + url - the original cachedb_url + + + mode - "cluster" + or "single" + + + cluster_command (cluster mode only) - + "SHARDS" or + "SLOTS", depending on which Redis + command is used for topology discovery + + + topology_refreshes - number of topology + refreshes performed on this connection + + + last_topology_refresh - UNIX timestamp + of the last topology refresh + + + nodes - array of cluster node objects, + each containing: + ip, port, + status + ("connected"/"disconnected"), + slots_assigned (cluster mode only), + queries, errors, + moved, + last_activity (seconds since last + successful query, -1 if never queried) + + + total_slots_mapped (cluster mode only) - + total number of slots with an assigned node (should be 16384 + for a healthy cluster) + + + + MI FIFO Command Format: + + +## list all Redis connections +opensips-cli -x mi redis_cluster_info + +## list only the "local" group +opensips-cli -x mi redis_cluster_info group=local + +
+ +
+ + <function moreinfo="none">redis_cluster_refresh</function> + + + Forces an immediate topology refresh on Redis Cluster connections. + This bypasses the normal once-per-second rate limit and queries the + cluster for its current slot-to-node mapping. Useful after manual + cluster rebalancing or node additions/removals. + + + For non-cluster (single instance) connections, the command returns + a "skipped (not cluster mode)" status. + + Parameters: + + + group (optional) - if specified, only + the connection belonging to this group will be refreshed. + If omitted, all cluster connections are refreshed. + + + + The response is a JSON array of objects, one per connection, each + containing group and status + ("ok", "error", or + "skipped (not cluster mode)"). + + + MI FIFO Command Format: + + +## refresh all cluster connections +opensips-cli -x mi redis_cluster_refresh + +## refresh only the "local" group +opensips-cli -x mi redis_cluster_refresh group=local + +
+ +
+ + <function moreinfo="none">redis_ping_nodes</function> + + + Sends a PING command to each Redis node and reports per-node + reachability status with round-trip latency. Useful for on-demand + health checks without waiting for the next query. + + Parameters: + + + group (optional) - if specified, only + nodes belonging to this group will be pinged. If omitted, + all Redis connections are pinged. + + + + The response is a JSON array of connection objects. Each connection + object includes: + + + + group - the connection group name + + + nodes - array of node objects, each + containing: + ip, port, + status + ("reachable", + "unreachable", or + "disconnected"), + latency_us (round-trip time in + microseconds, -1 if not reachable) + + + + MI FIFO Command Format: + + +## ping all Redis nodes +opensips-cli -x mi redis_ping_nodes + +## ping only the "local" group +opensips-cli -x mi redis_ping_nodes group=local + +
+ +
+ +
+ Exported Statistics + +
+ <varname>redis_queries</varname> + + Total number of successful Redis queries executed across all + connections and processes. + +
+ +
+ <varname>redis_queries_failed</varname> + + Total number of failed Redis queries (NULL replies from hiredis + or Redis error responses other than MOVED). + +
+ +
+ <varname>redis_moved</varname> + + Total number of MOVED redirections received from Redis Cluster + nodes. A MOVED response indicates a permanent slot migration - + the module updates its slot map and retries the query on the + correct node. + +
+ +
+ <varname>redis_topology_refreshes</varname> + + Total number of cluster topology refreshes performed (via + CLUSTER SHARDS or CLUSTER SLOTS). This counter increments both + for automatic refreshes (triggered by MOVED responses or + unreachable nodes) and manual refreshes (triggered via the + MI command). + +
+ +
+
Raw Query Syntax @@ -432,8 +918,8 @@ modparam("cachedb_redis", "ftsearch_json_mset_expire", 7200) cache_raw_query("redis","HGETALL $var(my_hash)","$avp(result)"); $var(it) = 0; - while ($(avp(result_final)[$var(it)]) != NULL) { - xlog("Multiple key reply: - we have fetched $(avp(result_final)[$var(it)]) \n"); + while ($(avp(result)[$var(it)]) != NULL) { + xlog("Multiple key reply: - we have fetched $(avp(result)[$var(it)]) \n"); $var(it) = $var(it) + 1; } ... diff --git a/modules/cachedb_redis/test/.gitignore b/modules/cachedb_redis/test/.gitignore new file mode 100644 index 00000000000..b35f1189c01 --- /dev/null +++ b/modules/cachedb_redis/test/.gitignore @@ -0,0 +1,2 @@ +test_hash +*.o diff --git a/modules/cachedb_redis/test/Makefile b/modules/cachedb_redis/test/Makefile new file mode 100644 index 00000000000..3eff91a2652 --- /dev/null +++ b/modules/cachedb_redis/test/Makefile @@ -0,0 +1,50 @@ +# Makefile for cachedb_redis test suite +# +# Usage: +# make - build all unit tests +# make test - build and run all unit tests +# make clean - remove built binaries and object files +# +# Requirements: +# - GCC or compatible C compiler +# - No external libraries required (tests are self-contained) + +CC ?= gcc +CFLAGS ?= -Wall -Wextra -Wno-unused-function -Wno-unused-variable -O2 + +UNIT_TESTS = test_hash test_mi_counters + +# Path to the real source (relative to this test directory) +UTILS_SRC = ../cachedb_redis_utils.c + +.PHONY: all test clean + +all: $(UNIT_TESTS) + +test: $(UNIT_TESTS) + @echo "=== Running unit tests ===" + @failed=0; \ + for t in $(UNIT_TESTS); do \ + echo "--- $$t ---"; \ + ./$$t; \ + if [ $$? -ne 0 ]; then failed=$$((failed + 1)); fi; \ + done; \ + echo "=== Done ($$failed failure(s)) ==="; \ + exit $$failed + +# hash_under_test.c #includes the real ../cachedb_redis_utils.c with +# OpenSIPS headers blocked and minimal type stubs provided. +# This compiles the actual crc16() and redisHash() functions. +hash_under_test.o: hash_under_test.c $(UTILS_SRC) + $(CC) $(CFLAGS) -c -o $@ hash_under_test.c + +# test_hash.c contains the test cases; links against real redisHash(). +test_hash: test_hash.c hash_under_test.o + $(CC) $(CFLAGS) -o $@ test_hash.c hash_under_test.o + +# test_mi_counters.c is self-contained (no external dependencies) +test_mi_counters: test_mi_counters.c + $(CC) $(CFLAGS) -o $@ test_mi_counters.c + +clean: + rm -f $(UNIT_TESTS) *.o diff --git a/modules/cachedb_redis/test/README.md b/modules/cachedb_redis/test/README.md new file mode 100644 index 00000000000..ab82d193c5b --- /dev/null +++ b/modules/cachedb_redis/test/README.md @@ -0,0 +1,99 @@ +# cachedb_redis Test Suite + +Tests for the OpenSIPS `cachedb_redis` module's Redis Cluster support, +organized by PR. + +## Directory Contents + +| File | Type | PR | Description | +|------|------|----|-------------| +| `Makefile` | Build | All | Builds C unit tests. Targets: `make`, `make test`, `make clean` | +| `hash_under_test.c` | Stub wrapper | PR 1 | Compiles the **real** `crc16()` and `redisHash()` from `../cachedb_redis_utils.c` by blocking OpenSIPS headers and providing minimal type stubs. | +| `test_hash.c` | Unit test | PR 1 | Tests the real `redisHash()` against `redis-cli CLUSTER KEYSLOT` reference values. Links against `hash_under_test.o`. **Fails before PR 1** (demonstrating both bugs), **passes after**. | +| `test_topology_refresh.sh` | Integration test | PR 3 | Verifies OpenSIPS adapts to topology changes (slot migrations, CLUSTER SHARDS/SLOTS probing). Migrates a slot, confirms OpenSIPS follows MOVED redirects, then restores. | +| `README.md` | Documentation | — | This file. | + +## How the Unit Test Links Against Real Code + +The test does **not** copy the hash function. Instead: + +1. `hash_under_test.c` pre-defines OpenSIPS include guards (`dprint_h`, + `_CACHEDB_H`, `mem_h`, etc.) and provides minimal type stubs (`str`, + `redis_con`, logging no-ops, `pkg_malloc` mapped to `malloc`). +2. It then `#include`s the real `../cachedb_redis_utils.c`, compiling the + actual `crc16()` and `redisHash()` into `hash_under_test.o`. +3. `test_hash.c` declares `extern` references to those functions and links + against `hash_under_test.o`. + +When `cachedb_redis_utils.c` is modified (by PR 1), rebuilding the test +automatically picks up the changes — no manual sync required. + +## Requirements + +### PR 1: Unit Test (`test_hash`) + +| Requirement | Notes | +|-------------|-------| +| C compiler (gcc or clang) | Any version supporting C99 | + +No external libraries are needed. Build and run: + +```bash +make test_hash +./test_hash +# or: make test (builds and runs all unit tests) +``` + +**Expected results before PR 1:** + +- Basic key tests (no hash tags, full cluster): PASS +- Hash tag tests (`{user}.name`, `{user}.email`): **FAIL** — bug 2, no extraction +- Partial cluster tests (`slots_assigned != 16383`): **FAIL** — bug 1, bitmask vs modulo + +**Expected results after PR 1:** + +- All tests: PASS (just run `make clean && make test` — no code changes needed in the test) + +### PR 2 & PR 3: Integration Tests + +| Requirement | Notes | +|-------------|-------| +| `redis-cli` | From the `redis-tools` package (Debian/Ubuntu) or `redis` package (RHEL/Fedora) | +| `curl` | For OpenSIPS MI HTTP interface | +| 3-node Redis Cluster | Default: `10.0.0.23`, `10.0.0.24`, `10.0.0.25` on port `6379` | +| Running OpenSIPS | With `mi_http` module loaded, listening on port `8888` | +| `cachedb_redis` module | Loaded in cluster mode, connected to the above cluster | + +Both scripts accept environment variables to override defaults: + +```bash +export REDIS_PASS="your_password" +export REDIS_NODE_1="10.0.0.23" +export REDIS_NODE_2="10.0.0.24" +export REDIS_NODE_3="10.0.0.25" +export REDIS_PORT="6379" +export MI_URL="http://127.0.0.1:8888/mi" +``` + +Run: + +```bash +./test_topology_refresh.sh +``` + +If OpenSIPS MI is not reachable, the integration tests will skip +OpenSIPS-specific assertions and only test direct Redis cluster operations. + +## Test Environment Warning + +The integration tests perform **live slot migrations** on the Redis Cluster. +They restore the original configuration afterward, but should only be run +in a **test or staging environment**, never in production. + +## Adding New Tests + +- **Unit tests** (C): Add the source file and a build target in the `Makefile`. + Append the binary name to the `UNIT_TESTS` variable so `make test` picks it up. +- **Integration tests** (shell): Follow the existing pattern — preflight checks, + assert helpers, cleanup, and environment variable overrides. Name the file + `test_.sh` and make it executable. diff --git a/modules/cachedb_redis/test/hash_under_test.c b/modules/cachedb_redis/test/hash_under_test.c new file mode 100644 index 00000000000..63894e618bd --- /dev/null +++ b/modules/cachedb_redis/test/hash_under_test.c @@ -0,0 +1,211 @@ +/* + * hash_under_test.c - Compilation wrapper for cachedb_redis_utils.c + * + * Pre-defines include guards and provides minimal type stubs, then + * #includes the real cachedb_redis_utils.c. This compiles the actual + * crc16() and redisHash() functions without the full OpenSIPS build tree. + * + * Functions that depend on OpenSIPS internals (build_cluster_nodes, etc.) + * compile with stub types/macros but should never be called from tests. + */ + +#include +#include +#include +#include + +/* ================================================================== */ +/* Block ALL external headers by pre-defining their include guards */ +/* ================================================================== */ +#define dprint_h /* ../../dprint.h */ +#define ut_h /* ../../ut.h */ +#define _CACHEDB_H /* ../../cachedb/cachedb.h */ +#define mem_h /* ../../mem/mem.h */ +#define TLS_API_H /* ../tls_mgm/api.h */ +#define str_h /* ../../str.h */ +#define __HIREDIS_H /* */ +#define CACHEDBREDIS_DBASE_H /* cachedb_redis_dbase.h */ +#define CACHEDB_REDIS_UTILSH /* cachedb_redis_utils.h (self-include) */ +#define statistics_h /* ../../statistics.h */ + +/* ================================================================== */ +/* Minimal type stubs matching OpenSIPS definitions */ +/* ================================================================== */ + +/* str type — matches struct __str from opensips/str.h */ +struct __str { + char *s; + int len; +}; +typedef struct __str str; + +/* cachedb_id — matches cachedb/cachedb_id.h (fields accessed by build_cluster_nodes) */ +struct cachedb_id { + char *scheme; + char *group_name; + char *username; + char *password; + char *host; + unsigned short port; + char *database; + char *extra_options; + char *initial_url; + int flags; +}; + +/* Forward declarations for pointer-only types */ +struct cachedb_pool_con_t; +struct tls_domain; +struct tls_mgm_binds { void (*release_domain)(struct tls_domain *); }; + +/* hiredis types — used as pointers in cluster_node */ +typedef struct redisContext { int fd; } redisContext; +typedef struct redisReply { + int type; + long long integer; + size_t len; + char *str; + size_t elements; + struct redisReply **element; +} redisReply; + +/* hiredis constants */ +#define REDIS_REPLY_STRING 1 +#define REDIS_REPLY_ARRAY 2 +#define REDIS_REPLY_INTEGER 3 +#define REDIS_REPLY_NIL 4 +#define REDIS_REPLY_STATUS 5 +#define REDIS_REPLY_ERROR 6 + +/* hiredis function stubs */ +static inline void redisFree(redisContext *c) { (void)c; } +static inline void freeReplyObject(void *r) { (void)r; } +static inline void *redisCommand(redisContext *c, const char *fmt, ...) { + (void)c; (void)fmt; return NULL; +} + +/* cluster_node — matches cachedb_redis_dbase.h */ +typedef struct cluster_nodes { + char *ip; + unsigned short port; + unsigned short start_slot; + unsigned short end_slot; + redisContext *context; + struct tls_domain *tls_dom; + uint8_t seen; + /* per-node, per-process counters (pkg memory) */ + unsigned long queries; + unsigned long errors; + unsigned long moved; + struct cluster_nodes *next; +} cluster_node; + +typedef struct { + const char *s; + int len; +} const_str; + +typedef struct { + int slot; + const_str endpoint; + int port; +} redis_moved; + +enum redis_flag { + REDIS_SINGLE_INSTANCE = 1 << 0, + REDIS_CLUSTER_INSTANCE = 1 << 1, + REDIS_INIT_NODES = 1 << 2, + REDIS_JSON_SUPPORT = 1 << 3, + REDIS_MULTIPLE_HOSTS = 1 << 4, +}; + +enum cluster_cmd { + CLUSTER_CMD_NONE, + CLUSTER_CMD_SHARDS, + CLUSTER_CMD_SLOTS +}; + +/* redis_con — matches cachedb_redis_dbase.h */ +typedef struct _redis_con { + struct cachedb_id *id; + unsigned int ref; + struct cachedb_pool_con_t *next; + char *host; + unsigned short port; + enum redis_flag flags; + cluster_node *nodes; + char *json_keyspace; + cluster_node *slot_table[16384]; + enum cluster_cmd cluster_cmd; + time_t last_topology_refresh; + unsigned int topology_refresh_count; + struct _redis_con *next_con; + struct _redis_con *current; +} redis_con; + +/* ================================================================== */ +/* Stub macros/globals for code in utils.c we don't test */ +/* ================================================================== */ + +/* Logging — no-op */ +#define LM_DBG(...) +#define LM_ERR(...) +#define LM_WARN(...) +#define LM_INFO(...) + +/* Memory — map to standard malloc/free */ +#define pkg_malloc malloc +#define pkg_free free + +/* str_match — compare two str values */ +static inline int str_match(const str *a, const str *b) { + return a->len == b->len && memcmp(a->s, b->s, a->len) == 0; +} + +/* pkg_nt_str_dup — null-terminated str dup */ +static inline int pkg_nt_str_dup(str *dst, const str *src) { + dst->s = (char *)malloc(src->len + 1); + if (!dst->s) return -1; + memcpy(dst->s, src->s, src->len); + dst->s[src->len] = '\0'; + dst->len = src->len; + return 0; +} + +/* redis_connect_node stub */ +static inline int redis_connect_node(void *con, cluster_node *node) { + (void)con; (void)node; return 0; +} + +/* Globals referenced by destroy_cluster_nodes */ +static int use_tls = 0; +static struct tls_mgm_binds tls_api; + +/* stat_var stubs — referenced by utils.c via update_stat() */ +typedef void stat_var; +#define update_stat(_var, _n) +static stat_var *redis_stat_topology_refreshes = NULL; + +/* Constants from cachedb_redis_utils.h */ +#define REDIS_DF_PORT 6379 +#define MOVED_PREFIX "MOVED " +#define MOVED_PREFIX_LEN (sizeof(MOVED_PREFIX) - 1) +#define ERR_INVALID_REPLY -1 +#define ERR_INVALID_SLOT -2 +#define ERR_INVALID_PORT -3 + +/* match_prefix from cachedb_redis_utils.h */ +static inline int match_prefix(const char *buf, size_t len, + const char *prefix, size_t prefix_len) { + size_t i; + if (len < prefix_len) return 0; + for (i = 0; i < prefix_len; ++i) { + if (buf[i] != prefix[i]) return 0; + } + return 1; +} + +/* ================================================================== */ +/* Include the REAL source — compiles actual crc16() and redisHash() */ +/* ================================================================== */ +#include "../cachedb_redis_utils.c" diff --git a/modules/cachedb_redis/test/test_edge_cases.sh b/modules/cachedb_redis/test/test_edge_cases.sh new file mode 100755 index 00000000000..87bba6eb9e8 --- /dev/null +++ b/modules/cachedb_redis/test/test_edge_cases.sh @@ -0,0 +1,782 @@ +#!/bin/bash +# +# test_edge_cases.sh - Edge case integration tests for cachedb_redis cluster +# +# Exercises two high-risk code paths uncovered by load testing analysis: +# Test 1: MOVED to an unknown endpoint — new node joins cluster +# Test 2: All nodes temporarily unreachable — complete outage + recovery +# +# Requirements: +# - redis-cli, curl, python3 +# - 3-node Redis Cluster (10.0.0.23-25:6379) +# - OpenSIPS with mi_http on port 8888 +# - SSH access to all 3 Redis nodes (10.0.0.23, 10.0.0.24, 10.0.0.25) +# +# Environment variables (override defaults): +# REDIS_PASS - Redis cluster password +# REDIS_NODE_1 - First cluster node (default: 10.0.0.23) +# REDIS_NODE_2 - Second cluster node (default: 10.0.0.24) +# REDIS_NODE_3 - Third cluster node (default: 10.0.0.25) +# REDIS_PORT - Redis port (default: 6379) +# MI_URL - OpenSIPS MI HTTP URL (default: http://127.0.0.1:8888/mi) +# LEAK_THRESHOLD - Max allowed memory growth in bytes (default: 51200 = 50KB) +# + +set -euo pipefail + +# --- Configuration --- +REDIS_PASS="${REDIS_PASS:-85feedc95d5fa7f16fefdb9c92d154179748f2b08df76dc0}" +REDIS_NODE_1="${REDIS_NODE_1:-10.0.0.23}" +REDIS_NODE_2="${REDIS_NODE_2:-10.0.0.24}" +REDIS_NODE_3="${REDIS_NODE_3:-10.0.0.25}" +REDIS_PORT="${REDIS_PORT:-6379}" +MI_URL="${MI_URL:-http://127.0.0.1:8888/mi}" +LEAK_THRESHOLD="${LEAK_THRESHOLD:-51200}" + +PASS=0 +FAIL=0 +TOTAL=0 +TOTAL_OPS=0 + +# --- Cleanup on exit --- +CLEANUP_SLOTS=() +cleanup() { + for slot in "${CLEANUP_SLOTS[@]}"; do + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" STABLE >/dev/null 2>&1 || true + done + done + # Delete any test keys + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" -c KEYS "test:edge:*" 2>/dev/null | while read -r k; do + redis_cmd "$node" -c DEL "$k" >/dev/null 2>&1 || true + done + done +} +trap cleanup EXIT + +# --- Helpers (shared with test_load.sh) --- + +redis_cmd() { + local node="$1"; shift + redis-cli -h "$node" -p "$REDIS_PORT" -a "$REDIS_PASS" --no-auth-warning "$@" +} + +mi_cmd() { + local cmd="$1"; shift + local params="" + while [ $# -gt 0 ]; do + case "$1" in + -d) params="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$params" ]; then + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"params\":$params,\"id\":1}" + else + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"id\":1}" + fi +} + +mi_fetch_value() { + local key="$1" + local result + result=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\"}" 2>/dev/null) + echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "" +} + +mi_store() { + local key="$1" value="$2" + local result + result=$(mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\",\"value\":\"$value\"}" 2>/dev/null) || return 1 + echo "$result" | python3 -c 'import sys,json; r=json.load(sys.stdin); sys.exit(1 if "error" in r else 0)' 2>/dev/null +} + +mi_remove() { + local key="$1" + mi_cmd "cache_remove" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\"}" >/dev/null 2>&1 +} + +mi_add() { + local key="$1" value="$2" + mi_cmd "cache_add" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\",\"value\":$value}" >/dev/null 2>&1 +} + +assert_eq() { + local desc="$1" expected="$2" actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$expected" = "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (expected='$expected', got='$actual')" + FAIL=$((FAIL + 1)) + fi +} + +assert_not_empty() { + local desc="$1" actual="$2" + TOTAL=$((TOTAL + 1)) + if [ -n "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (value was empty)" + FAIL=$((FAIL + 1)) + fi +} + +sample_memory() { + local result + result=$(mi_cmd "get_statistics" -d "{\"statistics\":[\"pkmem:\"]}" 2>/dev/null) + local used real frags + used=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-used_size"])' 2>/dev/null) + real=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-real_used_size"])' 2>/dev/null) + frags=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-fragments"])' 2>/dev/null) + echo "$used $real $frags" +} + +mem_used() { + echo "$1" | awk '{print $1}' +} + +mem_real() { + echo "$1" | awk '{print $2}' +} + +mem_frags() { + echo "$1" | awk '{print $3}' +} + +check_leak() { + local desc="$1" baseline="$2" current="$3" threshold="$4" + local delta=$((current - baseline)) + + TOTAL=$((TOTAL + 1)) + if [ "$delta" -lt "$threshold" ]; then + echo " PASS: $desc (delta=$delta < threshold=$threshold)" + PASS=$((PASS + 1)) + return 0 + else + echo " FAIL: $desc (delta=$delta >= threshold=$threshold)" + FAIL=$((FAIL + 1)) + return 1 + fi +} + +run_cycle() { + local key="$1" value="$2" + local fetched + + mi_store "$key" "$value" || return 1 + fetched=$(mi_fetch_value "$key") + if [ "$fetched" != "$value" ]; then + return 1 + fi + mi_remove "$key" || return 1 + TOTAL_OPS=$((TOTAL_OPS + 3)) + return 0 +} + +get_node_id() { + local node="$1" + redis_cmd "$node" CLUSTER MYID | tr -d '\r' +} + +resolve_slot_owner() { + local slot="$1" + local dest_override="${2:-}" + + if [ "$slot" -le 5460 ]; then + SOURCE_IP="$REDIS_NODE_1" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_2" + fi + elif [ "$slot" -le 10922 ]; then + SOURCE_IP="$REDIS_NODE_2" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_1" + fi + else + SOURCE_IP="$REDIS_NODE_3" + if [ "$dest_override" = "node1" ]; then + DEST_IP="$REDIS_NODE_1" + else + DEST_IP="$REDIS_NODE_2" + fi + fi + + SOURCE_ID=$(get_node_id "$SOURCE_IP") + DEST_ID=$(get_node_id "$DEST_IP") +} + +begin_migration() { + local slot="$1" + redis_cmd "$DEST_IP" CLUSTER SETSLOT "$slot" IMPORTING "$SOURCE_ID" >/dev/null 2>&1 || true + redis_cmd "$SOURCE_IP" CLUSTER SETSLOT "$slot" MIGRATING "$DEST_ID" >/dev/null 2>&1 || true +} + +migrate_keys() { + local slot="$1" + local keys + keys=$(redis_cmd "$SOURCE_IP" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$SOURCE_IP" MIGRATE "$DEST_IP" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi +} + +complete_migration() { + local slot="$1" + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$DEST_ID" >/dev/null 2>&1 || true + done + sleep 1 +} + +restore_slot() { + local slot="$1" orig_ip="$2" curr_ip="$3" + local orig_id curr_id + + orig_id=$(get_node_id "$orig_ip") + curr_id=$(get_node_id "$curr_ip") + + redis_cmd "$orig_ip" CLUSTER SETSLOT "$slot" IMPORTING "$curr_id" >/dev/null 2>&1 || true + redis_cmd "$curr_ip" CLUSTER SETSLOT "$slot" MIGRATING "$orig_id" >/dev/null 2>&1 || true + + local keys + keys=$(redis_cmd "$curr_ip" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$curr_ip" MIGRATE "$orig_ip" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi + + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$orig_id" >/dev/null 2>&1 || true + done + sleep 1 +} + +# --- New helpers for edge case tests --- + +# Run redis-cli commands on node 4 (separate VM at 10.0.0.28). +node4_cmd() { + redis-cli -h "$NODE4_IP" -p "$NODE4_PORT" -a "$REDIS_PASS" --no-auth-warning "$@" +} + +wait_cluster_ok() { + local timeout="${1:-60}" + local start=$SECONDS + local state + while [ $((SECONDS - start)) -lt "$timeout" ]; do + state=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO 2>/dev/null | grep cluster_state | tr -d '\r' | cut -d: -f2) + if [ "$state" = "ok" ]; then + echo "$((SECONDS - start))" + return 0 + fi + sleep 1 + done + echo "$timeout" + return 1 +} + +# Hash tags for targeting specific nodes: +# {b} -> slot 3300 (node 1, slots 0-5460) +# {c} -> slot 7365 (node 2, slots 5461-10922) +# {a} -> slot 15495 (node 3, slots 10923-16383) +HASH_TAGS=("{b}" "{c}" "{a}") +NODE_IPS=("$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3") +MIGRATION_SLOTS=(3300 7365 15495) + +# ================================================================== # +# Preflight checks # +# ================================================================== # +echo "=== cachedb_redis Edge Case Tests ===" +echo "" +echo "Checking prerequisites..." + +if ! command -v redis-cli &>/dev/null; then + echo "ERROR: redis-cli not found. Install redis-tools." + exit 1 +fi + +if ! command -v curl &>/dev/null; then + echo "ERROR: curl not found." + exit 1 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 not found." + exit 1 +fi + +# Verify cluster is healthy +CLUSTER_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +if [ "$CLUSTER_STATE" != "ok" ]; then + echo "ERROR: Redis Cluster state is '$CLUSTER_STATE', expected 'ok'." + exit 1 +fi +echo " Redis Cluster: ok" + +# Verify OpenSIPS MI is reachable +MI_RESPONSE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || true) +if [ "$MI_RESPONSE" != "200" ]; then + echo "ERROR: OpenSIPS MI not reachable at $MI_URL (HTTP $MI_RESPONSE)." + exit 1 +fi +echo " OpenSIPS MI: ok" + +# Verify SSH access to all 3 redis nodes +SSH_OK=1 +for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "$node" "true" 2>/dev/null; then + echo "ERROR: SSH access to $node failed." + SSH_OK=0 + fi +done +if [ "$SSH_OK" -eq 0 ]; then + echo "ERROR: SSH access required to all 3 redis nodes." + echo " Deploy keys: see Prerequisites section in test plan." + exit 1 +fi +echo " SSH to redis nodes: ok" + +echo "" + +# ================================================================== # +# Test 1: MOVED to unknown endpoint # +# ================================================================== # +echo "--- Test 1: MOVED to unknown endpoint ---" + +REDIS_NODE_4="${REDIS_NODE_4:-10.0.0.28}" +NODE4_IP="$REDIS_NODE_4" +NODE4_PORT="$REDIS_PORT" + +# Phase 1a: Start 4th Redis, verify OpenSIPS doesn't know about it +echo " Phase 1a: Starting 4th Redis on ${NODE4_IP}:${NODE4_PORT}" + +# Ensure redis-4 is running with a clean cluster state (no prior cluster membership) +ssh -o ConnectTimeout=5 "$NODE4_IP" "sudo systemctl start redis-server 2>/dev/null; sleep 1; \ + redis-cli -a '$REDIS_PASS' --no-auth-warning FLUSHALL 2>/dev/null; \ + redis-cli -a '$REDIS_PASS' --no-auth-warning CLUSTER RESET HARD 2>/dev/null; \ + sudo systemctl stop redis-server 2>/dev/null; \ + sudo rm -f /var/lib/redis/nodes.conf; \ + sudo systemctl start redis-server" >/dev/null 2>&1 || true +sleep 2 + +# Join new node to cluster +redis_cmd "$REDIS_NODE_1" CLUSTER MEET "$NODE4_IP" "$NODE4_PORT" >/dev/null 2>&1 + +# Wait for cluster to recognize 4 nodes (poll CLUSTER NODES) +NODE4_JOINED=0 +for attempt in $(seq 1 30); do + NODE_COUNT=$(redis_cmd "$REDIS_NODE_1" CLUSTER NODES 2>/dev/null | grep -c "master\|slave" || echo "0") + if [ "$NODE_COUNT" -ge 4 ]; then + NODE4_JOINED=1 + break + fi + sleep 1 +done + +if [ "$NODE4_JOINED" -eq 0 ]; then + echo " ERROR: 4th node did not join cluster within 30 seconds. Skipping Test 1." + # Cleanup — fully reset to avoid stale state on next run + node4_cmd FLUSHALL >/dev/null 2>&1 || true + node4_cmd CLUSTER RESET HARD >/dev/null 2>&1 || true + ssh -o ConnectTimeout=5 "$NODE4_IP" "sudo systemctl stop redis-server; \ + sudo rm -f /var/lib/redis/nodes.conf" 2>/dev/null || true +else + +NODE4_ID=$(node4_cmd CLUSTER MYID 2>/dev/null | tr -d '\r') + +echo " Cluster: $NODE_COUNT nodes, state ok" + +# Store a key on slot 3300 (still on node 1) via OpenSIPS — baseline check +mi_store "test:edge:moved:{b}:before" "before_migration" || true +TOTAL_OPS=$((TOTAL_OPS + 1)) +PRE_FETCH=$(mi_fetch_value "test:edge:moved:{b}:before") +TOTAL_OPS=$((TOTAL_OPS + 1)) +assert_eq "Pre-migration fetch succeeds" "before_migration" "$PRE_FETCH" + +# Phase 1b: Migrate slot 3300 to unknown node 4, trigger MOVED +echo " Phase 1b: Migrate slot 3300 to unknown node" + +NODE1_ID=$(get_node_id "$REDIS_NODE_1") + +# Full migration: node 1 -> node 4 +# Set IMPORTING on destination (node 4) +CLEANUP_SLOTS+=(3300) +node4_cmd CLUSTER SETSLOT 3300 IMPORTING "$NODE1_ID" >/dev/null 2>&1 || true + +# Set MIGRATING on source (node 1) +redis_cmd "$REDIS_NODE_1" CLUSTER SETSLOT 3300 MIGRATING "$NODE4_ID" >/dev/null 2>&1 || true + +# Migrate existing keys from node 1 to node 4 +KEYS_IN_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER GETKEYSINSLOT 3300 100 2>/dev/null | tr -d '\r') +if [ -n "$KEYS_IN_SLOT" ]; then + for k in $KEYS_IN_SLOT; do + redis_cmd "$REDIS_NODE_1" MIGRATE "$NODE4_IP" "$NODE4_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done +fi + +# Complete migration — notify all nodes (including node 4) +for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT 3300 NODE "$NODE4_ID" >/dev/null 2>&1 || true +done +node4_cmd CLUSTER SETSLOT 3300 NODE "$NODE4_ID" >/dev/null 2>&1 || true +sleep 1 + +# Now store via OpenSIPS — its slot table says 3300 -> node 1, but node 1 +# returns MOVED 3300 10.0.0.24:26379. OpenSIPS has never seen that endpoint, +# so get_redis_connection_by_endpoint returns NULL. After failover exhausts, +# NULL slot lookup triggers refresh_cluster_topology(), discovers node 4 via +# CLUSTER SHARDS, and subsequent operations route correctly. +# +# The first attempt may fail (triggering topology refresh). Retry a few +# times to give OpenSIPS time to discover the new node. +MOVED_STORED=0 +for attempt in $(seq 1 5); do + if mi_store "test:edge:moved:{b}:after" "after_migration" 2>/dev/null; then + MOVED_STORED=1 + break + fi + sleep 2 +done +TOTAL_OPS=$((TOTAL_OPS + 1)) + +POST_FETCH=$(mi_fetch_value "test:edge:moved:{b}:after") +TOTAL_OPS=$((TOTAL_OPS + 1)) +assert_eq "Post-migration store+fetch via MOVED to new node" "after_migration" "$POST_FETCH" + +# Phase 1c: Sustained operations on new node (100 operations) +echo " Phase 1c: Sustained operations on new node (100 cycles)" + +MOVED_BASELINE_MEM=$(sample_memory) +MOVED_BASELINE_USED=$(mem_used "$MOVED_BASELINE_MEM") + +P2C_OK=0 +P2C_FAIL=0 +for i in $(seq 1 100); do + key="test:edge:moved:{b}:sustained_${i}" + value="moved_sustained_${i}" + if run_cycle "$key" "$value"; then + P2C_OK=$((P2C_OK + 1)) + else + P2C_FAIL=$((P2C_FAIL + 1)) + fi +done + +echo " 100 cycles: $P2C_OK ok, $P2C_FAIL errors" + +TOTAL=$((TOTAL + 1)) +if [ "$P2C_FAIL" -eq 0 ]; then + echo " PASS: All operations on new node succeeded" + PASS=$((PASS + 1)) +else + echo " FAIL: $P2C_FAIL operations on new node failed" + FAIL=$((FAIL + 1)) +fi + +MOVED_FINAL_MEM=$(sample_memory) +MOVED_FINAL_USED=$(mem_used "$MOVED_FINAL_MEM") +check_leak "No memory leak" "$MOVED_BASELINE_USED" "$MOVED_FINAL_USED" "$LEAK_THRESHOLD" || true + +# Phase 1d: Cleanup — migrate slot back, remove node 4 +echo " Phase 1d: Cleanup" + +# Migrate slot 3300 back: node 4 -> node 1 +redis_cmd "$REDIS_NODE_1" CLUSTER SETSLOT 3300 IMPORTING "$NODE4_ID" >/dev/null 2>&1 || true +node4_cmd CLUSTER SETSLOT 3300 MIGRATING "$NODE1_ID" >/dev/null 2>&1 || true + +# Migrate keys back +KEYS_IN_SLOT=$(node4_cmd CLUSTER GETKEYSINSLOT 3300 100 2>/dev/null | tr -d '\r') +if [ -n "$KEYS_IN_SLOT" ]; then + for k in $KEYS_IN_SLOT; do + node4_cmd MIGRATE "$REDIS_NODE_1" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done +fi + +# Complete migration back +for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT 3300 NODE "$NODE1_ID" >/dev/null 2>&1 || true +done +node4_cmd CLUSTER SETSLOT 3300 NODE "$NODE1_ID" >/dev/null 2>&1 || true +sleep 1 + +# Remove node 4 from cluster: CLUSTER FORGET on all 3 original nodes +for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER FORGET "$NODE4_ID" >/dev/null 2>&1 || true +done + +# Fully reset redis-4 so it doesn't auto-rejoin or retain stale state +node4_cmd FLUSHALL >/dev/null 2>&1 || true +node4_cmd CLUSTER RESET HARD >/dev/null 2>&1 || true +ssh -o ConnectTimeout=5 "$NODE4_IP" "sudo systemctl stop redis-server; \ + sudo rm -f /var/lib/redis/nodes.conf" 2>/dev/null || true +sleep 1 +CLEANUP_SLOTS=("${CLEANUP_SLOTS[@]/3300}") + +# Wait for cluster to stabilize (3 nodes, state ok) +CLEANUP_SECS=$(wait_cluster_ok 30) || true +echo " Slot restored, node removed, cluster stable" + +# Warm-up: first request on the {b} slot (3300) will hit the stale node 4 +# connection, triggering a topology refresh. Absorb this expected failure. +mi_store "test:edge:moved:cleanup:{b}:warmup" "warmup" 2>/dev/null || true +sleep 2 + +# Verify OpenSIPS can route to all 3 original nodes +P2D_OK=0 +P2D_FAIL=0 +for tag_idx in 0 1 2; do + tag="${HASH_TAGS[$tag_idx]}" + key="test:edge:moved:cleanup:${tag}:verify" + value="cleanup_verify_${tag_idx}" + if run_cycle "$key" "$value"; then + P2D_OK=$((P2D_OK + 1)) + else + P2D_FAIL=$((P2D_FAIL + 1)) + fi +done + +TOTAL=$((TOTAL + 1)) +if [ "$P2D_OK" -eq 3 ]; then + echo " PASS: Post-cleanup operations on all original nodes" + PASS=$((PASS + 1)) +else + echo " FAIL: Post-cleanup operations failed ($P2D_OK/3 succeeded)" + FAIL=$((FAIL + 1)) +fi + +fi # end NODE4_JOINED check + +echo "" + +# ================================================================== # +# Test 2: All nodes temporarily unreachable # +# ================================================================== # +echo "--- Test 2: All nodes temporarily unreachable ---" + +# Phase 2a: Baseline — store 10 keys across all 3 nodes +echo " Phase 2a: Baseline (10 keys stored)" + +for i in $(seq 1 10); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:baseline_${i}" + value="outage_baseline_${i}" + mi_store "$key" "$value" || true + TOTAL_OPS=$((TOTAL_OPS + 1)) +done + +# Verify all 10 baseline keys are readable +BASELINE_READS_OK=0 +for i in $(seq 1 10); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:baseline_${i}" + fetched=$(mi_fetch_value "$key") + TOTAL_OPS=$((TOTAL_OPS + 1)) + if [ "$fetched" = "outage_baseline_${i}" ]; then + BASELINE_READS_OK=$((BASELINE_READS_OK + 1)) + fi +done + +OUTAGE_BASELINE_MEM=$(sample_memory) +OUTAGE_BASELINE_USED=$(mem_used "$OUTAGE_BASELINE_MEM") + +# Phase 2b: Stop all Redis nodes +echo " Phase 2b: Stopping all Redis nodes..." + +ssh -o ConnectTimeout=5 "$REDIS_NODE_3" "sudo systemctl stop redis-server" 2>/dev/null || true +sleep 1 +ssh -o ConnectTimeout=5 "$REDIS_NODE_2" "sudo systemctl stop redis-server" 2>/dev/null || true +sleep 1 +ssh -o ConnectTimeout=5 "$REDIS_NODE_1" "sudo systemctl stop redis-server" 2>/dev/null || true +sleep 3 + +# Phase 2c: Operations during outage +echo " Phase 2c: Operations during outage" + +P3C_OK=0 +P3C_FAIL=0 +for i in $(seq 1 50); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:during_${i}" + if mi_store "$key" "should_fail_${i}" 2>/dev/null; then + P3C_OK=$((P3C_OK + 1)) + else + P3C_FAIL=$((P3C_FAIL + 1)) + fi + TOTAL_OPS=$((TOTAL_OPS + 1)) +done + +echo " 50 operations: $P3C_OK ok, $P3C_FAIL expected failures" + +TOTAL=$((TOTAL + 1)) +if [ "$P3C_FAIL" -eq 50 ]; then + echo " PASS: All outage operations failed (expected)" + PASS=$((PASS + 1)) +else + echo " FAIL: Expected 50 failures, got $P3C_FAIL (ok=$P3C_OK)" + FAIL=$((FAIL + 1)) +fi + +# Verify MI is still responsive +MI_ALIVE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || echo "000") + +TOTAL=$((TOTAL + 1)) +if [ "$MI_ALIVE" = "200" ]; then + echo " PASS: MI still responsive" + PASS=$((PASS + 1)) +else + echo " FAIL: MI not responsive (HTTP $MI_ALIVE)" + FAIL=$((FAIL + 1)) +fi + +# Phase 2d: Restart all Redis nodes +echo " Phase 2d: Restarting all Redis nodes..." + +ssh -o ConnectTimeout=5 "$REDIS_NODE_1" "sudo systemctl start redis-server" 2>/dev/null || true +sleep 1 +ssh -o ConnectTimeout=5 "$REDIS_NODE_2" "sudo systemctl start redis-server" 2>/dev/null || true +sleep 1 +ssh -o ConnectTimeout=5 "$REDIS_NODE_3" "sudo systemctl start redis-server" 2>/dev/null || true +sleep 2 + +# Verify each node is up (poll PING) +for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + for attempt in $(seq 1 15); do + if redis_cmd "$node" PING 2>/dev/null | grep -q "PONG"; then + break + fi + sleep 1 + done +done + +# CLUSTER MEET between nodes in case they need help re-joining +redis_cmd "$REDIS_NODE_1" CLUSTER MEET "$REDIS_NODE_2" "$REDIS_PORT" >/dev/null 2>&1 || true +redis_cmd "$REDIS_NODE_1" CLUSTER MEET "$REDIS_NODE_3" "$REDIS_PORT" >/dev/null 2>&1 || true + +# Wait for cluster state ok +RECOVERY_SECS=$(wait_cluster_ok 60) || true + +CLUSTER_STATE_AFTER=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO 2>/dev/null | grep cluster_state | tr -d '\r' | cut -d: -f2) + +echo " Cluster recovery: $CLUSTER_STATE_AFTER (took $RECOVERY_SECS seconds)" + +TOTAL=$((TOTAL + 1)) +if [ "$CLUSTER_STATE_AFTER" = "ok" ]; then + echo " PASS: Cluster recovered" + PASS=$((PASS + 1)) +else + echo " FAIL: Cluster state is '$CLUSTER_STATE_AFTER' (expected 'ok')" + FAIL=$((FAIL + 1)) +fi + +# Phase 2e: Recovery operations +echo " Phase 2e: Recovery operations" + +P3E_OK=0 +P3E_FAIL=0 +for i in $(seq 1 100); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:recovery_${i}" + value="recovery_${i}" + if run_cycle "$key" "$value"; then + P3E_OK=$((P3E_OK + 1)) + else + P3E_FAIL=$((P3E_FAIL + 1)) + fi +done + +echo " 100 operations: $P3E_OK ok, $P3E_FAIL errors" + +TOTAL=$((TOTAL + 1)) +if [ "$P3E_OK" -ge 90 ]; then + echo " PASS: Recovery succeeded ($P3E_OK/100 >= 90 threshold)" + PASS=$((PASS + 1)) +else + echo " FAIL: Recovery insufficient ($P3E_OK/100 < 90 threshold)" + FAIL=$((FAIL + 1)) +fi + +# Verify pre-outage keys survived +SURVIVED=0 +for i in $(seq 1 10); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:baseline_${i}" + fetched=$(mi_fetch_value "$key") + TOTAL_OPS=$((TOTAL_OPS + 1)) + if [ "$fetched" = "outage_baseline_${i}" ]; then + SURVIVED=$((SURVIVED + 1)) + fi +done + +TOTAL=$((TOTAL + 1)) +if [ "$SURVIVED" -eq 10 ]; then + echo " PASS: Pre-outage keys survived ($SURVIVED/10)" + PASS=$((PASS + 1)) +else + echo " FAIL: Pre-outage keys lost (only $SURVIVED/10 survived)" + FAIL=$((FAIL + 1)) +fi + +# Phase 2f: Stability check +echo " Phase 2f: Stability" + +OUTAGE_FINAL_MEM=$(sample_memory) +OUTAGE_FINAL_USED=$(mem_used "$OUTAGE_FINAL_MEM") +OUTAGE_DELTA=$((OUTAGE_FINAL_USED - OUTAGE_BASELINE_USED)) + +echo " Memory delta: $OUTAGE_DELTA bytes" +check_leak "No memory leak" "$OUTAGE_BASELINE_USED" "$OUTAGE_FINAL_USED" "$LEAK_THRESHOLD" || true + +# Final 50 operations to confirm full recovery +P3F_OK=0 +P3F_FAIL=0 +for i in $(seq 1 50); do + tag_idx=$(( (i - 1) % 3 )) + key="test:edge:outage:${HASH_TAGS[$tag_idx]}:final_${i}" + value="final_${i}" + if run_cycle "$key" "$value"; then + P3F_OK=$((P3F_OK + 1)) + else + P3F_FAIL=$((P3F_FAIL + 1)) + fi +done + +echo " 50 operations: $P3F_OK ok, $P3F_FAIL errors" + +# Clean up baseline keys +for i in $(seq 1 10); do + tag_idx=$(( (i - 1) % 3 )) + mi_remove "test:edge:outage:${HASH_TAGS[$tag_idx]}:baseline_${i}" 2>/dev/null || true +done + +echo "" + +# ================================================================== # +# Summary # +# ================================================================== # +echo "=== Summary ===" +echo " Assertions: $PASS passed, $FAIL failed, $TOTAL total" + +if [ "$FAIL" -eq 0 ]; then + echo " PASS: All edge case tests passed" +else + echo " FAIL: $FAIL assertion(s) failed" +fi + +echo "" +exit "$FAIL" diff --git a/modules/cachedb_redis/test/test_hash.c b/modules/cachedb_redis/test/test_hash.c new file mode 100644 index 00000000000..06fb1df1958 --- /dev/null +++ b/modules/cachedb_redis/test/test_hash.c @@ -0,0 +1,290 @@ +/* + * test_hash.c - Regression test for redisHash() from cachedb_redis_utils.c + * + * Links against the REAL crc16() and redisHash() compiled from + * ../cachedb_redis_utils.c (via hash_under_test.c stub wrapper). + * + * Prerequisites: + * - C compiler (gcc or clang) + * - Build: make test_hash + * - Run: ./test_hash + * + * Reference values: redis-cli CLUSTER KEYSLOT + */ + +#include +#include +#include +#include + +/* OpenSIPS str type — must match struct __str from opensips/str.h */ +struct __str { + char *s; + int len; +}; +typedef struct __str str; + +/* Declarations for the real functions from cachedb_redis_utils.c */ +extern uint16_t crc16(const char *buf, int len); +extern unsigned int redisHash(str *key); + +/* ================================================================== */ +/* Test framework */ +/* ================================================================== */ + +static int tests_run = 0; +static int tests_passed = 0; +static int tests_failed = 0; + +static void test_slot(const char *key, + unsigned int expected, const char *desc) +{ + str k; + k.s = (char *)key; + k.len = strlen(key); + + unsigned int actual = redisHash(&k); + tests_run++; + + if (actual == expected) { + tests_passed++; + printf(" PASS %-45s slot=%5u\n", desc ? desc : key, actual); + } else { + tests_failed++; + printf(" FAIL %-45s expected=%5u got=%5u\n", + desc ? desc : key, expected, actual); + } +} + +/* Test with explicit length (for keys with embedded special chars) */ +static void test_slot_len(const char *key, int len, + unsigned int expected, const char *desc) +{ + str k; + k.s = (char *)key; + k.len = len; + + unsigned int actual = redisHash(&k); + tests_run++; + + if (actual == expected) { + tests_passed++; + printf(" PASS %-45s slot=%5u\n", desc, actual); + } else { + tests_failed++; + printf(" FAIL %-45s expected=%5u got=%5u\n", + desc, expected, actual); + } +} + +int main(void) +{ + printf("=== redisHash() Regression Test ===\n"); + printf("Testing real redisHash() from cachedb_redis_utils.c\n"); + printf("Reference: redis-cli CLUSTER KEYSLOT \n\n"); + + /* ---------------------------------------------------------- */ + /* Basic keys (no hash tags) */ + /* ---------------------------------------------------------- */ + printf("--- Basic keys (no hash tags) ---\n"); + test_slot("testkey", 4757, "testkey"); + test_slot("foo", 12182, "foo"); + test_slot("user", 5474, "user"); + test_slot("world", 9059, "world"); + + /* ---------------------------------------------------------- */ + /* Edge case: empty and single-char keys */ + /* ---------------------------------------------------------- */ + printf("\n--- Edge cases: short keys ---\n"); + test_slot("", 0, "empty string"); + test_slot("a", 15495, "single char 'a'"); + test_slot("0", 13907, "single char '0'"); + + /* ---------------------------------------------------------- */ + /* Edge case: keys with special characters */ + /* ---------------------------------------------------------- */ + printf("\n--- Edge cases: special characters ---\n"); + test_slot("key with spaces", 13638, "key with spaces"); + test_slot("key:with:colons", 12379, "key:with:colons"); + test_slot("key.with.dots", 16282, "key.with.dots"); + test_slot("key/with/slashes", 3738, "key/with/slashes"); + test_slot("key\twith\ttabs", 7294, "key with literal tabs"); + + /* ---------------------------------------------------------- */ + /* Edge case: long keys */ + /* ---------------------------------------------------------- */ + printf("\n--- Edge cases: long keys ---\n"); + { + char longkey[1025]; + unsigned int expected; + memset(longkey, 'x', 1024); + longkey[1024] = '\0'; + expected = crc16(longkey, 1024) % 16384; + test_slot(longkey, expected, "1024-byte key (all 'x')"); + } + + /* ---------------------------------------------------------- */ + /* Hash tag extraction */ + /* ---------------------------------------------------------- */ + printf("\n--- Hash tag extraction ---\n"); + printf("--- {user}.name and {user}.email should map to slot 5474 ---\n"); + printf("--- hello{world} should map to slot 9059 (same as \"world\") ---\n"); + test_slot("{user}.name", 5474, "{user}.name -> should be 5474"); + test_slot("{user}.email", 5474, "{user}.email -> should be 5474"); + test_slot("hello{world}", 9059, "hello{world} -> should be 9059"); + test_slot("{}bar", 6479, "{}bar (empty tag = full key)"); + test_slot("{foo}", 12182, "{foo} -> should be 12182"); + test_slot("a{foo}b", 12182, "a{foo}b -> should be 12182"); + test_slot("{}{foo}", 2263, "{}{foo} (empty first tag = full key)"); + + /* ---------------------------------------------------------- */ + /* Hash tag edge cases */ + /* ---------------------------------------------------------- */ + printf("\n--- Hash tag edge cases ---\n"); + /* Nested braces: first { to first } after it → hashes "{foo" */ + test_slot("{{foo}}", 13308, "{{foo}} -> hashes '{foo'"); + /* Only opening brace, no closing */ + test_slot("{unclosed", 470, "{unclosed (no close = full key)"); + /* Closing before opening */ + test_slot("}reversed{", 15992, "}reversed{ (close before open = full key)"); + /* Multiple hash tags: only first valid one counts → hashes "a" */ + test_slot("{a}{b}", 15495, "{a}{b} -> hashes 'a' (first tag)"); + /* Hash tag with just one char */ + test_slot("{x}.suffix", 16287, "{x}.suffix -> hashes 'x'"); + /* Hash tag at end */ + test_slot("prefix{tag}", 8338, "prefix{tag} -> hashes 'tag'"); + /* Empty first tag, then content with dot */ + test_slot("{}.{real}", 8956, "{}.{real} (empty first = full key)"); + /* Brace inside tag: {a{b}c → first { at 0, first } at 3 → hashes "a{b" */ + test_slot("{a{b}c", 13340, "{a{b}c -> hashes 'a{b'"); + + /* ---------------------------------------------------------- */ + /* Co-location verification */ + /* Keys with same hash tag must map to same slot */ + /* ---------------------------------------------------------- */ + printf("\n--- Co-location verification ---\n"); + { + str k1, k2; + unsigned int s1, s2; + + /* {session}.data and {session}.meta should co-locate */ + k1.s = "{session}.data"; k1.len = strlen(k1.s); + k2.s = "{session}.meta"; k2.len = strlen(k2.s); + s1 = redisHash(&k1); + s2 = redisHash(&k2); + tests_run++; + if (s1 == s2) { + tests_passed++; + printf(" PASS {session}.data == {session}.meta slot=%5u\n", s1); + } else { + tests_failed++; + printf(" FAIL {session}.data != {session}.meta %u != %u\n", s1, s2); + } + + /* {usrloc}.alice and {usrloc}.bob should co-locate */ + k1.s = "{usrloc}.alice"; k1.len = strlen(k1.s); + k2.s = "{usrloc}.bob"; k2.len = strlen(k2.s); + s1 = redisHash(&k1); + s2 = redisHash(&k2); + tests_run++; + if (s1 == s2) { + tests_passed++; + printf(" PASS {usrloc}.alice == {usrloc}.bob slot=%5u\n", s1); + } else { + tests_failed++; + printf(" FAIL {usrloc}.alice != {usrloc}.bob %u != %u\n", s1, s2); + } + + /* Different tags must (almost certainly) NOT co-locate */ + k1.s = "{alpha}.key"; k1.len = strlen(k1.s); + k2.s = "{beta}.key"; k2.len = strlen(k2.s); + s1 = redisHash(&k1); + s2 = redisHash(&k2); + tests_run++; + if (s1 != s2) { + tests_passed++; + printf(" PASS {alpha}.key != {beta}.key %u != %u\n", s1, s2); + } else { + tests_failed++; + printf(" FAIL {alpha}.key == {beta}.key (collision) slot=%u\n", s1); + } + } + + /* ---------------------------------------------------------- */ + /* Slot range validation */ + /* All slots must be in [0, 16383] */ + /* ---------------------------------------------------------- */ + printf("\n--- Slot range validation ---\n"); + { + int range_pass = 1; + int i; + char buf[32]; + for (i = 0; i < 10000; i++) { + snprintf(buf, sizeof(buf), "key:%d", i); + str k; + k.s = buf; + k.len = strlen(buf); + unsigned int slot = redisHash(&k); + if (slot > 16383) { + printf(" FAIL key:%d produced slot %u (> 16383)\n", i, slot); + range_pass = 0; + tests_failed++; + tests_run++; + break; + } + } + if (range_pass) { + tests_run++; + tests_passed++; + printf(" PASS 10000 keys all in [0, 16383]\n"); + } + } + + /* ---------------------------------------------------------- */ + /* CRC16 direct verification against known values */ + /* ---------------------------------------------------------- */ + printf("\n--- CRC16 direct verification ---\n"); + { + uint16_t crc; + unsigned int slot; + + crc = crc16("test", 4); + slot = crc % 16384; + tests_run++; + if (slot == 6918) { + tests_passed++; + printf(" PASS crc16(\"test\") %% 16384 = %u\n", slot); + } else { + tests_failed++; + printf(" FAIL crc16(\"test\") %% 16384 = %u (expected 6918)\n", slot); + } + + /* Verify CRC16 of empty string is 0 */ + crc = crc16("", 0); + tests_run++; + if (crc == 0) { + tests_passed++; + printf(" PASS crc16(\"\", 0) = 0\n"); + } else { + tests_failed++; + printf(" FAIL crc16(\"\", 0) = %u (expected 0)\n", crc); + } + } + + /* ---------------------------------------------------------- */ + /* Partial cluster regression (slots_assigned=10922) */ + /* After fix: results are identical to full cluster — modulo */ + /* is constant (% 16384), not dependent on slots_assigned. */ + /* Retained for regression coverage. */ + /* ---------------------------------------------------------- */ + printf("\n--- Partial cluster (slots_assigned=10922) ---\n"); + printf("--- After fix: results are identical to full cluster (modulo is constant) ---\n"); + test_slot("testkey", 4757, "testkey (partial cluster)"); + test_slot("foo", 12182, "foo (partial cluster)"); + test_slot("user", 5474, "user (partial cluster)"); + + printf("\n=== Results: %d passed, %d failed, %d total ===\n", + tests_passed, tests_failed, tests_run); + + return tests_failed > 0 ? 1 : 0; +} diff --git a/modules/cachedb_redis/test/test_load.sh b/modules/cachedb_redis/test/test_load.sh new file mode 100755 index 00000000000..511f742f9d3 --- /dev/null +++ b/modules/cachedb_redis/test/test_load.sh @@ -0,0 +1,740 @@ +#!/bin/bash +# +# test_load.sh - Load test for memory leak detection in cachedb_redis topology refresh +# +# Exercises the new CLUSTER SHARDS/SLOTS parser and dynamic topology refresh +# under sustained load with repeated topology changes, monitoring pkg memory +# for leaks. +# +# Phases: +# 0: Warmup + baseline (200 ops) +# 1: Sustained load without topology changes (500 ops) +# 2: Slot migration stress — 10 migration cycles (300 ops) +# 3: Node stop/start — failure + recovery path (170 ops) +# 4: Final soak (300 ops) +# +# Requirements: +# - redis-cli, curl, python3 +# - 3-node Redis Cluster (10.0.0.23-25:6379) +# - OpenSIPS with mi_http on port 8888 +# - SSH access to 10.0.0.25 (for node stop/start in Phase 3) +# - cachedb_redis module loaded with cluster mode +# +# Environment variables (override defaults): +# REDIS_PASS - Redis cluster password +# REDIS_NODE_1 - First cluster node (default: 10.0.0.23) +# REDIS_NODE_2 - Second cluster node (default: 10.0.0.24) +# REDIS_NODE_3 - Third cluster node (default: 10.0.0.25) +# REDIS_PORT - Redis port (default: 6379) +# MI_URL - OpenSIPS MI HTTP URL (default: http://127.0.0.1:8888/mi) +# LEAK_THRESHOLD - Max allowed memory growth in bytes (default: 51200 = 50KB) +# + +set -euo pipefail + +# --- Configuration --- +REDIS_PASS="${REDIS_PASS:-85feedc95d5fa7f16fefdb9c92d154179748f2b08df76dc0}" +REDIS_NODE_1="${REDIS_NODE_1:-10.0.0.23}" +REDIS_NODE_2="${REDIS_NODE_2:-10.0.0.24}" +REDIS_NODE_3="${REDIS_NODE_3:-10.0.0.25}" +REDIS_PORT="${REDIS_PORT:-6379}" +MI_URL="${MI_URL:-http://127.0.0.1:8888/mi}" +LEAK_THRESHOLD="${LEAK_THRESHOLD:-51200}" + +PASS=0 +FAIL=0 +TOTAL=0 +TOTAL_OPS=0 + +# --- Helpers (reused from test_topology_refresh.sh) --- + +redis_cmd() { + local node="$1"; shift + redis-cli -h "$node" -p "$REDIS_PORT" -a "$REDIS_PASS" --no-auth-warning "$@" +} + +mi_cmd() { + local cmd="$1"; shift + local params="" + while [ $# -gt 0 ]; do + case "$1" in + -d) params="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$params" ]; then + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"params\":$params,\"id\":1}" + else + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"id\":1}" + fi +} + +mi_fetch_value() { + local key="$1" + local result + result=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\"}" 2>/dev/null) + echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "" +} + +assert_eq() { + local desc="$1" expected="$2" actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$expected" = "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (expected='$expected', got='$actual')" + FAIL=$((FAIL + 1)) + fi +} + +assert_not_empty() { + local desc="$1" actual="$2" + TOTAL=$((TOTAL + 1)) + if [ -n "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (value was empty)" + FAIL=$((FAIL + 1)) + fi +} + +get_node_id() { + local node="$1" + redis_cmd "$node" CLUSTER MYID | tr -d '\r' +} + +resolve_slot_owner() { + local slot="$1" + local dest_override="${2:-}" + + if [ "$slot" -le 5460 ]; then + SOURCE_IP="$REDIS_NODE_1" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_2" + fi + elif [ "$slot" -le 10922 ]; then + SOURCE_IP="$REDIS_NODE_2" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_1" + fi + else + SOURCE_IP="$REDIS_NODE_3" + if [ "$dest_override" = "node1" ]; then + DEST_IP="$REDIS_NODE_1" + else + DEST_IP="$REDIS_NODE_2" + fi + fi + + SOURCE_ID=$(get_node_id "$SOURCE_IP") + DEST_ID=$(get_node_id "$DEST_IP") +} + +begin_migration() { + local slot="$1" + redis_cmd "$DEST_IP" CLUSTER SETSLOT "$slot" IMPORTING "$SOURCE_ID" >/dev/null 2>&1 || true + redis_cmd "$SOURCE_IP" CLUSTER SETSLOT "$slot" MIGRATING "$DEST_ID" >/dev/null 2>&1 || true +} + +migrate_keys() { + local slot="$1" + local keys + keys=$(redis_cmd "$SOURCE_IP" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$SOURCE_IP" MIGRATE "$DEST_IP" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi +} + +complete_migration() { + local slot="$1" + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$DEST_ID" >/dev/null 2>&1 || true + done + sleep 1 +} + +restore_slot() { + local slot="$1" orig_ip="$2" curr_ip="$3" + local orig_id curr_id + + orig_id=$(get_node_id "$orig_ip") + curr_id=$(get_node_id "$curr_ip") + + redis_cmd "$orig_ip" CLUSTER SETSLOT "$slot" IMPORTING "$curr_id" >/dev/null 2>&1 || true + redis_cmd "$curr_ip" CLUSTER SETSLOT "$slot" MIGRATING "$orig_id" >/dev/null 2>&1 || true + + local keys + keys=$(redis_cmd "$curr_ip" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$curr_ip" MIGRATE "$orig_ip" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi + + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$orig_id" >/dev/null 2>&1 || true + done + sleep 1 +} + +# --- New helpers for load test --- + +# Sample pkg memory for process 1 (HTTPD). Returns "used real fragments". +sample_memory() { + local result + result=$(mi_cmd "get_statistics" -d "{\"statistics\":[\"pkmem:\"]}" 2>/dev/null) + local used real frags + used=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-used_size"])' 2>/dev/null) + real=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-real_used_size"])' 2>/dev/null) + frags=$(echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin)["result"]["pkmem:1-fragments"])' 2>/dev/null) + echo "$used $real $frags" +} + +# Extract just used_size from sample_memory output +mem_used() { + echo "$1" | awk '{print $1}' +} + +# Extract just real_used_size from sample_memory output +mem_real() { + echo "$1" | awk '{print $2}' +} + +# Extract just fragments from sample_memory output +mem_frags() { + echo "$1" | awk '{print $3}' +} + +# Store a key via MI +mi_store() { + local key="$1" value="$2" + mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\",\"value\":\"$value\"}" >/dev/null 2>&1 +} + +# Remove a key via MI +mi_remove() { + local key="$1" + mi_cmd "cache_remove" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\"}" >/dev/null 2>&1 +} + +# Check for memory leak. Args: description, baseline_used, current_used, threshold +# Returns 0 (pass) or 1 (fail). Prints result. +check_leak() { + local desc="$1" baseline="$2" current="$3" threshold="$4" + local delta=$((current - baseline)) + + TOTAL=$((TOTAL + 1)) + if [ "$delta" -lt "$threshold" ]; then + echo " PASS: $desc (delta=$delta < threshold=$threshold)" + PASS=$((PASS + 1)) + return 0 + else + echo " FAIL: $desc (delta=$delta >= threshold=$threshold)" + FAIL=$((FAIL + 1)) + return 1 + fi +} + +# Check that memory samples don't show monotonic growth. +# Args: description, space-separated list of used_size values +# A sequence is "monotonic" if every value >= the previous one AND +# the last value is > the first. +check_no_monotonic_growth() { + local desc="$1"; shift + local samples=("$@") + local n=${#samples[@]} + + if [ "$n" -lt 3 ]; then + TOTAL=$((TOTAL + 1)) + echo " PASS: $desc (too few samples to check monotonicity)" + PASS=$((PASS + 1)) + return 0 + fi + + local monotonic=1 + local i + for (( i=1; i ${samples[$((n-1))]})" + FAIL=$((FAIL + 1)) + return 1 + else + echo " PASS: $desc (no monotonic growth)" + PASS=$((PASS + 1)) + return 0 + fi +} + +# Run a store/fetch/remove cycle. Returns 0 on success, 1 on any failure. +run_cycle() { + local key="$1" value="$2" + local fetched + + mi_store "$key" "$value" || return 1 + fetched=$(mi_fetch_value "$key") + if [ "$fetched" != "$value" ]; then + return 1 + fi + mi_remove "$key" || return 1 + TOTAL_OPS=$((TOTAL_OPS + 3)) + return 0 +} + +# Hash tags for targeting specific nodes: +# {b} -> slot 3300 (node 1, slots 0-5460) +# {c} -> slot 7365 (node 2, slots 5461-10922) +# {a} -> slot 15495 (node 3, slots 10923-16383) +HASH_TAGS=("{b}" "{c}" "{a}") +NODE_IPS=("$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3") + +# Slots used for migration cycles (one per node): +# slot 3300 (node 1), slot 7365 (node 2), slot 15495 (node 3) +MIGRATION_SLOTS=(3300 7365 15495) + +# --- Preflight checks --- +echo "=== cachedb_redis Load Test (Memory Leak Detection) ===" +echo "" +echo "Checking prerequisites..." + +if ! command -v redis-cli &>/dev/null; then + echo "ERROR: redis-cli not found. Install redis-tools." + exit 1 +fi + +if ! command -v curl &>/dev/null; then + echo "ERROR: curl not found." + exit 1 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 not found." + exit 1 +fi + +# Verify cluster is healthy +CLUSTER_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +if [ "$CLUSTER_STATE" != "ok" ]; then + echo "ERROR: Redis Cluster state is '$CLUSTER_STATE', expected 'ok'." + exit 1 +fi +echo " Redis Cluster: ok" + +# Verify OpenSIPS MI is reachable +MI_RESPONSE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || true) +if [ "$MI_RESPONSE" != "200" ]; then + echo "ERROR: OpenSIPS MI not reachable at $MI_URL (HTTP $MI_RESPONSE)." + exit 1 +fi +echo " OpenSIPS MI: ok" + +# Verify memory stats are available +TEST_MEM=$(sample_memory) +if [ -z "$(mem_used "$TEST_MEM")" ] || [ "$(mem_used "$TEST_MEM")" = "" ]; then + echo "ERROR: Cannot read pkg memory statistics." + exit 1 +fi +echo " Memory stats: ok (used=$(mem_used "$TEST_MEM"))" +echo " Leak threshold: ${LEAK_THRESHOLD} bytes" +echo "" + +# ================================================================== # +# Phase 0: Warmup + baseline # +# ================================================================== # +echo "--- Phase 0: Warmup ---" + +WARMUP_OK=0 +WARMUP_FAIL=0 +for i in $(seq 1 400); do + tag_idx=$(( (i - 1) % 3 )) + key="test:load:warmup:${HASH_TAGS[$tag_idx]}:${i}" + if run_cycle "$key" "warmup_${i}"; then + WARMUP_OK=$((WARMUP_OK + 1)) + else + WARMUP_FAIL=$((WARMUP_FAIL + 1)) + fi +done + +BASELINE_MEM=$(sample_memory) +BASELINE_USED=$(mem_used "$BASELINE_MEM") +BASELINE_REAL=$(mem_real "$BASELINE_MEM") +BASELINE_FRAGS=$(mem_frags "$BASELINE_MEM") + +echo " $((WARMUP_OK + WARMUP_FAIL)) operations completed ($WARMUP_OK ok, $WARMUP_FAIL errors)" +echo " Baseline memory: used=$BASELINE_USED real=$BASELINE_REAL fragments=$BASELINE_FRAGS" +echo "" + +# ================================================================== # +# Phase 1: Sustained load without topology changes # +# ================================================================== # +echo "--- Phase 1: Sustained load (no topology changes) ---" + +P1_OK=0 +P1_FAIL=0 +P1_SAMPLES=() + +for i in $(seq 1 1000); do + tag_idx=$(( (i - 1) % 3 )) + key="test:load:phase1:${HASH_TAGS[$tag_idx]}:${i}" + if run_cycle "$key" "phase1_${i}"; then + P1_OK=$((P1_OK + 1)) + else + P1_FAIL=$((P1_FAIL + 1)) + fi + + # Sample every 200 operations + if [ $((i % 200)) -eq 0 ]; then + sample=$(sample_memory) + P1_SAMPLES+=("$(mem_used "$sample")") + fi +done + +P1_FINAL_MEM=$(sample_memory) +P1_FINAL_USED=$(mem_used "$P1_FINAL_MEM") +P1_FINAL_REAL=$(mem_real "$P1_FINAL_MEM") +P1_FINAL_FRAGS=$(mem_frags "$P1_FINAL_MEM") +P1_DELTA=$((P1_FINAL_USED - BASELINE_USED)) + +echo " 1000 operations completed, $P1_FAIL errors" +echo " Memory: used=$P1_FINAL_USED real=$P1_FINAL_REAL fragments=$P1_FINAL_FRAGS" +echo " Delta from baseline: ${P1_DELTA} bytes" +check_leak "No leak in sustained load" "$BASELINE_USED" "$P1_FINAL_USED" "$LEAK_THRESHOLD" || true +echo "" + +# ================================================================== # +# Phase 2: Slot migration stress (10 cycles) # +# ================================================================== # +echo "--- Phase 2: Slot migration stress (20 cycles) ---" + +P2_TOTAL_OPS=0 +P2_MIGRATIONS=0 +P2_SAMPLES=() + +for cycle in $(seq 1 20); do + # Rotate through nodes: cycle 1->node1's slot, 2->node2's slot, etc. + slot_idx=$(( (cycle - 1) % 3 )) + slot=${MIGRATION_SLOTS[$slot_idx]} + tag="${HASH_TAGS[$slot_idx]}" + orig_ip="${NODE_IPS[$slot_idx]}" + + # Pick destination: next node in ring + dest_idx=$(( (slot_idx + 1) % 3 )) + dest_ip="${NODE_IPS[$dest_idx]}" + + # Set up SOURCE/DEST for migration helpers + SOURCE_IP="$orig_ip" + SOURCE_ID=$(get_node_id "$SOURCE_IP") + DEST_IP="$dest_ip" + DEST_ID=$(get_node_id "$DEST_IP") + + # Step 1: Store 40 keys in this slot + for k in $(seq 1 40); do + key="test:load:p2c${cycle}:${tag}:${k}" + mi_store "$key" "p2c${cycle}v${k}" + TOTAL_OPS=$((TOTAL_OPS + 1)) + P2_TOTAL_OPS=$((P2_TOTAL_OPS + 1)) + done + + # Step 2: Migrate the slot + begin_migration "$slot" + migrate_keys "$slot" + complete_migration "$slot" + P2_MIGRATIONS=$((P2_MIGRATIONS + 1)) + + # Step 3: Fetch all 40 keys via OpenSIPS (triggers MOVED -> refresh) + for k in $(seq 1 40); do + key="test:load:p2c${cycle}:${tag}:${k}" + fetched=$(mi_fetch_value "$key") + TOTAL_OPS=$((TOTAL_OPS + 1)) + P2_TOTAL_OPS=$((P2_TOTAL_OPS + 1)) + if [ "$fetched" != "p2c${cycle}v${k}" ]; then + echo " WARNING: cycle $cycle key $k mismatch (expected='p2c${cycle}v${k}', got='$fetched')" + fi + done + + # Step 4: Store 20 more keys (should go direct after refresh) + for k in $(seq 41 60); do + key="test:load:p2c${cycle}:${tag}:${k}" + mi_store "$key" "p2c${cycle}v${k}" + TOTAL_OPS=$((TOTAL_OPS + 1)) + P2_TOTAL_OPS=$((P2_TOTAL_OPS + 1)) + done + + # Step 5: Fetch all 60 keys, verify values + cycle_ok=0 + cycle_fail=0 + for k in $(seq 1 60); do + key="test:load:p2c${cycle}:${tag}:${k}" + fetched=$(mi_fetch_value "$key") + TOTAL_OPS=$((TOTAL_OPS + 1)) + P2_TOTAL_OPS=$((P2_TOTAL_OPS + 1)) + if [ "$fetched" = "p2c${cycle}v${k}" ]; then + cycle_ok=$((cycle_ok + 1)) + else + cycle_fail=$((cycle_fail + 1)) + fi + done + + # Step 6: Delete all keys + for k in $(seq 1 60); do + key="test:load:p2c${cycle}:${tag}:${k}" + mi_remove "$key" + TOTAL_OPS=$((TOTAL_OPS + 1)) + P2_TOTAL_OPS=$((P2_TOTAL_OPS + 1)) + done + + # Step 7: Restore slot to original owner + restore_slot "$slot" "$orig_ip" "$dest_ip" + + # Step 8: Sample memory + sample=$(sample_memory) + sample_used=$(mem_used "$sample") + P2_SAMPLES+=("$sample_used") + + echo " Cycle $cycle: slot $slot, migrate $orig_ip -> $dest_ip, ${cycle_ok}/$((cycle_ok + cycle_fail)) ok, mem=$sample_used" + + # Step 9: Let rate limiter expire + sleep 2 +done + +P2_FINAL_MEM=$(sample_memory) +P2_FINAL_USED=$(mem_used "$P2_FINAL_MEM") +P2_FINAL_REAL=$(mem_real "$P2_FINAL_MEM") +P2_FINAL_FRAGS=$(mem_frags "$P2_FINAL_MEM") +P2_DELTA=$((P2_FINAL_USED - BASELINE_USED)) + +echo " $P2_TOTAL_OPS operations, $P2_MIGRATIONS migrations" +echo " Memory: used=$P2_FINAL_USED real=$P2_FINAL_REAL fragments=$P2_FINAL_FRAGS" +echo " Delta from baseline: ${P2_DELTA} bytes" +check_leak "No leak after migrations" "$BASELINE_USED" "$P2_FINAL_USED" "$LEAK_THRESHOLD" || true +check_no_monotonic_growth "No monotonic growth across migration cycles" "${P2_SAMPLES[@]}" || true +echo "" + +# ================================================================== # +# Phase 3: Node stop/start (failure + recovery) # +# ================================================================== # +echo "--- Phase 3: Node failure and recovery ---" + +P3_START_MEM=$(sample_memory) +P3_START_USED=$(mem_used "$P3_START_MEM") + +# Stop Redis on node 3 +echo " Stopping redis on ${REDIS_NODE_3}..." +if ! ssh -o ConnectTimeout=5 "$REDIS_NODE_3" "sudo systemctl stop redis-server" 2>/dev/null; then + echo " WARNING: Could not stop redis on $REDIS_NODE_3 via SSH. Skipping Phase 3." + echo " (This phase requires SSH access to $REDIS_NODE_3)" + SKIP_PHASE3=1 +fi + +if [ "${SKIP_PHASE3:-0}" -eq 0 ]; then + # Wait for cluster to detect failure + sleep 5 + + # NOTE: With 3 masters and no replicas, stopping any node puts the + # entire cluster in CLUSTERDOWN state. ALL operations will fail, not + # just those targeting the stopped node's slots. This is expected. + + # Run 20 operations targeting the stopped node's slots — all should fail. + # These trigger the reconnect failure -> refresh_cluster_topology path. + # {a} -> slot 15495 (node 3) + P3_FAIL_OK=0 + P3_FAIL_ERR=0 + for i in $(seq 1 40); do + key="test:load:phase3:{a}:fail_${i}" + mi_store "$key" "should_fail_${i}" 2>/dev/null || true + TOTAL_OPS=$((TOTAL_OPS + 1)) + P3_FAIL_ERR=$((P3_FAIL_ERR + 1)) + done + echo " 40 ops on stopped node's slots: $P3_FAIL_ERR expected failures" + + # Also try 40 ops on other nodes' slots — will also fail due to CLUSTERDOWN + P3_CD_OK=0 + P3_CD_ERR=0 + for i in $(seq 1 40); do + tag_idx=$(( (i - 1) % 2 )) + if [ "$tag_idx" -eq 0 ]; then tag="{b}"; else tag="{c}"; fi + key="test:load:phase3:${tag}:cd_${i}" + if mi_store "$key" "clusterdown_${i}" 2>/dev/null; then + fetched=$(mi_fetch_value "$key") + if [ "$fetched" = "clusterdown_${i}" ]; then + P3_CD_OK=$((P3_CD_OK + 1)) + else + P3_CD_ERR=$((P3_CD_ERR + 1)) + fi + else + P3_CD_ERR=$((P3_CD_ERR + 1)) + fi + TOTAL_OPS=$((TOTAL_OPS + 2)) + done + echo " 40 ops on other nodes (CLUSTERDOWN): $P3_CD_OK ok, $P3_CD_ERR errors" + + # Verify OpenSIPS MI is still alive after the failures + MI_ALIVE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || echo "000") + if [ "$MI_ALIVE" != "200" ]; then + echo " FAIL: OpenSIPS MI not responding after node failure (HTTP $MI_ALIVE)" + echo " OpenSIPS may have crashed. Aborting Phase 3." + # Try to restart redis before bailing + ssh -o ConnectTimeout=5 "$REDIS_NODE_3" "sudo systemctl start redis-server" 2>/dev/null || true + sleep 5 + redis_cmd "$REDIS_NODE_1" CLUSTER MEET "$REDIS_NODE_3" "$REDIS_PORT" >/dev/null 2>&1 || true + FAIL=$((FAIL + 1)) + TOTAL=$((TOTAL + 1)) + else + echo " OpenSIPS MI: still alive after failures" + + # Start Redis on node 3 + echo " Starting redis on ${REDIS_NODE_3}..." + ssh -o ConnectTimeout=5 "$REDIS_NODE_3" "sudo systemctl start redis-server" 2>/dev/null || true + + # Wait for cluster convergence + sleep 5 + + # Re-add node if needed + redis_cmd "$REDIS_NODE_1" CLUSTER MEET "$REDIS_NODE_3" "$REDIS_PORT" >/dev/null 2>&1 || true + + # Poll for cluster state == ok (up to 30 seconds) + P3_RECOVERY_START=$SECONDS + P3_RECOVERED=0 + for attempt in $(seq 1 30); do + state=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO 2>/dev/null | grep cluster_state | tr -d '\r' | cut -d: -f2) + if [ "$state" = "ok" ]; then + P3_RECOVERED=1 + break + fi + sleep 1 + done + P3_RECOVERY_SECS=$((SECONDS - P3_RECOVERY_START)) + + if [ "$P3_RECOVERED" -eq 1 ]; then + echo " Cluster recovery: ok (took ${P3_RECOVERY_SECS} seconds)" + else + echo " WARNING: Cluster did not recover within 30 seconds" + fi + + # Run 50 store/fetch operations across all 3 nodes — verify recovery + # Topology refresh should pick up the recovered node + P3_RECOVERY_OK=0 + P3_RECOVERY_FAIL=0 + for i in $(seq 1 100); do + tag_idx=$(( (i - 1) % 3 )) + key="test:load:phase3:recovery:${HASH_TAGS[$tag_idx]}:${i}" + if run_cycle "$key" "recovery_${i}"; then + P3_RECOVERY_OK=$((P3_RECOVERY_OK + 1)) + else + P3_RECOVERY_FAIL=$((P3_RECOVERY_FAIL + 1)) + fi + done + echo " 100 ops across all nodes: $P3_RECOVERY_OK ok, $P3_RECOVERY_FAIL errors" + + P3_FINAL_MEM=$(sample_memory) + P3_FINAL_USED=$(mem_used "$P3_FINAL_MEM") + P3_FINAL_REAL=$(mem_real "$P3_FINAL_MEM") + P3_FINAL_FRAGS=$(mem_frags "$P3_FINAL_MEM") + P3_DELTA=$((P3_FINAL_USED - P3_START_USED)) + + echo " Memory: used=$P3_FINAL_USED real=$P3_FINAL_REAL fragments=$P3_FINAL_FRAGS" + echo " Delta from phase start: ${P3_DELTA} bytes" + check_leak "No leak after node failure/recovery" "$P3_START_USED" "$P3_FINAL_USED" "$LEAK_THRESHOLD" || true + fi + + # Clean up any leftover keys from failure tests (may exist if node came back) + for i in $(seq 1 40); do + mi_remove "test:load:phase3:{a}:fail_${i}" 2>/dev/null || true + mi_remove "test:load:phase3:{b}:cd_${i}" 2>/dev/null || true + mi_remove "test:load:phase3:{c}:cd_${i}" 2>/dev/null || true + done +fi + +echo "" + +# ================================================================== # +# Phase 4: Final soak # +# ================================================================== # +echo "--- Phase 4: Final soak ---" + +P4_OK=0 +P4_FAIL=0 +for i in $(seq 1 600); do + tag_idx=$(( (i - 1) % 3 )) + key="test:load:phase4:${HASH_TAGS[$tag_idx]}:${i}" + if run_cycle "$key" "phase4_${i}"; then + P4_OK=$((P4_OK + 1)) + else + P4_FAIL=$((P4_FAIL + 1)) + fi +done + +FINAL_MEM=$(sample_memory) +FINAL_USED=$(mem_used "$FINAL_MEM") +FINAL_REAL=$(mem_real "$FINAL_MEM") +FINAL_FRAGS=$(mem_frags "$FINAL_MEM") +FINAL_DELTA=$((FINAL_USED - BASELINE_USED)) + +echo " 600 operations completed, $P4_FAIL errors" +echo " Memory: used=$FINAL_USED real=$FINAL_REAL fragments=$FINAL_FRAGS" +echo " Overall delta from baseline: ${FINAL_DELTA} bytes" +check_leak "No memory leak (final vs baseline)" "$BASELINE_USED" "$FINAL_USED" "$LEAK_THRESHOLD" || true + +# Final cluster health check +FINAL_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +TOTAL=$((TOTAL + 1)) +if [ "$FINAL_STATE" = "ok" ]; then + echo " PASS: Cluster state is ok" + PASS=$((PASS + 1)) +else + echo " FAIL: Cluster state is '$FINAL_STATE' (expected 'ok')" + FAIL=$((FAIL + 1)) +fi + +echo "" + +# ================================================================== # +# Summary # +# ================================================================== # +echo "=== Summary ===" +if [ "$FINAL_DELTA" -ge 0 ]; then + DELTA_SIGN="+" +else + DELTA_SIGN="" +fi +DELTA_PCT=$(python3 -c "print(f'{abs($FINAL_DELTA)/$BASELINE_USED*100:.2f}')" 2>/dev/null || echo "?") +echo " Total operations: $TOTAL_OPS" +if [ "${SKIP_PHASE3:-0}" -eq 0 ]; then + echo " Topology refreshes: ${P2_MIGRATIONS}+ (migrations) + N (node failure)" +else + echo " Topology refreshes: ${P2_MIGRATIONS}+ (migrations), node failure skipped" +fi +echo " Memory baseline: $BASELINE_USED" +echo " Memory final: $FINAL_USED" +echo " Memory delta: ${DELTA_SIGN}${FINAL_DELTA} bytes (${DELTA_PCT}%)" +echo "" +echo " Assertions: $PASS passed, $FAIL failed, $TOTAL total" + +if [ "$FAIL" -eq 0 ]; then + echo " PASS: No memory leak detected" +else + echo " FAIL: $FAIL assertion(s) failed" +fi + +echo "" +exit "$FAIL" diff --git a/modules/cachedb_redis/test/test_mi_commands.sh b/modules/cachedb_redis/test/test_mi_commands.sh new file mode 100755 index 00000000000..a89c48f6732 --- /dev/null +++ b/modules/cachedb_redis/test/test_mi_commands.sh @@ -0,0 +1,397 @@ +#!/bin/bash +# +# test_mi_commands.sh - Integration test for MI commands and statistics (PR 5) +# +# Tests the redis_cluster_info and redis_cluster_refresh MI commands, +# as well as the shared-memory statistics counters. +# +# Requirements: +# - curl (for OpenSIPS MI HTTP interface) +# - jq (for JSON parsing) +# - A running OpenSIPS instance with mi_http on port 8888 +# - The cachedb_redis module loaded with at least one connection +# +# Environment variables (override defaults): +# MI_URL - OpenSIPS MI HTTP URL (default: http://127.0.0.1:8888/mi) +# +# Usage: +# ./test_mi_commands.sh +# + +set -euo pipefail + +# --- Configuration --- +MI_URL="${MI_URL:-http://127.0.0.1:8888/mi}" +# jq filter to select the "cluster" group from redis_cluster_info results +CLUSTER_JQ='[.result[] | select(.group=="cluster")][0]' +PING_CLUSTER_JQ='[.result[] | select(.group=="cluster")][0]' + + +PASS=0 +FAIL=0 +TOTAL=0 + +# --- Helpers --- +mi_cmd() { + local cmd="$1"; shift + local params="" + while [ $# -gt 0 ]; do + case "$1" in + -d) params="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$params" ]; then + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"params\":$params,\"id\":1}" + else + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"id\":1}" + fi +} + +assert_ok() { + local desc="$1" + local result="$2" + TOTAL=$((TOTAL + 1)) + if [ -n "$result" ] && [ "$result" != "null" ]; then + PASS=$((PASS + 1)) + echo " PASS: $desc" + else + FAIL=$((FAIL + 1)) + echo " FAIL: $desc (empty or null result)" + fi +} + +assert_eq() { + local desc="$1" + local expected="$2" + local actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$actual" = "$expected" ]; then + PASS=$((PASS + 1)) + echo " PASS: $desc" + else + FAIL=$((FAIL + 1)) + echo " FAIL: $desc (expected='$expected', actual='$actual')" + fi +} + +assert_ge() { + local desc="$1" + local threshold="$2" + local actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$actual" -ge "$threshold" ] 2>/dev/null; then + PASS=$((PASS + 1)) + echo " PASS: $desc (value=$actual)" + else + FAIL=$((FAIL + 1)) + echo " FAIL: $desc (expected >= $threshold, actual='$actual')" + fi +} + +# ============================================================ +echo "=== Test 1: redis_cluster_info (no params) ===" +# ============================================================ + +RESULT=$(mi_cmd "redis_cluster_info" || echo "") +assert_ok "redis_cluster_info returns response" "$RESULT" + +# Check that result is a JSON array with at least one connection +CON_COUNT=$(echo "$RESULT" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_ge "at least one connection returned" 1 "$CON_COUNT" + +# Check first connection has expected fields +GROUP=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.group" 2>/dev/null || echo "") +assert_ok "connection has group field" "$GROUP" + +MODE=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.mode" 2>/dev/null || echo "") +TOTAL=$((TOTAL + 1)) +if [ "$MODE" = "cluster" ] || [ "$MODE" = "single" ]; then + PASS=$((PASS + 1)) + echo " PASS: mode is cluster or single (mode=$MODE)" +else + FAIL=$((FAIL + 1)) + echo " FAIL: mode should be cluster or single (mode=$MODE)" +fi + +# Check nodes array exists +NODES_COUNT=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes | length" 2>/dev/null || echo "0") +assert_ge "at least one node present" 1 "$NODES_COUNT" + +# Check node has ip and port +NODE_IP=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].ip" 2>/dev/null || echo "") +assert_ok "node has ip field" "$NODE_IP" + +NODE_PORT=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].port" 2>/dev/null || echo "") +assert_ok "node has port field" "$NODE_PORT" + +NODE_STATUS=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].status" 2>/dev/null || echo "") +TOTAL=$((TOTAL + 1)) +if [ "$NODE_STATUS" = "connected" ] || [ "$NODE_STATUS" = "disconnected" ]; then + PASS=$((PASS + 1)) + echo " PASS: node has valid status (status=$NODE_STATUS)" +else + FAIL=$((FAIL + 1)) + echo " FAIL: node status invalid (status=$NODE_STATUS)" +fi + +# Check per-node counters exist +NODE_QUERIES=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].queries" 2>/dev/null || echo "null") +assert_ok "node has queries counter" "$NODE_QUERIES" + +NODE_ERRORS=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].errors" 2>/dev/null || echo "null") +assert_ok "node has errors counter" "$NODE_ERRORS" + +NODE_MOVED=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].moved" 2>/dev/null || echo "null") +assert_ok "node has moved counter" "$NODE_MOVED" + + +NODE_LAST_ACTIVITY=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].last_activity" 2>/dev/null || echo "null") +assert_ok "node has last_activity field" "$NODE_LAST_ACTIVITY" + +# For cluster mode, check total_slots_mapped and cluster_command +if [ "$MODE" = "cluster" ]; then + SLOTS=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.total_slots_mapped" 2>/dev/null || echo "0") + assert_eq "total_slots_mapped is 16384" "16384" "$SLOTS" + + SLOTS_ASSIGNED=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.nodes[0].slots_assigned" 2>/dev/null || echo "null") + assert_ok "node has slots_assigned field" "$SLOTS_ASSIGNED" + + CLUSTER_CMD=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.cluster_command" 2>/dev/null || echo "null") + TOTAL=$((TOTAL + 1)) + if [ "$CLUSTER_CMD" = "SHARDS" ] || [ "$CLUSTER_CMD" = "SLOTS" ]; then + PASS=$((PASS + 1)) + echo " PASS: cluster_command is SHARDS or SLOTS (value=$CLUSTER_CMD)" + else + FAIL=$((FAIL + 1)) + echo " FAIL: cluster_command should be SHARDS or SLOTS (value=$CLUSTER_CMD)" + fi +fi + +# Check topology refresh fields +TOPO_REFRESHES=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.topology_refreshes" 2>/dev/null || echo "null") +assert_ok "connection has topology_refreshes field" "$TOPO_REFRESHES" + +LAST_REFRESH=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.last_topology_refresh" 2>/dev/null || echo "null") +assert_ok "connection has last_topology_refresh field" "$LAST_REFRESH" + +# ============================================================ +echo "" +echo "=== Test 2: redis_cluster_info with group filter ===" +# ============================================================ + +RESULT_FILTERED=$(mi_cmd "redis_cluster_info" -d "{\"group\":\"$GROUP\"}" || echo "") +assert_ok "filtered redis_cluster_info returns response" "$RESULT_FILTERED" + +FILTERED_COUNT=$(echo "$RESULT_FILTERED" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_ge "at least one connection with matching group" 1 "$FILTERED_COUNT" + +FILTERED_GROUP=$(echo "$RESULT_FILTERED" | jq -r '.result[0].group' 2>/dev/null || echo "") +assert_eq "filtered result matches requested group" "$GROUP" "$FILTERED_GROUP" + +# Filter with non-existent group +RESULT_EMPTY=$(mi_cmd "redis_cluster_info" -d '{"group":"nonexistent_group_xyz"}' || echo "") +EMPTY_COUNT=$(echo "$RESULT_EMPTY" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_eq "non-existent group returns empty array" "0" "$EMPTY_COUNT" + +# ============================================================ +echo "" +echo "=== Test 3: redis_cluster_refresh ===" +# ============================================================ + +# Get topology_refreshes before +REFRESH_BEFORE=$(echo "$RESULT" | jq -r "${CLUSTER_JQ}.topology_refreshes" 2>/dev/null || echo "0") + +REFRESH_RESULT=$(mi_cmd "redis_cluster_refresh" || echo "") +assert_ok "redis_cluster_refresh returns response" "$REFRESH_RESULT" + +REFRESH_STATUS=$(echo "$REFRESH_RESULT" | jq -r "${CLUSTER_JQ}.status" 2>/dev/null || echo "") +TOTAL=$((TOTAL + 1)) +if [ "$REFRESH_STATUS" = "ok" ] || [ "$REFRESH_STATUS" = "skipped (not cluster mode)" ]; then + PASS=$((PASS + 1)) + echo " PASS: refresh status is ok or skipped (status=$REFRESH_STATUS)" +else + FAIL=$((FAIL + 1)) + echo " FAIL: unexpected refresh status (status=$REFRESH_STATUS)" +fi + +REFRESH_GROUP=$(echo "$REFRESH_RESULT" | jq -r "${CLUSTER_JQ}.group" 2>/dev/null || echo "") +assert_eq "refresh response includes group" "$GROUP" "$REFRESH_GROUP" + +# Verify topology_refreshes incremented (for cluster mode) +if [ "$MODE" = "cluster" ]; then + sleep 1 + RESULT_AFTER=$(mi_cmd "redis_cluster_info" || echo "") + REFRESH_AFTER=$(echo "$RESULT_AFTER" | jq -r "${CLUSTER_JQ}.topology_refreshes" 2>/dev/null || echo "0") + assert_ge "topology_refreshes incremented" "$((REFRESH_BEFORE + 1))" "$REFRESH_AFTER" +fi + +# Test refresh with group filter +REFRESH_FILTERED=$(mi_cmd "redis_cluster_refresh" -d "{\"group\":\"$GROUP\"}" || echo "") +REFRESH_FILTERED_STATUS=$(echo "$REFRESH_FILTERED" | jq -r '.result[0].status' 2>/dev/null || echo "") +TOTAL=$((TOTAL + 1)) +if [ "$REFRESH_FILTERED_STATUS" = "ok" ] || [ "$REFRESH_FILTERED_STATUS" = "skipped (not cluster mode)" ]; then + PASS=$((PASS + 1)) + echo " PASS: filtered refresh status is ok or skipped (status=$REFRESH_FILTERED_STATUS)" +else + FAIL=$((FAIL + 1)) + echo " FAIL: unexpected filtered refresh status (status=$REFRESH_FILTERED_STATUS)" +fi + +# ============================================================ +echo "" +echo "=== Test 4: Statistics counters ===" +# ============================================================ + +# Run cache operations to generate stats +mi_cmd "cache_store" -d '{"system":"redis:cluster","attr":"mi_test_key","value":"mi_test_val","expire":30}' >/dev/null 2>&1 || true +mi_cmd "cache_fetch" -d '{"system":"redis:cluster","attr":"mi_test_key"}' >/dev/null 2>&1 || true +mi_cmd "cache_remove" -d '{"system":"redis:cluster","attr":"mi_test_key"}' >/dev/null 2>&1 || true + +# Check statistics via get_statistics +STATS=$(mi_cmd "get_statistics" -d '{"statistics":["redis_queries","redis_queries_failed","redis_moved","redis_topology_refreshes"]}' 2>/dev/null || echo "") +assert_ok "get_statistics returns response" "$STATS" + +if [ -n "$STATS" ]; then + QUERY_STAT=$(echo "$STATS" | jq -r '.result["cachedb_redis:redis_queries"]' 2>/dev/null || echo "null") + assert_ge "redis_queries stat is positive" 1 "$QUERY_STAT" + + FAILED_STAT=$(echo "$STATS" | jq -r '.result["cachedb_redis:redis_queries_failed"]' 2>/dev/null || echo "null") + assert_ok "redis_queries_failed stat exists" "$FAILED_STAT" + + MOVED_STAT=$(echo "$STATS" | jq -r '.result["cachedb_redis:redis_moved"]' 2>/dev/null || echo "null") + assert_ok "redis_moved stat exists" "$MOVED_STAT" + + + TOPO_STAT=$(echo "$STATS" | jq -r '.result["cachedb_redis:redis_topology_refreshes"]' 2>/dev/null || echo "null") + assert_ge "redis_topology_refreshes stat is positive" 1 "$TOPO_STAT" + + # Verify per-node queries counter increased after cache operations + RESULT_POST_OPS=$(mi_cmd "redis_cluster_info" -d '{"group":"cluster"}' || echo "") + TOTAL_NODE_QUERIES=0 + for i in $(seq 0 $((NODES_COUNT - 1))); do + NQ=$(echo "$RESULT_POST_OPS" | jq -r "${CLUSTER_JQ}.nodes[$i].queries" 2>/dev/null || echo "0") + TOTAL_NODE_QUERIES=$((TOTAL_NODE_QUERIES + NQ)) + done + assert_ge "sum of per-node queries > 0 after cache ops" 1 "$TOTAL_NODE_QUERIES" +fi + +# ============================================================ +echo "" +echo "=== Test 5: Cluster health check ===" +# ============================================================ + +RESULT_FINAL=$(mi_cmd "redis_cluster_info" || echo "") +FINAL_NODES=$(echo "$RESULT_FINAL" | jq -r "${CLUSTER_JQ}.nodes | length" 2>/dev/null || echo "0") +assert_ge "cluster still has nodes after all tests" 1 "$FINAL_NODES" + +if [ "$MODE" = "cluster" ]; then + FINAL_SLOTS=$(echo "$RESULT_FINAL" | jq -r "${CLUSTER_JQ}.total_slots_mapped" 2>/dev/null || echo "0") + assert_eq "cluster still has 16384 slots mapped" "16384" "$FINAL_SLOTS" + + # Verify all nodes still connected + ALL_CONNECTED=1 + for i in $(seq 0 $((FINAL_NODES - 1))); do + STATUS=$(echo "$RESULT_FINAL" | jq -r "${CLUSTER_JQ}.nodes[$i].status" 2>/dev/null || echo "") + if [ "$STATUS" != "connected" ]; then + ALL_CONNECTED=0 + break + fi + done + TOTAL=$((TOTAL + 1)) + if [ "$ALL_CONNECTED" = "1" ]; then + PASS=$((PASS + 1)) + echo " PASS: all $FINAL_NODES nodes still connected" + else + FAIL=$((FAIL + 1)) + echo " FAIL: some nodes disconnected after tests" + fi +fi + +# ============================================================ +echo "" +echo "=== Test 6: redis_ping_nodes ===" +# ============================================================ + +PING_RESULT=$(mi_cmd "redis_ping_nodes" || echo "") +assert_ok "redis_ping_nodes returns response" "$PING_RESULT" + +# Check that result is a JSON array with at least one connection +PING_CON_COUNT=$(echo "$PING_RESULT" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_ge "at least one connection in ping result" 1 "$PING_CON_COUNT" + +# Check first connection has group and nodes +PING_GROUP=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.group" 2>/dev/null || echo "") +assert_ok "ping result has group field" "$PING_GROUP" + +PING_NODES_COUNT=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes | length" 2>/dev/null || echo "0") +assert_ge "at least one node in ping result" 1 "$PING_NODES_COUNT" + +# Check each node has ip, port, status, latency_us +PING_NODE_IP=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes[0].ip" 2>/dev/null || echo "") +assert_ok "ping node has ip field" "$PING_NODE_IP" + +PING_NODE_PORT=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes[0].port" 2>/dev/null || echo "") +assert_ok "ping node has port field" "$PING_NODE_PORT" + +PING_NODE_STATUS=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes[0].status" 2>/dev/null || echo "") +TOTAL=$((TOTAL + 1)) +if [ "$PING_NODE_STATUS" = "reachable" ] || [ "$PING_NODE_STATUS" = "unreachable" ] || [ "$PING_NODE_STATUS" = "disconnected" ]; then + PASS=$((PASS + 1)) + echo " PASS: ping node has valid status (status=$PING_NODE_STATUS)" +else + FAIL=$((FAIL + 1)) + echo " FAIL: ping node status invalid (status=$PING_NODE_STATUS)" +fi + +# All nodes should be reachable in a healthy cluster +ALL_REACHABLE=1 +for i in $(seq 0 $((PING_NODES_COUNT - 1))); do + PSTATUS=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes[$i].status" 2>/dev/null || echo "") + if [ "$PSTATUS" != "reachable" ]; then + ALL_REACHABLE=0 + break + fi +done +TOTAL=$((TOTAL + 1)) +if [ "$ALL_REACHABLE" = "1" ]; then + PASS=$((PASS + 1)) + echo " PASS: all $PING_NODES_COUNT nodes reachable" +else + FAIL=$((FAIL + 1)) + echo " FAIL: some nodes not reachable" +fi + +# Check latency_us > 0 for reachable nodes +PING_LATENCY=$(echo "$PING_RESULT" | jq -r "${PING_CLUSTER_JQ}.nodes[0].latency_us" 2>/dev/null || echo "-1") +if [ "$PING_NODE_STATUS" = "reachable" ]; then + assert_ge "ping latency_us > 0 for reachable node" 0 "$PING_LATENCY" +fi + +# Test filtered by group +PING_FILTERED=$(mi_cmd "redis_ping_nodes" -d "{\"group\":\"$PING_GROUP\"}" || echo "") +assert_ok "filtered redis_ping_nodes returns response" "$PING_FILTERED" + +PING_FILTERED_COUNT=$(echo "$PING_FILTERED" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_ge "at least one connection with matching group" 1 "$PING_FILTERED_COUNT" + +PING_FILTERED_GROUP=$(echo "$PING_FILTERED" | jq -r '.result[0].group' 2>/dev/null || echo "") +assert_eq "filtered ping result matches requested group" "$PING_GROUP" "$PING_FILTERED_GROUP" + +# Non-existent group returns empty +PING_EMPTY=$(mi_cmd "redis_ping_nodes" -d '{"group":"nonexistent_group_xyz"}' || echo "") +PING_EMPTY_COUNT=$(echo "$PING_EMPTY" | jq -r '.result | length' 2>/dev/null || echo "0") +assert_eq "non-existent group returns empty array" "0" "$PING_EMPTY_COUNT" + +# ============================================================ +echo "" +echo "==============================" +echo "Results: $PASS/$TOTAL passed, $FAIL failed" +echo "==============================" + +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/modules/cachedb_redis/test/test_mi_counters b/modules/cachedb_redis/test/test_mi_counters new file mode 100755 index 0000000000000000000000000000000000000000..235e155ea887c63a63c4ef65d04b1a4eaf6fa194 GIT binary patch literal 20528 zcmeHP4|H4AdB3(JaFZa}0a`aSqtC zG>OH+jq7Z2p*RcmDH5mXO$vajN>xQMtkifylidB9?t~xHd<@w_f+ROl*+~YWFcej) zPI4}~)paM_c!A<$$n?CvIz<-_c;ak@XK0rrQ$`vUePm5Byt!P}w`j<6R3*7lEjOy= z@=!ZsR73VB`Go%4v_GBD0F8iP|$yhmin7CZes)XCjMU%cwZiw=G2c=PvH1d$Xy}P z8R|@iEYX#S#H_F&W^&&mv8r|TvgQ8yzWKg}{QZK3!oPa`8h*5y~MB^hCDIpR_w|H|YY#i?A-kmYfeni=^JMgJO&`^MjV5g{oT z*Xh3UI>F<9fCdZ7VZ9D8zD?teHbrMV*E2ULJjI)0$Yt1u*F#v6Mr?RBu9U=K8(uGU zk}zt+vu;Vy_7JaLQY5<4hUd9Sgen_euFsNt@-pdxNe@hVVA2DV9+>pNqz5KF@W=9i znSP_vywCMG_M_(A8LK=uVy1^G2lMK34F^EV&3Ot}&+I1LQGE;1$3}A~bM{f4wwPlh zvc8Avw6z?|$ogHVhX+WYR9R>CHx3(6-aqSW*zad|K?bO!KxGH$zVD^_jM?9C-^YbW zyLOpnF7;pH}2SNq*W!9+Koo75O1a ze$+<(cS#;ls`wrf2_Pt`JUmI^* zzoI@<|0MXo3D(^CZ(b?HJ7J$^cCVm9^A)sa*`#YJfh8xb^D*6@1|v=P59LNZvwMhl zP($!nOa6wb1Zq#3edFfd&zW*)RdK@zyIRhO&a@md9#4o>!5AVCOl!+|I+MxGkY7VW`C=<%ItT|xDYFo z=go}SH!W;>Vh7Fs`@Q4R+7?Ow%`CFK#kPzIKiC+$uD8gp&4d+Ulrim6XueU)N!-lq89(|-VJ5KOlK<1zOECb{^@Y(jvlH6 z7s;ED-i|HgLZjaQ>Aa&e{Xx@HE7=md$t}4d%6by zE$n$=5XhUyHRe^q{MfU9@ZPZJy{x$;n|dnji9a6pBo9}HJtwnVnW8G&vyB_hN8fv2 zKuSovhL^xpm`Ik|?aPnRGuHpO(!YnaU@_SAF`75h8jOV4N2SMGy(LW>GdrHlUia%< z?rqO@u%un{$vQalNtpKt2Qn$0|?Ab@=E^ia_H#02F~k(xorpJh&|T z(_gWZ$G(V!k#-FcTI+;{r=-lSkp33#HjaG@{AwM+G=2gB>yOEbN1grGr}I!`rCI>S@l zG;S^#PK~Es^T;#|fEUzk8bQxroxt0+fnb|IvO)MCb7Ct(*)@!azdnq>G*Ju~J3`nI z&;A264xTrc>`j3;KFH$$GelL$)2M7e+$hjLPwxZhW7^fT$Yvy2XwzSg^@ZZkn)3RW z{XYE%l>R-W1rce(%qh&Hi$4tHjvAYTP}1np-SFW9ds z(Xp4_MOX_?lldqg-;1Cl`>)`j<+x1-nU?4uX1`5Ro{2(i_U-lPD68o9e*>Y$QfiN~ zO{YEH;}q{kve*`HbH`q}dcLo)+ZvwTgUASk`GduO`1J8NZ1G=cGy11u@%f z@yF@%Y2*KnjQ;`^e=p=I{s^iq{%@ZW|K|1J+KlEmP{!UQGxjbo9Z^%}UJRIh08;tk z@KW}xs3KX1Xh`ZP8`HK|L z`td5(t|7`+Lm1SXXzd`(8-zLP**`?<7Hl4r`C~t?TVRXJ3awkHY#-P8D?CN!|9To8 zFcS0GZk;{%P6 z)|U8r-n6TsU|P}Eq%g(p2A9Ml{&3Zq?VptYe&7D-M!A35iN`?X|GLxr?+o&R>}KK0G*0=)3B1n(!yDpy9XL6T>!;HcbdE_D?07l>Nw^ zYX3QkXvzwWKUDJjr%#lV|4g!0jlZHTlzsetApN}m?81km1^*p)OaGl>6m6Tb2al8g zzK2gvZRrQ@dq3NalK!Q9`hoM$;OX;E6XhSZ4;6N+$z9Scw6G! z0KJ~V<|jUgG(%Yc8&TSc$gs+d=wS@b?_fAK-zK^G8~^qkMW#U021#y0K6^}VMfZUb zQ$GTr)`YZc?L7E7yAgEApXVK$h9_h^`x92Hgn~~>0osSs$lus}+t}r*!3R`>HIklo zxnyHipp7l10}$j(#qcW+X-vCbyPUMW`34xChoVBbWWIY8rptV1pr^FrXOKK7-<<%= zJ_0G7?^^qQmfeG@+27(dXp6fE3~&k-Ph=6ScG$*ZG}FWF3Cb2vsO+YgqLN-mOigrY zqf3{k_VeBzFc2Kj@IiuV|5CIcK1}Ime>lqvMdHm~XVgXhl(vc&<)76*Wo&p(9W z=O4N6eVNy9EFQ_{D-p?;5gl2fu%d$bOXI-P`%#>LVg0+Tv@xaT@B7oQ){m#;Xrr+* z#VbugNWYEAi;-~YG&zS)F-*?kb6%I@X$tbkyO)w_2vis1bBy9M`~g-&TmE<+-g=h& z_bia|`I|8O{Qc~`$iF{l|2+qzkC6X<4*$st`45%t*}F5I-bZBey-r#vlkY?sj^fAp z(4<%>-6U@djGJ8w7_XoG209I+lQ~Mz+I#8-4MBze22<*Irpo7Cig!V!y?Rh zeOy3pejmpL6S(hxAbLL9>R){&)?7B>pkq#eE$CK zQJGC|0wY$CM=*5d3PQs=?fUCEaCP=eV1dPQU5QWkjvbIExDC^k$X1A`WjyWjOa40) z{}k`o&F@jbRudK}pR7c{>mXO3p_G3mL@DJjQ8ozG;2hat+BHKqHVd?|vsKD}0u+c% z(HhCZV_vfG#A|v|jeP~VA58R_TG6Lyh{0r;^uVMCCOz{g6Bs777RfJ|M}0CnqAB{0 zuMyfBiW$^FBZx2kCL--AD-w^%P8It@@^nTvCjycooQQWC(mMgeimSVJ788w)jmDbQ z#&YsuD3LTSHq218i|#hWB9`=U(g2h4iV!xpE4!rQRSZb#u23RAF9K2c9$#z@8tvVN z`f{^U>5LmX7KwEvLKL86-l`%$l#Xwxv&-sM{;=qK&sx^mzW)^eD?z&qx!xuhi3LMl zAzUzI;0YZCb`TAsVtxc8)nOUYKzBT48OZ=T-<|J?j-K*T5*{(GHrfNpkTG{lAjxhs z!jVukn5?CCDIjY@$yC%zE;Z%^F^-eT5QZmi!+}Ut-^yO8xYXDA=&xSroB2Wqn-nxG zTvRWel*GtyYhJgGM*dXA(QY_6RHJGRH5&Dc7A#&UigAnUm&|893s?^ekS=c7hSTs17Qn#*kh+=+9phXJYJOkI|;yVi=bZB=C# zOh3DFpzQ1_!sDZN(Sg~!$vz_7UJ$U{8z#D19)^3&F-pu%1w7ox?+`k_n%i*Tv0LAU9-kr)$FeP z)U?l_X?KO^Idhs?U@Ddn%gUMtaIBcTc*q;;gf#x{WpN8W^|6UAbx6HdL>5 z*O>0AW$wxqc+`}A^Fl`!2T8xc)8?t|WYC#Ij&{mF4(@MT$#4_c_;Ydo^>?-21-Fj+Vk&}v|`2&Xv5g< zjOL^L6BX8v!$#R3pd>4f0%d)Zmd7_$Wu5(u&&XxHRnzfpLs@6LW#j(~7ykXYQ}rS} z4pwS-qlV1Kzb9R_eECviZcC^=5{Mb~3-JTmx_R|WYV|F1p9;w@Q^Z?16)i60=Q43_ z{`cdCCev#NO7Y|T8YdNQk*uzHC2JLq)-^jd>p+`l;*Z^y}K(86JgKakoc zI^vyO(Gbp6eRZWd=(PtCe;|k3FX zU&YYG?^rm^dll$K4tZXBAA>&>3|Ijnd!ygnP()Y9o<%4)3?k~?46vm5MGgL7GVb3J zhz02-4y$hfS1=Our*QfQRWV%r?a8E;)$fqNiyzn#^hSu1uX!kb(?qd3uINwqg$8#JZ^B2Q3kb21r2Pbww>|5r+A2>%+0U@L8d4xYDozQ(U^;7uuo+%9bD+ z;hMIudD-fDGzVovYWyg9zK6a*XQTth#4XYTQz@7}xLEiwg6TXuKezvY8`_W39uCWf zEd1UWhK9y7;Qbxf@jbR8`7XUNW9Vf@4S79fbTe*{0`mGNdT$Z)6R*9jMg?D-r)5=^ z-C93G*3bK9r~T_7MSEnH=Y4b@s%GW8VMsU2^M020Kctf7d0)DJ=B`CCAx7#m~xO@)(6cd)`eLTYqXjE9gv;Tdd zk^C}ZLgD8EhNePGdGeW4{tLj7UHlxA&s#cw@$-gmCchu$`d>NZ4K2s8i5WH2t(Ue+PcIp^Ww)5CFR$xQXDfK0* +#include +#include +#include +#include + +/* ================================================================== */ +/* Minimal type stubs matching OpenSIPS definitions */ +/* ================================================================== */ + +struct __str { + char *s; + int len; +}; +typedef struct __str str; + +struct cachedb_id { + char *scheme; + char *group_name; + char *username; + char *password; + char *host; + unsigned short port; + char *database; + char *extra_options; + char *initial_url; + int flags; +}; + +struct cachedb_pool_con_t; +struct tls_domain; + +typedef struct redisContext { int fd; int err; char errstr[128]; } redisContext; +typedef struct redisReply { + int type; + long long integer; + size_t len; + char *str; + size_t elements; + struct redisReply **element; +} redisReply; + +/* cluster_node — must match cachedb_redis_dbase.h */ +typedef struct cluster_nodes { + char *ip; + unsigned short port; + unsigned short start_slot; + unsigned short end_slot; + redisContext *context; + struct tls_domain *tls_dom; + uint8_t seen; + /* per-node, per-process counters (pkg memory) */ + unsigned long queries; + unsigned long errors; + unsigned long moved; + struct cluster_nodes *next; +} cluster_node; + +enum redis_flag { + REDIS_SINGLE_INSTANCE = 1 << 0, + REDIS_CLUSTER_INSTANCE = 1 << 1, + REDIS_INIT_NODES = 1 << 2, + REDIS_JSON_SUPPORT = 1 << 3, + REDIS_MULTIPLE_HOSTS = 1 << 4, +}; + +enum cluster_cmd { + CLUSTER_CMD_NONE, + CLUSTER_CMD_SHARDS, + CLUSTER_CMD_SLOTS +}; + +typedef struct _redis_con { + struct cachedb_id *id; + unsigned int ref; + struct cachedb_pool_con_t *next; + char *host; + unsigned short port; + enum redis_flag flags; + cluster_node *nodes; + char *json_keyspace; + cluster_node *slot_table[16384]; + enum cluster_cmd cluster_cmd; + time_t last_topology_refresh; + unsigned int topology_refresh_count; + struct _redis_con *next_con; + struct _redis_con *current; +} redis_con; + +/* ================================================================== */ +/* Re-implement the static helpers from cachedb_redis_mi.c */ +/* (they are static so we duplicate them here for testing) */ +/* ================================================================== */ + +static int count_node_slots(redis_con *con, cluster_node *node) +{ + int i, count = 0; + for (i = 0; i < 16384; i++) + if (con->slot_table[i] == node) + count++; + return count; +} + +static int count_total_slots(redis_con *con) +{ + int i, count = 0; + for (i = 0; i < 16384; i++) + if (con->slot_table[i] != NULL) + count++; + return count; +} + +/* ================================================================== */ +/* Test framework */ +/* ================================================================== */ + +static int tests_run = 0; +static int tests_passed = 0; +static int tests_failed = 0; + +#define ASSERT_EQ(desc, expected, actual) do { \ + tests_run++; \ + if ((expected) == (actual)) { \ + tests_passed++; \ + printf(" PASS %s\n", desc); \ + } else { \ + tests_failed++; \ + printf(" FAIL %s (expected=%ld, got=%ld)\n", \ + desc, (long)(expected), (long)(actual)); \ + } \ +} while (0) + +/* ================================================================== */ +/* Tests */ +/* ================================================================== */ + +static void test_node_counters_zero_init(void) +{ + cluster_node node; + + printf("--- Test: cluster_node counters zero-initialized by memset ---\n"); + memset(&node, 0, sizeof(cluster_node)); + + ASSERT_EQ("queries starts at 0", 0UL, node.queries); + ASSERT_EQ("errors starts at 0", 0UL, node.errors); + ASSERT_EQ("moved starts at 0", 0UL, node.moved); +} + +static void test_node_counters_increment(void) +{ + cluster_node node; + + printf("\n--- Test: counter increments ---\n"); + memset(&node, 0, sizeof(cluster_node)); + + node.queries++; + node.queries++; + node.queries++; + ASSERT_EQ("queries after 3 increments", 3UL, node.queries); + + node.errors++; + ASSERT_EQ("errors after 1 increment", 1UL, node.errors); + + node.moved++; + node.moved++; + ASSERT_EQ("moved after 2 increments", 2UL, node.moved); + +} + +static void test_count_node_slots_empty(void) +{ + redis_con con; + cluster_node node; + + printf("\n--- Test: count_node_slots with empty slot table ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node, 0, sizeof(cluster_node)); + + ASSERT_EQ("no slots assigned", 0, count_node_slots(&con, &node)); +} + +static void test_count_node_slots_partial(void) +{ + redis_con con; + cluster_node node_a, node_b; + int i; + + printf("\n--- Test: count_node_slots with partial assignment ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node_a, 0, sizeof(cluster_node)); + memset(&node_b, 0, sizeof(cluster_node)); + node_a.ip = "10.0.0.1"; + node_b.ip = "10.0.0.2"; + + /* Assign slots 0-5460 to node_a, 5461-10921 to node_b */ + for (i = 0; i <= 5460; i++) + con.slot_table[i] = &node_a; + for (i = 5461; i <= 10921; i++) + con.slot_table[i] = &node_b; + /* slots 10922-16383 remain NULL */ + + ASSERT_EQ("node_a has 5461 slots", 5461, count_node_slots(&con, &node_a)); + ASSERT_EQ("node_b has 5461 slots", 5461, count_node_slots(&con, &node_b)); +} + +static void test_count_total_slots_full(void) +{ + redis_con con; + cluster_node node; + int i; + + printf("\n--- Test: count_total_slots with full assignment ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node, 0, sizeof(cluster_node)); + + for (i = 0; i < 16384; i++) + con.slot_table[i] = &node; + + ASSERT_EQ("total slots = 16384", 16384, count_total_slots(&con)); +} + +static void test_count_total_slots_partial(void) +{ + redis_con con; + cluster_node node; + int i; + + printf("\n--- Test: count_total_slots with partial assignment ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node, 0, sizeof(cluster_node)); + + for (i = 0; i < 8192; i++) + con.slot_table[i] = &node; + + ASSERT_EQ("total slots = 8192", 8192, count_total_slots(&con)); +} + +static void test_count_total_slots_empty(void) +{ + redis_con con; + + printf("\n--- Test: count_total_slots with empty table ---\n"); + memset(&con, 0, sizeof(redis_con)); + + ASSERT_EQ("total slots = 0", 0, count_total_slots(&con)); +} + +static void test_independent_node_counters(void) +{ + cluster_node node_a, node_b, node_c; + + printf("\n--- Test: independent per-node counters ---\n"); + memset(&node_a, 0, sizeof(cluster_node)); + memset(&node_b, 0, sizeof(cluster_node)); + memset(&node_c, 0, sizeof(cluster_node)); + + /* Simulate traffic patterns */ + node_a.queries = 100; + node_a.errors = 2; + node_a.moved = 5; + + node_b.queries = 200; + node_b.errors = 10; + node_b.moved = 0; + + node_c.queries = 50; + node_c.errors = 0; + node_c.moved = 0; + + /* Verify they're independent */ + ASSERT_EQ("node_a.queries = 100", 100UL, node_a.queries); + ASSERT_EQ("node_b.queries = 200", 200UL, node_b.queries); + ASSERT_EQ("node_c.queries = 50", 50UL, node_c.queries); + + ASSERT_EQ("node_a.errors = 2", 2UL, node_a.errors); + ASSERT_EQ("node_b.errors = 10", 10UL, node_b.errors); + ASSERT_EQ("node_c.errors = 0", 0UL, node_c.errors); + + ASSERT_EQ("node_a.moved = 5", 5UL, node_a.moved); + ASSERT_EQ("node_b.moved = 0", 0UL, node_b.moved); + +} + +static void test_three_node_cluster_slots(void) +{ + redis_con con; + cluster_node node_a, node_b, node_c; + int i; + + printf("\n--- Test: 3-node cluster even slot distribution ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node_a, 0, sizeof(cluster_node)); + memset(&node_b, 0, sizeof(cluster_node)); + memset(&node_c, 0, sizeof(cluster_node)); + + node_a.ip = "10.0.0.1"; + node_b.ip = "10.0.0.2"; + node_c.ip = "10.0.0.3"; + + /* Standard 3-node distribution: 0-5460, 5461-10922, 10923-16383 */ + for (i = 0; i <= 5460; i++) + con.slot_table[i] = &node_a; + for (i = 5461; i <= 10922; i++) + con.slot_table[i] = &node_b; + for (i = 10923; i < 16384; i++) + con.slot_table[i] = &node_c; + + ASSERT_EQ("node_a slots = 5461", 5461, count_node_slots(&con, &node_a)); + ASSERT_EQ("node_b slots = 5462", 5462, count_node_slots(&con, &node_b)); + ASSERT_EQ("node_c slots = 5461", 5461, count_node_slots(&con, &node_c)); + ASSERT_EQ("total slots = 16384", 16384, count_total_slots(&con)); +} + +static void test_slot_migration(void) +{ + redis_con con; + cluster_node node_a, node_b; + int i; + + printf("\n--- Test: slot migration from node_a to node_b ---\n"); + memset(&con, 0, sizeof(redis_con)); + memset(&node_a, 0, sizeof(cluster_node)); + memset(&node_b, 0, sizeof(cluster_node)); + + /* Initially all slots on node_a */ + for (i = 0; i < 16384; i++) + con.slot_table[i] = &node_a; + + ASSERT_EQ("before: node_a has 16384", 16384, count_node_slots(&con, &node_a)); + ASSERT_EQ("before: node_b has 0", 0, count_node_slots(&con, &node_b)); + + /* Migrate slots 0-999 to node_b */ + for (i = 0; i < 1000; i++) + con.slot_table[i] = &node_b; + + ASSERT_EQ("after: node_a has 15384", 15384, count_node_slots(&con, &node_a)); + ASSERT_EQ("after: node_b has 1000", 1000, count_node_slots(&con, &node_b)); + ASSERT_EQ("total still 16384", 16384, count_total_slots(&con)); +} + +static void test_struct_size_includes_counters(void) +{ + printf("\n--- Test: struct layout sanity ---\n"); + + /* Verify cluster_node is large enough to include the 4 counter fields */ + ASSERT_EQ("cluster_node size > base (has counter fields)", 1, + sizeof(cluster_node) > sizeof(char *) + 4 * sizeof(unsigned short) + + sizeof(redisContext *) + sizeof(void *) + sizeof(uint8_t)); + + /* Verify offsetof-style check: 'next' pointer comes after counters */ + cluster_node n; + memset(&n, 0, sizeof(n)); + n.queries = 0xAAAA; + n.errors = 0xBBBB; + n.moved = 0xCCCC; + /* Verify the values are at distinct locations */ + ASSERT_EQ("queries != errors", 1, n.queries != n.errors); + ASSERT_EQ("queries set correctly", 0xAAAAUL, n.queries); + ASSERT_EQ("errors set correctly", 0xBBBBUL, n.errors); + ASSERT_EQ("moved set correctly", 0xCCCCUL, n.moved); +} + +int main(void) +{ + printf("=== MI Counters & Helpers Unit Tests ===\n\n"); + + test_node_counters_zero_init(); + test_node_counters_increment(); + test_count_node_slots_empty(); + test_count_node_slots_partial(); + test_count_total_slots_full(); + test_count_total_slots_partial(); + test_count_total_slots_empty(); + test_independent_node_counters(); + test_three_node_cluster_slots(); + test_slot_migration(); + test_struct_size_includes_counters(); + + printf("\n=== Results: %d passed, %d failed, %d total ===\n", + tests_passed, tests_failed, tests_run); + + return tests_failed > 0 ? 1 : 0; +} diff --git a/modules/cachedb_redis/test/test_topology_refresh.sh b/modules/cachedb_redis/test/test_topology_refresh.sh new file mode 100755 index 00000000000..e475fb93a55 --- /dev/null +++ b/modules/cachedb_redis/test/test_topology_refresh.sh @@ -0,0 +1,454 @@ +#!/bin/bash +# +# test_topology_refresh.sh - Integration test for PR 3b (dynamic topology refresh) +# +# Tests that OpenSIPS correctly refreshes the cluster topology after +# MOVED redirections from slot migrations. Verifies: +# - Baseline: keys can be stored/fetched across all nodes +# - After slot migration + MOVED: topology refreshes and data is accessible +# - New writes to migrated slots go direct (no MOVED) +# - Multiple consecutive migrations work correctly +# - Cluster health after all tests +# +# Requirements: +# - redis-cli (Redis CLI client) +# - curl (for OpenSIPS MI HTTP interface) +# - A running 3-node Redis Cluster (default: 10.0.0.23-25:6379) +# - A running OpenSIPS instance with mi_http on port 8888 +# - The cachedb_redis module loaded with cluster mode enabled +# +# Environment variables (override defaults): +# REDIS_PASS - Redis cluster password +# REDIS_NODE_1 - First cluster node (default: 10.0.0.23) +# REDIS_NODE_2 - Second cluster node (default: 10.0.0.24) +# REDIS_NODE_3 - Third cluster node (default: 10.0.0.25) +# REDIS_PORT - Redis port (default: 6379) +# MI_URL - OpenSIPS MI HTTP URL (default: http://127.0.0.1:8888/mi) +# + +set -euo pipefail + +# --- Configuration --- +REDIS_PASS="${REDIS_PASS:-85feedc95d5fa7f16fefdb9c92d154179748f2b08df76dc0}" +REDIS_NODE_1="${REDIS_NODE_1:-10.0.0.23}" +REDIS_NODE_2="${REDIS_NODE_2:-10.0.0.24}" +REDIS_NODE_3="${REDIS_NODE_3:-10.0.0.25}" +REDIS_PORT="${REDIS_PORT:-6379}" +MI_URL="${MI_URL:-http://127.0.0.1:8888/mi}" + +PASS=0 +FAIL=0 +TOTAL=0 + +# --- Cleanup on exit --- +CLEANUP_SLOTS=() +cleanup() { + for slot in "${CLEANUP_SLOTS[@]}"; do + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" STABLE >/dev/null 2>&1 || true + done + done + # Delete any test keys + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" -c KEYS "test:pr3b:*" 2>/dev/null | while read -r k; do + redis_cmd "$node" -c DEL "$k" >/dev/null 2>&1 || true + done + done +} +trap cleanup EXIT + +# --- Helpers --- +redis_cmd() { + local node="$1"; shift + redis-cli -h "$node" -p "$REDIS_PORT" -a "$REDIS_PASS" --no-auth-warning "$@" +} + +mi_cmd() { + local cmd="$1"; shift + local params="" + while [ $# -gt 0 ]; do + case "$1" in + -d) params="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$params" ]; then + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"params\":$params,\"id\":1}" + else + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"id\":1}" + fi +} + +mi_fetch_value() { + local key="$1" + local result + result=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$key\"}" 2>/dev/null) + echo "$result" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "" +} + +assert_eq() { + local desc="$1" expected="$2" actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$expected" = "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (expected='$expected', got='$actual')" + FAIL=$((FAIL + 1)) + fi +} + +assert_not_empty() { + local desc="$1" actual="$2" + TOTAL=$((TOTAL + 1)) + if [ -n "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (value was empty)" + FAIL=$((FAIL + 1)) + fi +} + +get_node_id() { + local node="$1" + redis_cmd "$node" CLUSTER MYID | tr -d '\r' +} + +# Determine which node owns a given slot and pick a destination +# Sets: SOURCE_IP, DEST_IP, SOURCE_ID, DEST_ID +resolve_slot_owner() { + local slot="$1" + local dest_override="${2:-}" + + if [ "$slot" -le 5460 ]; then + SOURCE_IP="$REDIS_NODE_1" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_2" + fi + elif [ "$slot" -le 10922 ]; then + SOURCE_IP="$REDIS_NODE_2" + if [ "$dest_override" = "node3" ]; then + DEST_IP="$REDIS_NODE_3" + else + DEST_IP="$REDIS_NODE_1" + fi + else + SOURCE_IP="$REDIS_NODE_3" + if [ "$dest_override" = "node1" ]; then + DEST_IP="$REDIS_NODE_1" + else + DEST_IP="$REDIS_NODE_2" + fi + fi + + SOURCE_ID=$(get_node_id "$SOURCE_IP") + DEST_ID=$(get_node_id "$DEST_IP") +} + +# Begin slot migration: mark MIGRATING on source, IMPORTING on dest +begin_migration() { + local slot="$1" + redis_cmd "$DEST_IP" CLUSTER SETSLOT "$slot" IMPORTING "$SOURCE_ID" >/dev/null 2>&1 || true + redis_cmd "$SOURCE_IP" CLUSTER SETSLOT "$slot" MIGRATING "$DEST_ID" >/dev/null 2>&1 || true +} + +# Migrate all keys in a slot from source to dest +migrate_keys() { + local slot="$1" + local keys + keys=$(redis_cmd "$SOURCE_IP" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$SOURCE_IP" MIGRATE "$DEST_IP" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi +} + +# Complete migration: assign slot to dest on all nodes +complete_migration() { + local slot="$1" + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$DEST_ID" >/dev/null 2>&1 || true + done + sleep 1 +} + +# Restore a slot back to its original owner +restore_slot() { + local slot="$1" orig_ip="$2" curr_ip="$3" + local orig_id curr_id + + orig_id=$(get_node_id "$orig_ip") + curr_id=$(get_node_id "$curr_ip") + + redis_cmd "$orig_ip" CLUSTER SETSLOT "$slot" IMPORTING "$curr_id" >/dev/null 2>&1 || true + redis_cmd "$curr_ip" CLUSTER SETSLOT "$slot" MIGRATING "$orig_id" >/dev/null 2>&1 || true + + local keys + keys=$(redis_cmd "$curr_ip" CLUSTER GETKEYSINSLOT "$slot" 100 2>/dev/null | tr -d '\r') + if [ -n "$keys" ]; then + for k in $keys; do + redis_cmd "$curr_ip" MIGRATE "$orig_ip" "$REDIS_PORT" "$k" 0 5000 AUTH "$REDIS_PASS" >/dev/null 2>&1 || true + done + fi + + for node in "$REDIS_NODE_1" "$REDIS_NODE_2" "$REDIS_NODE_3"; do + redis_cmd "$node" CLUSTER SETSLOT "$slot" NODE "$orig_id" >/dev/null 2>&1 || true + done + sleep 1 +} + +# --- Preflight checks --- +echo "=== PR 3b: Dynamic Topology Refresh Test ===" +echo "" +echo "Checking prerequisites..." + +if ! command -v redis-cli &>/dev/null; then + echo "ERROR: redis-cli not found. Install redis-tools." + exit 1 +fi + +if ! command -v curl &>/dev/null; then + echo "ERROR: curl not found." + exit 1 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 required." + exit 1 +fi + +# Verify cluster is healthy +CLUSTER_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +if [ "$CLUSTER_STATE" != "ok" ]; then + echo "ERROR: Redis Cluster state is '$CLUSTER_STATE', expected 'ok'." + exit 1 +fi +echo " Redis Cluster: ok" + +# Verify OpenSIPS MI is reachable +MI_RESPONSE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || true) +if [ "$MI_RESPONSE" != "200" ]; then + echo "ERROR: OpenSIPS MI not reachable at $MI_URL (HTTP $MI_RESPONSE)." + exit 1 +fi +echo " OpenSIPS MI: ok" + +echo "" + +# ================================================================== # +# Test 1: Baseline — store and fetch keys across all nodes # +# ================================================================== # +echo "--- Test 1: Baseline — store and fetch across all nodes ---" + +KEY1="test:pr3b:{b}:node1" +KEY2="test:pr3b:{c}:node2" +KEY3="test:pr3b:{a}:node3" + +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY1\",\"value\":\"baseline1\"}" >/dev/null +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY2\",\"value\":\"baseline2\"}" >/dev/null +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY3\",\"value\":\"baseline3\"}" >/dev/null + +assert_eq "Fetch key1 baseline" "baseline1" "$(mi_fetch_value "$KEY1")" +assert_eq "Fetch key2 baseline" "baseline2" "$(mi_fetch_value "$KEY2")" +assert_eq "Fetch key3 baseline" "baseline3" "$(mi_fetch_value "$KEY3")" + +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY1" >/dev/null 2>&1 || true +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY2" >/dev/null 2>&1 || true +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY3" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 2: Migrate a slot, verify data still accessible # +# ================================================================== # +echo "--- Test 2: Migrate slot, verify data accessible after MOVED ---" + +TEST_KEY="test:pr3b:migrate2" +TEST_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$TEST_KEY" | tr -d '\r') +echo " Key '$TEST_KEY' -> slot $TEST_SLOT" + +resolve_slot_owner "$TEST_SLOT" +ORIG_SOURCE="$SOURCE_IP" +echo " Source: $SOURCE_IP -> Dest: $DEST_IP" + +# Store key via OpenSIPS +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$TEST_KEY\",\"value\":\"before_migrate\"}" >/dev/null + +# Complete migration (no mid-migration ASK — just complete it) +CLEANUP_SLOTS+=("$TEST_SLOT") +begin_migration "$TEST_SLOT" +migrate_keys "$TEST_SLOT" +complete_migration "$TEST_SLOT" +echo " Slot $TEST_SLOT migrated to $DEST_IP" + +# Fetch via OpenSIPS — triggers MOVED → topology refresh +FETCHED=$(mi_fetch_value "$TEST_KEY") +assert_eq "Fetch after migration (MOVED triggers refresh)" "before_migrate" "$FETCHED" + +# Store a NEW key to the same slot — should go direct after refresh +# (uses same hash tag to target the same slot) +NEW_KEY="test:pr3b:migrate2:new" +NEW_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$NEW_KEY" | tr -d '\r') +# If the new key doesn't hash to the same slot, that's ok — just test it +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$NEW_KEY\",\"value\":\"after_refresh\"}" >/dev/null +FETCHED_NEW=$(mi_fetch_value "$NEW_KEY") +assert_eq "New key stored after refresh" "after_refresh" "$FETCHED_NEW" + +# Restore +echo " Restoring slot $TEST_SLOT..." +restore_slot "$TEST_SLOT" "$ORIG_SOURCE" "$DEST_IP" +CLEANUP_SLOTS=("${CLEANUP_SLOTS[@]/$TEST_SLOT}") +redis_cmd "$REDIS_NODE_1" -c DEL "$TEST_KEY" "$NEW_KEY" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 3: Migrate slot, write to migrated slot, verify # +# ================================================================== # +echo "--- Test 3: Write to migrated slot after complete migration ---" + +K1="test:pr3b:{migrate3}:k1" +K1_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$K1" | tr -d '\r') +echo " Key '$K1' -> slot $K1_SLOT" + +resolve_slot_owner "$K1_SLOT" +ORIG_SOURCE="$SOURCE_IP" +echo " Source: $SOURCE_IP (A) -> Dest: $DEST_IP (B)" + +# Store K1 on original owner +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$K1\",\"value\":\"value_k1\"}" >/dev/null + +CLEANUP_SLOTS+=("$K1_SLOT") +# Migrate S: A → B (complete) +begin_migration "$K1_SLOT" +migrate_keys "$K1_SLOT" +complete_migration "$K1_SLOT" +echo " Slot $K1_SLOT migrated A->B" + +# Wait for rate-limit to expire (refresh is rate-limited to 1/sec) +sleep 2 + +# Write K2 to same slot via OpenSIPS — uses same hash tag {migrate3} +# First fetch triggers MOVED → refresh, then write should go direct +K2="test:pr3b:{migrate3}:k2" +K2_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$K2" | tr -d '\r') +echo " K2 '$K2' -> slot $K2_SLOT (same as K1)" + +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$K2\",\"value\":\"value_k2\"}" >/dev/null + +# Verify K2 landed on node B (the new owner) via direct node query +K2_ON_B=$(redis_cmd "$DEST_IP" GET "$K2" | tr -d '\r') +assert_eq "K2 landed on new slot owner ($DEST_IP)" "value_k2" "$K2_ON_B" + +# Fetch both K1 and K2 via OpenSIPS +assert_eq "Fetch K1 via OpenSIPS" "value_k1" "$(mi_fetch_value "$K1")" +assert_eq "Fetch K2 via OpenSIPS" "value_k2" "$(mi_fetch_value "$K2")" + +# Restore +echo " Restoring slot $K1_SLOT..." +restore_slot "$K1_SLOT" "$ORIG_SOURCE" "$DEST_IP" +CLEANUP_SLOTS=("${CLEANUP_SLOTS[@]/$K1_SLOT}") +redis_cmd "$REDIS_NODE_1" -c DEL "$K1" "$K2" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 4: Multiple migrations, verify data integrity # +# ================================================================== # +echo "--- Test 4: Multiple migrations A->B->C ---" + +MK="test:pr3b:multi" +MK_SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$MK" | tr -d '\r') +echo " Key '$MK' -> slot $MK_SLOT" + +# Determine A, B, C +if [ "$MK_SLOT" -le 5460 ]; then + NODE_A="$REDIS_NODE_1"; NODE_B="$REDIS_NODE_2"; NODE_C="$REDIS_NODE_3" +elif [ "$MK_SLOT" -le 10922 ]; then + NODE_A="$REDIS_NODE_2"; NODE_B="$REDIS_NODE_3"; NODE_C="$REDIS_NODE_1" +else + NODE_A="$REDIS_NODE_3"; NODE_B="$REDIS_NODE_1"; NODE_C="$REDIS_NODE_2" +fi + +NODE_A_ID=$(get_node_id "$NODE_A") +NODE_B_ID=$(get_node_id "$NODE_B") +NODE_C_ID=$(get_node_id "$NODE_C") + +echo " A=$NODE_A B=$NODE_B C=$NODE_C" + +# Store value on A +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$MK\",\"value\":\"on_A\"}" >/dev/null + +# Migration 1: A -> B +SOURCE_IP="$NODE_A"; SOURCE_ID="$NODE_A_ID"; DEST_IP="$NODE_B"; DEST_ID="$NODE_B_ID" +CLEANUP_SLOTS+=("$MK_SLOT") +begin_migration "$MK_SLOT" +migrate_keys "$MK_SLOT" +complete_migration "$MK_SLOT" +echo " Migration 1: A->B complete" + +sleep 2 # Wait for rate-limit + +# Fetch (triggers MOVED → refresh) +assert_eq "Fetch after A->B migration" "on_A" "$(mi_fetch_value "$MK")" + +# Write again (should go to B now) +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$MK\",\"value\":\"on_B\"}" >/dev/null + +# Migration 2: B -> C +SOURCE_IP="$NODE_B"; SOURCE_ID="$NODE_B_ID"; DEST_IP="$NODE_C"; DEST_ID="$NODE_C_ID" +begin_migration "$MK_SLOT" +migrate_keys "$MK_SLOT" +complete_migration "$MK_SLOT" +echo " Migration 2: B->C complete" + +sleep 2 # Wait for rate-limit + +# Fetch (triggers MOVED → refresh) +assert_eq "Fetch after B->C migration" "on_B" "$(mi_fetch_value "$MK")" + +# Verify value on C directly +DIRECT_C=$(redis_cmd "$NODE_C" GET "$MK" | tr -d '\r') +assert_eq "Value on node C directly" "on_B" "$DIRECT_C" + +# Restore C -> A +echo " Restoring slot $MK_SLOT to A..." +restore_slot "$MK_SLOT" "$NODE_A" "$NODE_C" +CLEANUP_SLOTS=("${CLEANUP_SLOTS[@]/$MK_SLOT}") +redis_cmd "$REDIS_NODE_1" -c DEL "$MK" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 5: Cluster health + data integrity after all tests # +# ================================================================== # +echo "--- Test 5: Final cluster health + data integrity ---" + +FINAL_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +TOTAL=$((TOTAL + 1)) +if [ "$FINAL_STATE" = "ok" ]; then + echo " PASS: Cluster state is ok after all tests" + PASS=$((PASS + 1)) +else + echo " FAIL: Cluster state is '$FINAL_STATE' (expected 'ok')" + FAIL=$((FAIL + 1)) +fi + +# Store and fetch a final key to confirm module is healthy +FINAL_KEY="test:pr3b:final:$(date +%s)" +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$FINAL_KEY\",\"value\":\"healthy\"}" >/dev/null +FINAL_VAL=$(mi_fetch_value "$FINAL_KEY") +assert_eq "Final health check store/fetch" "healthy" "$FINAL_VAL" +redis_cmd "$REDIS_NODE_1" -c DEL "$FINAL_KEY" >/dev/null 2>&1 || true + +echo "" +echo "=== Results: $PASS passed, $FAIL failed, $TOTAL total ===" +exit $FAIL diff --git a/modules/cachedb_redis/test/test_topology_startup.sh b/modules/cachedb_redis/test/test_topology_startup.sh new file mode 100755 index 00000000000..51920e6dcf8 --- /dev/null +++ b/modules/cachedb_redis/test/test_topology_startup.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# +# test_topology_startup.sh - Integration test for PR 3a (topology parser replacement) +# +# Tests that the new CLUSTER SHARDS/SLOTS parser correctly builds the slot +# table and that keys are routed to the correct nodes. Verifies: +# - OpenSIPS connects to cluster and MI responds +# - Keys can be stored and fetched +# - Keys that hash to different nodes are routed correctly +# - Many random keys can be stored/fetched (slot table coverage) +# +# Requirements: +# - redis-cli (Redis CLI client) +# - curl (for OpenSIPS MI HTTP interface) +# - A running 3-node Redis Cluster (default: 10.0.0.23-25:6379) +# - A running OpenSIPS instance with mi_http on port 8888 +# - The cachedb_redis module loaded with cluster mode enabled +# +# Environment variables (override defaults): +# REDIS_PASS - Redis cluster password +# REDIS_NODE_1 - First cluster node (default: 10.0.0.23) +# REDIS_NODE_2 - Second cluster node (default: 10.0.0.24) +# REDIS_NODE_3 - Third cluster node (default: 10.0.0.25) +# REDIS_PORT - Redis port (default: 6379) +# MI_URL - OpenSIPS MI HTTP URL (default: http://127.0.0.1:8888/mi) +# + +set -euo pipefail + +# --- Configuration --- +REDIS_PASS="${REDIS_PASS:-85feedc95d5fa7f16fefdb9c92d154179748f2b08df76dc0}" +REDIS_NODE_1="${REDIS_NODE_1:-10.0.0.23}" +REDIS_NODE_2="${REDIS_NODE_2:-10.0.0.24}" +REDIS_NODE_3="${REDIS_NODE_3:-10.0.0.25}" +REDIS_PORT="${REDIS_PORT:-6379}" +MI_URL="${MI_URL:-http://127.0.0.1:8888/mi}" + +PASS=0 +FAIL=0 +TOTAL=0 + +# --- Helpers --- +redis_cmd() { + local node="$1"; shift + redis-cli -h "$node" -p "$REDIS_PORT" -a "$REDIS_PASS" --no-auth-warning "$@" +} + +mi_cmd() { + local cmd="$1"; shift + local params="" + while [ $# -gt 0 ]; do + case "$1" in + -d) params="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$params" ]; then + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"params\":$params,\"id\":1}" + else + curl -s -m 10 -X POST "$MI_URL/$cmd" -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"$cmd\",\"id\":1}" + fi +} + +assert_eq() { + local desc="$1" expected="$2" actual="$3" + TOTAL=$((TOTAL + 1)) + if [ "$expected" = "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (expected='$expected', got='$actual')" + FAIL=$((FAIL + 1)) + fi +} + +assert_not_empty() { + local desc="$1" actual="$2" + TOTAL=$((TOTAL + 1)) + if [ -n "$actual" ]; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc (value was empty)" + FAIL=$((FAIL + 1)) + fi +} + +# Determine which node owns a given slot +slot_owner() { + local slot="$1" + if [ "$slot" -le 5460 ]; then + echo "$REDIS_NODE_1" + elif [ "$slot" -le 10922 ]; then + echo "$REDIS_NODE_2" + else + echo "$REDIS_NODE_3" + fi +} + +# --- Preflight checks --- +echo "=== PR 3a: Topology Startup Parser Test ===" +echo "" +echo "Checking prerequisites..." + +if ! command -v redis-cli &>/dev/null; then + echo "ERROR: redis-cli not found. Install redis-tools." + exit 1 +fi + +if ! command -v curl &>/dev/null; then + echo "ERROR: curl not found." + exit 1 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 required." + exit 1 +fi + +# Verify cluster is healthy +CLUSTER_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +if [ "$CLUSTER_STATE" != "ok" ]; then + echo "ERROR: Redis Cluster state is '$CLUSTER_STATE', expected 'ok'." + exit 1 +fi +echo " Redis Cluster: ok" + +# Verify OpenSIPS MI is reachable +MI_RESPONSE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" -X POST "$MI_URL/which" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"which","id":1}' 2>/dev/null || true) +if [ "$MI_RESPONSE" != "200" ]; then + echo "ERROR: OpenSIPS MI not reachable at $MI_URL (HTTP $MI_RESPONSE)." + exit 1 +fi +echo " OpenSIPS MI: ok" + +echo "" + +# ================================================================== # +# Test 1: Basic store and fetch # +# ================================================================== # +echo "--- Test 1: Basic store and fetch ---" + +TEST_KEY="test:pr3a:basic:$(date +%s)" +TEST_VAL="topology_test_value" + +STORE_RESULT=$(mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$TEST_KEY\",\"value\":\"$TEST_VAL\"}") +assert_eq "Store returns OK" "OK" "$(echo "$STORE_RESULT" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",""))' 2>/dev/null || echo "")" + +FETCH_RESULT=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$TEST_KEY\"}") +FETCHED_VAL=$(echo "$FETCH_RESULT" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") +assert_eq "Fetch returns stored value" "$TEST_VAL" "$FETCHED_VAL" + +# Verify via redis-cli too +DIRECT_VAL=$(redis_cmd "$REDIS_NODE_1" -c GET "$TEST_KEY" | tr -d '\r') +assert_eq "Key exists in cluster (redis-cli)" "$TEST_VAL" "$DIRECT_VAL" + +redis_cmd "$REDIS_NODE_1" -c DEL "$TEST_KEY" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 2: Keys routed to correct nodes # +# ================================================================== # +echo "--- Test 2: Keys routed to correct nodes ---" + +# Find keys that hash to each node's slot range +# Node 1: slots 0-5460, Node 2: slots 5461-10922, Node 3: slots 10923-16383 +KEY1="test:pr3a:node1:$(date +%s)" +KEY2="test:pr3a:node2:$(date +%s)" +KEY3="test:pr3a:node3:$(date +%s)" + +# We need keys that hash to specific ranges. Use {hashtag} to control routing. +# {a} hashes to slot 15495 (node 3), {b} to slot 3300 (node 1), {c} to slot 7365 (node 2) +KEY1="test:pr3a:{b}:node1" +KEY2="test:pr3a:{c}:node2" +KEY3="test:pr3a:{a}:node3" + +SLOT1=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$KEY1" | tr -d '\r') +SLOT2=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$KEY2" | tr -d '\r') +SLOT3=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$KEY3" | tr -d '\r') + +OWNER1=$(slot_owner "$SLOT1") +OWNER2=$(slot_owner "$SLOT2") +OWNER3=$(slot_owner "$SLOT3") + +echo " $KEY1 -> slot $SLOT1 -> $OWNER1" +echo " $KEY2 -> slot $SLOT2 -> $OWNER2" +echo " $KEY3 -> slot $SLOT3 -> $OWNER3" + +# Store all 3 keys via OpenSIPS +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY1\",\"value\":\"val1\"}" >/dev/null +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY2\",\"value\":\"val2\"}" >/dev/null +mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY3\",\"value\":\"val3\"}" >/dev/null + +# Verify each key landed on the correct node +DIRECT1=$(redis_cmd "$OWNER1" GET "$KEY1" | tr -d '\r') +assert_eq "Key1 on correct node ($OWNER1)" "val1" "$DIRECT1" + +DIRECT2=$(redis_cmd "$OWNER2" GET "$KEY2" | tr -d '\r') +assert_eq "Key2 on correct node ($OWNER2)" "val2" "$DIRECT2" + +DIRECT3=$(redis_cmd "$OWNER3" GET "$KEY3" | tr -d '\r') +assert_eq "Key3 on correct node ($OWNER3)" "val3" "$DIRECT3" + +# Fetch each key back via OpenSIPS +FETCH1=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY1\"}") +FETCHED1=$(echo "$FETCH1" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") +assert_eq "Fetch $KEY1 via OpenSIPS" "val1" "$FETCHED1" + +FETCH2=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY2\"}") +FETCHED2=$(echo "$FETCH2" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") +assert_eq "Fetch $KEY2 via OpenSIPS" "val2" "$FETCHED2" + +FETCH3=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY3\"}") +FETCHED3=$(echo "$FETCH3" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") +assert_eq "Fetch $KEY3 via OpenSIPS" "val3" "$FETCHED3" + +# Cleanup +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY1" >/dev/null 2>&1 || true +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY2" >/dev/null 2>&1 || true +redis_cmd "$REDIS_NODE_1" -c DEL "$KEY3" >/dev/null 2>&1 || true + +echo "" + +# ================================================================== # +# Test 3: Write and read 100 random keys # +# ================================================================== # +echo "--- Test 3: Write and read 100 random keys ---" + +TIMESTAMP=$(date +%s) +KEYS_OK=0 +KEYS_FAIL=0 + +for i in $(seq 1 100); do + KEY="test:pr3a:bulk:${TIMESTAMP}:${i}" + VAL="value_${i}" + + mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY\",\"value\":\"$VAL\"}" >/dev/null 2>&1 + + FETCH=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY\"}" 2>/dev/null) + FETCHED=$(echo "$FETCH" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") + + if [ "$FETCHED" = "$VAL" ]; then + KEYS_OK=$((KEYS_OK + 1)) + else + KEYS_FAIL=$((KEYS_FAIL + 1)) + if [ "$KEYS_FAIL" -le 3 ]; then + echo " FAIL detail: key=$KEY expected=$VAL got=$FETCHED" + fi + fi + + # Cleanup + redis_cmd "$REDIS_NODE_1" -c DEL "$KEY" >/dev/null 2>&1 || true +done + +TOTAL=$((TOTAL + 1)) +if [ "$KEYS_OK" -eq 100 ]; then + echo " PASS: All 100 keys stored and fetched correctly" + PASS=$((PASS + 1)) +else + echo " FAIL: $KEYS_OK/100 keys correct, $KEYS_FAIL failed" + FAIL=$((FAIL + 1)) +fi + +echo "" + +# ================================================================== # +# Test 4: Verify slot table coverage across multiple slot ranges # +# ================================================================== # +echo "--- Test 4: Verify slot table coverage across slot ranges ---" + +# Store keys with various hash tags, verify they land on the right nodes +# and can be stored/fetched. This covers slots across all 3 nodes. +# We use 30 different keys with diverse hash tags to cover the slot space. +SLOTS_OK=0 +SLOTS_FAIL=0 +SLOTS_TOTAL=30 + +for i in $(seq 1 $SLOTS_TOTAL); do + KEY="test:pr3a:coverage:${i}:$(date +%s)" + VAL="coverage_${i}" + + mi_cmd "cache_store" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY\",\"value\":\"$VAL\"}" >/dev/null 2>&1 + + FETCH=$(mi_cmd "cache_fetch" -d "{\"system\":\"redis:cluster\",\"attr\":\"$KEY\"}" 2>/dev/null) + FETCHED=$(echo "$FETCH" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("result",{}).get("value",""))' 2>/dev/null || echo "") + + if [ "$FETCHED" = "$VAL" ]; then + SLOTS_OK=$((SLOTS_OK + 1)) + else + SLOTS_FAIL=$((SLOTS_FAIL + 1)) + if [ "$SLOTS_FAIL" -le 3 ]; then + SLOT=$(redis_cmd "$REDIS_NODE_1" CLUSTER KEYSLOT "$KEY" 2>/dev/null | tr -d '\r') + echo " FAIL detail: key=$KEY slot=$SLOT expected=$VAL got=$FETCHED" + fi + fi + + redis_cmd "$REDIS_NODE_1" -c DEL "$KEY" >/dev/null 2>&1 || true +done + +TOTAL=$((TOTAL + 1)) +if [ "$SLOTS_FAIL" -eq 0 ]; then + echo " PASS: All $SLOTS_OK/$SLOTS_TOTAL coverage keys stored and fetched" + PASS=$((PASS + 1)) +else + echo " FAIL: $SLOTS_OK ok, $SLOTS_FAIL failed out of $SLOTS_TOTAL" + FAIL=$((FAIL + 1)) +fi + +echo "" + +# --- Final cluster health check --- +echo "--- Final cluster health check ---" +FINAL_STATE=$(redis_cmd "$REDIS_NODE_1" CLUSTER INFO | grep cluster_state | tr -d '\r' | cut -d: -f2) +TOTAL=$((TOTAL + 1)) +if [ "$FINAL_STATE" = "ok" ]; then + echo " PASS: Cluster state is ok after all tests" + PASS=$((PASS + 1)) +else + echo " FAIL: Cluster state is '$FINAL_STATE' (expected 'ok')" + FAIL=$((FAIL + 1)) +fi + +echo "" +echo "=== Results: $PASS passed, $FAIL failed, $TOTAL total ===" +exit $FAIL From 58741ce3a2de80351235d795a9ca579c1ae6f68a Mon Sep 17 00:00:00 2001 From: Norm Brandinger Date: Wed, 1 Apr 2026 08:25:37 -0400 Subject: [PATCH 3/4] cachedb_redis: fix UNIT_TESTS build and remove committed binary Exclude standalone test binaries (test_hash, test_mi_counters, hash_under_test) from the UNIT_TESTS auto-discovery in Makefile.modules. These files have their own main() and are built via test/Makefile; pulling them into the module .so causes multiple-definition linker errors. Also remove the accidentally committed test/test_mi_counters ELF binary and add it to .gitignore alongside test_hash. Reported-by: dondetir --- modules/cachedb_redis/Makefile | 5 +++++ modules/cachedb_redis/test/.gitignore | 1 + modules/cachedb_redis/test/test_mi_counters | Bin 20528 -> 0 bytes 3 files changed, 6 insertions(+) delete mode 100755 modules/cachedb_redis/test/test_mi_counters diff --git a/modules/cachedb_redis/Makefile b/modules/cachedb_redis/Makefile index 8d3f6b96a47..080bfcf60b9 100644 --- a/modules/cachedb_redis/Makefile +++ b/modules/cachedb_redis/Makefile @@ -26,4 +26,9 @@ DEFS+=-I$(LOCALBASE)/include LIBS += -L$(LOCALBASE)/lib -lhiredis endif +# Standalone test binaries have their own main() and Makefile; +# exclude them from the UNIT_TESTS auto-discovery to avoid +# multiple-definition linker errors when built into the module .so +exclude_files=test/test_hash.c test/test_mi_counters.c test/hash_under_test.c + include ../../Makefile.modules diff --git a/modules/cachedb_redis/test/.gitignore b/modules/cachedb_redis/test/.gitignore index b35f1189c01..32aeb7765b6 100644 --- a/modules/cachedb_redis/test/.gitignore +++ b/modules/cachedb_redis/test/.gitignore @@ -1,2 +1,3 @@ test_hash +test_mi_counters *.o diff --git a/modules/cachedb_redis/test/test_mi_counters b/modules/cachedb_redis/test/test_mi_counters deleted file mode 100755 index 235e155ea887c63a63c4ef65d04b1a4eaf6fa194..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20528 zcmeHP4|H4AdB3(JaFZa}0a`aSqtC zG>OH+jq7Z2p*RcmDH5mXO$vajN>xQMtkifylidB9?t~xHd<@w_f+ROl*+~YWFcej) zPI4}~)paM_c!A<$$n?CvIz<-_c;ak@XK0rrQ$`vUePm5Byt!P}w`j<6R3*7lEjOy= z@=!ZsR73VB`Go%4v_GBD0F8iP|$yhmin7CZes)XCjMU%cwZiw=G2c=PvH1d$Xy}P z8R|@iEYX#S#H_F&W^&&mv8r|TvgQ8yzWKg}{QZK3!oPa`8h*5y~MB^hCDIpR_w|H|YY#i?A-kmYfeni=^JMgJO&`^MjV5g{oT z*Xh3UI>F<9fCdZ7VZ9D8zD?teHbrMV*E2ULJjI)0$Yt1u*F#v6Mr?RBu9U=K8(uGU zk}zt+vu;Vy_7JaLQY5<4hUd9Sgen_euFsNt@-pdxNe@hVVA2DV9+>pNqz5KF@W=9i znSP_vywCMG_M_(A8LK=uVy1^G2lMK34F^EV&3Ot}&+I1LQGE;1$3}A~bM{f4wwPlh zvc8Avw6z?|$ogHVhX+WYR9R>CHx3(6-aqSW*zad|K?bO!KxGH$zVD^_jM?9C-^YbW zyLOpnF7;pH}2SNq*W!9+Koo75O1a ze$+<(cS#;ls`wrf2_Pt`JUmI^* zzoI@<|0MXo3D(^CZ(b?HJ7J$^cCVm9^A)sa*`#YJfh8xb^D*6@1|v=P59LNZvwMhl zP($!nOa6wb1Zq#3edFfd&zW*)RdK@zyIRhO&a@md9#4o>!5AVCOl!+|I+MxGkY7VW`C=<%ItT|xDYFo z=go}SH!W;>Vh7Fs`@Q4R+7?Ow%`CFK#kPzIKiC+$uD8gp&4d+Ulrim6XueU)N!-lq89(|-VJ5KOlK<1zOECb{^@Y(jvlH6 z7s;ED-i|HgLZjaQ>Aa&e{Xx@HE7=md$t}4d%6by zE$n$=5XhUyHRe^q{MfU9@ZPZJy{x$;n|dnji9a6pBo9}HJtwnVnW8G&vyB_hN8fv2 zKuSovhL^xpm`Ik|?aPnRGuHpO(!YnaU@_SAF`75h8jOV4N2SMGy(LW>GdrHlUia%< z?rqO@u%un{$vQalNtpKt2Qn$0|?Ab@=E^ia_H#02F~k(xorpJh&|T z(_gWZ$G(V!k#-FcTI+;{r=-lSkp33#HjaG@{AwM+G=2gB>yOEbN1grGr}I!`rCI>S@l zG;S^#PK~Es^T;#|fEUzk8bQxroxt0+fnb|IvO)MCb7Ct(*)@!azdnq>G*Ju~J3`nI z&;A264xTrc>`j3;KFH$$GelL$)2M7e+$hjLPwxZhW7^fT$Yvy2XwzSg^@ZZkn)3RW z{XYE%l>R-W1rce(%qh&Hi$4tHjvAYTP}1np-SFW9ds z(Xp4_MOX_?lldqg-;1Cl`>)`j<+x1-nU?4uX1`5Ro{2(i_U-lPD68o9e*>Y$QfiN~ zO{YEH;}q{kve*`HbH`q}dcLo)+ZvwTgUASk`GduO`1J8NZ1G=cGy11u@%f z@yF@%Y2*KnjQ;`^e=p=I{s^iq{%@ZW|K|1J+KlEmP{!UQGxjbo9Z^%}UJRIh08;tk z@KW}xs3KX1Xh`ZP8`HK|L z`td5(t|7`+Lm1SXXzd`(8-zLP**`?<7Hl4r`C~t?TVRXJ3awkHY#-P8D?CN!|9To8 zFcS0GZk;{%P6 z)|U8r-n6TsU|P}Eq%g(p2A9Ml{&3Zq?VptYe&7D-M!A35iN`?X|GLxr?+o&R>}KK0G*0=)3B1n(!yDpy9XL6T>!;HcbdE_D?07l>Nw^ zYX3QkXvzwWKUDJjr%#lV|4g!0jlZHTlzsetApN}m?81km1^*p)OaGl>6m6Tb2al8g zzK2gvZRrQ@dq3NalK!Q9`hoM$;OX;E6XhSZ4;6N+$z9Scw6G! z0KJ~V<|jUgG(%Yc8&TSc$gs+d=wS@b?_fAK-zK^G8~^qkMW#U021#y0K6^}VMfZUb zQ$GTr)`YZc?L7E7yAgEApXVK$h9_h^`x92Hgn~~>0osSs$lus}+t}r*!3R`>HIklo zxnyHipp7l10}$j(#qcW+X-vCbyPUMW`34xChoVBbWWIY8rptV1pr^FrXOKK7-<<%= zJ_0G7?^^qQmfeG@+27(dXp6fE3~&k-Ph=6ScG$*ZG}FWF3Cb2vsO+YgqLN-mOigrY zqf3{k_VeBzFc2Kj@IiuV|5CIcK1}Ime>lqvMdHm~XVgXhl(vc&<)76*Wo&p(9W z=O4N6eVNy9EFQ_{D-p?;5gl2fu%d$bOXI-P`%#>LVg0+Tv@xaT@B7oQ){m#;Xrr+* z#VbugNWYEAi;-~YG&zS)F-*?kb6%I@X$tbkyO)w_2vis1bBy9M`~g-&TmE<+-g=h& z_bia|`I|8O{Qc~`$iF{l|2+qzkC6X<4*$st`45%t*}F5I-bZBey-r#vlkY?sj^fAp z(4<%>-6U@djGJ8w7_XoG209I+lQ~Mz+I#8-4MBze22<*Irpo7Cig!V!y?Rh zeOy3pejmpL6S(hxAbLL9>R){&)?7B>pkq#eE$CK zQJGC|0wY$CM=*5d3PQs=?fUCEaCP=eV1dPQU5QWkjvbIExDC^k$X1A`WjyWjOa40) z{}k`o&F@jbRudK}pR7c{>mXO3p_G3mL@DJjQ8ozG;2hat+BHKqHVd?|vsKD}0u+c% z(HhCZV_vfG#A|v|jeP~VA58R_TG6Lyh{0r;^uVMCCOz{g6Bs777RfJ|M}0CnqAB{0 zuMyfBiW$^FBZx2kCL--AD-w^%P8It@@^nTvCjycooQQWC(mMgeimSVJ788w)jmDbQ z#&YsuD3LTSHq218i|#hWB9`=U(g2h4iV!xpE4!rQRSZb#u23RAF9K2c9$#z@8tvVN z`f{^U>5LmX7KwEvLKL86-l`%$l#Xwxv&-sM{;=qK&sx^mzW)^eD?z&qx!xuhi3LMl zAzUzI;0YZCb`TAsVtxc8)nOUYKzBT48OZ=T-<|J?j-K*T5*{(GHrfNpkTG{lAjxhs z!jVukn5?CCDIjY@$yC%zE;Z%^F^-eT5QZmi!+}Ut-^yO8xYXDA=&xSroB2Wqn-nxG zTvRWel*GtyYhJgGM*dXA(QY_6RHJGRH5&Dc7A#&UigAnUm&|893s?^ekS=c7hSTs17Qn#*kh+=+9phXJYJOkI|;yVi=bZB=C# zOh3DFpzQ1_!sDZN(Sg~!$vz_7UJ$U{8z#D19)^3&F-pu%1w7ox?+`k_n%i*Tv0LAU9-kr)$FeP z)U?l_X?KO^Idhs?U@Ddn%gUMtaIBcTc*q;;gf#x{WpN8W^|6UAbx6HdL>5 z*O>0AW$wxqc+`}A^Fl`!2T8xc)8?t|WYC#Ij&{mF4(@MT$#4_c_;Ydo^>?-21-Fj+Vk&}v|`2&Xv5g< zjOL^L6BX8v!$#R3pd>4f0%d)Zmd7_$Wu5(u&&XxHRnzfpLs@6LW#j(~7ykXYQ}rS} z4pwS-qlV1Kzb9R_eECviZcC^=5{Mb~3-JTmx_R|WYV|F1p9;w@Q^Z?16)i60=Q43_ z{`cdCCev#NO7Y|T8YdNQk*uzHC2JLq)-^jd>p+`l;*Z^y}K(86JgKakoc zI^vyO(Gbp6eRZWd=(PtCe;|k3FX zU&YYG?^rm^dll$K4tZXBAA>&>3|Ijnd!ygnP()Y9o<%4)3?k~?46vm5MGgL7GVb3J zhz02-4y$hfS1=Our*QfQRWV%r?a8E;)$fqNiyzn#^hSu1uX!kb(?qd3uINwqg$8#JZ^B2Q3kb21r2Pbww>|5r+A2>%+0U@L8d4xYDozQ(U^;7uuo+%9bD+ z;hMIudD-fDGzVovYWyg9zK6a*XQTth#4XYTQz@7}xLEiwg6TXuKezvY8`_W39uCWf zEd1UWhK9y7;Qbxf@jbR8`7XUNW9Vf@4S79fbTe*{0`mGNdT$Z)6R*9jMg?D-r)5=^ z-C93G*3bK9r~T_7MSEnH=Y4b@s%GW8VMsU2^M020Kctf7d0)DJ=B`CCAx7#m~xO@)(6cd)`eLTYqXjE9gv;Tdd zk^C}ZLgD8EhNePGdGeW4{tLj7UHlxA&s#cw@$-gmCchu$`d>NZ4K2s8i5WH2t(Ue+PcIp^Ww)5CFR$xQXDfK0* Date: Wed, 1 Apr 2026 08:30:32 -0400 Subject: [PATCH 4/4] cachedb_redis: guard redisEnableKeepAliveWithInterval for hiredis < 1.0 redisEnableKeepAliveWithInterval() was added in hiredis 1.0.0. Ubuntu 20.04 ships hiredis 0.14, causing an implicit-function-declaration error with -Werror. Gate on HIREDIS_MAJOR >= 1, falling back to redisEnableKeepAlive() (no interval parameter) on older versions. --- modules/cachedb_redis/cachedb_redis_dbase.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/cachedb_redis/cachedb_redis_dbase.c b/modules/cachedb_redis/cachedb_redis_dbase.c index 716a826bd64..579ffc1d898 100644 --- a/modules/cachedb_redis/cachedb_redis_dbase.c +++ b/modules/cachedb_redis/cachedb_redis_dbase.c @@ -98,7 +98,11 @@ redisContext *redis_get_ctx(char *ip, int port) } if (redis_keepalive > 0) { +#if defined(HIREDIS_MAJOR) && HIREDIS_MAJOR >= 1 if (redisEnableKeepAliveWithInterval(ctx, redis_keepalive) != REDIS_OK) +#else + if (redisEnableKeepAlive(ctx) != REDIS_OK) +#endif LM_WARN("failed to enable TCP keepalive on redis connection " "%s:%hu\n", ip, (unsigned short)port); }