From f7b59364d1bb003e7bc0f24562ed6fef5bd33976 Mon Sep 17 00:00:00 2001 From: Denys Pozniak Date: Wed, 29 Apr 2026 13:47:01 +0200 Subject: [PATCH] mid_registrar, usrloc: replicate AoR with populated kv_storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the race in `full-sharing-cluster` mode where peers received REPL_URECORD_INSERT packets with an empty kv_storage. The serialization that 343f452c1 added shipped whatever was in r->kv_storage at replicate time, but mid_registrar's store_urecord_data() ran *after* ul.insert_urecord() — so peers saw an empty store and later failed to issue De-REGISTERs: ERROR:usrloc:store_deserialize: bad JSON input or oom ERROR:mid_registrar:unregister_record: 'from' key not found, skipping De-REGISTER ERROR:mid_registrar:mid_reg_aor_event: failed to unregister contact Mirror the existing ucontact pattern (ucontact_info_t.pre_replicate_cb): extend insert_urecord() with an optional pre-replication callback that the caller can use to attach record-level data between mem_insert and replicate_urecord_insert. mid_registrar wires mid_reg_store_aor_data into the throttle-AoR save path so the kv_storage is populated before the INSERT packet is built. Closes #3845 Co-Authored-By: Claude Opus 4.7 (1M context) --- modules/mid_registrar/save.c | 34 ++++++++++++++++++++++++++++++++-- modules/pua_reginfo/notify.c | 2 +- modules/registrar/save.c | 2 +- modules/registrar/test/test.c | 6 +++--- modules/usrloc/udomain.c | 8 +++++++- modules/usrloc/udomain.h | 9 ++++++++- modules/usrloc/ul_cluster.c | 6 +++--- modules/usrloc/ul_mi.c | 2 +- modules/usrloc/usrloc.h | 8 +++++++- 9 files changed, 63 insertions(+), 14 deletions(-) diff --git a/modules/mid_registrar/save.c b/modules/mid_registrar/save.c index 3ccbd941f5e..cbb40b199ee 100644 --- a/modules/mid_registrar/save.c +++ b/modules/mid_registrar/save.c @@ -236,6 +236,14 @@ struct mr_ct_data { int last_cseq; }; +struct mr_aor_data { + struct mid_reg_info *mri; + const str *ct_uri; + int expires_out; + int last_reg_ts; + int last_cseq; +}; + static int mid_reg_store_ct_data(ucontact_t *c, void *info) { struct mr_ct_data *data = (struct mr_ct_data *)info; @@ -249,6 +257,19 @@ static int mid_reg_store_ct_data(ucontact_t *c, void *info) return rc; } +static int mid_reg_store_aor_data(urecord_t *r, void *info) +{ + struct mr_aor_data *data = (struct mr_aor_data *)info; + int rc; + + rc = store_urecord_data(r, data->mri, data->ct_uri, data->expires_out, + data->last_reg_ts, data->last_cseq); + if (rc != 0) + LM_ERR("failed to attach urecord data - oom?\n"); + + return rc; +} + static int mid_reg_update_ct_data(ucontact_t *c, void *info) { struct mr_ct_data *data = (struct mr_ct_data *)info; @@ -288,7 +309,7 @@ static int overwrite_req_contacts(struct sip_msg *req, ul.lock_udomain(mri->dom, &mri->aor); ul.get_urecord(mri->dom, &mri->aor, &r); - if (!r && ul.insert_urecord(mri->dom, &mri->aor, &r, 0) < 0) { + if (!r && ul.insert_urecord(mri->dom, &mri->aor, &r, 0, NULL, NULL) < 0) { rerrno = R_UL_NEW_R; LM_ERR("failed to insert new record structure\n"); goto out_err; @@ -1574,7 +1595,16 @@ static inline int save_restore_req_contacts(struct sip_msg *req, if (!_c) goto out; - if (ul.insert_urecord(mri->dom, _a, &r, 0) < 0) { + /* populate kv_storage before cluster replication so peers receive + * a populated AoR INSERT packet (otherwise unregister_record() on + * peers fails to find the 'from' key when the AoR later expires) */ + struct mr_aor_data aor_data = { + mri, &_c->uri, e_out, + (int)(unsigned long)get_act_time(), cseq + }; + + if (ul.insert_urecord(mri->dom, _a, &r, 0, + mid_reg_store_aor_data, &aor_data) < 0) { rerrno = R_UL_NEW_R; LM_ERR("failed to insert new record structure\n"); goto out_err; diff --git a/modules/pua_reginfo/notify.c b/modules/pua_reginfo/notify.c index 1443822b8ee..23314d4c199 100644 --- a/modules/pua_reginfo/notify.c +++ b/modules/pua_reginfo/notify.c @@ -82,7 +82,7 @@ int process_contact(udomain_t *domain, urecord_t **ul_record, str aor, case EVENT_REFRESHED: /* In case, no record exists and new one should be created, * create a new entry for this user in the usrloc-DB */ - if(ul.insert_urecord(domain, &aor, ul_record, 0) < 0) { + if(ul.insert_urecord(domain, &aor, ul_record, 0, NULL, NULL) < 0) { LM_ERR("failed to insert new user-record\n"); ret = RESULT_ERROR; goto done; diff --git a/modules/registrar/save.c b/modules/registrar/save.c index fce0f28e944..1eed1f3bca9 100644 --- a/modules/registrar/save.c +++ b/modules/registrar/save.c @@ -260,7 +260,7 @@ static inline int insert_contacts(struct sip_msg* _m, contact_t* _c, } if (r==0) { - if (ul.insert_urecord(_d, _a, &r, 0) < 0) { + if (ul.insert_urecord(_d, _a, &r, 0, NULL, NULL) < 0) { rerrno = R_UL_NEW_R; LM_ERR("failed to insert new record structure\n"); goto error; diff --git a/modules/registrar/test/test.c b/modules/registrar/test/test.c index 7bad65aa134..186e9087053 100644 --- a/modules/registrar/test/test.c +++ b/modules/registrar/test/test.c @@ -77,7 +77,7 @@ static void test_lookup(void) ok(reg_lookup(&msg, d, NULL, NULL) == LOOKUP_NO_RESULTS, "lookup-1"); ul.lock_udomain(d, &aor); - ok(ul.insert_urecord(d, &aor, &r, 0) == 0, "create AoR"); + ok(ul.insert_urecord(d, &aor, &r, 0, NULL, NULL) == 0, "create AoR"); fill_ucontact_info(&ci); ci.methods = METHOD_UNDEF; @@ -124,11 +124,11 @@ static void test_lookup(void) struct msg_branch branch; ul.lock_udomain(d, &aor2); - ok(ul.insert_urecord(d, &aor2, &r, 0) == 0, "create AoR 2"); + ok(ul.insert_urecord(d, &aor2, &r, 0, NULL, NULL) == 0, "create AoR 2"); ul.unlock_udomain(d, &aor2); ul.lock_udomain(d, &aor3); - ok(ul.insert_urecord(d, &aor3, &r, 0) == 0, "create AoR 3"); + ok(ul.insert_urecord(d, &aor3, &r, 0, NULL, NULL) == 0, "create AoR 3"); fill_ucontact_info(&ci); ci.methods = METHOD_UNDEF; ok(ul.insert_ucontact(r, &ct2, &ci, NULL, 1, &c) == 0, "insert Contact for AoR 3"); diff --git a/modules/usrloc/udomain.c b/modules/usrloc/udomain.c index f9f6d0ce60e..0b5ddb10aeb 100644 --- a/modules/usrloc/udomain.c +++ b/modules/usrloc/udomain.c @@ -1356,7 +1356,9 @@ int cdb_update_urecord_metadata(const str *_aor, int unpublish) * Create and insert a new record */ int insert_urecord(udomain_t* _d, str* _aor, struct urecord** _r, - char skip_replication) + char skip_replication, + ur_insert_pre_repl_cb_f pre_replicate_cb, + void *pre_replicate_info) { if (have_mem_storage()) { if (mem_insert_urecord(_d, _aor, _r) < 0) { @@ -1373,6 +1375,10 @@ int insert_urecord(udomain_t* _d, str* _aor, struct urecord** _r, _aor->len, _aor->s); } + if (pre_replicate_cb + && pre_replicate_cb(*_r, pre_replicate_info) != 0) + LM_ERR("urecord pre-replication callback returned non-zero\n"); + if (location_cluster) replicate_urecord_insert(*_r); } diff --git a/modules/usrloc/udomain.h b/modules/usrloc/udomain.h index e979dddba1e..af01407e15c 100644 --- a/modules/usrloc/udomain.h +++ b/modules/usrloc/udomain.h @@ -151,11 +151,18 @@ void unlock_ulslot(udomain_t* _d, int slot); /* ===== module interface ======= */ +/* Optional callback invoked by @insert_urecord between memory insertion and + * cluster replication, allowing the caller to populate record-level data + * (e.g. mid_registrar's kv_storage keys) so it ships in the INSERT packet. */ +typedef int (*ur_insert_pre_repl_cb_f)(struct urecord *r, void *info); + /*! \brief * Create and insert a new record */ int insert_urecord(udomain_t* _d, str* _aor, struct urecord** _r, - char skip_replication); + char skip_replication, + ur_insert_pre_repl_cb_f pre_replicate_cb, + void *pre_replicate_info); /*! \brief * Obtain a urecord pointer if the urecord exists in domain diff --git a/modules/usrloc/ul_cluster.c b/modules/usrloc/ul_cluster.c index 9a4d0f100b6..f820913913e 100644 --- a/modules/usrloc/ul_cluster.c +++ b/modules/usrloc/ul_cluster.c @@ -449,7 +449,7 @@ static int receive_urecord_insert(bin_packet_t *packet) if (get_urecord(domain, &aor, &r) == 0) goto out; - if (insert_urecord(domain, &aor, &r, 1) != 0) { + if (insert_urecord(domain, &aor, &r, 1, NULL, NULL) != 0) { unlock_udomain(domain, &aor); goto out_err; } @@ -608,7 +608,7 @@ static int receive_ucontact_insert(bin_packet_t *packet) LM_INFO("failed to fetch local urecord - creating new one " "(ci: '%.*s') \n", callid.len, callid.s); - if (insert_urecord(domain, &aor, &record, 1) != 0) { + if (insert_urecord(domain, &aor, &record, 1, NULL, NULL) != 0) { LM_ERR("failed to insert new record\n"); unlock_udomain(domain, &aor); goto error; @@ -772,7 +772,7 @@ static int receive_ucontact_update(bin_packet_t *packet) LM_INFO("failed to fetch local urecord - create new record and contact" " (ci: '%.*s')\n", callid.len, callid.s); - if (insert_urecord(domain, &aor, &record, 1) != 0) { + if (insert_urecord(domain, &aor, &record, 1, NULL, NULL) != 0) { LM_ERR("failed to insert urecord\n"); unlock_udomain(domain, &aor); goto error; diff --git a/modules/usrloc/ul_mi.c b/modules/usrloc/ul_mi.c index 2b7f1d0eeb3..8c7955bf02e 100644 --- a/modules/usrloc/ul_mi.c +++ b/modules/usrloc/ul_mi.c @@ -520,7 +520,7 @@ mi_response_t *mi_usrloc_add(const mi_params_t *params, n = get_urecord( dom, &aor, &r); if ( n==1) { - if (insert_urecord( dom, &aor, &r, 0) < 0) + if (insert_urecord( dom, &aor, &r, 0, NULL, NULL) < 0) goto lock_error; c = 0; diff --git a/modules/usrloc/usrloc.h b/modules/usrloc/usrloc.h index 20d6a1feea5..1915ecfcbe9 100644 --- a/modules/usrloc/usrloc.h +++ b/modules/usrloc/usrloc.h @@ -128,11 +128,17 @@ typedef struct usrloc_api { * @r: will hold the newly created object * @skip_replication: set to true in order to avoid replicating an AoR * insertion event to neighboring cluster nodes + * @pre_replicate_cb: optional callback fired after the urecord lives in + * memory but before it is replicated; use it to attach + * record-level kv_storage that must reach peers + * @pre_replicate_info: opaque context handed to @pre_replicate_cb * * Return: 0 (success), negative otherwise */ int (*insert_urecord) (udomain_t *d, str *aor, struct urecord **r, - char skip_replication); + char skip_replication, + ur_insert_pre_repl_cb_f pre_replicate_cb, + void *pre_replicate_info); /** * Fetch a key from record-level storage.