Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions spp_api_v2/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,21 @@ Dependencies
Changelog
=========

19.0.2.0.1
~~~~~~~~~~

- Fix ``SerializationFailure`` race when multiple Odoo workers rebuild
their routing map simultaneously (e.g. after ``-u all``) and all try
to sync the same ``fastapi.endpoint`` rows
- Serialize concurrent sync attempts across workers using a
transaction-scoped Postgres advisory lock; workers that don't acquire
the lock skip the sync and pick up the freshly synced routes on the
next routing-map rebuild (via ``endpoint_route_version`` cache
invalidation)
- Log skipped syncs at INFO and lock-primitive failures at WARNING so
cold-start route-availability symptoms and broken-primitive
regressions are diagnosable without raising the global log level

19.0.2.0.0
~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion spp_api_v2/__manifest__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "OpenSPP API V2",
"category": "OpenSPP/Integration",
"version": "19.0.2.0.0",
"version": "19.0.2.0.1",
"sequence": 1,
"author": "OpenSPP.org",
"website": "https://github.com/OpenSPP/OpenSPP2",
Expand Down
124 changes: 96 additions & 28 deletions spp_api_v2/models/ir_http_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
This workaround should be removed when Odoo core fixes the cache bug.
"""

import hashlib
import logging
import threading

Expand All @@ -22,6 +23,46 @@

_logger = logging.getLogger(__name__)

# Stable 64-bit signed key for the transaction-scoped advisory lock that
# serializes FastAPI endpoint sync attempts across workers. Derived from a
# SHA-256 of the qualified name so it is deterministic and unlikely to collide
# with other modules' advisory locks in the same database.
_FASTAPI_SYNC_ADVISORY_LOCK_KEY = int.from_bytes(
hashlib.sha256(b"spp_api_v2.fastapi_endpoint_sync").digest()[:8],
byteorder="big",
signed=True,
)


def _try_acquire_fastapi_sync_lock(cr):
"""Try to acquire the cross-worker FastAPI endpoint sync advisory lock.

Returns True if this transaction got the lock, False otherwise (either
because another backend holds it, or because the lock SQL itself failed —
e.g. exhausted shared-lock memory, permissions). The lock is transaction-
scoped (released automatically at COMMIT/ROLLBACK), so callers do not need
to release it explicitly.

Failing closed (returning False) is the safe default: callers will skip
the sync, and the next routing_map call will retry. Logging at WARNING so
a persistently broken lock primitive is visible — silently degrading to
every-worker-races behaviour would mask the regression this patch fixes.
"""
try:
cr.execute(
"SELECT pg_try_advisory_xact_lock(%s)",
(_FASTAPI_SYNC_ADVISORY_LOCK_KEY,),
)
(got_lock,) = cr.fetchone()
return got_lock
except Exception as e:
_logger.warning(
"FastAPI endpoint sync advisory-lock acquire failed (%s); "
"treating as 'lock held elsewhere' and skipping sync this round.",
e,
)
return False


class IrHttp(models.AbstractModel):
"""Patch ir.http to fix routing_map cache bug"""
Expand Down Expand Up @@ -80,37 +121,64 @@ def routing_map(self, key=None):
from odoo.api import Environment

with registry.cursor() as cr:
env = Environment(cr, SUPERUSER_ID, {})

# First check for endpoints with registry_sync=False (never synced)
unsynced_endpoints = env["fastapi.endpoint"].search([("registry_sync", "=", False)])

# Also check for endpoints that claim to be synced but have no routes
# This catches cases where routes were deleted or DB was reset
synced_endpoints = env["fastapi.endpoint"].search([("registry_sync", "=", True)])
if synced_endpoints and "endpoint.route" in env:
for endpoint in synced_endpoints:
route_exists = env["endpoint.route"].search_count(
[("endpoint_id", "=", endpoint.id)], limit=1
)
if not route_exists:
_logger.warning(
"Endpoint '%s' (id=%d) claims to be synced but has no routes - forcing re-sync",
endpoint.name,
endpoint.id,
)
# Reset flag to trigger re-sync
endpoint.registry_sync = False
unsynced_endpoints |= endpoint

if unsynced_endpoints:
unsynced_endpoints.action_sync_registry()
cr.commit()
# Serialize concurrent sync attempts across workers. After a
# registry reload (e.g. -u all) every worker's routing_map()
# races to update the same fastapi_endpoint rows; without
# this lock all but one fail with SerializationFailure.
if not _try_acquire_fastapi_sync_lock(cr):
# Skipping is safe: the worker that DID get the lock will
# bump endpoint_route_version when it commits action_sync_registry().
# That version is part of our routing-map cache key (line ~89),
# and we re-read it per call, so any degraded routing map this
# worker caches now is keyed at the old version and is naturally
# invalidated on the next call after the winner commits. Bad
# window is bounded by winner-commit latency (seconds at most).
#
# INFO (not DEBUG) so it's visible at default log level —
# otherwise diagnosing transient missing routes after a
# cold start has nothing to go on. Fires at most once
# per registry reload per worker, so not noisy.
_logger.info(
"Synced %d FastAPI endpoints for database %s",
len(unsynced_endpoints),
"FastAPI endpoint sync skipped for %s — another worker is syncing",
registry.db_name,
)
else:
env = Environment(cr, SUPERUSER_ID, {})

# First check for endpoints with registry_sync=False (never synced)
unsynced_endpoints = env["fastapi.endpoint"].search([("registry_sync", "=", False)])

# Also check for endpoints that claim to be synced but have no routes
# This catches cases where routes were deleted or DB was reset
synced_endpoints = env["fastapi.endpoint"].search([("registry_sync", "=", True)])
if synced_endpoints and "endpoint.route" in env:
for endpoint in synced_endpoints:
route_exists = env["endpoint.route"].search_count(
[("endpoint_id", "=", endpoint.id)], limit=1
)
if not route_exists:
_logger.warning(
"Endpoint '%s' (id=%d) claims to be synced but has no routes - forcing re-sync",
endpoint.name,
endpoint.id,
)
# Reset flag to trigger re-sync
endpoint.registry_sync = False
unsynced_endpoints |= endpoint

if unsynced_endpoints:
unsynced_endpoints.action_sync_registry()
_logger.info(
"Synced %d FastAPI endpoints for database %s",
len(unsynced_endpoints),
registry.db_name,
)
# cr.commit() ends the transaction and RELEASES the
# advisory lock acquired above. Do not add any sync
# work below this line — it would run unlocked and
# re-introduce the SerializationFailure race this
# patch exists to prevent.
cr.commit()
except Exception as e:
# If endpoint model doesn't exist or sync fails, continue anyway
_logger.debug("Could not sync FastAPI endpoints: %s", e)
Expand Down
6 changes: 6 additions & 0 deletions spp_api_v2/readme/HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
### 19.0.2.0.1

- Fix `SerializationFailure` race when multiple Odoo workers rebuild their routing map simultaneously (e.g. after `-u all`) and all try to sync the same `fastapi.endpoint` rows
- Serialize concurrent sync attempts across workers using a transaction-scoped Postgres advisory lock; workers that don't acquire the lock skip the sync and pick up the freshly synced routes on the next routing-map rebuild (via `endpoint_route_version` cache invalidation)
- Log skipped syncs at INFO and lock-primitive failures at WARNING so cold-start route-availability symptoms and broken-primitive regressions are diagnosable without raising the global log level

### 19.0.2.0.0

- Initial migration to OpenSPP2
29 changes: 23 additions & 6 deletions spp_api_v2/static/description/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -517,32 +517,49 @@ <h1>Dependencies</h1>
<div class="contents local topic" id="contents">
<ul class="simple">
<li><a class="reference internal" href="#changelog" id="toc-entry-1">Changelog</a><ul>
<li><a class="reference internal" href="#section-1" id="toc-entry-2">19.0.2.0.0</a></li>
<li><a class="reference internal" href="#section-1" id="toc-entry-2">19.0.2.0.1</a></li>
<li><a class="reference internal" href="#section-2" id="toc-entry-3">19.0.2.0.0</a></li>
</ul>
</li>
<li><a class="reference internal" href="#bug-tracker" id="toc-entry-3">Bug Tracker</a></li>
<li><a class="reference internal" href="#credits" id="toc-entry-4">Credits</a></li>
<li><a class="reference internal" href="#bug-tracker" id="toc-entry-4">Bug Tracker</a></li>
<li><a class="reference internal" href="#credits" id="toc-entry-5">Credits</a></li>
</ul>
</div>
<div class="section" id="changelog">
<h2><a class="toc-backref" href="#toc-entry-1">Changelog</a></h2>
<div class="section" id="section-1">
<h3><a class="toc-backref" href="#toc-entry-2">19.0.2.0.0</a></h3>
<h3><a class="toc-backref" href="#toc-entry-2">19.0.2.0.1</a></h3>
<ul class="simple">
<li>Fix <tt class="docutils literal">SerializationFailure</tt> race when multiple Odoo workers rebuild
their routing map simultaneously (e.g. after <tt class="docutils literal"><span class="pre">-u</span> all</tt>) and all try
to sync the same <tt class="docutils literal">fastapi.endpoint</tt> rows</li>
<li>Serialize concurrent sync attempts across workers using a
transaction-scoped Postgres advisory lock; workers that don’t acquire
the lock skip the sync and pick up the freshly synced routes on the
next routing-map rebuild (via <tt class="docutils literal">endpoint_route_version</tt> cache
invalidation)</li>
<li>Log skipped syncs at INFO and lock-primitive failures at WARNING so
cold-start route-availability symptoms and broken-primitive
regressions are diagnosable without raising the global log level</li>
</ul>
</div>
<div class="section" id="section-2">
<h3><a class="toc-backref" href="#toc-entry-3">19.0.2.0.0</a></h3>
<ul class="simple">
<li>Initial migration to OpenSPP2</li>
</ul>
</div>
</div>
<div class="section" id="bug-tracker">
<h2><a class="toc-backref" href="#toc-entry-3">Bug Tracker</a></h2>
<h2><a class="toc-backref" href="#toc-entry-4">Bug Tracker</a></h2>
<p>Bugs are tracked on <a class="reference external" href="https://github.com/OpenSPP/OpenSPP2/issues">GitHub Issues</a>.
In case of trouble, please check there if your issue has already been reported.
If you spotted it first, help us to smash it by providing a detailed and welcomed
<a class="reference external" href="https://github.com/OpenSPP/OpenSPP2/issues/new?body=module:%20spp_api_v2%0Aversion:%2019.0%0A%0A**Steps%20to%20reproduce**%0A-%20...%0A%0A**Current%20behavior**%0A%0A**Expected%20behavior**">feedback</a>.</p>
<p>Do not contact contributors directly about support or help with technical issues.</p>
</div>
<div class="section" id="credits">
<h2><a class="toc-backref" href="#toc-entry-4">Credits</a></h2>
<h2><a class="toc-backref" href="#toc-entry-5">Credits</a></h2>
</div>
</div>
<div class="section" id="authors">
Expand Down
1 change: 1 addition & 0 deletions spp_api_v2/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from . import test_group_service
from . import test_individual_api
from . import test_individual_service
from . import test_ir_http_patch
from . import test_jwt_secret_validation
from . import test_metadata
from . import test_oauth
Expand Down
Loading
Loading