Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,7 @@
## 2025-02-13 - API Route Prefix Consistency
**Learning:** Inconsistent application of `/api` prefixes between `main.py` router mounting and test suite request paths can lead to 404 errors during testing, even if the logic is correct. This is especially prevalent when multiple agents work on the same codebase with different assumptions about global prefixes.
**Action:** Always verify that `app.include_router` in `backend/main.py` uses `prefix="/api"` if the test suite (e.g., `tests/test_blockchain.py`) expects it. If a router is mounted without a prefix, ensure tests are updated or the prefix is added to `main.py` to maintain repository-wide consistency.

## 2026-02-14 - O(1) Blockchain Chaining & Cache Optimization
**Learning:** implementing cryptographic chaining (blockchain) for high-frequency records like field officer visits can introduce latency if the previous hash is fetched via a database scan on every creation. While `order_by(id.desc()).first()` is an indexed lookup, maintaining a thread-safe cache for the last hash further eliminates DB roundtrips. However, the cache must be carefully implemented to avoid redundant "verification" queries that negate the performance gain.
**Action:** Use a `ThreadSafeCache` to store the last record's hash and ID. On creation, check the cache first. Only on cache miss (or initialization), perform a projected database lookup (`db.query(Model.id, Model.hash)`). Ensure the cache `max_size` accounts for all keys used (e.g., hash and ID) to prevent constant eviction.
1 change: 1 addition & 0 deletions backend/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,5 @@ def invalidate(self):
user_upload_cache = ThreadSafeCache(ttl=3600, max_size=1000) # 1 hour TTL for upload limits
blockchain_last_hash_cache = ThreadSafeCache(ttl=3600, max_size=1)
grievance_last_hash_cache = ThreadSafeCache(ttl=3600, max_size=1)
visit_last_hash_cache = ThreadSafeCache(ttl=3600, max_size=2)
user_issues_cache = ThreadSafeCache(ttl=300, max_size=50) # 5 minutes TTL
12 changes: 8 additions & 4 deletions backend/geofencing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,17 @@ def is_within_geofence(
return within_fence, distance


def generate_visit_hash(visit_data: dict) -> str:
def generate_visit_hash(visit_data: dict, prev_hash: str = "") -> str:
"""
Generate a tamper-resistant HMAC hash for visit data (blockchain-like integrity).

Uses HMAC-SHA256 with server secret to prevent forgery.
Normalizes datetime to ISO format for deterministic hashing.
Supports chaining by including the previous record's hash.

Args:
visit_data: Dictionary containing visit information
prev_hash: Hash of the previous visit record for chaining

Returns:
HMAC-SHA256 hash of visit data
Expand All @@ -111,14 +113,15 @@ def generate_visit_hash(visit_data: dict) -> str:
else:
check_in_time_str = str(check_in_time) if check_in_time else ""

# Create a deterministic string from visit data
# Create a deterministic string from visit data including the previous hash
data_string = (
f"{visit_data.get('issue_id')}"
f"{visit_data.get('officer_email')}"
f"{visit_data.get('check_in_latitude')}"
f"{visit_data.get('check_in_longitude')}"
Comment on lines 117 to 121
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generate_visit_hash() constructs data_string by concatenating fields without any delimiter/structured encoding. This is ambiguous (different field combinations can produce the same string), which undermines the integrity guarantee even with HMAC. Use deterministic structured serialization (e.g., JSON with sorted keys) or add unambiguous separators and normalized numeric formatting before hashing.

Copilot uses AI. Check for mistakes.
f"{check_in_time_str}"
f"{visit_data.get('visit_notes', '')}"
f"{prev_hash}"
)

# Generate HMAC-SHA256 hash for tamper-resistance
Expand All @@ -137,19 +140,20 @@ def generate_visit_hash(visit_data: dict) -> str:
return ""


def verify_visit_integrity(visit_data: dict, stored_hash: str) -> bool:
def verify_visit_integrity(visit_data: dict, stored_hash: str, prev_hash: str = "") -> bool:
"""
Verify the integrity of visit data against stored hash.

Args:
visit_data: Dictionary containing visit information
stored_hash: Previously stored hash
prev_hash: Previous record hash for chained verification

Returns:
True if data is unmodified, False otherwise
"""
try:
computed_hash = generate_visit_hash(visit_data)
computed_hash = generate_visit_hash(visit_data, prev_hash=prev_hash)
is_valid = computed_hash == stored_hash

if not is_valid:
Expand Down
7 changes: 7 additions & 0 deletions backend/init_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,13 @@ def index_exists(table, index_name):
if not index_exists("field_officer_visits", "ix_field_officer_visits_check_in_time"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_field_officer_visits_check_in_time ON field_officer_visits (check_in_time)"))

if not column_exists("field_officer_visits", "previous_visit_hash"):
conn.execute(text("ALTER TABLE field_officer_visits ADD COLUMN previous_visit_hash VARCHAR"))
logger.info("Added previous_visit_hash column to field_officer_visits")

if not index_exists("field_officer_visits", "ix_field_officer_visits_previous_visit_hash"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_field_officer_visits_previous_visit_hash ON field_officer_visits (previous_visit_hash)"))
Comment on lines +202 to +207
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

This migration is not wired into the running app.

backend/main.py currently skips migrate_db() during startup, while backend/routers/field_officer.py already reads FieldOfficerVisit.previous_visit_hash on live requests. On an upgraded database, the column added here never gets created and the first check-in/verify call will fail with a missing-column error. The current tests will not catch this because they create the latest schema directly with Base.metadata.create_all().

Minimal startup fix outside this file
-        # await run_in_threadpool(migrate_db)
+        await run_in_threadpool(migrate_db)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/init_db.py` around lines 202 - 207, The migration that adds
previous_visit_hash is not being run at startup, causing
FieldOfficerVisit.previous_visit_hash access in routers/field_officer.py to fail
on upgraded DBs; wire the migration runner into app startup by ensuring
migrate_db() (from backend/init_db.py) is invoked during application bootstrap
in backend/main.py (e.g., in the FastAPI startup event or where DB/init
functions are called) so the ALTER TABLE and index creation run before the app
serves requests that import/read FieldOfficerVisit.previous_visit_hash.


logger.info("Database migration check completed successfully.")

except Exception as e:
Expand Down
1 change: 1 addition & 0 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ class FieldOfficerVisit(Base):

# Immutability hash (blockchain-like integrity)
visit_hash = Column(String, nullable=True) # Hash of visit data for integrity verification
previous_visit_hash = Column(String, nullable=True, index=True) # Linked hash for O(1) verification

# Metadata
created_at = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc))
Expand Down
113 changes: 111 additions & 2 deletions backend/routers/field_officer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@
from backend.geofencing_service import (
is_within_geofence,
generate_visit_hash,
verify_visit_integrity,
calculate_visit_metrics,
get_geofencing_service
)
from backend.cache import visit_last_hash_cache
from backend.schemas import BlockchainVerificationResponse

Comment on lines +34 to 36
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate import: BlockchainVerificationResponse is imported separately from backend.schemas even though this file already has a from backend.schemas import (...) block above. Please merge into a single import to avoid duplication.

Copilot uses AI. Check for mistakes.
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -58,6 +61,7 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
- **geofence_radius_meters**: Acceptable distance from site (default: 100m)

**Geo-Fencing**: Automatically verifies if officer is within acceptable radius of issue location
**Blockchain Chaining**: Cryptographically links this visit to the previous one for immutability
"""
try:
# Validate issue exists
Expand Down Expand Up @@ -95,6 +99,22 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
# Create visit record
check_in_time = datetime.now(timezone.utc)

# Blockchain Chaining Logic
# Use thread-safe cache to eliminate database lookups for the previous hash
prev_hash = visit_last_hash_cache.get("last_hash")
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai Bot Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Race condition: the read → hash → commit → cache-update sequence is not atomic, so concurrent check-ins (or requests across multiple worker processes) will read the same prev_hash and fork the blockchain chain, silently breaking the tamper-evidence guarantee this feature is designed to provide.

To fix this properly, either use a database-level advisory lock / SELECT ... FOR UPDATE on the last row, or serialize check-ins through an application-level mutex (noting that an in-process lock only works with a single worker).

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/field_officer.py, line 104:

<comment>Race condition: the read → hash → commit → cache-update sequence is not atomic, so concurrent check-ins (or requests across multiple worker processes) will read the same `prev_hash` and fork the blockchain chain, silently breaking the tamper-evidence guarantee this feature is designed to provide.

To fix this properly, either use a database-level advisory lock / `SELECT ... FOR UPDATE` on the last row, or serialize check-ins through an application-level mutex (noting that an in-process lock only works with a single worker).</comment>

<file context>
@@ -95,6 +99,22 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
         
+        # Blockchain Chaining Logic
+        # Use thread-safe cache to eliminate database lookups for the previous hash
+        prev_hash = visit_last_hash_cache.get("last_hash")
+
+        if prev_hash is None:
</file context>
Fix with Cubic


if prev_hash is None:
# Cache miss: fetch only the last hash and ID from DB
# Optimization: Use column projection to avoid full model loading
last_visit = db.query(FieldOfficerVisit.id, FieldOfficerVisit.visit_hash).order_by(FieldOfficerVisit.id.desc()).first()
if last_visit:
prev_hash = last_visit[1] or ""
visit_last_hash_cache.set(data=prev_hash, key="last_hash")
visit_last_hash_cache.set(data=last_visit[0], key="last_id")
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

last_id is cached here but never read anywhere, so it’s currently dead state. Either remove it (and reduce cache max_size), or use it to validate cache freshness against the DB before chaining (see the pattern in backend/grievance_service.py where cached last_id/hash are compared to the DB tail to avoid chaining to stale values).

Suggested change
visit_last_hash_cache.set(data=last_visit[0], key="last_id")

Copilot uses AI. Check for mistakes.
else:
prev_hash = ""
visit_last_hash_cache.set(data=prev_hash, key="last_hash")
Comment on lines +103 to +116
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chaining via a process-local cache isn’t atomic across concurrent requests: two check-ins can read the same prev_hash before either commits, producing a fork (multiple visits with the same previous_visit_hash). If a strictly linear chain is required, consider determining prev_hash from the DB within the same transaction as the insert (or using a DB-level lock/sequence) rather than relying on the in-memory cache alone.

Suggested change
# Use thread-safe cache to eliminate database lookups for the previous hash
prev_hash = visit_last_hash_cache.get("last_hash")
if prev_hash is None:
# Cache miss: fetch only the last hash and ID from DB
# Optimization: Use column projection to avoid full model loading
last_visit = db.query(FieldOfficerVisit.id, FieldOfficerVisit.visit_hash).order_by(FieldOfficerVisit.id.desc()).first()
if last_visit:
prev_hash = last_visit[1] or ""
visit_last_hash_cache.set(data=prev_hash, key="last_hash")
visit_last_hash_cache.set(data=last_visit[0], key="last_id")
else:
prev_hash = ""
visit_last_hash_cache.set(data=prev_hash, key="last_hash")
# Determine previous hash from the database within the same transaction
last_visit = (
db.query(FieldOfficerVisit)
.order_by(FieldOfficerVisit.id.desc())
.with_for_update()
.first()
)
if last_visit:
prev_hash = last_visit.visit_hash or ""
else:
prev_hash = ""

Copilot uses AI. Check for mistakes.

visit_data = {
'issue_id': request.issue_id,
'officer_email': request.officer_email,
Expand All @@ -104,8 +124,8 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
'visit_notes': request.visit_notes or ''
}

# Generate immutable hash
visit_hash = generate_visit_hash(visit_data)
# Generate immutable hash with chaining
visit_hash = generate_visit_hash(visit_data, prev_hash=prev_hash)

new_visit = FieldOfficerVisit(
issue_id=request.issue_id,
Expand All @@ -123,13 +143,18 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
visit_notes=request.visit_notes,
status='checked_in',
visit_hash=visit_hash,
previous_visit_hash=prev_hash,
is_public=True
)

db.add(new_visit)
db.commit()
db.refresh(new_visit)

# Update cache after successful commit
visit_last_hash_cache.set(data=visit_hash, key="last_hash")
visit_last_hash_cache.set(data=new_visit.id, key="last_id")

logger.info(
f"Officer {request.officer_name} checked in at issue {request.issue_id}. "
f"Distance: {distance:.2f}m, Within fence: {within_fence}"
Expand All @@ -155,6 +180,8 @@ def officer_check_in(request: OfficerCheckInRequest, db: Session = Depends(get_d
status=new_visit.status,
verified_by=new_visit.verified_by,
verified_at=new_visit.verified_at,
visit_hash=new_visit.visit_hash,
previous_visit_hash=new_visit.previous_visit_hash,
is_public=new_visit.is_public,
created_at=new_visit.created_at
)
Expand Down Expand Up @@ -230,6 +257,8 @@ def officer_check_out(request: OfficerCheckOutRequest, db: Session = Depends(get
status=visit.status,
verified_by=visit.verified_by,
verified_at=visit.verified_at,
visit_hash=visit.visit_hash,
previous_visit_hash=visit.previous_visit_hash,
is_public=visit.is_public,
created_at=visit.created_at
)
Expand Down Expand Up @@ -445,6 +474,86 @@ def get_visit_statistics(db: Session = Depends(get_db)):
raise HTTPException(status_code=500, detail="Failed to calculate statistics")


@router.get("/field-officer/visit/{visit_id}/blockchain-verify", response_model=BlockchainVerificationResponse)
def verify_visit_blockchain(visit_id: int, db: Session = Depends(get_db)):
Comment on lines +477 to +478
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BlockchainVerificationResponse is reused here for visit verification, but its field descriptions are issue-specific (e.g., "issue integrity" / "previous issue's hash"), which will make the OpenAPI schema misleading for this route. Consider adding a visit-specific response model or making the existing response model descriptions generic.

Copilot uses AI. Check for mistakes.
"""
Verify the cryptographic integrity of a field officer visit using O(1) blockchain verification.

Checks if the stored visit_hash correctly seals the record data and links to the previous record.
"""
Comment on lines +479 to +483
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The endpoint docstring claims it verifies that the visit "links to the previous record", but the implementation only recomputes the hash using the stored previous_visit_hash value; it does not validate that previous_visit_hash matches the actual previous visit’s visit_hash. If link validation is intended, fetch the previous visit (by ID/order) and compare its visit_hash to previous_visit_hash (or adjust the docstring/message to describe what is actually verified).

Copilot uses AI. Check for mistakes.
try:
# Fetch visit data including the link to previous hash
visit = db.query(
FieldOfficerVisit.id,
FieldOfficerVisit.issue_id,
FieldOfficerVisit.officer_email,
FieldOfficerVisit.check_in_latitude,
FieldOfficerVisit.check_in_longitude,
FieldOfficerVisit.check_in_time,
FieldOfficerVisit.visit_notes,
FieldOfficerVisit.visit_hash,
FieldOfficerVisit.previous_visit_hash
).filter(FieldOfficerVisit.id == visit_id).first()

if not visit:
raise HTTPException(status_code=404, detail=f"Visit {visit_id} not found")

# Prepare data for hash recomputation
# NOTE: When recomputing the hash, we must ensure the check_in_time format
# matches exactly what was used during creation (isoformat).
# SQLite returns naive datetime objects, so we need to ensure consistency.
check_in_time_str = ""
if visit.check_in_time:
# If it's a datetime object, convert to ISO format string
# We use the same logic as in create_issue and create_grievance
if visit.check_in_time.tzinfo is None:
# Naive datetime from SQLite, assume UTC
check_in_time_str = visit.check_in_time.replace(tzinfo=timezone.utc).isoformat()
else:
check_in_time_str = visit.check_in_time.isoformat()

visit_data = {
'issue_id': visit.issue_id,
'officer_email': visit.officer_email,
'check_in_latitude': visit.check_in_latitude,
'check_in_longitude': visit.check_in_longitude,
'check_in_time': check_in_time_str,
'visit_notes': visit.visit_notes or ''
}

# Determine previous hash (O(1) from stored column)
prev_hash = visit.previous_visit_hash or ""

# Verify integrity using the service helper for O(1) chained verification
is_valid = verify_visit_integrity(
visit_data,
visit.visit_hash or "",
prev_hash=visit.previous_visit_hash or ""
)

if visit.visit_hash is None:
message = "No integrity hash present for this visit record."
else:
message = (
"Integrity verified. This visit record is cryptographically sealed and part of a secure chain."
if is_valid
else "Integrity check failed! The visit data or chain link has been tampered with."
)

return BlockchainVerificationResponse(
is_valid=is_valid,
current_hash=visit.visit_hash,
computed_hash=generate_visit_hash(visit_data, prev_hash=visit.previous_visit_hash or ""),
message=message
)

except HTTPException:
raise
except Exception as e:
logger.error(f"Error verifying visit blockchain for {visit_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Failed to verify visit integrity")


@router.post("/field-officer/visit/{visit_id}/verify")
def verify_visit(
visit_id: int,
Expand Down
2 changes: 2 additions & 0 deletions backend/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,8 @@ class FieldOfficerVisitResponse(BaseModel):
status: str = Field(..., description="Visit status")
verified_by: Optional[str] = Field(None, description="Verified by")
verified_at: Optional[datetime] = Field(None, description="Verification timestamp")
visit_hash: Optional[str] = Field(None, description="Integrity hash")
previous_visit_hash: Optional[str] = Field(None, description="Previous visit hash")
is_public: bool = Field(..., description="Public visibility")
created_at: datetime = Field(..., description="Creation timestamp")

Expand Down
Loading
Loading