Skip to content

Commit a8a9ea2

Browse files
authored
Merge pull request #26 from webstackdev/feature/topic-centroid-feedback-loop
Feature/topic centroid feedback loop
2 parents aaaec24 + 9164b45 commit a8a9ea2

23 files changed

Lines changed: 1691 additions & 25 deletions

core/admin.py

Lines changed: 213 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
"""
77

88
import json
9+
from urllib.parse import urlencode
910

1011
from django import forms
1112
from django.contrib import admin, messages
12-
from django.db.models import Avg
13+
from django.db.models import Avg, Max, QuerySet
14+
from django.http import HttpRequest
15+
from django.urls import reverse
1316
from django.utils import timezone
1417
from django.utils.html import format_html
1518
from django.utils.safestring import mark_safe
@@ -35,6 +38,7 @@
3538
ReviewQueue,
3639
SkillResult,
3740
SourceConfig,
41+
TopicCentroidSnapshot,
3842
UserFeedback,
3943
)
4044
from core.plugins import get_plugin_for_source_config, validate_plugin_config
@@ -64,6 +68,94 @@ def _score_color(value) -> str:
6468
return "red"
6569

6670

71+
def _drift_card_color(value) -> str:
72+
"""Return an admin card severity for centroid drift percentages."""
73+
74+
if value is None:
75+
return "info"
76+
numeric_value = float(value)
77+
if numeric_value <= 0.15:
78+
return "success"
79+
if numeric_value <= 0.35:
80+
return "warning"
81+
return "danger"
82+
83+
84+
def _format_snapshot_freshness(computed_at) -> str:
85+
"""Return a compact human-readable age for the latest snapshot."""
86+
87+
if computed_at is None:
88+
return "-"
89+
age = timezone.now() - computed_at
90+
total_hours = max(0, int(age.total_seconds() // 3600))
91+
if total_hours < 24:
92+
return f"{total_hours}h ago"
93+
return f"{max(1, total_hours // 24)}d ago"
94+
95+
96+
def _freshness_card_color(computed_at) -> str:
97+
"""Return an admin card severity based on snapshot recency."""
98+
99+
if computed_at is None:
100+
return "warning"
101+
age = timezone.now() - computed_at
102+
age_hours = age.total_seconds() / 3600
103+
if age_hours <= 24:
104+
return "success"
105+
if age_hours <= 72:
106+
return "warning"
107+
return "danger"
108+
109+
110+
def _build_topic_centroid_project_drilldowns(queryset, changelist_url: str):
111+
"""Build one filtered-history drilldown row per project.
112+
113+
The changelist widget needs stable project links even on SQLite, so this keeps
114+
the grouping logic in Python instead of relying on database-specific distinct-on
115+
behavior.
116+
"""
117+
118+
latest_by_project: dict[int, TopicCentroidSnapshot] = {}
119+
snapshot_counts: dict[int, int] = {}
120+
ordered_snapshots = queryset.select_related("project").order_by(
121+
"project_id", "-computed_at"
122+
)
123+
124+
for snapshot in ordered_snapshots:
125+
project_id = snapshot.project_id
126+
snapshot_counts[project_id] = snapshot_counts.get(project_id, 0) + 1
127+
latest_by_project.setdefault(project_id, snapshot)
128+
129+
project_drilldowns = []
130+
for snapshot in sorted(
131+
latest_by_project.values(),
132+
key=lambda value: value.project.name.lower(),
133+
):
134+
project_drilldowns.append(
135+
{
136+
"project_id": snapshot.project_id,
137+
"project_name": snapshot.project.name,
138+
"snapshot_count": snapshot_counts[snapshot.project_id],
139+
"centroid_active": snapshot.centroid_active,
140+
"feedback_count": snapshot.feedback_count,
141+
"latest_snapshot": _format_snapshot_freshness(snapshot.computed_at),
142+
"drift_from_previous": (
143+
f"{_score_to_percent(snapshot.drift_from_previous):.1f}%"
144+
if snapshot.drift_from_previous is not None
145+
else "n/a"
146+
),
147+
"drift_from_week_ago": (
148+
f"{_score_to_percent(snapshot.drift_from_week_ago):.1f}%"
149+
if snapshot.drift_from_week_ago is not None
150+
else "n/a"
151+
),
152+
"href": f"{changelist_url}?{urlencode({'project__id__exact': snapshot.project_id})}",
153+
}
154+
)
155+
156+
return project_drilldowns
157+
158+
67159
class BlueskyCredentialsAdminForm(forms.ModelForm):
68160
"""Admin form that accepts a plaintext Bluesky app credential input."""
69161

@@ -223,6 +315,15 @@ class ProjectConfigAdmin(admin.ModelAdmin):
223315
"upvote_authority_weight",
224316
"downvote_authority_weight",
225317
"authority_decay_rate",
318+
"recompute_topic_centroid_on_feedback_save",
319+
)
320+
list_filter = ("recompute_topic_centroid_on_feedback_save",)
321+
fields = (
322+
"project",
323+
"upvote_authority_weight",
324+
"downvote_authority_weight",
325+
"authority_decay_rate",
326+
"recompute_topic_centroid_on_feedback_save",
226327
)
227328

228329

@@ -325,6 +426,112 @@ def display_components(self, obj):
325426
)
326427

327428

429+
@admin.register(TopicCentroidSnapshot)
430+
class TopicCentroidSnapshotAdmin(admin.ModelAdmin):
431+
"""Admin view for persisted topic-centroid history and drift."""
432+
433+
list_before_template = "admin/topic_centroid_snapshot_changelist_widget.html"
434+
list_display = (
435+
"project",
436+
"centroid_active",
437+
"feedback_count",
438+
"display_drift_from_previous",
439+
"display_drift_from_week_ago",
440+
"computed_at",
441+
)
442+
list_filter = (
443+
"centroid_active",
444+
("project", admin.RelatedOnlyFieldListFilter),
445+
"computed_at",
446+
)
447+
search_fields = ("project__name",)
448+
autocomplete_fields = ("project",)
449+
450+
@admin.display(description="Drift vs Previous", ordering="drift_from_previous")
451+
def display_drift_from_previous(self, obj):
452+
"""Render cosine-distance drift from the previous active snapshot."""
453+
454+
if obj.drift_from_previous is None:
455+
return "n/a"
456+
return f"{_score_to_percent(obj.drift_from_previous):.1f}%"
457+
458+
@admin.display(description="Drift vs 7d", ordering="drift_from_week_ago")
459+
def display_drift_from_week_ago(self, obj):
460+
"""Render cosine-distance drift from the nearest week-old snapshot."""
461+
462+
if obj.drift_from_week_ago is None:
463+
return "n/a"
464+
return f"{_score_to_percent(obj.drift_from_week_ago):.1f}%"
465+
466+
def changelist_view(self, request, extra_context=None):
467+
"""Augment the changelist with centroid freshness and drift summary cards."""
468+
469+
queryset = self.get_queryset(request)
470+
changelist_url = reverse(
471+
f"{self.admin_site.name}:{self.model._meta.app_label}_{self.model._meta.model_name}_changelist"
472+
)
473+
metrics = queryset.aggregate(
474+
avg_drift_from_previous=Avg("drift_from_previous"),
475+
avg_drift_from_week_ago=Avg("drift_from_week_ago"),
476+
latest_snapshot_at=Max("computed_at"),
477+
)
478+
project_count = queryset.values("project_id").distinct().count()
479+
active_project_count = (
480+
queryset.filter(centroid_active=True)
481+
.values("project_id")
482+
.distinct()
483+
.count()
484+
)
485+
486+
extra_context = extra_context or {}
487+
extra_context["dashboard_stats"] = [
488+
{
489+
"title": "Active Centroids",
490+
"value": (
491+
f"{active_project_count} / {project_count}"
492+
if project_count
493+
else "0 / 0"
494+
),
495+
"icon": "hub",
496+
"color": (
497+
"success"
498+
if active_project_count == project_count and project_count
499+
else "warning"
500+
),
501+
},
502+
{
503+
"title": "Avg Drift vs Previous",
504+
"value": (
505+
f"{_score_to_percent(metrics['avg_drift_from_previous']):.1f}%"
506+
if metrics["avg_drift_from_previous"] is not None
507+
else "-"
508+
),
509+
"icon": "show_chart",
510+
"color": _drift_card_color(metrics["avg_drift_from_previous"]),
511+
},
512+
{
513+
"title": "Avg Drift vs 7d",
514+
"value": (
515+
f"{_score_to_percent(metrics['avg_drift_from_week_ago']):.1f}%"
516+
if metrics["avg_drift_from_week_ago"] is not None
517+
else "-"
518+
),
519+
"icon": "timeline",
520+
"color": _drift_card_color(metrics["avg_drift_from_week_ago"]),
521+
},
522+
{
523+
"title": "Latest Snapshot",
524+
"value": _format_snapshot_freshness(metrics["latest_snapshot_at"]),
525+
"icon": "schedule",
526+
"color": _freshness_card_color(metrics["latest_snapshot_at"]),
527+
},
528+
]
529+
extra_context["centroid_project_drilldowns"] = (
530+
_build_topic_centroid_project_drilldowns(queryset, changelist_url)
531+
)
532+
return super().changelist_view(request, extra_context=extra_context)
533+
534+
328535
@admin.register(EntityMention)
329536
class EntityMentionAdmin(admin.ModelAdmin):
330537
"""Admin view for extracted tracked-entity mentions."""
@@ -404,7 +611,11 @@ def reject_selected_candidates(self, request, queryset):
404611
)
405612

406613
@admin.action(description="Merge selected candidates into existing entities")
407-
def merge_into_existing_entities(self, request, queryset):
614+
def merge_into_existing_entities(
615+
self,
616+
request: HttpRequest,
617+
queryset: QuerySet[EntityCandidate],
618+
) -> None:
408619
"""Merge candidates when a same-name entity already exists in the project."""
409620

410621
merged_count = 0

core/api.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import logging
99
from typing import Any
1010

11-
from django.db.models import Count, Prefetch
11+
from django.db.models import Avg, Count, Prefetch, Q
1212
from drf_spectacular.utils import (
1313
OpenApiExample,
1414
OpenApiParameter,
@@ -40,6 +40,7 @@
4040
ReviewQueue,
4141
SkillResult,
4242
SourceConfig,
43+
TopicCentroidSnapshot,
4344
UserFeedback,
4445
)
4546
from core.serializers import (
@@ -55,6 +56,8 @@
5556
ReviewQueueSerializer,
5657
SkillResultSerializer,
5758
SourceConfigSerializer,
59+
TopicCentroidObservabilitySummarySerializer,
60+
TopicCentroidSnapshotSerializer,
5861
UserFeedbackSerializer,
5962
)
6063

@@ -1036,6 +1039,63 @@ class SourceConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
10361039
queryset = SourceConfig.objects.select_related("project")
10371040

10381041

1042+
@document_project_owned_viewset(
1043+
resource_plural="topic centroid snapshots",
1044+
resource_singular="topic centroid snapshot",
1045+
create_description="Topic centroid snapshots are pipeline-managed history rows and are exposed read-only for observability.",
1046+
tag="Observability",
1047+
action_overrides=build_crud_action_overrides(
1048+
TopicCentroidSnapshotSerializer,
1049+
resource_plural="topic centroid snapshots for the selected project",
1050+
resource_singular="topic centroid snapshot",
1051+
),
1052+
)
1053+
class TopicCentroidSnapshotViewSet(
1054+
ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet
1055+
):
1056+
"""Inspect persisted centroid history and aggregate drift for a project."""
1057+
1058+
serializer_class = TopicCentroidSnapshotSerializer
1059+
queryset = TopicCentroidSnapshot.objects.select_related("project")
1060+
1061+
@extend_schema(
1062+
summary="Get topic centroid summary",
1063+
description=(
1064+
"Return aggregate centroid observability metrics for the selected project, "
1065+
"including average drift and the latest persisted snapshot."
1066+
),
1067+
request=None,
1068+
responses={
1069+
200: TopicCentroidObservabilitySummarySerializer,
1070+
403: AUTHENTICATION_REQUIRED_RESPONSE,
1071+
},
1072+
tags=["Observability"],
1073+
)
1074+
@action(detail=False, methods=["get"], url_path="summary")
1075+
def summary(self, request, *args, **kwargs):
1076+
"""Return centroid observability summary metrics for the current project."""
1077+
1078+
queryset = self.get_queryset()
1079+
metrics = queryset.aggregate(
1080+
snapshot_count=Count("id"),
1081+
active_snapshot_count=Count("id", filter=Q(centroid_active=True)),
1082+
avg_drift_from_previous=Avg("drift_from_previous"),
1083+
avg_drift_from_week_ago=Avg("drift_from_week_ago"),
1084+
)
1085+
serializer = TopicCentroidObservabilitySummarySerializer(
1086+
{
1087+
"project": self.get_project().id,
1088+
"snapshot_count": metrics["snapshot_count"],
1089+
"active_snapshot_count": metrics["active_snapshot_count"],
1090+
"avg_drift_from_previous": metrics["avg_drift_from_previous"],
1091+
"avg_drift_from_week_ago": metrics["avg_drift_from_week_ago"],
1092+
"latest_snapshot": queryset.order_by("-computed_at").first(),
1093+
},
1094+
context=self.get_serializer_context(),
1095+
)
1096+
return Response(serializer.data)
1097+
1098+
10391099
@document_project_owned_viewset(
10401100
resource_plural="review queue entries",
10411101
resource_singular="review queue entry",

core/api_urls.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
ReviewQueueViewSet,
1212
SkillResultViewSet,
1313
SourceConfigViewSet,
14+
TopicCentroidSnapshotViewSet,
1415
UserFeedbackViewSet,
1516
)
1617

@@ -40,6 +41,11 @@
4041
project_router.register(
4142
r"source-configs", SourceConfigViewSet, basename="project-source-config"
4243
)
44+
project_router.register(
45+
r"topic-centroid-snapshots",
46+
TopicCentroidSnapshotViewSet,
47+
basename="project-topic-centroid-snapshot",
48+
)
4349
project_router.register(
4450
r"review-queue", ReviewQueueViewSet, basename="project-review-queue"
4551
)

0 commit comments

Comments
 (0)