Skip to content

Commit 965c563

Browse files
committed
Add source diversity analysis WP
1 parent c9e4bb5 commit 965c563

12 files changed

Lines changed: 1018 additions & 8 deletions

File tree

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{% if dashboard_stats or source_diversity_alerts or source_diversity_project_drilldowns %}
2+
<div class="mb-6 flex flex-col gap-4">
3+
{% if dashboard_stats %}
4+
<div class="grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4">
5+
{% for stat in dashboard_stats %}
6+
<div class="rounded-default border border-base-200 bg-white px-4 py-4 shadow-xs dark:border-base-800 dark:bg-base-900">
7+
<div class="flex items-center justify-between gap-3">
8+
<div>
9+
<p class="text-xs font-medium uppercase tracking-tight text-base-500 dark:text-base-400">{{ stat.title }}</p>
10+
<p class="mt-2 text-2xl font-semibold text-font-important-light dark:text-font-important-dark">{{ stat.value }}</p>
11+
</div>
12+
<span class="material-symbols-outlined text-base-400 dark:text-base-500">{{ stat.icon }}</span>
13+
</div>
14+
</div>
15+
{% endfor %}
16+
</div>
17+
{% endif %}
18+
{% if source_diversity_alerts %}
19+
<div class="rounded-default border border-base-200 bg-white px-4 py-4 shadow-xs dark:border-base-800 dark:bg-base-900">
20+
<h2 class="text-sm font-semibold text-font-important-light dark:text-font-important-dark">Concentration Alerts</h2>
21+
<div class="mt-4 grid grid-cols-1 gap-3 xl:grid-cols-2">
22+
{% for alert in source_diversity_alerts %}
23+
<div class="rounded-default border border-orange-200 bg-orange-50 px-4 py-3 text-sm text-orange-800 dark:border-orange-500/30 dark:bg-orange-500/10 dark:text-orange-300">
24+
{{ alert.message }}
25+
</div>
26+
{% endfor %}
27+
</div>
28+
</div>
29+
{% endif %}
30+
{% if source_diversity_project_drilldowns %}
31+
<div class="rounded-default border border-base-200 bg-white px-4 py-4 shadow-xs dark:border-base-800 dark:bg-base-900">
32+
<div>
33+
<h2 class="text-sm font-semibold text-font-important-light dark:text-font-important-dark">Project Drilldown</h2>
34+
<p class="mt-1 text-sm text-base-500 dark:text-base-400">
35+
Jump into filtered source diversity history for a single project.
36+
</p>
37+
</div>
38+
<div class="mt-4 grid grid-cols-1 gap-3 xl:grid-cols-2">
39+
{% for drilldown in source_diversity_project_drilldowns %}
40+
<a href="{{ drilldown.href }}"
41+
class="rounded-default border border-base-200 px-4 py-3 transition hover:border-primary-600 hover:bg-base-50 dark:border-base-800 dark:hover:border-primary-500 dark:hover:bg-base-800">
42+
<div class="flex items-start justify-between gap-3">
43+
<div>
44+
<p class="font-medium text-font-important-light dark:text-font-important-dark">{{ drilldown.project_name }}</p>
45+
<p class="mt-1 text-sm text-base-500 dark:text-base-400">
46+
{{ drilldown.snapshot_count }} snapshot{{ drilldown.snapshot_count|pluralize }} · {{ drilldown.latest_snapshot }}
47+
</p>
48+
</div>
49+
<span class="rounded-full bg-base-100 px-2 py-1 text-xs font-medium text-base-700 dark:bg-base-800 dark:text-base-300">
50+
{{ drilldown.alert_count }} alert{{ drilldown.alert_count|pluralize }}
51+
</span>
52+
</div>
53+
<div class="mt-3 flex flex-wrap gap-x-4 gap-y-1 text-sm text-base-600 dark:text-base-300">
54+
<span>Plugin {{ drilldown.plugin_entropy }}</span>
55+
<span>Source {{ drilldown.source_entropy }}</span>
56+
<span>Top plugin {{ drilldown.top_plugin_share }}</span>
57+
</div>
58+
</a>
59+
{% endfor %}
60+
</div>
61+
</div>
62+
{% endif %}
63+
</div>
64+
{% endif %}

ingestion/tasks.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from celery import shared_task
66
from django.conf import settings
7-
from django.db.models import Q
7+
from django.db.models import Model, Q
88
from django.utils import timezone
99

1010
from content.deduplication import canonicalize_url
@@ -16,6 +16,17 @@
1616
logger = logging.getLogger(__name__)
1717

1818

19+
def _require_pk(instance: Model) -> int:
20+
"""Return a saved model primary key as an ``int``."""
21+
22+
instance_pk = instance.pk
23+
if instance_pk is None:
24+
raise ValueError(
25+
f"{instance.__class__.__name__} must be saved before task dispatch"
26+
)
27+
return int(instance_pk)
28+
29+
1930
@shared_task(name="core.tasks.run_ingestion")
2031
def run_ingestion(source_config_id: int):
2132
"""Fetch new content for one source config and record an ingestion run."""
@@ -74,7 +85,10 @@ def _ingest_source_config(source_config: SourceConfig) -> tuple[int, int]:
7485
for item in fetched_items:
7586
if _content_exists_for_item(source_config, item):
7687
continue
77-
source_metadata = getattr(item, "source_metadata", None) or {}
88+
source_metadata = {
89+
**(getattr(item, "source_metadata", None) or {}),
90+
"source_config_id": _require_pk(source_config),
91+
}
7892
content = Content.objects.create(
7993
project=source_config.project,
8094
entity=_match_entity_for_item(plugin, item),

trends/admin.py

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from django.urls import reverse
99
from django.utils import timezone
1010

11-
from trends.models import TopicCentroidSnapshot
11+
from trends.models import SourceDiversitySnapshot, TopicCentroidSnapshot
1212

1313

1414
def _project_pk(snapshot: TopicCentroidSnapshot) -> int:
@@ -44,6 +44,32 @@ def _drift_card_color(value) -> str:
4444
return "danger"
4545

4646

47+
def _diversity_card_color(value) -> str:
48+
"""Return an admin card severity for normalized diversity scores."""
49+
50+
if value is None:
51+
return "info"
52+
numeric_value = float(value)
53+
if numeric_value >= 0.75:
54+
return "success"
55+
if numeric_value >= 0.4:
56+
return "warning"
57+
return "danger"
58+
59+
60+
def _share_card_color(value) -> str:
61+
"""Return an admin card severity for concentration share metrics."""
62+
63+
if value is None:
64+
return "info"
65+
numeric_value = float(value)
66+
if numeric_value <= 0.4:
67+
return "success"
68+
if numeric_value <= 0.7:
69+
return "warning"
70+
return "danger"
71+
72+
4773
def _format_snapshot_freshness(computed_at) -> str:
4874
"""Return a compact human-readable age for the latest snapshot."""
4975

@@ -115,6 +141,46 @@ def _build_topic_centroid_project_drilldowns(queryset, changelist_url: str):
115141
return project_drilldowns
116142

117143

144+
def _build_source_diversity_project_drilldowns(queryset, changelist_url: str):
145+
"""Build one filtered-history drilldown row per project."""
146+
147+
latest_by_project: dict[int, SourceDiversitySnapshot] = {}
148+
snapshot_counts: dict[int, int] = {}
149+
ordered_snapshots = queryset.select_related("project").order_by(
150+
"project_id", "-computed_at"
151+
)
152+
153+
for snapshot in ordered_snapshots:
154+
project_id = int(snapshot.project_id)
155+
snapshot_counts[project_id] = snapshot_counts.get(project_id, 0) + 1
156+
latest_by_project.setdefault(project_id, snapshot)
157+
158+
project_drilldowns = []
159+
for snapshot in sorted(
160+
latest_by_project.values(),
161+
key=lambda value: value.project.name.lower(),
162+
):
163+
project_id = int(snapshot.project_id)
164+
alerts = cast(
165+
list[dict[str, Any]], (snapshot.breakdown or {}).get("alerts", [])
166+
)
167+
project_drilldowns.append(
168+
{
169+
"project_id": project_id,
170+
"project_name": snapshot.project.name,
171+
"snapshot_count": snapshot_counts[project_id],
172+
"latest_snapshot": _format_snapshot_freshness(snapshot.computed_at),
173+
"plugin_entropy": f"{_score_to_percent(snapshot.plugin_entropy):.1f}%",
174+
"source_entropy": f"{_score_to_percent(snapshot.source_entropy):.1f}%",
175+
"top_plugin_share": f"{_score_to_percent(snapshot.top_plugin_share):.1f}%",
176+
"alert_count": len(alerts),
177+
"href": f"{changelist_url}?{urlencode({'project__id__exact': project_id})}",
178+
}
179+
)
180+
181+
return project_drilldowns
182+
183+
118184
@admin.register(TopicCentroidSnapshot)
119185
class TopicCentroidSnapshotAdmin(admin.ModelAdmin):
120186
"""Admin view for persisted topic-centroid history and drift."""
@@ -219,3 +285,120 @@ def changelist_view(self, request, extra_context=None):
219285
_build_topic_centroid_project_drilldowns(queryset, changelist_url)
220286
)
221287
return super().changelist_view(request, extra_context=extra_context)
288+
289+
290+
@admin.register(SourceDiversitySnapshot)
291+
class SourceDiversitySnapshotAdmin(admin.ModelAdmin):
292+
"""Admin view for persisted source-diversity history and concentration alerts."""
293+
294+
list_before_template = "admin/source_diversity_snapshot_changelist_widget.html"
295+
list_display = (
296+
"project",
297+
"display_plugin_entropy",
298+
"display_source_entropy",
299+
"display_author_entropy",
300+
"display_top_plugin_share",
301+
"computed_at",
302+
)
303+
list_filter = (
304+
"window_days",
305+
("project", admin.RelatedOnlyFieldListFilter),
306+
"computed_at",
307+
)
308+
search_fields = ("project__name",)
309+
autocomplete_fields = ("project",)
310+
311+
@admin.display(description="Plugin Diversity", ordering="plugin_entropy")
312+
def display_plugin_entropy(self, obj):
313+
"""Render normalized plugin diversity as a percentage."""
314+
315+
return f"{_score_to_percent(obj.plugin_entropy):.1f}%"
316+
317+
@admin.display(description="Source Diversity", ordering="source_entropy")
318+
def display_source_entropy(self, obj):
319+
"""Render normalized source diversity as a percentage."""
320+
321+
return f"{_score_to_percent(obj.source_entropy):.1f}%"
322+
323+
@admin.display(description="Author Diversity", ordering="author_entropy")
324+
def display_author_entropy(self, obj):
325+
"""Render normalized author diversity as a percentage."""
326+
327+
return f"{_score_to_percent(obj.author_entropy):.1f}%"
328+
329+
@admin.display(description="Top Plugin Share", ordering="top_plugin_share")
330+
def display_top_plugin_share(self, obj):
331+
"""Render the largest plugin share as a percentage."""
332+
333+
return f"{_score_to_percent(obj.top_plugin_share):.1f}%"
334+
335+
def changelist_view(self, request, extra_context=None):
336+
"""Augment the changelist with diversity summaries and alert callouts."""
337+
338+
queryset = self.get_queryset(request)
339+
changelist_url = reverse(
340+
f"{self.admin_site.name}:{self.model._meta.app_label}_{self.model._meta.model_name}_changelist"
341+
)
342+
metrics = queryset.aggregate(
343+
avg_plugin_entropy=Avg("plugin_entropy"),
344+
avg_source_entropy=Avg("source_entropy"),
345+
avg_author_entropy=Avg("author_entropy"),
346+
avg_top_plugin_share=Avg("top_plugin_share"),
347+
latest_snapshot_at=Max("computed_at"),
348+
)
349+
alerts = [
350+
alert
351+
for snapshot in queryset.order_by("project_id", "-computed_at")
352+
for alert in cast(
353+
list[dict[str, Any]], (snapshot.breakdown or {}).get("alerts", [])
354+
)
355+
]
356+
357+
extra_context = cast(dict[str, Any], extra_context or {})
358+
extra_context["dashboard_stats"] = [
359+
{
360+
"title": "Plugin Diversity",
361+
"value": (
362+
f"{_score_to_percent(metrics['avg_plugin_entropy']):.1f}%"
363+
if metrics["avg_plugin_entropy"] is not None
364+
else "-"
365+
),
366+
"icon": "hub",
367+
"color": _diversity_card_color(metrics["avg_plugin_entropy"]),
368+
},
369+
{
370+
"title": "Source Diversity",
371+
"value": (
372+
f"{_score_to_percent(metrics['avg_source_entropy']):.1f}%"
373+
if metrics["avg_source_entropy"] is not None
374+
else "-"
375+
),
376+
"icon": "lan",
377+
"color": _diversity_card_color(metrics["avg_source_entropy"]),
378+
},
379+
{
380+
"title": "Author Diversity",
381+
"value": (
382+
f"{_score_to_percent(metrics['avg_author_entropy']):.1f}%"
383+
if metrics["avg_author_entropy"] is not None
384+
else "-"
385+
),
386+
"icon": "group",
387+
"color": _diversity_card_color(metrics["avg_author_entropy"]),
388+
},
389+
{
390+
"title": "Top Plugin Share",
391+
"value": (
392+
f"{_score_to_percent(metrics['avg_top_plugin_share']):.1f}%"
393+
if metrics["avg_top_plugin_share"] is not None
394+
else "-"
395+
),
396+
"icon": "pie_chart",
397+
"color": _share_card_color(metrics["avg_top_plugin_share"]),
398+
},
399+
]
400+
extra_context["source_diversity_alerts"] = alerts
401+
extra_context["source_diversity_project_drilldowns"] = (
402+
_build_source_diversity_project_drilldowns(queryset, changelist_url)
403+
)
404+
return super().changelist_view(request, extra_context=extra_context)

trends/api.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@
1919
from core.permissions import IsProjectContributor, IsProjectMember
2020
from trends.models import (
2121
ContentClusterMembership,
22+
SourceDiversitySnapshot,
2223
ThemeSuggestion,
2324
ThemeSuggestionStatus,
2425
TopicCentroidSnapshot,
2526
TopicCluster,
2627
TopicVelocitySnapshot,
2728
)
2829
from trends.serializers import (
30+
SourceDiversityObservabilitySummarySerializer,
31+
SourceDiversitySnapshotSerializer,
2932
ThemeSuggestionDismissSerializer,
3033
ThemeSuggestionSerializer,
3134
TopicClusterDetailSerializer,
@@ -334,3 +337,56 @@ def summary(self, request, *args, **kwargs):
334337
context=self.get_serializer_context(),
335338
)
336339
return Response(serializer.data)
340+
341+
342+
@document_project_owned_viewset(
343+
resource_plural="source diversity snapshots",
344+
resource_singular="source diversity snapshot",
345+
create_description="Source diversity snapshots are pipeline-managed observability rows and are exposed read-only for health analysis.",
346+
tag="Observability",
347+
action_overrides=build_crud_action_overrides(
348+
SourceDiversitySnapshotSerializer,
349+
resource_plural="source diversity snapshots for the selected project",
350+
resource_singular="source diversity snapshot",
351+
),
352+
)
353+
class SourceDiversitySnapshotViewSet(
354+
ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet
355+
):
356+
"""Inspect persisted source-diversity history for a project."""
357+
358+
serializer_class = SourceDiversitySnapshotSerializer
359+
queryset = SourceDiversitySnapshot.objects.select_related("project")
360+
361+
def get_permissions(self):
362+
"""Restrict source-diversity observability to project contributors."""
363+
364+
return [IsProjectContributor()]
365+
366+
@extend_schema(
367+
summary="Get source diversity summary",
368+
description=(
369+
"Return the latest persisted source-diversity snapshot for the selected project "
370+
"along with the number of stored snapshots."
371+
),
372+
request=None,
373+
responses={
374+
200: SourceDiversityObservabilitySummarySerializer,
375+
403: AUTHENTICATION_REQUIRED_RESPONSE,
376+
},
377+
tags=["Observability"],
378+
)
379+
@action(detail=False, methods=["get"], url_path="summary")
380+
def summary(self, request, *args, **kwargs):
381+
"""Return source-diversity summary metrics for the current project."""
382+
383+
queryset = self.get_queryset()
384+
serializer = SourceDiversityObservabilitySummarySerializer(
385+
{
386+
"project": _require_pk(self.get_project()),
387+
"snapshot_count": queryset.count(),
388+
"latest_snapshot": queryset.order_by("-computed_at").first(),
389+
},
390+
context=self.get_serializer_context(),
391+
)
392+
return Response(serializer.data)

0 commit comments

Comments
 (0)