diff --git a/core/templates/admin/source_diversity_snapshot_changelist_widget.html b/core/templates/admin/source_diversity_snapshot_changelist_widget.html
new file mode 100644
index 00000000..962b00c9
--- /dev/null
+++ b/core/templates/admin/source_diversity_snapshot_changelist_widget.html
@@ -0,0 +1,64 @@
+{% if dashboard_stats or source_diversity_alerts or source_diversity_project_drilldowns %}
+
+ {% if dashboard_stats %}
+
+ {% for stat in dashboard_stats %}
+
+
+
+
{{ stat.title }}
+
{{ stat.value }}
+
+
{{ stat.icon }}
+
+
+ {% endfor %}
+
+ {% endif %}
+ {% if source_diversity_alerts %}
+
+
Concentration Alerts
+
+ {% for alert in source_diversity_alerts %}
+
+ {{ alert.message }}
+
+ {% endfor %}
+
+
+ {% endif %}
+ {% if source_diversity_project_drilldowns %}
+
+
+
Project Drilldown
+
+ Jump into filtered source diversity history for a single project.
+
+
+
+
+ {% endif %}
+
+{% endif %}
diff --git a/ingestion/tasks.py b/ingestion/tasks.py
index dfdeb198..c229ad4a 100644
--- a/ingestion/tasks.py
+++ b/ingestion/tasks.py
@@ -4,7 +4,7 @@
from celery import shared_task
from django.conf import settings
-from django.db.models import Q
+from django.db.models import Model, Q
from django.utils import timezone
from content.deduplication import canonicalize_url
@@ -16,6 +16,17 @@
logger = logging.getLogger(__name__)
+def _require_pk(instance: Model) -> int:
+ """Return a saved model primary key as an ``int``."""
+
+ instance_pk = instance.pk
+ if instance_pk is None:
+ raise ValueError(
+ f"{instance.__class__.__name__} must be saved before task dispatch"
+ )
+ return int(instance_pk)
+
+
@shared_task(name="core.tasks.run_ingestion")
def run_ingestion(source_config_id: int):
"""Fetch new content for one source config and record an ingestion run."""
@@ -74,7 +85,10 @@ def _ingest_source_config(source_config: SourceConfig) -> tuple[int, int]:
for item in fetched_items:
if _content_exists_for_item(source_config, item):
continue
- source_metadata = getattr(item, "source_metadata", None) or {}
+ source_metadata = {
+ **(getattr(item, "source_metadata", None) or {}),
+ "source_config_id": _require_pk(source_config),
+ }
content = Content.objects.create(
project=source_config.project,
entity=_match_entity_for_item(plugin, item),
diff --git a/trends/admin.py b/trends/admin.py
index afbd6357..2b3de58c 100644
--- a/trends/admin.py
+++ b/trends/admin.py
@@ -8,7 +8,7 @@
from django.urls import reverse
from django.utils import timezone
-from trends.models import TopicCentroidSnapshot
+from trends.models import SourceDiversitySnapshot, TopicCentroidSnapshot
def _project_pk(snapshot: TopicCentroidSnapshot) -> int:
@@ -44,6 +44,32 @@ def _drift_card_color(value) -> str:
return "danger"
+def _diversity_card_color(value) -> str:
+ """Return an admin card severity for normalized diversity scores."""
+
+ if value is None:
+ return "info"
+ numeric_value = float(value)
+ if numeric_value >= 0.75:
+ return "success"
+ if numeric_value >= 0.4:
+ return "warning"
+ return "danger"
+
+
+def _share_card_color(value) -> str:
+ """Return an admin card severity for concentration share metrics."""
+
+ if value is None:
+ return "info"
+ numeric_value = float(value)
+ if numeric_value <= 0.4:
+ return "success"
+ if numeric_value <= 0.7:
+ return "warning"
+ return "danger"
+
+
def _format_snapshot_freshness(computed_at) -> str:
"""Return a compact human-readable age for the latest snapshot."""
@@ -115,6 +141,46 @@ def _build_topic_centroid_project_drilldowns(queryset, changelist_url: str):
return project_drilldowns
+def _build_source_diversity_project_drilldowns(queryset, changelist_url: str):
+ """Build one filtered-history drilldown row per project."""
+
+ latest_by_project: dict[int, SourceDiversitySnapshot] = {}
+ snapshot_counts: dict[int, int] = {}
+ ordered_snapshots = queryset.select_related("project").order_by(
+ "project_id", "-computed_at"
+ )
+
+ for snapshot in ordered_snapshots:
+ project_id = int(snapshot.project_id)
+ snapshot_counts[project_id] = snapshot_counts.get(project_id, 0) + 1
+ latest_by_project.setdefault(project_id, snapshot)
+
+ project_drilldowns = []
+ for snapshot in sorted(
+ latest_by_project.values(),
+ key=lambda value: value.project.name.lower(),
+ ):
+ project_id = int(snapshot.project_id)
+ alerts = cast(
+ list[dict[str, Any]], (snapshot.breakdown or {}).get("alerts", [])
+ )
+ project_drilldowns.append(
+ {
+ "project_id": project_id,
+ "project_name": snapshot.project.name,
+ "snapshot_count": snapshot_counts[project_id],
+ "latest_snapshot": _format_snapshot_freshness(snapshot.computed_at),
+ "plugin_entropy": f"{_score_to_percent(snapshot.plugin_entropy):.1f}%",
+ "source_entropy": f"{_score_to_percent(snapshot.source_entropy):.1f}%",
+ "top_plugin_share": f"{_score_to_percent(snapshot.top_plugin_share):.1f}%",
+ "alert_count": len(alerts),
+ "href": f"{changelist_url}?{urlencode({'project__id__exact': project_id})}",
+ }
+ )
+
+ return project_drilldowns
+
+
@admin.register(TopicCentroidSnapshot)
class TopicCentroidSnapshotAdmin(admin.ModelAdmin):
"""Admin view for persisted topic-centroid history and drift."""
@@ -219,3 +285,120 @@ def changelist_view(self, request, extra_context=None):
_build_topic_centroid_project_drilldowns(queryset, changelist_url)
)
return super().changelist_view(request, extra_context=extra_context)
+
+
+@admin.register(SourceDiversitySnapshot)
+class SourceDiversitySnapshotAdmin(admin.ModelAdmin):
+ """Admin view for persisted source-diversity history and concentration alerts."""
+
+ list_before_template = "admin/source_diversity_snapshot_changelist_widget.html"
+ list_display = (
+ "project",
+ "display_plugin_entropy",
+ "display_source_entropy",
+ "display_author_entropy",
+ "display_top_plugin_share",
+ "computed_at",
+ )
+ list_filter = (
+ "window_days",
+ ("project", admin.RelatedOnlyFieldListFilter),
+ "computed_at",
+ )
+ search_fields = ("project__name",)
+ autocomplete_fields = ("project",)
+
+ @admin.display(description="Plugin Diversity", ordering="plugin_entropy")
+ def display_plugin_entropy(self, obj):
+ """Render normalized plugin diversity as a percentage."""
+
+ return f"{_score_to_percent(obj.plugin_entropy):.1f}%"
+
+ @admin.display(description="Source Diversity", ordering="source_entropy")
+ def display_source_entropy(self, obj):
+ """Render normalized source diversity as a percentage."""
+
+ return f"{_score_to_percent(obj.source_entropy):.1f}%"
+
+ @admin.display(description="Author Diversity", ordering="author_entropy")
+ def display_author_entropy(self, obj):
+ """Render normalized author diversity as a percentage."""
+
+ return f"{_score_to_percent(obj.author_entropy):.1f}%"
+
+ @admin.display(description="Top Plugin Share", ordering="top_plugin_share")
+ def display_top_plugin_share(self, obj):
+ """Render the largest plugin share as a percentage."""
+
+ return f"{_score_to_percent(obj.top_plugin_share):.1f}%"
+
+ def changelist_view(self, request, extra_context=None):
+ """Augment the changelist with diversity summaries and alert callouts."""
+
+ queryset = self.get_queryset(request)
+ changelist_url = reverse(
+ f"{self.admin_site.name}:{self.model._meta.app_label}_{self.model._meta.model_name}_changelist"
+ )
+ metrics = queryset.aggregate(
+ avg_plugin_entropy=Avg("plugin_entropy"),
+ avg_source_entropy=Avg("source_entropy"),
+ avg_author_entropy=Avg("author_entropy"),
+ avg_top_plugin_share=Avg("top_plugin_share"),
+ latest_snapshot_at=Max("computed_at"),
+ )
+ alerts = [
+ alert
+ for snapshot in queryset.order_by("project_id", "-computed_at")
+ for alert in cast(
+ list[dict[str, Any]], (snapshot.breakdown or {}).get("alerts", [])
+ )
+ ]
+
+ extra_context = cast(dict[str, Any], extra_context or {})
+ extra_context["dashboard_stats"] = [
+ {
+ "title": "Plugin Diversity",
+ "value": (
+ f"{_score_to_percent(metrics['avg_plugin_entropy']):.1f}%"
+ if metrics["avg_plugin_entropy"] is not None
+ else "-"
+ ),
+ "icon": "hub",
+ "color": _diversity_card_color(metrics["avg_plugin_entropy"]),
+ },
+ {
+ "title": "Source Diversity",
+ "value": (
+ f"{_score_to_percent(metrics['avg_source_entropy']):.1f}%"
+ if metrics["avg_source_entropy"] is not None
+ else "-"
+ ),
+ "icon": "lan",
+ "color": _diversity_card_color(metrics["avg_source_entropy"]),
+ },
+ {
+ "title": "Author Diversity",
+ "value": (
+ f"{_score_to_percent(metrics['avg_author_entropy']):.1f}%"
+ if metrics["avg_author_entropy"] is not None
+ else "-"
+ ),
+ "icon": "group",
+ "color": _diversity_card_color(metrics["avg_author_entropy"]),
+ },
+ {
+ "title": "Top Plugin Share",
+ "value": (
+ f"{_score_to_percent(metrics['avg_top_plugin_share']):.1f}%"
+ if metrics["avg_top_plugin_share"] is not None
+ else "-"
+ ),
+ "icon": "pie_chart",
+ "color": _share_card_color(metrics["avg_top_plugin_share"]),
+ },
+ ]
+ extra_context["source_diversity_alerts"] = alerts
+ extra_context["source_diversity_project_drilldowns"] = (
+ _build_source_diversity_project_drilldowns(queryset, changelist_url)
+ )
+ return super().changelist_view(request, extra_context=extra_context)
diff --git a/trends/api.py b/trends/api.py
index 31beadaa..d7aea4f1 100644
--- a/trends/api.py
+++ b/trends/api.py
@@ -19,6 +19,7 @@
from core.permissions import IsProjectContributor, IsProjectMember
from trends.models import (
ContentClusterMembership,
+ SourceDiversitySnapshot,
ThemeSuggestion,
ThemeSuggestionStatus,
TopicCentroidSnapshot,
@@ -26,6 +27,8 @@
TopicVelocitySnapshot,
)
from trends.serializers import (
+ SourceDiversityObservabilitySummarySerializer,
+ SourceDiversitySnapshotSerializer,
ThemeSuggestionDismissSerializer,
ThemeSuggestionSerializer,
TopicClusterDetailSerializer,
@@ -334,3 +337,56 @@ def summary(self, request, *args, **kwargs):
context=self.get_serializer_context(),
)
return Response(serializer.data)
+
+
+@document_project_owned_viewset(
+ resource_plural="source diversity snapshots",
+ resource_singular="source diversity snapshot",
+ create_description="Source diversity snapshots are pipeline-managed observability rows and are exposed read-only for health analysis.",
+ tag="Observability",
+ action_overrides=build_crud_action_overrides(
+ SourceDiversitySnapshotSerializer,
+ resource_plural="source diversity snapshots for the selected project",
+ resource_singular="source diversity snapshot",
+ ),
+)
+class SourceDiversitySnapshotViewSet(
+ ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet
+):
+ """Inspect persisted source-diversity history for a project."""
+
+ serializer_class = SourceDiversitySnapshotSerializer
+ queryset = SourceDiversitySnapshot.objects.select_related("project")
+
+ def get_permissions(self):
+ """Restrict source-diversity observability to project contributors."""
+
+ return [IsProjectContributor()]
+
+ @extend_schema(
+ summary="Get source diversity summary",
+ description=(
+ "Return the latest persisted source-diversity snapshot for the selected project "
+ "along with the number of stored snapshots."
+ ),
+ request=None,
+ responses={
+ 200: SourceDiversityObservabilitySummarySerializer,
+ 403: AUTHENTICATION_REQUIRED_RESPONSE,
+ },
+ tags=["Observability"],
+ )
+ @action(detail=False, methods=["get"], url_path="summary")
+ def summary(self, request, *args, **kwargs):
+ """Return source-diversity summary metrics for the current project."""
+
+ queryset = self.get_queryset()
+ serializer = SourceDiversityObservabilitySummarySerializer(
+ {
+ "project": _require_pk(self.get_project()),
+ "snapshot_count": queryset.count(),
+ "latest_snapshot": queryset.order_by("-computed_at").first(),
+ },
+ context=self.get_serializer_context(),
+ )
+ return Response(serializer.data)
diff --git a/trends/api_urls.py b/trends/api_urls.py
index 97d69cba..54f50b00 100644
--- a/trends/api_urls.py
+++ b/trends/api_urls.py
@@ -3,6 +3,7 @@
from rest_framework_nested.routers import NestedSimpleRouter
from trends.api import (
+ SourceDiversitySnapshotViewSet,
ThemeSuggestionViewSet,
TopicCentroidSnapshotViewSet,
TopicClusterViewSet,
@@ -27,3 +28,8 @@ def register_project_routes(project_router: NestedSimpleRouter) -> None:
TopicCentroidSnapshotViewSet,
basename="project-topic-centroid-snapshot",
)
+ project_router.register(
+ r"source-diversity-snapshots",
+ SourceDiversitySnapshotViewSet,
+ basename="project-source-diversity-snapshot",
+ )
diff --git a/trends/migrations/0004_source_diversity_snapshot.py b/trends/migrations/0004_source_diversity_snapshot.py
new file mode 100644
index 00000000..076b1a62
--- /dev/null
+++ b/trends/migrations/0004_source_diversity_snapshot.py
@@ -0,0 +1,53 @@
+import django.db.models.deletion
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("trends", "0003_theme_suggestion"),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name="SourceDiversitySnapshot",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("computed_at", models.DateTimeField(auto_now_add=True)),
+ ("window_days", models.PositiveIntegerField(default=14)),
+ ("plugin_entropy", models.FloatField()),
+ ("source_entropy", models.FloatField()),
+ ("author_entropy", models.FloatField()),
+ ("cluster_entropy", models.FloatField()),
+ ("top_plugin_share", models.FloatField()),
+ ("top_source_share", models.FloatField()),
+ ("breakdown", models.JSONField(blank=True, default=dict)),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="source_diversity_snapshots",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["-computed_at", "id"],
+ "db_table": "core_sourcediversitysnapshot",
+ "indexes": [
+ models.Index(
+ fields=["project", "-computed_at"],
+ name="core_sourced_project_4bf5_idx",
+ ),
+ ],
+ },
+ ),
+ ]
diff --git a/trends/models.py b/trends/models.py
index 7f09b03c..95cdf45d 100644
--- a/trends/models.py
+++ b/trends/models.py
@@ -205,3 +205,35 @@ class Meta:
def __str__(self) -> str:
return self.title
+
+
+class SourceDiversitySnapshot(models.Model):
+ """Capture one project-level source diversity reading for a rolling window."""
+
+ project = models.ForeignKey(
+ "projects.Project",
+ on_delete=models.CASCADE,
+ related_name="source_diversity_snapshots",
+ )
+ computed_at = models.DateTimeField(auto_now_add=True)
+ window_days = models.PositiveIntegerField(default=14)
+ plugin_entropy = models.FloatField()
+ source_entropy = models.FloatField()
+ author_entropy = models.FloatField()
+ cluster_entropy = models.FloatField()
+ top_plugin_share = models.FloatField()
+ top_source_share = models.FloatField()
+ breakdown = models.JSONField(default=dict, blank=True)
+
+ class Meta:
+ ordering = ["-computed_at", "id"]
+ db_table = "core_sourcediversitysnapshot"
+ indexes = [
+ models.Index(
+ fields=["project", "-computed_at"],
+ name="core_sourced_project_4bf5_idx",
+ ),
+ ]
+
+ def __str__(self) -> str:
+ return f"Source diversity snapshot for {self.project.name}"
diff --git a/trends/serializers.py b/trends/serializers.py
index f55f72c3..36a1cf1f 100644
--- a/trends/serializers.py
+++ b/trends/serializers.py
@@ -6,6 +6,7 @@
from core.serializer_mixins import ProjectScopedSerializerMixin
from trends.models import (
ContentClusterMembership,
+ SourceDiversitySnapshot,
ThemeSuggestion,
ThemeSuggestionStatus,
TopicCentroidSnapshot,
@@ -213,3 +214,32 @@ class TopicCentroidObservabilitySummarySerializer(serializers.Serializer):
avg_drift_from_previous = serializers.FloatField(allow_null=True)
avg_drift_from_week_ago = serializers.FloatField(allow_null=True)
latest_snapshot = TopicCentroidSnapshotSerializer(allow_null=True)
+
+
+class SourceDiversitySnapshotSerializer(serializers.ModelSerializer):
+ """Serialize one persisted source-diversity snapshot for a project."""
+
+ class Meta:
+ model = SourceDiversitySnapshot
+ fields = [
+ "id",
+ "project",
+ "computed_at",
+ "window_days",
+ "plugin_entropy",
+ "source_entropy",
+ "author_entropy",
+ "cluster_entropy",
+ "top_plugin_share",
+ "top_source_share",
+ "breakdown",
+ ]
+ read_only_fields = fields
+
+
+class SourceDiversityObservabilitySummarySerializer(serializers.Serializer):
+ """Serialize project-level source-diversity observability metrics."""
+
+ project = serializers.IntegerField()
+ snapshot_count = serializers.IntegerField()
+ latest_snapshot = SourceDiversitySnapshotSerializer(allow_null=True)
diff --git a/trends/tasks.py b/trends/tasks.py
index f6c589ca..592e7ef7 100644
--- a/trends/tasks.py
+++ b/trends/tasks.py
@@ -21,11 +21,12 @@
)
from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json
from content.models import Content, FeedbackType, UserFeedback
-from entities.models import Entity, EntityMention
-from projects.models import Project
+from entities.models import Entity, EntityMention, EntityMentionRole
+from projects.models import Project, SourceConfig
from .models import (
ContentClusterMembership,
+ SourceDiversitySnapshot,
ThemeSuggestion,
ThemeSuggestionStatus,
TopicCentroidSnapshot,
@@ -43,6 +44,9 @@
TOPIC_CLUSTER_MIN_MEMBERS = 3
TOPIC_VELOCITY_TRAILING_DAYS = 7
TOPIC_VELOCITY_EMA_ALPHA = 0.5
+SOURCE_DIVERSITY_WINDOW_DAYS = 14
+SOURCE_DIVERSITY_TOP_PLUGIN_ALERT_THRESHOLD = 0.7
+SOURCE_DIVERSITY_LOW_AUTHOR_ENTROPY_THRESHOLD = 0.4
THEME_DETECTION_SKILL_NAME = "theme_detection"
THEME_SUGGESTION_DAILY_CAP = 5
THEME_NOVELTY_LOOKBACK_DAYS = 30
@@ -357,8 +361,10 @@ def recompute_topic_velocity(project_id: int) -> dict[str, int]:
)
snapshot_count += 1
if settings.CELERY_TASK_ALWAYS_EAGER:
+ recompute_source_diversity(project_id)
generate_theme_suggestions(project_id)
else:
+ _enqueue_task(recompute_source_diversity, project_id)
_enqueue_task(generate_theme_suggestions, project_id)
return {
"project_id": project_id,
@@ -367,6 +373,144 @@ def recompute_topic_velocity(project_id: int) -> dict[str, int]:
}
+@shared_task(name="core.tasks.recompute_source_diversity")
+def recompute_source_diversity(project_id: int) -> dict[str, object]:
+ """Persist one fresh source-diversity snapshot for a project's recent content."""
+
+ computed_at = timezone.now()
+ window_start = computed_at - timedelta(days=SOURCE_DIVERSITY_WINDOW_DAYS)
+ recent_contents = list(
+ Content.objects.filter(
+ project_id=project_id,
+ is_active=True,
+ published_date__gte=window_start,
+ )
+ .select_related("entity")
+ .only(
+ "id",
+ "project_id",
+ "entity_id",
+ "source_plugin",
+ "source_metadata",
+ "published_date",
+ )
+ .order_by("published_date", "id")
+ )
+ content_ids = [_require_pk(content) for content in recent_contents]
+ author_entity_ids = _author_entities_for_contents(recent_contents)
+ cluster_labels = {
+ str(cluster_id): label
+ for cluster_id, label in TopicCluster.objects.filter(
+ memberships__content_id__in=content_ids,
+ project_id=project_id,
+ )
+ .distinct()
+ .values_list("id", "label")
+ }
+
+ plugin_counts: Counter[str] = Counter()
+ source_counts: Counter[str] = Counter()
+ author_counts: Counter[str] = Counter()
+ for content in recent_contents:
+ plugin_key = content.source_plugin or "unknown"
+ plugin_counts[plugin_key] += 1
+ source_counts[_source_bucket_key(content)] += 1
+ author_entity_id = author_entity_ids.get(_require_pk(content))
+ if author_entity_id is not None:
+ author_counts[str(author_entity_id)] += 1
+
+ cluster_counts: Counter[str] = Counter(
+ str(cluster_id)
+ for cluster_id in ContentClusterMembership.objects.filter(
+ project_id=project_id,
+ content_id__in=content_ids,
+ ).values_list("cluster_id", flat=True)
+ )
+
+ source_config_labels = {
+ f"source_config:{source_id}": f"{plugin_name} #{source_id}"
+ for source_id, plugin_name in SourceConfig.objects.filter(
+ pk__in=[
+ int(key.split(":", 1)[1])
+ for key in source_counts
+ if key.startswith("source_config:")
+ ]
+ ).values_list("id", "plugin_name")
+ }
+ author_labels = {
+ str(entity_id): name
+ for entity_id, name in Entity.objects.filter(
+ pk__in=[int(key) for key in author_counts]
+ ).values_list("id", "name")
+ }
+ cluster_label_map = {
+ cluster_id: label or f"Cluster {cluster_id}"
+ for cluster_id, label in cluster_labels.items()
+ }
+
+ top_plugin_item = _top_count_item(plugin_counts)
+ top_source_item = _top_count_item(source_counts)
+ plugin_entropy = _normalized_shannon_entropy(plugin_counts)
+ source_entropy = _normalized_shannon_entropy(source_counts)
+ author_entropy = _normalized_shannon_entropy(author_counts)
+ cluster_entropy = _normalized_shannon_entropy(cluster_counts)
+ top_plugin_share = _top_share(plugin_counts)
+ top_source_share = _top_share(source_counts)
+
+ alerts = _build_source_diversity_alerts(
+ top_plugin_item=top_plugin_item,
+ top_plugin_share=top_plugin_share,
+ author_counts=author_counts,
+ author_entropy=author_entropy,
+ )
+ breakdown = {
+ "total_content_count": len(recent_contents),
+ "plugin_counts": _serialize_breakdown_counts(
+ plugin_counts,
+ label_resolver=lambda key: key,
+ ),
+ "source_counts": _serialize_breakdown_counts(
+ source_counts,
+ label_resolver=lambda key: source_config_labels.get(
+ key,
+ _fallback_source_label(key),
+ ),
+ ),
+ "author_counts": _serialize_breakdown_counts(
+ author_counts,
+ label_resolver=lambda key: author_labels.get(key, f"Entity {key}"),
+ ),
+ "cluster_counts": _serialize_breakdown_counts(
+ cluster_counts,
+ label_resolver=lambda key: cluster_label_map.get(key, f"Cluster {key}"),
+ ),
+ "alerts": alerts,
+ }
+
+ snapshot = SourceDiversitySnapshot.objects.create(
+ project_id=project_id,
+ window_days=SOURCE_DIVERSITY_WINDOW_DAYS,
+ plugin_entropy=plugin_entropy,
+ source_entropy=source_entropy,
+ author_entropy=author_entropy,
+ cluster_entropy=cluster_entropy,
+ top_plugin_share=top_plugin_share,
+ top_source_share=top_source_share,
+ breakdown=breakdown,
+ )
+ if snapshot.computed_at != computed_at:
+ SourceDiversitySnapshot.objects.filter(pk=snapshot.pk).update(
+ computed_at=computed_at
+ )
+ snapshot.computed_at = computed_at
+ return {
+ "project_id": project_id,
+ "snapshot_id": _require_pk(snapshot),
+ "content_count": len(recent_contents),
+ "alert_count": len(alerts),
+ }
+
+
@shared_task(name="core.tasks.generate_theme_suggestions")
def generate_theme_suggestions(project_id: int) -> dict[str, int]:
"""Generate pending editor-facing theme suggestions for one project."""
@@ -929,6 +1073,164 @@ def dismiss_theme_suggestion(
return theme_suggestion
+def _author_entities_for_contents(contents: list[Content]) -> dict[int, int | None]:
+ """Resolve one best-effort author entity bucket per content row."""
+
+ content_ids = [_require_pk(content) for content in contents]
+ author_mentions: dict[int, int] = {}
+ for content_id, entity_id in (
+ EntityMention.objects.filter(
+ content_id__in=content_ids,
+ role=EntityMentionRole.AUTHOR,
+ )
+ .order_by("content_id", "entity_id")
+ .values_list("content_id", "entity_id")
+ ):
+ author_mentions.setdefault(int(content_id), int(entity_id))
+ return {
+ _require_pk(content): author_mentions.get(_require_pk(content))
+ or (_require_pk(content.entity) if content.entity is not None else None)
+ for content in contents
+ }
+
+
+def _source_bucket_key(content: Content) -> str:
+ """Return the source bucket key used for source diversity counts."""
+
+ source_metadata = content.source_metadata or {}
+ source_config_id = source_metadata.get("source_config_id")
+ if isinstance(source_config_id, bool):
+ source_config_id = None
+ if isinstance(source_config_id, (int, str)) and source_config_id != "":
+ try:
+ return f"source_config:{int(source_config_id)}"
+ except ValueError:
+ pass
+ sender_email = str(source_metadata.get("sender_email", "")).strip().lower()
+ if sender_email:
+ return f"sender_email:{sender_email}"
+ for metadata_key in (
+ "feed_url",
+ "subreddit",
+ "author_handle",
+ "account_acct",
+ "instance_url",
+ ):
+ metadata_value = str(source_metadata.get(metadata_key, "")).strip().lower()
+ if metadata_value:
+ return f"{metadata_key}:{metadata_value}"
+ if content.source_plugin:
+ return f"plugin:{content.source_plugin}"
+ return "unknown"
+
+
+def _fallback_source_label(source_key: str) -> str:
+ """Render a human-readable source label for non-SourceConfig buckets."""
+
+ prefix, _, value = source_key.partition(":")
+ if not value:
+ return source_key
+ if prefix == "sender_email":
+ return value
+ if prefix == "plugin":
+ return f"{value} (unattributed)"
+ return value
+
+
+def _serialize_breakdown_counts(
+ counts: Counter[str],
+ *,
+ label_resolver,
+) -> list[dict[str, object]]:
+ """Serialize one counter into a stable JSON-friendly breakdown list."""
+
+ total_count = sum(counts.values())
+ if total_count <= 0:
+ return []
+ return [
+ {
+ "key": key,
+ "label": label_resolver(key),
+ "count": count,
+ "share": count / total_count,
+ }
+ for key, count in sorted(counts.items(), key=lambda item: (-item[1], item[0]))
+ ]
+
+
+def _normalized_shannon_entropy(counts: Counter[str]) -> float:
+ """Return Shannon entropy normalized into the inclusive [0, 1] interval."""
+
+ positive_counts = [count for count in counts.values() if count > 0]
+ if len(positive_counts) <= 1:
+ return 0.0
+ total_count = sum(positive_counts)
+ entropy = -sum(
+ (count / total_count) * math.log2(count / total_count)
+ for count in positive_counts
+ )
+ max_entropy = math.log2(len(positive_counts))
+ if max_entropy <= 0:
+ return 0.0
+ return max(0.0, min(1.0, entropy / max_entropy))
+
+
+def _top_share(counts: Counter[str]) -> float:
+ """Return the share owned by the largest bucket in one distribution."""
+
+ total_count = sum(counts.values())
+ if total_count <= 0:
+ return 0.0
+ return max(counts.values()) / total_count
+
+
+def _top_count_item(counts: Counter[str]) -> tuple[str, int] | None:
+ """Return the largest bucket key/count pair using a stable tie-breaker."""
+
+ if not counts:
+ return None
+ return max(counts.items(), key=lambda item: (item[1], item[0]))
+
+
+def _build_source_diversity_alerts(
+ *,
+ top_plugin_item: tuple[str, int] | None,
+ top_plugin_share: float,
+ author_counts: Counter[str],
+ author_entropy: float,
+) -> list[dict[str, str]]:
+ """Build advisory cards for concentrated source diversity snapshots."""
+
+ alerts: list[dict[str, str]] = []
+ if (
+ top_plugin_item is not None
+ and top_plugin_share > SOURCE_DIVERSITY_TOP_PLUGIN_ALERT_THRESHOLD
+ ):
+ alerts.append(
+ {
+ "code": "top_plugin_share",
+ "severity": "warning",
+ "message": (
+ f"Your stream is {top_plugin_share:.0%} from {top_plugin_item[0]} this week."
+ ),
+ }
+ )
+ if author_counts and author_entropy < SOURCE_DIVERSITY_LOW_AUTHOR_ENTROPY_THRESHOLD:
+ author_bucket_count = len(author_counts)
+ alerts.append(
+ {
+ "code": "author_entropy",
+ "severity": "warning",
+ "message": (
+ "Three authors account for most of your content."
+ if author_bucket_count <= 3
+ else "A small set of authors accounts for most of your content."
+ ),
+ }
+ )
+ return alerts
+
+
def _resolve_dominant_entity(contents: list[Content]) -> Entity | None:
"""Return the most frequently referenced entity across clustered content."""
diff --git a/trends/tests/test_admin.py b/trends/tests/test_admin.py
index 18ce818f..1dee2be0 100644
--- a/trends/tests/test_admin.py
+++ b/trends/tests/test_admin.py
@@ -10,8 +10,8 @@
from django.utils import timezone
from projects.models import Project
-from trends.admin import TopicCentroidSnapshotAdmin
-from trends.models import TopicCentroidSnapshot
+from trends.admin import SourceDiversitySnapshotAdmin, TopicCentroidSnapshotAdmin
+from trends.models import SourceDiversitySnapshot, TopicCentroidSnapshot
pytestmark = pytest.mark.django_db
@@ -55,6 +55,21 @@ def _drilldowns(response: object) -> list[dict[str, Any]]:
return cast(list[dict[str, Any]], _context(response)["centroid_project_drilldowns"])
+def _source_diversity_alerts(response: object) -> list[dict[str, Any]]:
+ """Return typed source diversity alerts from a changelist payload."""
+
+ return cast(list[dict[str, Any]], _context(response)["source_diversity_alerts"])
+
+
+def _source_diversity_drilldowns(response: object) -> list[dict[str, Any]]:
+ """Return typed source diversity drilldowns from a changelist payload."""
+
+ return cast(
+ list[dict[str, Any]],
+ _context(response)["source_diversity_project_drilldowns"],
+ )
+
+
@pytest.fixture
def source_admin_context(django_user_model):
user = _create_user(
@@ -151,3 +166,82 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats(
f"{_require_pk(source_admin_context.project)}"
)
assert centroid_project_drilldowns[0]["drift_from_previous"] == "10.0%"
+
+
+def test_source_diversity_snapshot_admin_changelist_view_builds_dashboard_stats(
+ source_admin_context, mocker
+):
+ second_project = Project.objects.create(
+ name="Second Diversity Project",
+ topic_description="Analytics",
+ )
+ first_snapshot = SourceDiversitySnapshot.objects.create(
+ project=source_admin_context.project,
+ window_days=14,
+ plugin_entropy=0.8,
+ source_entropy=0.7,
+ author_entropy=0.3,
+ cluster_entropy=0.6,
+ top_plugin_share=0.75,
+ top_source_share=0.7,
+ breakdown={
+ "alerts": [
+ {
+ "code": "top_plugin_share",
+ "severity": "warning",
+ "message": "Your stream is 75% from rss this week.",
+ }
+ ]
+ },
+ )
+ second_snapshot = SourceDiversitySnapshot.objects.create(
+ project=second_project,
+ window_days=14,
+ plugin_entropy=0.2,
+ source_entropy=0.4,
+ author_entropy=0.1,
+ cluster_entropy=0.3,
+ top_plugin_share=0.9,
+ top_source_share=0.85,
+ breakdown={"alerts": []},
+ )
+ admin_instance = SourceDiversitySnapshotAdmin(SourceDiversitySnapshot, AdminSite())
+ mocker.patch.object(
+ admin_instance,
+ "get_queryset",
+ return_value=SourceDiversitySnapshot.objects.all(),
+ )
+ super_changelist_view = mocker.patch(
+ "django.contrib.admin.options.ModelAdmin.changelist_view",
+ side_effect=lambda request, extra_context=None: extra_context,
+ )
+
+ response = admin_instance.changelist_view(request=_request())
+ dashboard_stats = _dashboard_stats(response)
+ alerts = _source_diversity_alerts(response)
+ drilldowns = _source_diversity_drilldowns(response)
+
+ super_changelist_view.assert_called_once()
+ assert (
+ admin_instance.list_before_template
+ == "admin/source_diversity_snapshot_changelist_widget.html"
+ )
+ assert dashboard_stats[0]["value"] == "50.0%"
+ assert dashboard_stats[0]["color"] == "warning"
+ assert dashboard_stats[1]["value"] == "55.0%"
+ assert dashboard_stats[2]["value"] == "20.0%"
+ assert dashboard_stats[2]["color"] == "danger"
+ assert dashboard_stats[3]["value"] == "82.5%"
+ assert dashboard_stats[3]["color"] == "danger"
+ assert alerts == [
+ {
+ "code": "top_plugin_share",
+ "severity": "warning",
+ "message": "Your stream is 75% from rss this week.",
+ }
+ ]
+ assert len(drilldowns) == 2
+ assert drilldowns[0]["project_name"] == "Admin Project"
+ assert drilldowns[0]["plugin_entropy"] == "80.0%"
+ assert drilldowns[0]["top_plugin_share"] == "75.0%"
+ assert drilldowns[0]["alert_count"] == 1
diff --git a/trends/tests/test_api.py b/trends/tests/test_api.py
index 531eaedd..e6f3a05a 100644
--- a/trends/tests/test_api.py
+++ b/trends/tests/test_api.py
@@ -12,6 +12,7 @@
from projects.models import Project, ProjectConfig, ProjectMembership, ProjectRole
from trends.models import (
ContentClusterMembership,
+ SourceDiversitySnapshot,
ThemeSuggestion,
ThemeSuggestionStatus,
TopicCentroidSnapshot,
@@ -392,3 +393,69 @@ def test_theme_suggestion_accept_and_dismiss_actions_update_workflow_fields(self
self.assertEqual(dismiss_suggestion.status, ThemeSuggestionStatus.DISMISSED)
self.assertEqual(dismiss_suggestion.dismissal_reason, "already covered")
self.assertEqual(dismiss_suggestion.decided_by, self.owner)
+
+ def test_source_diversity_snapshot_list_and_summary_are_scoped_to_project(self):
+ owner_snapshot = SourceDiversitySnapshot.objects.create(
+ project=self.owner_project,
+ window_days=14,
+ plugin_entropy=0.8,
+ source_entropy=0.75,
+ author_entropy=0.5,
+ cluster_entropy=0.6,
+ top_plugin_share=0.7,
+ top_source_share=0.65,
+ breakdown={
+ "total_content_count": 4,
+ "plugin_counts": [],
+ "source_counts": [],
+ "author_counts": [],
+ "cluster_counts": [],
+ "alerts": [],
+ },
+ )
+ other_snapshot = SourceDiversitySnapshot.objects.create(
+ project=self.other_project,
+ window_days=14,
+ plugin_entropy=0.2,
+ source_entropy=0.3,
+ author_entropy=0.1,
+ cluster_entropy=0.4,
+ top_plugin_share=0.95,
+ top_source_share=0.95,
+ breakdown={
+ "total_content_count": 8,
+ "plugin_counts": [],
+ "source_counts": [],
+ "author_counts": [],
+ "cluster_counts": [],
+ "alerts": [],
+ },
+ )
+
+ list_response = self.client.get(
+ reverse(
+ "v1:project-source-diversity-snapshot-list",
+ kwargs={"project_id": _require_pk(self.owner_project)},
+ )
+ )
+ summary_response = self.client.get(
+ reverse(
+ "v1:project-source-diversity-snapshot-summary",
+ kwargs={"project_id": _require_pk(self.owner_project)},
+ )
+ )
+
+ self.assertEqual(list_response.status_code, status.HTTP_200_OK)
+ self.assertEqual(len(list_response.json()), 1)
+ self.assertEqual(list_response.json()[0]["id"], _require_pk(owner_snapshot))
+ self.assertNotEqual(_require_pk(owner_snapshot), _require_pk(other_snapshot))
+
+ self.assertEqual(summary_response.status_code, status.HTTP_200_OK)
+ self.assertEqual(
+ summary_response.json()["project"], _require_pk(self.owner_project)
+ )
+ self.assertEqual(summary_response.json()["snapshot_count"], 1)
+ self.assertEqual(
+ summary_response.json()["latest_snapshot"]["id"],
+ _require_pk(owner_snapshot),
+ )
diff --git a/trends/tests/test_tasks.py b/trends/tests/test_tasks.py
index dbc77696..ee9f71ce 100644
--- a/trends/tests/test_tasks.py
+++ b/trends/tests/test_tasks.py
@@ -7,9 +7,10 @@
from content.models import Content, FeedbackType, UserFeedback
from entities.models import Entity, EntityMention, EntityMentionRole
from projects.model_support import SourcePluginName
-from projects.models import Project
+from projects.models import Project, SourceConfig
from trends.models import (
ContentClusterMembership,
+ SourceDiversitySnapshot,
ThemeSuggestion,
ThemeSuggestionStatus,
TopicCentroidSnapshot,
@@ -22,6 +23,7 @@
assign_content_to_topic_cluster,
generate_theme_suggestions,
queue_topic_centroid_recompute,
+ recompute_source_diversity,
recompute_topic_centroid,
recompute_topic_clusters,
recompute_topic_velocity,
@@ -280,6 +282,113 @@ def test_run_all_topic_cluster_recomputations_enqueues_all_projects(
assert delay_mock.call_count == 2
+def test_recompute_source_diversity_persists_entropy_breakdown_and_alerts(
+ source_plugin_context,
+):
+ project = source_plugin_context.project
+ rss_source = SourceConfig.objects.create(
+ project=project,
+ plugin_name=SourcePluginName.RSS,
+ config={"feed_url": "https://example.com/feed.xml"},
+ )
+ reddit_source = SourceConfig.objects.create(
+ project=project,
+ plugin_name=SourcePluginName.REDDIT,
+ config={"subreddit": "MachineLearning"},
+ )
+ first_cluster = TopicCluster.objects.create(
+ project=project,
+ first_seen_at="2026-04-20T00:00:00Z",
+ last_seen_at="2026-04-24T00:00:00Z",
+ is_active=True,
+ member_count=3,
+ dominant_entity=source_plugin_context.entity,
+ label="Platform Signals",
+ )
+ second_cluster = TopicCluster.objects.create(
+ project=project,
+ first_seen_at="2026-04-20T00:00:00Z",
+ last_seen_at="2026-04-24T00:00:00Z",
+ is_active=True,
+ member_count=1,
+ dominant_entity=source_plugin_context.entity,
+ label="Community Chatter",
+ )
+
+ contents = []
+ for index in range(3):
+ contents.append(
+ Content.objects.create(
+ project=project,
+ entity=source_plugin_context.entity,
+ url=f"https://example.com/rss-{index}",
+ title=f"RSS {index}",
+ author="Author",
+ source_plugin=SourcePluginName.RSS,
+ published_date="2026-04-24T12:00:00Z",
+ content_text="Manual content body",
+ source_metadata={"source_config_id": _require_pk(rss_source)},
+ )
+ )
+ contents.append(
+ Content.objects.create(
+ project=project,
+ entity=source_plugin_context.entity,
+ url="https://example.com/reddit-0",
+ title="Reddit 0",
+ author="Author",
+ source_plugin=SourcePluginName.REDDIT,
+ published_date="2026-04-24T12:00:00Z",
+ content_text="Manual content body",
+ source_metadata={"source_config_id": _require_pk(reddit_source)},
+ )
+ )
+ for content in contents[:3]:
+ ContentClusterMembership.objects.create(
+ content=content,
+ cluster=first_cluster,
+ project=project,
+ similarity=0.95,
+ )
+ ContentClusterMembership.objects.create(
+ content=contents[3],
+ cluster=second_cluster,
+ project=project,
+ similarity=0.91,
+ )
+
+ result = recompute_source_diversity(_require_pk(project))
+ snapshot = SourceDiversitySnapshot.objects.get(project=project)
+
+ assert result["project_id"] == _require_pk(project)
+ assert result["content_count"] == 4
+ assert snapshot.window_days == 14
+ assert snapshot.plugin_entropy == pytest.approx(0.811278, rel=1e-4)
+ assert snapshot.source_entropy == pytest.approx(0.811278, rel=1e-4)
+ assert snapshot.author_entropy == 0.0
+ assert snapshot.cluster_entropy == pytest.approx(0.811278, rel=1e-4)
+ assert snapshot.top_plugin_share == pytest.approx(0.75)
+ assert snapshot.top_source_share == pytest.approx(0.75)
+ assert snapshot.breakdown["plugin_counts"][0]["key"] == SourcePluginName.RSS
+ assert snapshot.breakdown["plugin_counts"][0]["count"] == 3
+ assert snapshot.breakdown["source_counts"][0]["label"] == (
+ f"rss #{_require_pk(rss_source)}"
+ )
+ assert snapshot.breakdown["cluster_counts"][0]["label"] == "Platform Signals"
+ assert snapshot.breakdown["alerts"] == [
+ {
+ "code": "top_plugin_share",
+ "severity": "warning",
+ "message": "Your stream is 75% from rss this week.",
+ },
+ {
+ "code": "author_entropy",
+ "severity": "warning",
+ "message": "Three authors account for most of your content.",
+ },
+ ]
+
+
def test_queue_topic_centroid_recompute_enqueues_background_task(
source_plugin_context, mocker
):