Provision
+'
- f"{formatted_json}"
- f""
- )
-
- def changelist_view(self, request, extra_context=None):
- """Augment the changelist with latency and failure-rate statistics."""
-
- qs = self.get_queryset(request)
- extra_context = extra_context or {}
- metrics = qs.aggregate(avg_lat=Avg("latency_ms"))
- avg_latency = metrics["avg_lat"] or 0
- failure_count = qs.filter(status="failed").count()
- total_count = qs.count() or 1
-
- extra_context["dashboard_stats"] = [
- {
- "title": "Avg Latency",
- "value": f"{avg_latency:.0f}ms",
- "icon": "timer",
- "color": "warning" if avg_latency > 2000 else "success",
- },
- {
- "title": "Failure Rate",
- "value": f"{(failure_count / total_count) * 100:.1f}%",
- "icon": "error",
- "color": "danger" if failure_count > 0 else "success",
- },
- ]
- return super().changelist_view(request, extra_context=extra_context)
-
-
-@admin.register(UserFeedback)
-class UserFeedbackAdmin(ModelAdmin):
- """Admin view for editorial feedback and agreement with AI scoring."""
-
- list_display = (
- "display_feedback",
- "get_content_title",
- "get_ai_score",
- "project",
- "user",
- "created_at",
- )
- list_filter = ("feedback_type", ("project", admin.RelatedOnlyFieldListFilter))
- search_fields = ("content__title", "user__email", "user__username")
-
- @admin.display(description="Type")
- def display_feedback(self, obj):
- """Render feedback as a thumbs-up or thumbs-down glyph."""
-
- if str(obj.feedback_type).lower() == "upvote":
- return format_html('{}', "1.2rem", "π")
- return format_html('{}', "1.2rem", "π")
-
- @admin.display(description="Content Title")
- def get_content_title(self, obj):
- """Return a shortened content title for list display."""
-
- return obj.content.title[:50] + "..."
-
- @admin.display(description="AI Score")
- def get_ai_score(self, obj):
- """Displays the original AI score to compare with user feedback."""
- score = obj.content.relevance_score
- if score is None:
- return "-"
- color = "green" if score > 75 else "red" if score < 40 else "orange"
- return format_html('{}%', color, score)
-
- def changelist_view(self, request, extra_context=None):
- """Augment the changelist with editorial approval statistics."""
-
- qs = self.get_queryset(request)
- extra_context = extra_context or {}
- upvotes = qs.filter(feedback_type="upvote").count()
- total = qs.count() or 1
- approval_rate = (upvotes / total) * 100
-
- extra_context["dashboard_stats"] = [
- {
- "title": "Approval Rate",
- "value": f"{approval_rate:.1f}%",
- "icon": "thumb_up",
- "color": "success" if approval_rate > 80 else "warning",
- },
- {
- "title": "Total Feedback",
- "value": total,
- "icon": "forum",
- },
- ]
- return super().changelist_view(request, extra_context=extra_context)
-
-
-@admin.register(IngestionRun)
-class IngestionRunAdmin(ModelAdmin):
- """Admin view for ingestion health, throughput, and timing."""
-
- list_display = (
- "plugin_name",
- "project",
- "display_status",
- "display_efficiency",
- "display_duration",
- "started_at",
- )
- list_filter = (
- "plugin_name",
- "status",
- ("project", admin.RelatedOnlyFieldListFilter),
- )
- search_fields = ("plugin_name", "error_message", "project__name")
- readonly_fields = ("display_duration", "started_at", "completed_at")
- fieldsets = (
- ("Run Info", {"fields": ("plugin_name", "project", "status")}),
- (
- "Data Metrics",
- {"fields": ("items_fetched", "items_ingested", "display_efficiency")},
- ),
- ("Timing", {"fields": ("started_at", "completed_at", "display_duration")}),
- ("Logs", {"fields": ("error_message",), "classes": ("collapse",)}),
- )
-
- @admin.display(description="Status")
- def display_status(self, obj):
- """Render ingestion status as an Unfold badge."""
-
- status_value = str(obj.status).lower()
- colors = {"success": "success", "failed": "danger", "running": "info"}
- return format_html(
- '{}',
- colors.get(status_value, "warning"),
- status_value.upper(),
- )
-
- @admin.display(description="Efficiency (Ingested/Fetched)")
- def display_efficiency(self, obj):
- """Show how much of the fetched content became stored content."""
-
- if obj.items_fetched == 0:
- return "0/0"
- percent = (obj.items_ingested / obj.items_fetched) * 100
- color = "green" if percent > 90 else "orange" if percent > 50 else "red"
- percent_label = f"({percent:.0f}%)"
- return format_html(
- '{} / {} {}',
- obj.items_ingested,
- obj.items_fetched,
- color,
- percent_label,
- )
-
- @admin.display(description="Duration")
- def display_duration(self, obj):
- """Return human-readable runtime for completed ingestion runs."""
-
- if not obj.completed_at:
- return "In Progress..."
- duration = obj.completed_at - obj.started_at
- seconds = duration.total_seconds()
- return f"{int(seconds // 60)}m {int(seconds % 60)}s"
-
- def changelist_view(self, request, extra_context=None):
- """Augment the changelist with ingestion success statistics."""
-
- qs = self.get_queryset(request)
- extra_context = extra_context or {}
- total_runs = qs.count()
- failed_runs = qs.filter(status="failed").count()
- total_ingested = sum(qs.values_list("items_ingested", flat=True))
-
- extra_context["dashboard_stats"] = [
- {
- "title": "Total Content Ingested",
- "value": f"{total_ingested:,}",
- "icon": "cloud_download",
- },
- {
- "title": "Success Rate",
- "value": f"{((total_runs - failed_runs) / (total_runs or 1)) * 100:.1f}%",
- "icon": "check_circle",
- "color": "success" if failed_runs == 0 else "warning",
- },
- ]
- return super().changelist_view(request, extra_context=extra_context)
-
-
-@admin.register(SourceConfig)
-class SourceConfigAdmin(ModelAdmin):
- """Admin view for source-plugin configuration and connectivity checks."""
-
- list_display = (
- "plugin_name",
- "project",
- "display_health",
- "is_active",
- "last_fetched_at",
- )
- list_filter = (
- "is_active",
- "plugin_name",
- ("project", admin.RelatedOnlyFieldListFilter),
- )
- list_editable = ("is_active",)
- search_fields = ("plugin_name", "project__name")
- actions = ["test_source_connection"]
- readonly_fields = ("last_fetched_at", "pretty_config")
- fieldsets = (
- ("Core Settings", {"fields": ("plugin_name", "project", "is_active")}),
- (
- "Configuration",
- {
- "fields": ("pretty_config", "config"),
- },
- ),
- (
- "Activity",
- {
- "fields": ("last_fetched_at",),
- },
- ),
- )
-
- @admin.display(description="Status")
- def display_health(self, obj):
- """Infer a human-friendly health state from activity timestamps."""
-
- if not obj.is_active:
- return format_html('{}', "gray", "β Paused")
-
- if obj.last_fetched_at:
- hours_since = (timezone.now() - obj.last_fetched_at).total_seconds() / 3600
- if hours_since > 24:
- return format_html(
- '{}', "red", "β Stale"
- )
- return format_html(
- '{}', "green", "β Healthy"
- )
-
- return format_html(
- '{}', "orange", "β Never Run"
- )
-
- @admin.display(description="Config Preview")
- def pretty_config(self, obj):
- """Displays the JSON config in a readable format."""
- if not obj.config:
- return "Empty"
- formatted_json = json.dumps(obj.config, indent=4)
- return mark_safe(
- f'{formatted_json}'
- )
-
- @admin.action(description="Test Source Connectivity")
- def test_source_connection(self, request, queryset):
- """
- Custom action to trigger a dry-run fetch for the selected sources.
- """
- healthy_sources = []
- failed_sources = []
-
- for source_config in queryset.select_related("project"):
- try:
- source_config.config = validate_plugin_config(
- source_config.plugin_name,
- source_config.config,
- )
- plugin = get_plugin_for_source_config(source_config)
- if not plugin.health_check():
- raise RuntimeError("Health check returned an unhealthy status.")
- except Exception as exc:
- failed_sources.append(f"{source_config}: {exc}")
- else:
- healthy_sources.append(str(source_config))
-
- if healthy_sources:
- self.message_user(
- request,
- f"Connectivity check passed for {len(healthy_sources)} source(s).",
- messages.SUCCESS,
- )
-
- if failed_sources:
- self.message_user(
- request,
- "Connectivity check failed for: " + "; ".join(failed_sources),
- messages.ERROR,
- )
-
- def changelist_view(self, request, extra_context=None):
- """Augment the changelist with source-count and diversity stats."""
-
- qs = self.get_queryset(request)
- extra_context = extra_context or {}
- active_count = qs.filter(is_active=True).count()
- total_count = qs.count() or 1
-
- extra_context["dashboard_stats"] = [
- {
- "title": "Active Sources",
- "value": f"{active_count} / {total_count}",
- "icon": "settings_input_component",
- "color": "success" if active_count == total_count else "warning",
- },
- {
- "title": "Plugin Variety",
- "value": qs.values("plugin_name").distinct().count(),
- "icon": "extension",
- },
- ]
- return super().changelist_view(request, extra_context=extra_context)
-
-
-@admin.register(ReviewQueue)
-class ReviewQueueAdmin(ModelAdmin):
- """Admin view for items waiting on editorial judgment."""
-
- list_display = (
- "get_content_title",
- "project",
- "reason",
- "display_confidence",
- "resolved",
- "resolution",
- "created_at",
- )
- list_filter = ("resolved", "reason", ("project", admin.RelatedOnlyFieldListFilter))
- list_editable = ("resolved", "resolution")
- actions = ["mark_as_approved", "mark_as_rejected"]
-
- @admin.display(description="Content")
- def get_content_title(self, obj):
- """Return a shortened content title for list display."""
-
- return obj.content.title[:50] + "..."
-
- @admin.display(description="Confidence")
- def display_confidence(self, obj):
- """Render confidence as a percentage with risk coloring."""
-
- color = (
- "red"
- if obj.confidence < 0.3
- else "orange" if obj.confidence < 0.6 else "green"
- )
- confidence_label = f"{obj.confidence * 100:.0f}%"
- return format_html('{}', color, confidence_label)
-
- @admin.action(description="Approve selected items")
- def mark_as_approved(self, request, queryset):
- """Resolve selected review items as approved."""
-
- queryset.update(resolved=True, resolution="APPROVED")
- self.message_user(request, "Selected items approved.", messages.SUCCESS)
-
- @admin.action(description="Reject selected items")
- def mark_as_rejected(self, request, queryset):
- """Resolve selected review items as rejected."""
-
- queryset.update(resolved=True, resolution="REJECTED")
- self.message_user(request, "Selected items rejected.", messages.WARNING)
-
- def changelist_view(self, request, extra_context=None):
- """Augment the changelist with pending-volume and confidence stats."""
-
- qs = self.get_queryset(request)
- extra_context = extra_context or {}
- pending_count = qs.filter(resolved=False).count()
- avg_conf = qs.aggregate(avg_confidence=Avg("confidence"))["avg_confidence"] or 0
-
- extra_context["dashboard_stats"] = [
- {
- "title": "Pending Review",
- "value": pending_count,
- "icon": "pending_actions",
- "color": "danger" if pending_count > 10 else "success",
- },
- {
- "title": "Avg Confidence",
- "value": f"{avg_conf * 100:.0f}%",
- "icon": "psychology",
- },
- ]
- return super().changelist_view(request, extra_context=extra_context)
+from pipeline.admin import ReviewQueueAdmin, SkillResultAdmin # noqa: E402,F401
diff --git a/core/api.py b/core/api.py
index 52ee8b73..b8093372 100644
--- a/core/api.py
+++ b/core/api.py
@@ -8,7 +8,6 @@
import logging
from typing import Any
-from django.db.models import Avg, Count, Prefetch, Q
from drf_spectacular.utils import (
OpenApiExample,
OpenApiParameter,
@@ -20,52 +19,32 @@
from rest_framework import serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.exceptions import NotFound
-from rest_framework.filters import OrderingFilter
from rest_framework.response import Response
-from core.entity_extraction import (
- accept_entity_candidate,
- merge_entity_candidate,
- reject_entity_candidate,
-)
from core.models import (
- BlueskyCredentials,
Content,
- Entity,
- EntityCandidate,
- EntityMention,
IngestionRun,
IntakeAllowlist,
NewsletterIntake,
- Project,
- ProjectConfig,
- ReviewQueue,
- SkillResult,
- SourceConfig,
- TopicCentroidSnapshot,
UserFeedback,
- generate_project_intake_token,
+)
+from core.permissions import (
+ IsProjectAdmin,
+ IsProjectContributor,
+ IsProjectFeedbackEditor,
+ IsProjectMember,
+ IsProjectMemberWritable,
+ get_visible_projects_queryset,
)
from core.serializers import (
- BlueskyCredentialsSerializer,
ContentSerializer,
- EntityAuthoritySnapshotSerializer,
- EntityCandidateMergeSerializer,
- EntityCandidateSerializer,
- EntityMentionSummarySerializer,
- EntitySerializer,
IngestionRunSerializer,
IntakeAllowlistSerializer,
NewsletterIntakeSerializer,
- ProjectConfigSerializer,
- ProjectSerializer,
- ReviewQueueSerializer,
SkillResultSerializer,
- SourceConfigSerializer,
- TopicCentroidObservabilitySummarySerializer,
- TopicCentroidSnapshotSerializer,
UserFeedbackSerializer,
)
+from projects.models import Project
CLASSIFICATION_SKILL_NAME = "content_classification"
RELEVANCE_SKILL_NAME = "relevance_scoring"
@@ -95,7 +74,6 @@
"Create Project Request",
value={
"name": "AI Weekly",
- "group": 3,
"topic_description": "Coverage of developer tools, model releases, and applied AI workflows.",
"content_retention_days": 180,
},
@@ -107,11 +85,11 @@
value={
"id": 1,
"name": "AI Weekly",
- "group": 3,
"topic_description": "Coverage of developer tools, model releases, and applied AI workflows.",
"content_retention_days": 180,
"intake_token": "project-token-123",
"intake_enabled": True,
+ "user_role": "admin",
"has_bluesky_credentials": True,
"bluesky_handle": "aiweekly.bsky.social",
"bluesky_is_active": True,
@@ -424,7 +402,7 @@ def document_group_access_viewset(
tag: str,
action_overrides: dict[str, dict] | None = None,
):
- """Decorate a viewset with schema metadata for group-access resources.
+ """Decorate a viewset with schema metadata for membership-scoped resources.
Args:
resource_plural: Human-readable plural label for the resource.
@@ -456,12 +434,12 @@ def schema(action: str, **kwargs):
list=schema(
"list",
summary=f"List {resource_plural}",
- description=f"Return all {resource_plural} available to the authenticated user through group membership.",
+ description=f"Return all {resource_plural} available to the authenticated user through project membership.",
),
retrieve=schema(
"retrieve",
summary=f"Get {resource_singular}",
- description=f"Return a single {resource_singular} available to the authenticated user through group membership.",
+ description=f"Return a single {resource_singular} available to the authenticated user through project membership.",
),
create=schema(
"create",
@@ -471,17 +449,17 @@ def schema(action: str, **kwargs):
update=schema(
"update",
summary=f"Replace {resource_singular}",
- description=f"Replace an existing {resource_singular} available to the authenticated user through group membership.",
+ description=f"Replace an existing {resource_singular} available to the authenticated user through project membership.",
),
partial_update=schema(
"partial_update",
summary=f"Update {resource_singular}",
- description=f"Update one or more fields on an existing {resource_singular} available to the authenticated user through group membership.",
+ description=f"Update one or more fields on an existing {resource_singular} available to the authenticated user through project membership.",
),
destroy=schema(
"destroy",
summary=f"Delete {resource_singular}",
- description=f"Delete an existing {resource_singular} available to the authenticated user through group membership.",
+ description=f"Delete an existing {resource_singular} available to the authenticated user through project membership.",
),
)
@@ -581,7 +559,7 @@ def get_project(self):
"project_id must be present in nested project-scoped routes"
)
try:
- return Project.objects.get(pk=project_id, group__user=self.request.user)
+ return get_visible_projects_queryset(self.request.user).get(pk=project_id)
except Project.DoesNotExist as exc:
raise NotFound("Project not found.") from exc
@@ -606,325 +584,6 @@ def perform_create(self, serializer):
serializer.save(project=self.get_project())
-@document_group_access_viewset(
- resource_plural="projects",
- resource_singular="project",
- create_description="Create a new project for one of the authenticated user's groups.",
- tag="Project Management",
- action_overrides=build_crud_action_overrides(
- ProjectSerializer,
- resource_plural="projects available to the authenticated user",
- resource_singular="project",
- create_examples=[PROJECT_CREATE_REQUEST_EXAMPLE, PROJECT_RESPONSE_EXAMPLE],
- create_response_examples=[PROJECT_RESPONSE_EXAMPLE],
- retrieve_examples=[PROJECT_RESPONSE_EXAMPLE],
- ),
-)
-class ProjectViewSet(viewsets.ModelViewSet):
- """Manage projects accessible through the current user's group memberships."""
-
- serializer_class = ProjectSerializer
- queryset = Project.objects.select_related("group", "bluesky_credentials")
- lookup_url_kwarg = "id"
-
- def get_queryset(self):
- """Limit projects to those visible through the authenticated user."""
-
- return self.queryset.filter(group__user=self.request.user).distinct()
-
- @extend_schema(
- summary="Rotate newsletter intake token",
- description=(
- "Generate a fresh project-specific newsletter intake token and return the "
- "updated project payload."
- ),
- tags=["Project Management"],
- request=None,
- responses={200: ProjectSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE},
- )
- @action(detail=True, methods=["post"], url_path="rotate-intake-token")
- def rotate_intake_token(self, request, *args, **kwargs):
- """Generate a fresh intake token for the selected project."""
-
- project = self.get_object()
- project.intake_token = generate_project_intake_token()
- project.save(update_fields=["intake_token"])
- serializer = self.get_serializer(project)
- return Response(serializer.data)
-
- @extend_schema(
- summary="Verify Bluesky credentials",
- description=(
- "Verify the selected project's stored Bluesky credentials by authenticating "
- "the account and checking the current session."
- ),
- tags=["Ingestion"],
- request=None,
- responses={
- 200: build_success_response(
- BLUESKY_CREDENTIALS_VERIFY_RESPONSE,
- "The project's Bluesky credentials were verified successfully.",
- ),
- 400: OpenApiResponse(
- response=inline_serializer(
- name="BlueskyCredentialsVerifyErrorResponse",
- fields={
- "type": serializers.CharField(),
- "errors": inline_serializer(
- name="BlueskyCredentialsVerifyError",
- fields={
- "code": serializers.CharField(),
- "detail": serializers.CharField(),
- "attr": serializers.CharField(allow_null=True),
- },
- many=True,
- ),
- },
- ),
- description="The project is missing Bluesky credentials or verification failed.",
- ),
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- )
- @action(detail=True, methods=["post"], url_path="verify-bluesky-credentials")
- def verify_bluesky_credentials(self, request, *args, **kwargs):
- """Verify the Bluesky credentials stored for the selected project."""
-
- from core.plugins.bluesky import BlueskySourcePlugin
-
- project = self.get_object()
- try:
- credentials = project.bluesky_credentials
- except BlueskyCredentials.DoesNotExist as exc:
- raise serializers.ValidationError(
- {
- "bluesky_credentials": "No Bluesky credentials are configured for this project."
- }
- ) from exc
-
- try:
- BlueskySourcePlugin.verify_credentials(credentials)
- except Exception as exc:
- logger.exception(
- "Bluesky credential verification failed for project id=%s",
- project.id,
- )
- raise serializers.ValidationError(
- {
- "bluesky_credentials": (
- "Credential verification failed. Please re-check the credentials "
- "and try again."
- )
- }
- ) from exc
-
- credentials.refresh_from_db()
- return Response(
- {
- "status": "verified",
- "handle": credentials.handle,
- "last_verified_at": credentials.last_verified_at,
- "last_error": "",
- }
- )
-
-
-@document_project_owned_viewset(
- resource_plural="project configurations",
- resource_singular="project configuration",
- create_description="Create a new project configuration record for the selected project, including authority weighting and decay settings.",
- tag="Project Management",
- action_overrides=build_crud_action_overrides(
- ProjectConfigSerializer,
- resource_plural="project configurations for the selected project",
- resource_singular="project configuration",
- ),
-)
-class ProjectConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Manage per-project scoring and authority configuration."""
-
- serializer_class = ProjectConfigSerializer
- queryset = ProjectConfig.objects.select_related("project")
-
-
-@document_project_owned_viewset(
- resource_plural="entities",
- resource_singular="entity",
- create_description="Create a new tracked entity for the selected project, such as a company, person, or organization.",
- tag="Entity Catalog",
- action_overrides=build_crud_action_overrides(
- EntitySerializer,
- resource_plural="entities for the selected project",
- resource_singular="entity",
- ),
-)
-class EntityViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Manage tracked entities associated with a project."""
-
- serializer_class = EntitySerializer
- filter_backends = [OrderingFilter]
- ordering_fields = ["authority_score", "created_at", "name"]
- ordering = ["name"]
- queryset = (
- Entity.objects.select_related("project")
- .annotate(mention_count=Count("mentions", distinct=True))
- .prefetch_related(
- Prefetch(
- "mentions",
- queryset=EntityMention.objects.select_related("content").order_by(
- "-created_at"
- ),
- to_attr="prefetched_mentions",
- )
- )
- )
-
- @extend_schema(
- summary="List entity mentions",
- description="Return the extracted mention history for one tracked entity inside the selected project.",
- request=None,
- responses={
- 200: EntityMentionSummarySerializer(many=True),
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Entity Catalog"],
- )
- @action(detail=True, methods=["get"], url_path="mentions")
- def mentions(self, request, *args, **kwargs):
- """Return the extracted mentions for the selected entity."""
-
- entity = self.get_object()
- mentions = entity.mentions.select_related("content").order_by("-created_at")
- serializer = EntityMentionSummarySerializer(mentions, many=True)
- return Response(serializer.data)
-
- @extend_schema(
- summary="List authority history",
- description=(
- "Return persisted authority-score snapshots for one tracked entity. "
- "Use the optional limit query parameter to cap the number of snapshots returned."
- ),
- parameters=[
- OpenApiParameter(
- name="limit",
- type=int,
- location=OpenApiParameter.QUERY,
- description="Maximum number of authority snapshots to return.",
- required=False,
- )
- ],
- request=None,
- responses={
- 200: EntityAuthoritySnapshotSerializer(many=True),
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Entity Catalog"],
- )
- @action(detail=True, methods=["get"], url_path="authority_history")
- def authority_history(self, request, *args, **kwargs):
- """Return recent authority snapshots for the selected entity."""
-
- entity = self.get_object()
- snapshots = entity.authority_snapshots.order_by("-computed_at")
- limit_param = request.query_params.get("limit")
- if limit_param:
- try:
- limit = max(1, min(int(limit_param), 100))
- except ValueError as exc:
- raise serializers.ValidationError(
- {"limit": "Limit must be an integer between 1 and 100."}
- ) from exc
- snapshots = snapshots[:limit]
- serializer = EntityAuthoritySnapshotSerializer(snapshots, many=True)
- return Response(serializer.data)
-
-
-@document_project_owned_viewset(
- resource_plural="entity candidates",
- resource_singular="entity candidate",
- create_description="Entity candidates are created by the pipeline and can be reviewed through dedicated actions.",
- tag="Entity Catalog",
- action_overrides=build_crud_action_overrides(
- EntityCandidateSerializer,
- resource_plural="entity candidates for the selected project",
- resource_singular="entity candidate",
- ),
-)
-class EntityCandidateViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet):
- """Inspect and resolve entity candidates surfaced by entity extraction."""
-
- serializer_class = EntityCandidateSerializer
- queryset = EntityCandidate.objects.select_related(
- "project", "first_seen_in", "merged_into"
- )
-
- @extend_schema(
- summary="Accept entity candidate",
- description="Promote a pending entity candidate into a tracked entity and backfill recent mentions.",
- request=None,
- responses={
- 200: EntityCandidateSerializer,
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Entity Catalog"],
- )
- @action(detail=True, methods=["post"], url_path="accept")
- def accept(self, request, *args, **kwargs):
- """Accept an entity candidate and return its updated representation."""
-
- candidate = self.get_object()
- accept_entity_candidate(candidate)
- candidate.refresh_from_db()
- serializer = self.get_serializer(candidate)
- return Response(serializer.data)
-
- @extend_schema(
- summary="Reject entity candidate",
- description="Mark a pending entity candidate as rejected without creating a tracked entity.",
- request=None,
- responses={
- 200: EntityCandidateSerializer,
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Entity Catalog"],
- )
- @action(detail=True, methods=["post"], url_path="reject")
- def reject(self, request, *args, **kwargs):
- """Reject an entity candidate and return its updated representation."""
-
- candidate = self.get_object()
- reject_entity_candidate(candidate)
- candidate.refresh_from_db()
- serializer = self.get_serializer(candidate)
- return Response(serializer.data)
-
- @extend_schema(
- summary="Merge entity candidate",
- description="Merge a pending entity candidate into an existing tracked entity from the same project.",
- request=EntityCandidateMergeSerializer,
- responses={
- 200: EntityCandidateSerializer,
- 400: EntityCandidateMergeSerializer,
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Entity Catalog"],
- )
- @action(detail=True, methods=["post"], url_path="merge")
- def merge(self, request, *args, **kwargs):
- """Merge an entity candidate into an existing tracked entity."""
-
- candidate = self.get_object()
- serializer = EntityCandidateMergeSerializer(
- data=request.data,
- context=self.get_serializer_context(),
- )
- serializer.is_valid(raise_exception=True)
- merge_entity_candidate(candidate, serializer.validated_data["merged_into"])
- candidate.refresh_from_db()
- response_serializer = self.get_serializer(candidate)
- return Response(response_serializer.data)
-
-
@document_project_owned_viewset(
resource_plural="content items",
resource_singular="content item",
@@ -945,6 +604,17 @@ class ContentViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
serializer_class = ContentSerializer
queryset = Content.objects.select_related("project", "entity")
+ def get_permissions(self):
+ """Allow all members to read content, contributors to edit, and admins to delete."""
+
+ if self.action == "destroy":
+ permission_classes = [IsProjectAdmin]
+ elif self.action in {"create", "update", "partial_update", "run_skill"}:
+ permission_classes = [IsProjectMemberWritable]
+ else:
+ permission_classes = [IsProjectMember]
+ return [permission() for permission in permission_classes]
+
@extend_schema(
summary="Run content skill",
description=(
@@ -1003,25 +673,6 @@ def run_skill(self, request, *args, **kwargs):
return Response(serializer.data, status=status.HTTP_201_CREATED)
-@document_project_owned_viewset(
- resource_plural="skill results",
- resource_singular="skill result",
- create_description="Create a new skill result for project content. The referenced content must belong to the selected project.",
- tag="AI Processing",
- action_overrides=build_crud_action_overrides(
- SkillResultSerializer,
- resource_plural="skill results for the selected project",
- resource_singular="skill result",
- retrieve_examples=[SKILL_RESULT_RESPONSE_EXAMPLE],
- ),
-)
-class SkillResultViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Inspect persisted AI skill outputs for project content."""
-
- serializer_class = SkillResultSerializer
- queryset = SkillResult.objects.select_related("content", "project", "superseded_by")
-
-
@document_project_owned_viewset(
resource_plural="user feedback entries",
resource_singular="user feedback entry",
@@ -1039,6 +690,11 @@ class UserFeedbackViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
serializer_class = UserFeedbackSerializer
queryset = UserFeedback.objects.select_related("content", "project", "user")
+ def get_permissions(self):
+ """Allow all members to read feedback and owners or admins to modify it."""
+
+ return [IsProjectFeedbackEditor()]
+
def perform_create(self, serializer):
"""Attach the authenticated user automatically to new feedback rows."""
@@ -1062,32 +718,14 @@ class IngestionRunViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
serializer_class = IngestionRunSerializer
queryset = IngestionRun.objects.select_related("project")
+ def get_permissions(self):
+ """Allow all members to read ingestion runs and contributors to manage them."""
-@document_project_owned_viewset(
- resource_plural="Bluesky credentials",
- resource_singular="Bluesky credentials",
- create_description=(
- "Create Bluesky credentials for the selected project. The app password is "
- "accepted write-only and is never returned in API responses."
- ),
- tag="Ingestion",
- action_overrides=build_crud_action_overrides(
- BlueskyCredentialsSerializer,
- resource_plural="Bluesky credentials for the selected project",
- resource_singular="Bluesky credentials",
- retrieve_examples=[BLUESKY_CREDENTIALS_RESPONSE_EXAMPLE],
- ),
-)
-class BlueskyCredentialsViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Manage project-scoped Bluesky credentials."""
-
- serializer_class = BlueskyCredentialsSerializer
- queryset = BlueskyCredentials.objects.select_related("project")
-
- def get_queryset(self):
- """Restrict credentials to the selected project and current user."""
-
- return super().get_queryset().order_by("-updated_at")
+ if self.action in {"create", "update", "partial_update", "destroy"}:
+ permission_classes = [IsProjectMemberWritable]
+ else:
+ permission_classes = [IsProjectMember]
+ return [permission() for permission in permission_classes]
@document_project_owned_viewset(
@@ -1110,6 +748,11 @@ class IntakeAllowlistViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
serializer_class = IntakeAllowlistSerializer
queryset = IntakeAllowlist.objects.select_related("project")
+ def get_permissions(self):
+ """Restrict intake allowlist access to project contributors."""
+
+ return [IsProjectContributor()]
+
@document_project_owned_viewset(
resource_plural="newsletter intake entries",
@@ -1131,103 +774,7 @@ class NewsletterIntakeViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelV
serializer_class = NewsletterIntakeSerializer
queryset = NewsletterIntake.objects.select_related("project")
+ def get_permissions(self):
+ """Allow any project member to inspect newsletter intake history."""
-@document_project_owned_viewset(
- resource_plural="source configurations",
- resource_singular="source configuration",
- create_description="Create a new source configuration for the selected project. Plugin-specific configuration is validated before the record is saved.",
- tag="Ingestion",
- action_overrides=build_crud_action_overrides(
- SourceConfigSerializer,
- resource_plural="source configurations for the selected project",
- resource_singular="source configuration",
- create_examples=[
- SOURCE_CONFIG_CREATE_REQUEST_EXAMPLE,
- SOURCE_CONFIG_REDDIT_REQUEST_EXAMPLE,
- SOURCE_CONFIG_BLUESKY_REQUEST_EXAMPLE,
- SOURCE_CONFIG_RESPONSE_EXAMPLE,
- ],
- create_response_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE],
- retrieve_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE],
- ),
-)
-class SourceConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Manage source-plugin configuration for a project."""
-
- serializer_class = SourceConfigSerializer
- queryset = SourceConfig.objects.select_related("project")
-
-
-@document_project_owned_viewset(
- resource_plural="topic centroid snapshots",
- resource_singular="topic centroid snapshot",
- create_description="Topic centroid snapshots are pipeline-managed history rows and are exposed read-only for observability.",
- tag="Observability",
- action_overrides=build_crud_action_overrides(
- TopicCentroidSnapshotSerializer,
- resource_plural="topic centroid snapshots for the selected project",
- resource_singular="topic centroid snapshot",
- ),
-)
-class TopicCentroidSnapshotViewSet(
- ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet
-):
- """Inspect persisted centroid history and aggregate drift for a project."""
-
- serializer_class = TopicCentroidSnapshotSerializer
- queryset = TopicCentroidSnapshot.objects.select_related("project")
-
- @extend_schema(
- summary="Get topic centroid summary",
- description=(
- "Return aggregate centroid observability metrics for the selected project, "
- "including average drift and the latest persisted snapshot."
- ),
- request=None,
- responses={
- 200: TopicCentroidObservabilitySummarySerializer,
- 403: AUTHENTICATION_REQUIRED_RESPONSE,
- },
- tags=["Observability"],
- )
- @action(detail=False, methods=["get"], url_path="summary")
- def summary(self, request, *args, **kwargs):
- """Return centroid observability summary metrics for the current project."""
-
- queryset = self.get_queryset()
- metrics = queryset.aggregate(
- snapshot_count=Count("id"),
- active_snapshot_count=Count("id", filter=Q(centroid_active=True)),
- avg_drift_from_previous=Avg("drift_from_previous"),
- avg_drift_from_week_ago=Avg("drift_from_week_ago"),
- )
- serializer = TopicCentroidObservabilitySummarySerializer(
- {
- "project": self.get_project().id,
- "snapshot_count": metrics["snapshot_count"],
- "active_snapshot_count": metrics["active_snapshot_count"],
- "avg_drift_from_previous": metrics["avg_drift_from_previous"],
- "avg_drift_from_week_ago": metrics["avg_drift_from_week_ago"],
- "latest_snapshot": queryset.order_by("-computed_at").first(),
- },
- context=self.get_serializer_context(),
- )
- return Response(serializer.data)
-
-
-@document_project_owned_viewset(
- resource_plural="review queue entries",
- resource_singular="review queue entry",
- create_description="Create a new review queue entry for the selected project. The referenced content must belong to the same project.",
- tag="Review Queue",
- action_overrides=build_crud_action_overrides(
- ReviewQueueSerializer,
- resource_plural="review queue entries for the selected project",
- resource_singular="review queue entry",
- ),
-)
-class ReviewQueueViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
- """Inspect and manage content awaiting manual review."""
-
- serializer_class = ReviewQueueSerializer
- queryset = ReviewQueue.objects.select_related("content", "project")
+ return [IsProjectMember()]
diff --git a/core/api_urls.py b/core/api_urls.py
index e1c70538..e4bc93ab 100644
--- a/core/api_urls.py
+++ b/core/api_urls.py
@@ -1,72 +1,42 @@
+"""Aggregate app-owned API route registrations under the public v1 surface."""
+
from rest_framework.routers import DefaultRouter
from rest_framework_nested.routers import NestedSimpleRouter
-from core.api import (
- BlueskyCredentialsViewSet,
- ContentViewSet,
- EntityCandidateViewSet,
- EntityViewSet,
- IngestionRunViewSet,
- IntakeAllowlistViewSet,
- NewsletterIntakeViewSet,
- ProjectConfigViewSet,
- ProjectViewSet,
- ReviewQueueViewSet,
- SkillResultViewSet,
- SourceConfigViewSet,
- TopicCentroidSnapshotViewSet,
- UserFeedbackViewSet,
+from content.api_urls import register_project_routes as register_content_project_routes
+from entities.api_urls import (
+ register_project_routes as register_entities_project_routes,
+)
+from ingestion.api_urls import (
+ register_project_routes as register_ingestion_project_routes,
+)
+from newsletters.api_urls import (
+ register_project_routes as register_newsletters_project_routes,
+)
+from pipeline.api_urls import (
+ register_project_routes as register_pipeline_project_routes,
)
+from projects.api_urls import (
+ register_project_routes as register_projects_project_routes,
+)
+from projects.api_urls import (
+ register_root_routes as register_projects_root_routes,
+)
+from trends.api_urls import register_project_routes as register_trends_project_routes
app_name = "api"
router = DefaultRouter()
-router.register("projects", ProjectViewSet, basename="project")
+register_projects_root_routes(router)
project_router = NestedSimpleRouter(router, r"projects", lookup="project")
-project_router.register(
- r"project-configs", ProjectConfigViewSet, basename="project-config"
-)
-project_router.register(r"entities", EntityViewSet, basename="project-entity")
-project_router.register(
- r"entity-candidates",
- EntityCandidateViewSet,
- basename="project-entity-candidate",
-)
-project_router.register(r"contents", ContentViewSet, basename="project-content")
-project_router.register(
- r"skill-results", SkillResultViewSet, basename="project-skill-result"
-)
-project_router.register(r"feedback", UserFeedbackViewSet, basename="project-feedback")
-project_router.register(
- r"ingestion-runs", IngestionRunViewSet, basename="project-ingestion-run"
-)
-project_router.register(
- r"bluesky-credentials",
- BlueskyCredentialsViewSet,
- basename="project-bluesky-credentials",
-)
-project_router.register(
- r"intake-allowlist",
- IntakeAllowlistViewSet,
- basename="project-intake-allowlist",
-)
-project_router.register(
- r"newsletter-intakes",
- NewsletterIntakeViewSet,
- basename="project-newsletter-intake",
-)
-project_router.register(
- r"source-configs", SourceConfigViewSet, basename="project-source-config"
-)
-project_router.register(
- r"topic-centroid-snapshots",
- TopicCentroidSnapshotViewSet,
- basename="project-topic-centroid-snapshot",
-)
-project_router.register(
- r"review-queue", ReviewQueueViewSet, basename="project-review-queue"
-)
+register_projects_project_routes(project_router)
+register_entities_project_routes(project_router)
+register_content_project_routes(project_router)
+register_pipeline_project_routes(project_router)
+register_ingestion_project_routes(project_router)
+register_newsletters_project_routes(project_router)
+register_trends_project_routes(project_router)
urlpatterns = [
*router.urls,
diff --git a/core/embeddings.py b/core/embeddings.py
index bbee9fd5..39fd198b 100644
--- a/core/embeddings.py
+++ b/core/embeddings.py
@@ -26,8 +26,9 @@
VectorParams,
)
-from core.models import Content, Entity
+from core.models import Content
from core.settings_types import CoreSettings
+from entities.models import Entity
SentenceTransformer = None
settings = cast(CoreSettings, django_settings)
diff --git a/core/entity_extraction.py b/core/entity_extraction.py
index 4e3935b7..80adee6b 100644
--- a/core/entity_extraction.py
+++ b/core/entity_extraction.py
@@ -15,6 +15,10 @@
from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json
from core.models import (
Content,
+ SkillResult,
+ SkillStatus,
+)
+from entities.models import (
Entity,
EntityCandidate,
EntityCandidateStatus,
@@ -22,8 +26,6 @@
EntityMentionRole,
EntityMentionSentiment,
EntityType,
- SkillResult,
- SkillStatus,
)
ENTITY_EXTRACTION_SKILL_NAME = "entity_extraction"
@@ -121,7 +123,9 @@ def run_entity_extraction(content: Content) -> dict[str, Any]:
confidence = max((mention.confidence for mention in mentions), default=0.0)
return {
"mentions": [_serialize_mention(mention) for mention in mentions],
- "candidate_entities": [_serialize_candidate(candidate) for candidate in candidates],
+ "candidate_entities": [
+ _serialize_candidate(candidate) for candidate in candidates
+ ],
"primary_entity_id": primary_entity.id if primary_entity is not None else None,
"confidence": confidence,
"explanation": extraction.get(
@@ -211,7 +215,8 @@ def persist_entity_candidates(
if not created:
update_fields: list[str] = []
suggested_type = candidate_payload.get(
- "suggested_type", candidate.suggested_type or _guess_candidate_type(name)
+ "suggested_type",
+ candidate.suggested_type or _guess_candidate_type(name),
)
if candidate.suggested_type != suggested_type:
candidate.suggested_type = suggested_type
@@ -264,7 +269,9 @@ def reject_entity_candidate(candidate: EntityCandidate) -> None:
candidate.save(update_fields=["status", "updated_at"])
-def backfill_entity_mentions(entity: Entity, *, candidate_name: str | None = None) -> None:
+def backfill_entity_mentions(
+ entity: Entity, *, candidate_name: str | None = None
+) -> None:
"""Retroactively attach recent content rows to an accepted or merged entity."""
cutoff = timezone.now() - timedelta(days=RETROACTIVE_MENTION_WINDOW_DAYS)
@@ -312,7 +319,10 @@ def _run_entity_extraction_with_fallback(
"title": content.title,
"content_text": content.content_text[:5000],
"project_id": content.project_id,
- "tracked_entities": [_serialize_tracked_entity(entity) for entity in candidate_entities],
+ "tracked_entities": [
+ _serialize_tracked_entity(entity)
+ for entity in candidate_entities
+ ],
},
),
)
@@ -393,7 +403,11 @@ def _retrieve_candidate_entities(
ordered_ids.append(entity_id)
if not ordered_ids:
return tracked_entities
- return [entities_by_id[entity_id] for entity_id in ordered_ids if entity_id in entities_by_id]
+ return [
+ entities_by_id[entity_id]
+ for entity_id in ordered_ids
+ if entity_id in entities_by_id
+ ]
def _normalize_mentions(
@@ -423,7 +437,9 @@ def _normalize_mentions(
"role": _normalize_role(raw_mention.get("role")),
"sentiment": _normalize_sentiment(raw_mention.get("sentiment")),
"span": str(raw_mention.get("span", entity_name)).strip(),
- "confidence": _normalize_confidence(raw_mention.get("confidence", 0.75)),
+ "confidence": _normalize_confidence(
+ raw_mention.get("confidence", 0.75)
+ ),
}
)
return normalized_mentions, unresolved_names
@@ -447,7 +463,9 @@ def _normalize_candidates(
elif isinstance(raw_candidate, dict):
candidate_name = _clean_candidate_name(str(raw_candidate.get("name", "")))
suggested_type = str(
- raw_candidate.get("suggested_type", _guess_candidate_type(candidate_name))
+ raw_candidate.get(
+ "suggested_type", _guess_candidate_type(candidate_name)
+ )
)
else:
continue
@@ -477,7 +495,9 @@ def _discover_candidates(
discovered: list[dict[str, str]] = []
seen_names: set[str] = set()
candidate_text = "\n".join(
- part for part in [content.author, content.title, content.content_text[:2000]] if part
+ part
+ for part in [content.author, content.title, content.content_text[:2000]]
+ if part
)
for match in PROPER_NOUN_PATTERN.findall(candidate_text):
name = _clean_candidate_name(match)
@@ -490,9 +510,7 @@ def _discover_candidates(
):
continue
seen_names.add(normalized_name)
- discovered.append(
- {"name": name, "suggested_type": _guess_candidate_type(name)}
- )
+ discovered.append({"name": name, "suggested_type": _guess_candidate_type(name)})
return discovered
@@ -536,7 +554,9 @@ def _find_entity_span(
stripped_label = label.strip()
if not stripped_label:
continue
- pattern = re.compile(rf"(? str:
return EntityMentionRole.AUTHOR
if content.title and span_lower in content.title.lower():
return EntityMentionRole.SUBJECT
- if re.search(rf'"[^\n]{{0,120}}{re.escape(span)}[^\n]{{0,120}}"', content.content_text, re.IGNORECASE):
+ if re.search(
+ rf'"[^\n]{{0,120}}{re.escape(span)}[^\n]{{0,120}}"',
+ content.content_text,
+ re.IGNORECASE,
+ ):
return EntityMentionRole.QUOTED
return EntityMentionRole.MENTIONED
@@ -714,7 +738,9 @@ def _guess_candidate_type(name: str) -> str:
return EntityType.VENDOR
if any(token in ORGANIZATION_SUFFIXES for token in tokens):
return EntityType.ORGANIZATION
- title_case_tokens = [token for token in name.split() if token and token[:1].isupper()]
+ title_case_tokens = [
+ token for token in name.split() if token and token[:1].isupper()
+ ]
if 2 <= len(title_case_tokens) <= 3:
return EntityType.INDIVIDUAL
return EntityType.ORGANIZATION
diff --git a/core/management/commands/seed_demo.py b/core/management/commands/seed_demo.py
index 14247100..0d8fadb9 100644
--- a/core/management/commands/seed_demo.py
+++ b/core/management/commands/seed_demo.py
@@ -5,7 +5,6 @@
from django.conf import settings
from django.contrib.auth import get_user_model
-from django.contrib.auth.models import Group
from django.core.management.base import BaseCommand
from django.db import transaction
from django.utils import timezone
@@ -16,20 +15,14 @@
from core.embeddings import upsert_content_embedding
from core.models import (
Content,
- Entity,
- EntityType,
FeedbackType,
IngestionRun,
- Project,
- ProjectConfig,
ReviewQueue,
ReviewReason,
ReviewResolution,
RunStatus,
SkillResult,
SkillStatus,
- SourceConfig,
- SourcePluginName,
UserFeedback,
)
from core.pipeline import (
@@ -37,9 +30,17 @@
RELEVANCE_SKILL_NAME,
SUMMARIZATION_SKILL_NAME,
)
+from entities.models import Entity, EntityType
+from projects.model_support import SourcePluginName
+from projects.models import (
+ Project,
+ ProjectConfig,
+ ProjectMembership,
+ ProjectRole,
+ SourceConfig,
+)
DEMO_PROJECT_NAME = "Platform Engineering Weekly"
-DEMO_GROUP_NAME = "platform-engineering-editors"
DEMO_TOPIC_DESCRIPTION = (
"Platform engineering, DevOps, cloud infrastructure, reliability, and "
"developer experience."
@@ -537,17 +538,19 @@ def _ensure_demo_project(self) -> Project:
)
user.set_password("demo-password")
user.save(update_fields=["password"])
- group, _ = Group.objects.get_or_create(name=DEMO_GROUP_NAME)
- user.groups.add(group)
project, created = Project.objects.get_or_create(
- group=group,
name=DEMO_PROJECT_NAME,
defaults={"topic_description": DEMO_TOPIC_DESCRIPTION},
)
if not created and project.topic_description != DEMO_TOPIC_DESCRIPTION:
project.topic_description = DEMO_TOPIC_DESCRIPTION
project.save(update_fields=["topic_description"])
+ ProjectMembership.objects.get_or_create(
+ user=user,
+ project=project,
+ defaults={"role": ProjectRole.ADMIN},
+ )
ProjectConfig.objects.get_or_create(project=project)
return project
diff --git a/core/migrations/0010_move_project_models_to_projects.py b/core/migrations/0010_move_project_models_to_projects.py
new file mode 100644
index 00000000..dee95e58
--- /dev/null
+++ b/core/migrations/0010_move_project_models_to_projects.py
@@ -0,0 +1,159 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+PROJECT_MODEL_NAMES = [
+ "project",
+ "projectconfig",
+ "sourceconfig",
+ "blueskycredentials",
+]
+
+
+def rename_project_content_types(apps, schema_editor):
+ """Retarget existing content types to the new projects app label."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ ContentType.objects.using(schema_editor.connection.alias).filter(
+ app_label="core", model__in=PROJECT_MODEL_NAMES
+ ).update(app_label="projects")
+
+
+def rename_project_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ ContentType.objects.using(schema_editor.connection.alias).filter(
+ app_label="projects", model__in=PROJECT_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0009_topiccentroidsnapshot"),
+ ("projects", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.AlterField(
+ model_name="entity",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="entities",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="entityauthoritysnapshot",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="entity_authority_snapshots",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="topiccentroidsnapshot",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="topic_centroid_snapshots",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="content",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="contents",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="entitymention",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="entity_mentions",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="entitycandidate",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="entity_candidates",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="intakeallowlist",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="intake_allowlist",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="newsletterintake",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="newsletter_intakes",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="skillresult",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="skill_results",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="userfeedback",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="feedback",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="ingestionrun",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="ingestion_runs",
+ to="projects.project",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="reviewqueue",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="review_queue_items",
+ to="projects.project",
+ ),
+ ),
+ migrations.DeleteModel(name="BlueskyCredentials"),
+ migrations.DeleteModel(name="ProjectConfig"),
+ migrations.DeleteModel(name="Project"),
+ migrations.DeleteModel(name="SourceConfig"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_project_content_types,
+ rename_project_content_types_reverse,
+ ),
+ ]
diff --git a/core/migrations/0011_move_entity_models_to_entities.py b/core/migrations/0011_move_entity_models_to_entities.py
new file mode 100644
index 00000000..4208487c
--- /dev/null
+++ b/core/migrations/0011_move_entity_models_to_entities.py
@@ -0,0 +1,61 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+ENTITY_MODEL_NAMES = [
+ "entity",
+ "entityauthoritysnapshot",
+ "entitymention",
+ "entitycandidate",
+]
+
+
+def rename_entity_content_types(apps, schema_editor):
+ """Retarget existing content types to the new entities app label."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ ContentType.objects.using(schema_editor.connection.alias).filter(
+ app_label="core", model__in=ENTITY_MODEL_NAMES
+ ).update(app_label="entities")
+
+
+def rename_entity_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ ContentType.objects.using(schema_editor.connection.alias).filter(
+ app_label="entities", model__in=ENTITY_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0010_move_project_models_to_projects"),
+ ("entities", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.AlterField(
+ model_name="content",
+ name="entity",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="contents",
+ to="entities.entity",
+ ),
+ ),
+ migrations.DeleteModel(name="EntityAuthoritySnapshot"),
+ migrations.DeleteModel(name="EntityMention"),
+ migrations.DeleteModel(name="EntityCandidate"),
+ migrations.DeleteModel(name="Entity"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_entity_content_types,
+ rename_entity_content_types_reverse,
+ ),
+ ]
diff --git a/core/migrations/0012_move_content_models_to_content_and_ingestion.py b/core/migrations/0012_move_content_models_to_content_and_ingestion.py
new file mode 100644
index 00000000..b4e5a393
--- /dev/null
+++ b/core/migrations/0012_move_content_models_to_content_and_ingestion.py
@@ -0,0 +1,74 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+CONTENT_MODEL_NAMES = ["content", "userfeedback"]
+INGESTION_MODEL_NAMES = ["ingestionrun"]
+
+
+def rename_content_and_ingestion_content_types(apps, schema_editor):
+ """Retarget existing content types to the new owning app labels."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="core", model__in=CONTENT_MODEL_NAMES
+ ).update(app_label="content")
+ ContentType.objects.using(database_alias).filter(
+ app_label="core", model__in=INGESTION_MODEL_NAMES
+ ).update(app_label="ingestion")
+
+
+def rename_content_and_ingestion_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="content", model__in=CONTENT_MODEL_NAMES
+ ).update(app_label="core")
+ ContentType.objects.using(database_alias).filter(
+ app_label="ingestion", model__in=INGESTION_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0011_move_entity_models_to_entities"),
+ ("content", "0001_initial"),
+ ("ingestion", "0001_initial"),
+ ("entities", "0002_alter_entitycandidate_first_seen_in_and_more"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.AlterField(
+ model_name="skillresult",
+ name="content",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="skill_results",
+ to="content.content",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="reviewqueue",
+ name="content",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="review_queue_items",
+ to="content.content",
+ ),
+ ),
+ migrations.DeleteModel(name="IngestionRun"),
+ migrations.DeleteModel(name="Content"),
+ migrations.DeleteModel(name="UserFeedback"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_content_and_ingestion_content_types,
+ rename_content_and_ingestion_content_types_reverse,
+ ),
+ ]
diff --git a/core/migrations/0013_move_newsletter_models_to_newsletters.py b/core/migrations/0013_move_newsletter_models_to_newsletters.py
new file mode 100644
index 00000000..b629f090
--- /dev/null
+++ b/core/migrations/0013_move_newsletter_models_to_newsletters.py
@@ -0,0 +1,44 @@
+from django.db import migrations
+
+NEWSLETTER_MODEL_NAMES = ["intakeallowlist", "newsletterintake"]
+
+
+def rename_newsletter_content_types(apps, schema_editor):
+ """Retarget existing content types to the new newsletters app label."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="core", model__in=NEWSLETTER_MODEL_NAMES
+ ).update(app_label="newsletters")
+
+
+def rename_newsletter_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="newsletters", model__in=NEWSLETTER_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0012_move_content_models_to_content_and_ingestion"),
+ ("newsletters", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.DeleteModel(name="NewsletterIntake"),
+ migrations.DeleteModel(name="IntakeAllowlist"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_newsletter_content_types,
+ rename_newsletter_content_types_reverse,
+ ),
+ ]
diff --git a/core/migrations/0014_move_pipeline_models_to_pipeline.py b/core/migrations/0014_move_pipeline_models_to_pipeline.py
new file mode 100644
index 00000000..71f2fc59
--- /dev/null
+++ b/core/migrations/0014_move_pipeline_models_to_pipeline.py
@@ -0,0 +1,44 @@
+from django.db import migrations
+
+PIPELINE_MODEL_NAMES = ["skillresult", "reviewqueue"]
+
+
+def rename_pipeline_content_types(apps, schema_editor):
+ """Retarget existing content types to the new pipeline app label."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="core", model__in=PIPELINE_MODEL_NAMES
+ ).update(app_label="pipeline")
+
+
+def rename_pipeline_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="pipeline", model__in=PIPELINE_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0013_move_newsletter_models_to_newsletters"),
+ ("pipeline", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.DeleteModel(name="SkillResult"),
+ migrations.DeleteModel(name="ReviewQueue"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_pipeline_content_types,
+ rename_pipeline_content_types_reverse,
+ ),
+ ]
diff --git a/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py b/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py
new file mode 100644
index 00000000..b1a396d8
--- /dev/null
+++ b/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py
@@ -0,0 +1,43 @@
+from django.db import migrations
+
+TRENDS_MODEL_NAMES = ["topiccentroidsnapshot"]
+
+
+def rename_trends_content_types(apps, schema_editor):
+ """Retarget existing content types to the new trends app label."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="core", model__in=TRENDS_MODEL_NAMES
+ ).update(app_label="trends")
+
+
+def rename_trends_content_types_reverse(apps, schema_editor):
+ """Restore the historical core app label on rollback."""
+
+ ContentType = apps.get_model("contenttypes", "ContentType")
+ database_alias = schema_editor.connection.alias
+ ContentType.objects.using(database_alias).filter(
+ app_label="trends", model__in=TRENDS_MODEL_NAMES
+ ).update(app_label="core")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0014_move_pipeline_models_to_pipeline"),
+ ("trends", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.DeleteModel(name="TopicCentroidSnapshot"),
+ ],
+ database_operations=[],
+ ),
+ migrations.RunPython(
+ rename_trends_content_types,
+ rename_trends_content_types_reverse,
+ ),
+ ]
diff --git a/core/models.py b/core/models.py
index cff292d8..2b1bd85a 100644
--- a/core/models.py
+++ b/core/models.py
@@ -1,19 +1,60 @@
-"""Core domain models for projects, ingestion, and editorial review.
+"""Core domain models plus compatibility re-exports during app decomposition.
The admin, API, Celery tasks, and AI pipeline all revolve around the models in this
module. Adding model-level docstrings here gives Django admindocs a useful summary
of the core entities new contributors interact with first.
"""
-import base64
-import hashlib
import secrets
-from urllib.parse import urlsplit, urlunsplit
-from cryptography.fernet import Fernet
-from django.conf import settings
-from django.contrib.auth.models import Group
-from django.db import models
+from content.models import Content, FeedbackType, UserFeedback
+from entities.models import (
+ Entity,
+ EntityAuthoritySnapshot,
+ EntityCandidate,
+ EntityCandidateStatus,
+ EntityMention,
+ EntityMentionRole,
+ EntityMentionSentiment,
+ EntityType,
+)
+from ingestion.models import IngestionRun, RunStatus
+from newsletters.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus
+from pipeline.models import ReviewQueue as _ReviewQueue
+from pipeline.models import ReviewReason as _ReviewReason
+from pipeline.models import ReviewResolution as _ReviewResolution
+from pipeline.models import SkillResult as _SkillResult
+from pipeline.models import SkillStatus as _SkillStatus
+from projects.models import Project as _Project
+from trends.models import TopicCentroidSnapshot as _TopicCentroidSnapshot
+
+Project = _Project
+ReviewQueue = _ReviewQueue
+ReviewReason = _ReviewReason
+ReviewResolution = _ReviewResolution
+SkillResult = _SkillResult
+SkillStatus = _SkillStatus
+TopicCentroidSnapshot = _TopicCentroidSnapshot
+
+__all__ = [
+ "Content",
+ "Entity",
+ "EntityAuthoritySnapshot",
+ "EntityCandidate",
+ "EntityCandidateStatus",
+ "EntityMention",
+ "EntityMentionRole",
+ "EntityMentionSentiment",
+ "EntityType",
+ "FeedbackType",
+ "IngestionRun",
+ "IntakeAllowlist",
+ "NewsletterIntake",
+ "NewsletterIntakeStatus",
+ "Project",
+ "RunStatus",
+ "UserFeedback",
+]
def generate_project_intake_token() -> str:
@@ -24,7 +65,9 @@ def generate_project_intake_token() -> str:
``intake+Confirm this sender for newsletter ingestion.
" - f'' - ) - - message = EmailMultiAlternatives( - subject=subject, - body=text_body, - from_email=settings.DEFAULT_FROM_EMAIL, - to=[to_email], - ) - message.attach_alternative(html_body, "text/html") - message.send() - - -def build_confirmation_url(token: str) -> str: - """Build the absolute confirmation URL for an allowlist token.""" - - base_url = settings.NEWSLETTER_API_BASE_URL.rstrip("/") - return f"{base_url}{reverse('confirm-newsletter-sender', kwargs={'token': token})}" - - -def process_inbound_newsletter( - *, - recipients: Iterable[str], - sender_email: str, - subject: str, - raw_html: str, - raw_text: str, - message_id: str, -) -> dict[str, Any]: - """Persist and route one inbound newsletter message. - - Args: - recipients: Recipient addresses from the inbound email payload. - sender_email: Envelope sender or normalized message sender. - subject: Newsletter email subject. - raw_html: Raw HTML body captured from the provider webhook. - raw_text: Raw plain-text body captured from the provider webhook. - message_id: Provider message identifier used for deduplication. - - Returns: - A status payload describing whether the message was ignored, queued, or is - waiting for sender confirmation. - """ - - project = _find_intake_project(recipients) - if project is None: - return {"status": "ignored", "reason": "no_matching_project"} - - normalized_sender_email = normalize_sender_email(sender_email) - normalized_message_id = message_id.strip() - if not normalized_sender_email or not normalized_message_id: - return {"status": "ignored", "reason": "missing_sender_or_message_id"} - - defaults = { - "project": project, - "sender_email": normalized_sender_email, - "subject": subject[:512], - "raw_html": sanitize_newsletter_html(raw_html), - "raw_text": raw_text, - } - intake, created = NewsletterIntake.objects.get_or_create( - message_id=normalized_message_id, - defaults=defaults, - ) - if not created: - return {"id": intake.id, "status": intake.status, "duplicate": True} - - allowlist, allowlist_created = IntakeAllowlist.objects.get_or_create( - project=project, - sender_email=normalized_sender_email, - ) - - if allowlist.is_confirmed: - queue_newsletter_intake(intake.id) - return {"id": intake.id, "status": intake.status} - - if allowlist_created: - send_confirmation_email( - to_email=normalized_sender_email, - confirm_url=build_confirmation_url(allowlist.confirmation_token), - project_name=project.name, - ) - - return {"id": intake.id, "status": intake.status, "confirmation_required": True} - - -def queue_newsletter_intake(intake_id: int) -> None: - """Dispatch newsletter extraction for a stored intake row. - - Args: - intake_id: Primary key of the stored ``NewsletterIntake`` row. - """ - - process_newsletter_intake = current_app.tasks[ - "core.tasks.process_newsletter_intake" - ] - if settings.CELERY_TASK_ALWAYS_EAGER: - process_newsletter_intake.apply(args=(intake_id,), throw=True) - else: - process_newsletter_intake.delay(intake_id) - - -def _find_intake_project(recipients: Iterable[str]) -> Project | None: - """Resolve the first enabled project referenced by the recipient list.""" - - for recipient in recipients: - token = extract_project_token(recipient) - if token is None: - continue - project = Project.objects.filter( - intake_token=token, intake_enabled=True - ).first() - if project is not None: - return project - return None +"""Compatibility exports for newsletter intake helpers.""" + +from newsletters.intake import ( + build_confirmation_url, + extract_newsletter_items, + normalize_sender_email, + process_inbound_newsletter, + queue_newsletter_intake, + sanitize_newsletter_html, + send_confirmation_email, +) + +__all__ = [ + "build_confirmation_url", + "extract_newsletter_items", + "normalize_sender_email", + "process_inbound_newsletter", + "queue_newsletter_intake", + "sanitize_newsletter_html", + "send_confirmation_email", +] diff --git a/core/permissions.py b/core/permissions.py new file mode 100644 index 00000000..77757f13 --- /dev/null +++ b/core/permissions.py @@ -0,0 +1,158 @@ +"""Cross-cutting helpers and DRF permission classes for project roles.""" + +from __future__ import annotations + +from rest_framework import permissions + +from projects.models import Project, ProjectMembership, ProjectRole + + +def get_visible_projects_queryset(user): + """Return the projects visible to the given authenticated user.""" + + if not getattr(user, "is_authenticated", False): + return Project.objects.none() + return Project.objects.filter(memberships__user=user).distinct() + + +def get_user_role(user, project: Project) -> str | None: + """Return the user's membership role for the given project, if any.""" + + if not getattr(user, "is_authenticated", False): + return None + if getattr(user, "is_superuser", False): + return ProjectRole.ADMIN + return ( + ProjectMembership.objects.filter(user=user, project=project) + .values_list("role", flat=True) + .first() + ) + + +def _get_project_from_view(view) -> Project | None: + """Resolve the current nested project from a project-scoped view when present.""" + + get_project = getattr(view, "get_project", None) + if callable(get_project): + return get_project() + return None + + +def _resolve_project(obj) -> Project: + """Resolve the owning project for a project-scoped model instance.""" + + if isinstance(obj, Project): + return obj + return obj.project + + +class IsProjectMember(permissions.BasePermission): + """Allow authenticated project members to read project-scoped resources.""" + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) is not None + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) is not None + + +class IsProjectContributor(permissions.BasePermission): + """Allow only admins and members to access contributor-only resources.""" + + allowed_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) in self.allowed_roles + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) in self.allowed_roles + + +class IsProjectMemberWritable(permissions.BasePermission): + """Allow all members to read, but reserve writes for admins and members.""" + + writable_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + role = get_user_role(user, project) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.writable_roles + + def has_object_permission(self, request, view, obj) -> bool: + role = get_user_role(request.user, _resolve_project(obj)) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.writable_roles + + +class IsProjectAdmin(permissions.BasePermission): + """Restrict access to project admins.""" + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) == ProjectRole.ADMIN + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) == ProjectRole.ADMIN + + +class IsProjectFeedbackEditor(permissions.BasePermission): + """Allow feedback reads to any member and writes by owners or project admins.""" + + contributor_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + role = get_user_role(user, project) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.contributor_roles + + def has_object_permission(self, request, view, obj) -> bool: + role = get_user_role(request.user, _resolve_project(obj)) + if request.method in permissions.SAFE_METHODS: + return role is not None + if role == ProjectRole.ADMIN: + return True + if role != ProjectRole.MEMBER: + return False + return obj.user_id == request.user.id diff --git a/core/pipeline.py b/core/pipeline.py index 8c34d620..0d19cae7 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -18,6 +18,7 @@ from django.utils import timezone from langgraph.graph import END, StateGraph +from content.models import Content from core.deduplication import canonicalize_url from core.embeddings import ( build_content_embedding_text, @@ -28,7 +29,7 @@ ) from core.entity_extraction import run_entity_extraction from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json -from core.models import Content, ReviewQueue, ReviewReason, SkillResult, SkillStatus +from pipeline.models import ReviewQueue, ReviewReason, SkillResult, SkillStatus logger = logging.getLogger(__name__) diff --git a/core/plugins/__init__.py b/core/plugins/__init__.py index 29184f7d..cd50803b 100644 --- a/core/plugins/__init__.py +++ b/core/plugins/__init__.py @@ -1,5 +1,5 @@ -"""Public plugin-registry helpers used by the rest of the application.""" +"""Compatibility wrappers for the ingestion plugin registry.""" -from core.plugins.registry import get_plugin_for_source_config, validate_plugin_config +from ingestion.plugins import get_plugin_for_source_config, validate_plugin_config __all__ = ["get_plugin_for_source_config", "validate_plugin_config"] diff --git a/core/plugins/base.py b/core/plugins/base.py index 8fe08f0d..00801b41 100644 --- a/core/plugins/base.py +++ b/core/plugins/base.py @@ -1,95 +1,3 @@ -"""Base types and shared behavior for ingestion source plugins.""" +"""Compatibility wrapper for the ingestion plugin base types.""" -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from datetime import datetime -from urllib.parse import urlparse - - -@dataclass(slots=True) -class ContentItem: - """Normalized content payload returned by source plugins.""" - - url: str - title: str - author: str - published_date: datetime - content_text: str - source_plugin: str - source_metadata: dict[str, object] | None = None - - -class SourcePlugin(ABC): - """Abstract base class implemented by all ingestion source plugins.""" - - required_config_fields: tuple[str, ...] = () - - def __init__(self, source_config): - """Bind a plugin instance to the saved source configuration and project.""" - - self.source_config = source_config - self.project = source_config.project - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate and normalize raw JSON configuration for a plugin. - - Args: - config: Raw configuration object submitted through admin or API. - - Returns: - A normalized configuration dictionary. - - Raises: - ValueError: If the config is not a mapping or required fields are - missing. - """ - - if not isinstance(config, dict): - raise ValueError("Config must be a JSON object.") - normalized_config = dict(config) - for field_name in cls.required_config_fields: - if not normalized_config.get(field_name): - raise ValueError(f"Missing required config field: {field_name}") - return normalized_config - - @abstractmethod - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch content newer than the given timestamp.""" - - raise NotImplementedError - - @abstractmethod - def health_check(self) -> bool: - """Return whether the remote source is reachable and usable.""" - - raise NotImplementedError - - def match_entity_for_url(self, url: str): - """Match a fetched URL to a tracked entity based on hostname equality.""" - - target_hostname = self._normalize_hostname(url) - if not target_hostname: - return None - for entity in self.project.entities.exclude(website_url=""): - if self._normalize_hostname(entity.website_url) == target_hostname: - return entity - return None - - def match_entity_for_item(self, item: ContentItem): - """Match a fetched content item to an entity. - - The default implementation preserves the existing hostname-based behavior - by matching against the normalized item URL. - """ - - return self.match_entity_for_url(item.url) - - @staticmethod - def _normalize_hostname(url: str) -> str: - """Normalize a URL hostname for entity matching.""" - - hostname = urlparse(url).hostname or "" - return hostname.removeprefix("www.").lower() +from ingestion.plugins.base import * # noqa: F403 diff --git a/core/plugins/bluesky.py b/core/plugins/bluesky.py index 0f19c760..f402666b 100644 --- a/core/plugins/bluesky.py +++ b/core/plugins/bluesky.py @@ -1,247 +1,3 @@ -"""Bluesky source plugin used to ingest public feeds and author timelines.""" +"""Compatibility wrapper for the Bluesky source plugin.""" -from __future__ import annotations - -from datetime import datetime - -from atproto import Client -from django.utils import timezone -from django.utils.dateparse import parse_datetime - -from core.models import BlueskyCredentials, SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - -PUBLIC_APPVIEW_BASE_URL = "https://public.api.bsky.app" - - -class BlueskySourcePlugin(SourcePlugin): - """Fetch public Bluesky feed or author posts through AppView.""" - - @classmethod - def verify_credentials(cls, credentials: BlueskyCredentials) -> None: - """Authenticate a stored Bluesky account and confirm the session works.""" - - try: - client = cls._authenticated_client_for_credentials(credentials) - client.com.atproto.server.get_session() - except Exception as exc: - cls._record_credentials_status(credentials, error_message=str(exc)) - raise - cls._record_credentials_status(credentials, error_message="") - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate Bluesky feed or author configuration.""" - - normalized_config = super().validate_config(config) - feed_uri = normalized_config.get("feed_uri") - author_handle = normalized_config.get("author_handle") - if bool(feed_uri) == bool(author_handle): - raise ValueError("Provide exactly one of feed_uri or author_handle") - if feed_uri and ( - not isinstance(feed_uri, str) - or not feed_uri.startswith("at://") - or "/app.bsky.feed.generator/" not in feed_uri - ): - raise ValueError( - "feed_uri must be a Bluesky feed generator at:// URI" - ) - if author_handle: - normalized_handle = cls._normalize_handle(author_handle) - if not normalized_handle: - raise ValueError("author_handle must be a non-empty Bluesky handle") - normalized_config["author_handle"] = normalized_handle - - normalized_config["max_posts_per_fetch"] = int( - normalized_config.get("max_posts_per_fetch", 100) - ) - if normalized_config["max_posts_per_fetch"] <= 0: - raise ValueError("max_posts_per_fetch must be a positive integer") - - include_replies = normalized_config.get("include_replies", False) - if not isinstance(include_replies, bool): - raise ValueError("include_replies must be a boolean") - normalized_config["include_replies"] = include_replies - return normalized_config - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch public Bluesky posts newer than ``since`` and normalize them.""" - - response = self._get_feed_response() - items: list[ContentItem] = [] - seen_post_uris: set[str] = set() - for feed_view in response.feed: - post = getattr(feed_view, "post", None) - if post is None or post.uri in seen_post_uris: - continue - seen_post_uris.add(post.uri) - if not self.source_config.config.get("include_replies", False) and getattr( - feed_view, "reply", None - ): - continue - published_date = self._published_date_for_post(post) - if since and published_date <= since: - continue - items.append(self._build_content_item(post, published_date)) - return items - - def health_check(self) -> bool: - """Treat the source as healthy when the AppView request succeeds.""" - - credentials = self._credentials() - try: - self._get_feed_response(limit=1) - except Exception as exc: - self._record_credentials_status(credentials, error_message=str(exc)) - raise - self._record_credentials_status(credentials, error_message="") - return True - - def match_entity_for_item(self, item: ContentItem): - """Match posts to entities using the author's Bluesky handle first.""" - - author_handle = self._normalize_handle( - str((item.source_metadata or {}).get("author_handle", "")) - ) - if author_handle: - for entity in self.project.entities.exclude(bluesky_handle=""): - if self._normalize_handle(entity.bluesky_handle) == author_handle: - return entity - return super().match_entity_for_item(item) - - def _get_feed_response(self, limit: int | None = None): - """Query the configured public feed endpoint.""" - - request_limit = limit or self.source_config.config.get("max_posts_per_fetch", 100) - client = self._client() - feed_uri = self.source_config.config.get("feed_uri") - if feed_uri: - return client.app.bsky.feed.get_feed( - {"feed": feed_uri, "limit": request_limit} - ) - return client.app.bsky.feed.get_author_feed( - { - "actor": self.source_config.config["author_handle"], - "include_pins": False, - "limit": request_limit, - } - ) - - def _build_content_item(self, post, published_date: datetime) -> ContentItem: - """Convert one AppView post into the shared plugin payload.""" - - author_handle = self._normalize_handle(self._nested_value(post, "author", "handle")) - external_url = self._nested_value(post, "embed", "external", "uri") - external_title = ( - self._nested_value(post, "embed", "external", "title") or "" - ).strip() - post_url = self._post_url(post) - record_text = (self._nested_value(post, "record", "text") or "").strip() - title = external_title or record_text.splitlines()[0].strip() or post_url - return ContentItem( - url=external_url or post_url, - title=title, - author=author_handle, - published_date=published_date, - content_text=record_text or external_title or post_url, - source_plugin=SourcePluginName.BLUESKY, - source_metadata={ - "author_did": self._nested_value(post, "author", "did") or "", - "author_handle": author_handle, - "embedded_url": external_url or "", - "post_uri": getattr(post, "uri", ""), - "reply_count": getattr(post, "reply_count", 0) or 0, - "repost_count": getattr(post, "repost_count", 0) or 0, - }, - ) - - @staticmethod - def _published_date_for_post(post) -> datetime: - """Choose the indexed or record timestamp for a Bluesky post.""" - - for value in ( - getattr(post, "indexed_at", None), - BlueskySourcePlugin._nested_value(post, "record", "created_at"), - ): - if value: - parsed_value = parse_datetime(value) - if parsed_value is not None: - return parsed_value - return timezone.now() - - @staticmethod - def _post_url(post) -> str: - """Build the public web URL for a Bluesky post when no card link exists.""" - - actor = ( - BlueskySourcePlugin._normalize_handle( - BlueskySourcePlugin._nested_value(post, "author", "handle") - ) - or BlueskySourcePlugin._nested_value(post, "author", "did") - or "" - ) - post_uri = getattr(post, "uri", "") - post_id = post_uri.rstrip("/").split("/")[-1] if post_uri else "" - if actor and post_id: - return f"https://bsky.app/profile/{actor}/post/{post_id}" - return post_uri - - @staticmethod - def _normalize_handle(handle: object) -> str: - """Normalize handles so matching stays case-insensitive.""" - - if not isinstance(handle, str): - return "" - return handle.strip().removeprefix("@").lower() - - @staticmethod - def _nested_value(value, *path: str): - """Read nested object or dict attributes without binding to model types.""" - - current_value = value - for path_part in path: - if current_value is None: - return None - if isinstance(current_value, dict): - current_value = current_value.get(path_part) - else: - current_value = getattr(current_value, path_part, None) - return current_value - - def _client(self) -> Client: - """Create a public or authenticated ATProto client for the project.""" - - credentials = self._credentials() - if credentials is None: - return Client(base_url=PUBLIC_APPVIEW_BASE_URL) - return self._authenticated_client_for_credentials(credentials) - - def _credentials(self) -> BlueskyCredentials | None: - """Return the active project-scoped Bluesky credentials, if configured.""" - - return BlueskyCredentials.objects.filter(project=self.project, is_active=True).first() - - @staticmethod - def _authenticated_client_for_credentials(credentials: BlueskyCredentials) -> Client: - """Build an authenticated client from a stored credential record.""" - - if not credentials.has_app_password(): - raise RuntimeError("Bluesky credentials are missing an app password.") - client = Client(base_url=credentials.client_base_url) - client.login(login=credentials.handle, password=credentials.get_app_password()) - return client - - @staticmethod - def _record_credentials_status( - credentials: BlueskyCredentials | None, *, error_message: str - ) -> None: - """Persist the latest credential verification result when credentials exist.""" - - if credentials is None: - return - update_fields = ["last_error", "updated_at"] - credentials.last_error = error_message - if not error_message: - credentials.last_verified_at = timezone.now() - update_fields.append("last_verified_at") - credentials.save(update_fields=update_fields) +from ingestion.plugins.bluesky import * # noqa: F403 diff --git a/core/plugins/reddit.py b/core/plugins/reddit.py index c91f5168..f9cd3f0b 100644 --- a/core/plugins/reddit.py +++ b/core/plugins/reddit.py @@ -1,95 +1,3 @@ -"""Reddit source plugin used for trend and discussion ingestion.""" +"""Compatibility wrapper for the Reddit source plugin.""" -from __future__ import annotations - -from datetime import UTC, datetime - -import praw -from django.conf import settings - -from core.models import SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - - -class RedditSourcePlugin(SourcePlugin): - """Fetch posts from a configured subreddit listing.""" - - required_config_fields = ("subreddit",) - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate Reddit-specific config such as listing and limit values.""" - - normalized_config = super().validate_config(config) - listing = normalized_config.get("listing", "both") - if listing not in {"new", "hot", "both"}: - raise ValueError("listing must be one of: new, hot, both") - normalized_config["listing"] = listing - normalized_config["limit"] = int(normalized_config.get("limit", 25)) - if normalized_config["limit"] <= 0: - raise ValueError("limit must be a positive integer") - return normalized_config - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch subreddit submissions newer than ``since`` and normalize them.""" - - subreddit = self._client().subreddit(self.source_config.config["subreddit"]) - items: list[ContentItem] = [] - seen_submission_ids: set[str] = set() - for submission in self._iter_submissions(subreddit): - if submission.id in seen_submission_ids: - continue - seen_submission_ids.add(submission.id) - published_date = datetime.fromtimestamp(submission.created_utc, tz=UTC) - if since and published_date <= since: - continue - items.append( - ContentItem( - url=submission.url - or f"https://www.reddit.com{submission.permalink}", - title=submission.title.strip(), - author=str(submission.author) if submission.author else "", - published_date=published_date, - content_text=(submission.selftext or submission.title).strip(), - source_plugin=SourcePluginName.REDDIT, - ) - ) - return items - - def health_check(self) -> bool: - """Verify that the configured subreddit can be queried successfully.""" - - subreddit = self._client().subreddit(self.source_config.config["subreddit"]) - next(subreddit.new(limit=1), None) - return True - - def match_entity_for_url(self, url: str): - """Skip entity matching for Reddit because posts are not entity-driven.""" - - return None - - def _iter_submissions(self, subreddit): - """Yield submissions from the configured listing modes without duplicates.""" - - listing = self.source_config.config.get("listing", "both") - limit = self.source_config.config.get("limit", 25) - if listing in {"new", "both"}: - yield from subreddit.new(limit=limit) - if listing in {"hot", "both"}: - yield from subreddit.hot(limit=limit) - - @staticmethod - def _client(): - """Create the authenticated PRAW client from Django settings. - - Raises: - RuntimeError: If Reddit credentials are missing. - """ - - if not settings.REDDIT_CLIENT_ID or not settings.REDDIT_CLIENT_SECRET: - raise RuntimeError("Reddit credentials are not configured.") - return praw.Reddit( - client_id=settings.REDDIT_CLIENT_ID, - client_secret=settings.REDDIT_CLIENT_SECRET, - user_agent=settings.REDDIT_USER_AGENT, - ) +from ingestion.plugins.reddit import * # noqa: F403 diff --git a/core/plugins/registry.py b/core/plugins/registry.py index 7bd8ccc4..6b40bc04 100644 --- a/core/plugins/registry.py +++ b/core/plugins/registry.py @@ -1,43 +1,3 @@ -"""Registry helpers for resolving source-plugin implementations by name.""" +"""Compatibility wrapper for the ingestion plugin registry.""" -from typing import Any - -from core.models import SourcePluginName -from core.plugins.bluesky import BlueskySourcePlugin -from core.plugins.reddit import RedditSourcePlugin -from core.plugins.rss import RSSSourcePlugin - -PLUGIN_REGISTRY = { - SourcePluginName.RSS: RSSSourcePlugin, - SourcePluginName.REDDIT: RedditSourcePlugin, - SourcePluginName.BLUESKY: BlueskySourcePlugin, -} - - -def get_plugin_for_source_config(source_config): - """Instantiate the plugin configured for a saved source configuration.""" - - return _get_plugin_class(source_config.plugin_name)(source_config) - - -def validate_plugin_config( - plugin_name: SourcePluginName | str, config: object -) -> dict[str, Any]: - """Validate plugin config using the plugin class registered for the name.""" - - return _get_plugin_class(plugin_name).validate_config(config) - - -def _get_plugin_class(plugin_name: SourcePluginName | str): - """Resolve a plugin enum value or string into its registered class. - - Raises: - ValueError: If the plugin name is not supported. - """ - - try: - return PLUGIN_REGISTRY[SourcePluginName(plugin_name)] - except KeyError as exc: - raise ValueError(f"Unsupported source plugin: {plugin_name}") from exc - except ValueError as exc: - raise ValueError(f"Unsupported source plugin: {plugin_name}") from exc +from ingestion.plugins.registry import * # noqa: F403 diff --git a/core/plugins/rss.py b/core/plugins/rss.py index 75bd906b..ca0cdb64 100644 --- a/core/plugins/rss.py +++ b/core/plugins/rss.py @@ -1,78 +1,3 @@ -"""RSS source plugin used to ingest feed entries into project content.""" +"""Compatibility wrapper for the RSS source plugin.""" -from __future__ import annotations - -from datetime import UTC, datetime -from time import struct_time - -import feedparser -from django.utils import timezone - -from core.models import SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - - -class RSSSourcePlugin(SourcePlugin): - """Fetch content from a configured RSS or Atom feed.""" - - required_config_fields = ("feed_url",) - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Parse the feed and return entries newer than ``since``.""" - - parsed_feed = feedparser.parse(self.source_config.config["feed_url"]) - items: list[ContentItem] = [] - for entry in parsed_feed.entries: - published_date = self._published_date_for_entry(entry) - if since and published_date <= since: - continue - url = getattr(entry, "link", "") - title = (getattr(entry, "title", "") or "").strip() - if not url or not title: - continue - summary = ( - getattr(entry, "summary", "") - or getattr(entry, "description", "") - or title - ) - items.append( - ContentItem( - url=url, - title=title, - author=(getattr(entry, "author", "") or "").strip(), - published_date=published_date, - content_text=summary.strip(), - source_plugin=SourcePluginName.RSS, - ) - ) - return items - - def health_check(self) -> bool: - """Treat the feed as healthy when it returns at least one entry.""" - - parsed_feed = feedparser.parse(self.source_config.config["feed_url"]) - return bool(getattr(parsed_feed, "entries", [])) - - @staticmethod - def _published_date_for_entry(entry) -> datetime: - """Choose the best available published timestamp for a feed entry.""" - - for field_name in ("published_parsed", "updated_parsed", "created_parsed"): - parsed_value = getattr(entry, field_name, None) - if parsed_value: - return RSSSourcePlugin._struct_time_to_datetime(parsed_value) - return timezone.now() - - @staticmethod - def _struct_time_to_datetime(parsed_value: struct_time) -> datetime: - """Convert ``feedparser`` time tuples into timezone-aware datetimes.""" - - return datetime( - parsed_value.tm_year, - parsed_value.tm_mon, - parsed_value.tm_mday, - parsed_value.tm_hour, - parsed_value.tm_min, - parsed_value.tm_sec, - tzinfo=UTC, - ) +from ingestion.plugins.rss import * # noqa: F403 diff --git a/core/serializer_mixins.py b/core/serializer_mixins.py new file mode 100644 index 00000000..d3ab6317 --- /dev/null +++ b/core/serializer_mixins.py @@ -0,0 +1,55 @@ +"""Shared DRF serializer mixins used across app-owned serializer modules.""" + +from core.models import Content, SkillResult +from core.permissions import get_visible_projects_queryset +from entities.models import Entity + + +class ProjectScopedSerializerMixin: + """Limit serializer relationship fields to objects the current user can access.""" + + def _filter_related_queryset(self, request): + """Constrain related-field querysets using the request user and project context.""" + + user = request.user + project = self.context.get("project") + if "project" in self.fields: + self.fields["project"].queryset = get_visible_projects_queryset(user) + if "entity" in self.fields: + entity_queryset = ( + Entity.objects.filter(project=project) + if project + else Entity.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["entity"].queryset = entity_queryset + if "merged_into" in self.fields: + merged_into_queryset = ( + Entity.objects.filter(project=project) + if project + else Entity.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["merged_into"].queryset = merged_into_queryset + if "content" in self.fields: + content_queryset = ( + Content.objects.filter(project=project) + if project + else Content.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["content"].queryset = content_queryset + if "superseded_by" in self.fields: + skill_result_queryset = ( + SkillResult.objects.filter(project=project) + if project + else SkillResult.objects.filter( + project__memberships__user=user + ).distinct() + ) + self.fields["superseded_by"].queryset = skill_result_queryset + + def __init__(self, *args, **kwargs): + """Initialize the serializer and scope relation fields when authenticated.""" + + super().__init__(*args, **kwargs) + request = self.context.get("request") + if request and request.user.is_authenticated: + self._filter_related_queryset(request) diff --git a/core/serializers.py b/core/serializers.py index 059cd3a4..87a91cdf 100644 --- a/core/serializers.py +++ b/core/serializers.py @@ -1,385 +1,88 @@ -"""DRF serializers for project-scoped core models. +"""DRF serializers for project-scoped core models and compatibility exports.""" -These serializers enforce the project's access rules at the API boundary. They do -more than simple field translation: several serializers limit related querysets to - the active project and validate that cross-project relationships cannot be posted. -""" +from importlib import import_module +from typing import TYPE_CHECKING, Any -from django.contrib.auth.models import Group from rest_framework import serializers from core.models import ( - BlueskyCredentials, Content, - Entity, - EntityAuthoritySnapshot, - EntityCandidate, - EntityMention, IngestionRun, IntakeAllowlist, NewsletterIntake, - Project, - ProjectConfig, - ReviewQueue, - SkillResult, - SourceConfig, - TopicCentroidSnapshot, UserFeedback, ) -from core.plugins import validate_plugin_config - - -class ProjectScopedSerializerMixin: - """Limit serializer relationship fields to objects the current user can access.""" - - def _filter_related_queryset(self, request): - """Constrain related-field querysets using the request user and project context.""" - - user = request.user - project = self.context.get("project") - if "group" in self.fields: - self.fields["group"].queryset = Group.objects.filter(user=user) - if "project" in self.fields: - self.fields["project"].queryset = Project.objects.filter( - group__user=user - ).distinct() - if "entity" in self.fields: - entity_queryset = ( - Entity.objects.filter(project=project) - if project - else Entity.objects.filter(project__group__user=user) - ) - self.fields["entity"].queryset = entity_queryset - if "merged_into" in self.fields: - merged_into_queryset = ( - Entity.objects.filter(project=project) - if project - else Entity.objects.filter(project__group__user=user) - ) - self.fields["merged_into"].queryset = merged_into_queryset - if "content" in self.fields: - content_queryset = ( - Content.objects.filter(project=project) - if project - else Content.objects.filter(project__group__user=user) - ) - self.fields["content"].queryset = content_queryset - if "superseded_by" in self.fields: - skill_result_queryset = ( - SkillResult.objects.filter(project=project) - if project - else SkillResult.objects.filter(project__group__user=user) - ) - self.fields["superseded_by"].queryset = skill_result_queryset - - def __init__(self, *args, **kwargs): - """Initialize the serializer and scope relation fields when authenticated.""" - - super().__init__(*args, **kwargs) - request = self.context.get("request") - if request and request.user.is_authenticated: - self._filter_related_queryset(request) - - -class ProjectSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize top-level project records.""" - - has_bluesky_credentials = serializers.SerializerMethodField() - bluesky_handle = serializers.SerializerMethodField() - bluesky_is_active = serializers.SerializerMethodField() - bluesky_last_verified_at = serializers.SerializerMethodField() - bluesky_last_error = serializers.SerializerMethodField() - - class Meta: - model = Project - fields = [ - "id", - "name", - "group", - "topic_description", - "content_retention_days", - "intake_token", - "intake_enabled", - "has_bluesky_credentials", - "bluesky_handle", - "bluesky_is_active", - "bluesky_last_verified_at", - "bluesky_last_error", - "created_at", - ] - read_only_fields = ["id", "created_at"] - - def _get_bluesky_credentials(self, obj: Project): - """Return the project's stored Bluesky credentials, if configured.""" - - try: - return obj.bluesky_credentials - except Project.bluesky_credentials.RelatedObjectDoesNotExist: - return None - - def get_has_bluesky_credentials(self, obj: Project) -> bool: - """Return whether the project has stored Bluesky credentials.""" - - return self._get_bluesky_credentials(obj) is not None - - def get_bluesky_handle(self, obj: Project) -> str: - """Return the stored Bluesky handle, or an empty string.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.handle if credentials else "" - - def get_bluesky_is_active(self, obj: Project) -> bool: - """Return whether the stored Bluesky credentials are currently active.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.is_active if credentials else False - - def get_bluesky_last_verified_at(self, obj: Project): - """Return the last successful verification timestamp, if available.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.last_verified_at if credentials else None - - def get_bluesky_last_error(self, obj: Project) -> str: - """Return the latest Bluesky verification error, or an empty string.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.last_error if credentials else "" - - -class ProjectConfigSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize per-project authority and scoring settings.""" - - class Meta: - model = ProjectConfig - fields = [ - "id", - "project", - "upvote_authority_weight", - "downvote_authority_weight", - "authority_decay_rate", - ] - read_only_fields = ["id", "project"] - - -class BlueskyCredentialsSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize project-scoped Bluesky credentials without exposing secrets.""" - - app_password = serializers.CharField( - write_only=True, - required=False, - allow_blank=True, - trim_whitespace=False, +from core.serializer_mixins import ProjectScopedSerializerMixin + +if TYPE_CHECKING: + from entities.serializers import ( + EntityAuthoritySnapshotSerializer, + EntityCandidateMergeSerializer, + EntityCandidateSerializer, + EntityMentionSummarySerializer, + EntitySerializer, ) - has_stored_credential = serializers.SerializerMethodField() - - class Meta: - model = BlueskyCredentials - fields = [ - "id", - "project", - "handle", - "pds_url", - "is_active", - "has_stored_credential", - "app_password", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ] - read_only_fields = [ - "id", - "project", - "has_stored_credential", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ] - - def get_has_stored_credential(self, obj: BlueskyCredentials) -> bool: - """Return whether the project has an encrypted Bluesky credential stored.""" - - return obj.has_stored_credential() - - def validate(self, attrs): - """Require an app password when creating a credential record.""" - - attrs = super().validate(attrs) - app_password = attrs.get("app_password", "") - if self.instance is None and not app_password: - raise serializers.ValidationError( - {"app_password": "A Bluesky app credential is required."} - ) - return attrs - - def create(self, validated_data): - """Encrypt the submitted Bluesky app password before saving the record.""" - - app_password = validated_data.pop("app_password", "") - instance = super().create(validated_data) - if app_password: - instance.set_app_password(app_password) - instance.save(update_fields=["app_password_encrypted", "updated_at"]) - return instance - - def update(self, instance, validated_data): - """Keep the stored credential unless a replacement app password is submitted.""" - - app_password = validated_data.pop("app_password", "") - instance = super().update(instance, validated_data) - if app_password: - instance.set_app_password(app_password) - instance.save(update_fields=["app_password_encrypted", "updated_at"]) - return instance - - -class EntitySerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize tracked entities for a project.""" - - mention_count = serializers.IntegerField(read_only=True) - latest_mentions = serializers.SerializerMethodField() - - class Meta: - model = Entity - fields = [ - "id", - "project", - "name", - "type", - "description", - "authority_score", - "website_url", - "github_url", - "linkedin_url", - "bluesky_handle", - "mastodon_handle", - "twitter_handle", - "mention_count", - "latest_mentions", - "created_at", - ] - read_only_fields = ["id", "project", "created_at"] - - def get_latest_mentions(self, obj): - """Return a compact summary of the most recent mentions for an entity.""" - - mentions = getattr(obj, "prefetched_mentions", None) - if mentions is None: - mentions = obj.mentions.select_related("content").order_by("-created_at") - return EntityMentionSummarySerializer(mentions[:3], many=True).data - - -class EntityAuthoritySnapshotSerializer(serializers.ModelSerializer): - """Serialize one persisted authority recomputation for an entity.""" - - class Meta: - model = EntityAuthoritySnapshot - fields = [ - "id", - "entity", - "project", - "computed_at", - "mention_component", - "feedback_component", - "duplicate_component", - "decayed_prior", - "final_score", - ] - read_only_fields = fields - - -class TopicCentroidSnapshotSerializer(serializers.ModelSerializer): - """Serialize one persisted topic-centroid recomputation for a project.""" - - class Meta: - model = TopicCentroidSnapshot - fields = [ - "id", - "project", - "computed_at", - "centroid_active", - "feedback_count", - "upvote_count", - "downvote_count", - "drift_from_previous", - "drift_from_week_ago", - ] - read_only_fields = fields - - -class TopicCentroidObservabilitySummarySerializer(serializers.Serializer): - """Serialize project-level centroid observability summary metrics.""" - - project = serializers.IntegerField() - snapshot_count = serializers.IntegerField() - active_snapshot_count = serializers.IntegerField() - avg_drift_from_previous = serializers.FloatField(allow_null=True) - avg_drift_from_week_ago = serializers.FloatField(allow_null=True) - latest_snapshot = TopicCentroidSnapshotSerializer(allow_null=True) - - -class EntityMentionSummarySerializer(serializers.ModelSerializer): - """Serialize a compact entity-mention summary for frontend display.""" - - content_id = serializers.IntegerField(read_only=True) - content_title = serializers.CharField(source="content.title", read_only=True) - - class Meta: - model = EntityMention - fields = [ - "id", - "content_id", - "content_title", - "role", - "sentiment", - "span", - "confidence", - "created_at", - ] - read_only_fields = fields - - -class EntityCandidateSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize extracted entity candidates awaiting editorial review.""" - - first_seen_title = serializers.CharField( - source="first_seen_in.title", read_only=True + from pipeline.serializers import ReviewQueueSerializer, SkillResultSerializer + from trends.serializers import ( + TopicCentroidObservabilitySummarySerializer, + TopicCentroidSnapshotSerializer, ) - merged_into_name = serializers.CharField(source="merged_into.name", read_only=True) - - class Meta: - model = EntityCandidate - fields = [ - "id", - "project", - "name", - "suggested_type", - "first_seen_in", - "first_seen_title", - "occurrence_count", - "status", - "merged_into", - "merged_into_name", - "created_at", - "updated_at", - ] - read_only_fields = fields - -class EntityCandidateMergeSerializer( - ProjectScopedSerializerMixin, serializers.Serializer -): - """Validate merge requests for entity candidates.""" - - merged_into = serializers.PrimaryKeyRelatedField(queryset=Entity.objects.none()) +_COMPAT_SERIALIZER_EXPORTS = { + "EntityAuthoritySnapshotSerializer": ( + "entities.serializers", + "EntityAuthoritySnapshotSerializer", + ), + "EntityCandidateMergeSerializer": ( + "entities.serializers", + "EntityCandidateMergeSerializer", + ), + "EntityCandidateSerializer": ( + "entities.serializers", + "EntityCandidateSerializer", + ), + "EntityMentionSummarySerializer": ( + "entities.serializers", + "EntityMentionSummarySerializer", + ), + "EntitySerializer": ("entities.serializers", "EntitySerializer"), + "ReviewQueueSerializer": ("pipeline.serializers", "ReviewQueueSerializer"), + "SkillResultSerializer": ("pipeline.serializers", "SkillResultSerializer"), + "TopicCentroidObservabilitySummarySerializer": ( + "trends.serializers", + "TopicCentroidObservabilitySummarySerializer", + ), + "TopicCentroidSnapshotSerializer": ( + "trends.serializers", + "TopicCentroidSnapshotSerializer", + ), +} + +__all__ = [ + "ProjectScopedSerializerMixin", + "EntityAuthoritySnapshotSerializer", + "EntityCandidateMergeSerializer", + "EntityCandidateSerializer", + "EntityMentionSummarySerializer", + "EntitySerializer", + "ReviewQueueSerializer", + "SkillResultSerializer", + "TopicCentroidObservabilitySummarySerializer", + "TopicCentroidSnapshotSerializer", +] + + +def __getattr__(name: str) -> Any: + """Resolve compatibility serializer re-exports lazily.""" + + try: + module_name, attribute_name = _COMPAT_SERIALIZER_EXPORTS[name] + except KeyError as exc: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from exc + + value = getattr(import_module(module_name), attribute_name) + globals()[name] = value + return value class ContentSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): @@ -436,43 +139,6 @@ def validate(self, attrs): return attrs -class SkillResultSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize persisted AI skill executions for content.""" - - class Meta: - model = SkillResult - fields = [ - "id", - "content", - "project", - "skill_name", - "status", - "result_data", - "error_message", - "model_used", - "latency_ms", - "confidence", - "created_at", - "superseded_by", - ] - read_only_fields = ["id", "project", "created_at"] - - def validate(self, attrs): - """Reject skill results whose content does not belong to the active project.""" - - project = ( - self.context.get("project") - or attrs.get("project") - or getattr(self.instance, "project", None) - ) - content = attrs.get("content") or getattr(self.instance, "content", None) - if project and content and content.project_id != project.id: - raise serializers.ValidationError( - {"content": "Content must belong to the selected project."} - ) - return attrs - - class UserFeedbackSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): """Serialize editor feedback attached to a content item.""" @@ -518,69 +184,6 @@ class Meta: read_only_fields = ["id", "project", "started_at"] -class SourceConfigSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize source-plugin configuration and normalize provider settings.""" - - class Meta: - model = SourceConfig - fields = [ - "id", - "project", - "plugin_name", - "config", - "is_active", - "last_fetched_at", - ] - read_only_fields = ["id", "project", "last_fetched_at"] - - def validate(self, attrs): - """Validate plugin-specific configuration with the plugin registry.""" - - plugin_name = attrs.get("plugin_name") or getattr( - self.instance, "plugin_name", None - ) - config = attrs.get("config") or getattr(self.instance, "config", {}) - if plugin_name: - try: - attrs["config"] = validate_plugin_config(plugin_name, config) - except ValueError as exc: - raise serializers.ValidationError({"config": str(exc)}) from exc - return attrs - - -class ReviewQueueSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize manual-review queue items for project content.""" - - class Meta: - model = ReviewQueue - fields = [ - "id", - "project", - "content", - "reason", - "confidence", - "created_at", - "resolved", - "resolution", - ] - read_only_fields = ["id", "project", "created_at"] - - def validate(self, attrs): - """Reject review items whose content does not belong to the active project.""" - - project = ( - self.context.get("project") - or attrs.get("project") - or getattr(self.instance, "project", None) - ) - content = attrs.get("content") or getattr(self.instance, "content", None) - if project and content and content.project_id != project.id: - raise serializers.ValidationError( - {"content": "Content must belong to the selected project."} - ) - return attrs - - class IntakeAllowlistSerializer( ProjectScopedSerializerMixin, serializers.ModelSerializer ): diff --git a/core/signals.py b/core/signals.py index 4cb25fa0..4ab7e4b6 100644 --- a/core/signals.py +++ b/core/signals.py @@ -1,64 +1,31 @@ -"""Signal handlers that adapt Anymail inbound events to project intake logic.""" +"""Signal handlers for cross-cutting core behaviors.""" from __future__ import annotations from typing import Any -from anymail.signals import inbound from django.db.models.signals import post_save from django.dispatch import receiver -from core.models import ProjectConfig, UserFeedback -from core.newsletters import process_inbound_newsletter -from core.tasks import queue_topic_centroid_recompute +from core.models import UserFeedback +from newsletters.signals import handle_anymail_inbound as _handle_anymail_inbound +from projects.models import ProjectConfig +from trends.tasks import queue_topic_centroid_recompute -def _address_to_string(address) -> str: - """Normalize an Anymail address object or string into plain text.""" - - if address is None: - return "" - addr_spec = getattr(address, "addr_spec", None) - if isinstance(addr_spec, str): - return addr_spec.strip() - return str(address).strip() - - -@receiver(inbound) def handle_anymail_inbound( sender: Any, event: Any, esp_name: str, **kwargs: Any, ) -> None: - """Translate an inbound Anymail event into the internal intake payload. - - Args: - sender: Signal sender supplied by Anymail. - event: Normalized inbound event object. - esp_name: Name of the email service provider that generated the event. - **kwargs: Additional Anymail signal metadata. - """ - - message = event.message - - recipients: list[str] = [] - if message.envelope_recipient: - recipients.append(message.envelope_recipient) - recipients.extend( - address.addr_spec - for address in getattr(message, "to", []) - if getattr(address, "addr_spec", "") - ) + """Preserve the legacy core.signals import path for inbound handling.""" - process_inbound_newsletter( - recipients=recipients, - sender_email=message.envelope_sender - or _address_to_string(getattr(message, "from_email", None)), - subject=message.subject or "", - raw_html=message.html or "", - raw_text=message.text or "", - message_id=str(message.get("Message-ID", "") or event.event_id or ""), + _handle_anymail_inbound( + sender=sender, + event=event, + esp_name=esp_name, + **kwargs, ) diff --git a/core/tasks.py b/core/tasks.py index 4805580d..3be63815 100644 --- a/core/tasks.py +++ b/core/tasks.py @@ -4,41 +4,23 @@ import math from collections import defaultdict from datetime import timedelta +from importlib import import_module +from typing import TYPE_CHECKING, Any, Protocol, cast from celery import shared_task from django.conf import settings -from django.core.cache import cache from django.db import transaction -from django.db.models import Count, Q +from django.db.models import Count, Model from django.utils import timezone -from core.deduplication import canonicalize_url from core.embeddings import ( - build_content_embedding_text, - delete_topic_centroid, - embed_text, upsert_content_embedding, - upsert_topic_centroid, ) from core.models import ( Content, - Entity, - EntityAuthoritySnapshot, - EntityMention, - EntityMentionRole, FeedbackType, - IngestionRun, - IntakeAllowlist, - NewsletterIntake, - NewsletterIntakeStatus, - Project, - ProjectConfig, - RunStatus, - SourceConfig, - TopicCentroidSnapshot, UserFeedback, ) -from core.newsletter_extraction import extract_newsletter_items from core.pipeline import ( RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME, @@ -46,7 +28,13 @@ execute_background_skill_result, process_content_pipeline, ) -from core.plugins import get_plugin_for_source_config +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityMention, + EntityMentionRole, +) +from projects.models import Project, ProjectConfig logger = logging.getLogger(__name__) @@ -55,71 +43,95 @@ EntityMentionRole.AUTHOR, EntityMentionRole.SUBJECT, ) -TOPIC_CENTROID_LOOKBACK_DAYS = 90 -TOPIC_CENTROID_MIN_UPVOTES = 10 -TOPIC_CENTROID_DOWNVOTE_WEIGHT = 0.25 -TOPIC_CENTROID_DEBOUNCE_SECONDS = 60 * 5 -TOPIC_CENTROID_DECAY_TAU_DAYS = 45 +if TYPE_CHECKING: + from ingestion.tasks import run_all_ingestions, run_ingestion + from newsletters.tasks import process_newsletter_intake + from trends.tasks import ( + TOPIC_CENTROID_MIN_UPVOTES, + queue_topic_centroid_recompute, + recompute_topic_centroid, + run_all_topic_centroid_recomputations, + ) -@shared_task(name="core.tasks.run_ingestion") -def run_ingestion(source_config_id: int): - """Fetch new content for one source config and record an ingestion run. +_COMPAT_TASK_EXPORTS = { + "process_newsletter_intake": ( + "newsletters.tasks", + "process_newsletter_intake", + ), + "run_all_ingestions": ("ingestion.tasks", "run_all_ingestions"), + "run_ingestion": ("ingestion.tasks", "run_ingestion"), + "TOPIC_CENTROID_MIN_UPVOTES": ( + "trends.tasks", + "TOPIC_CENTROID_MIN_UPVOTES", + ), + "queue_topic_centroid_recompute": ( + "trends.tasks", + "queue_topic_centroid_recompute", + ), + "recompute_topic_centroid": ("trends.tasks", "recompute_topic_centroid"), + "run_all_topic_centroid_recomputations": ( + "trends.tasks", + "run_all_topic_centroid_recomputations", + ), +} + +__all__ = [ + "process_newsletter_intake", + "run_all_ingestions", + "run_ingestion", + "TOPIC_CENTROID_MIN_UPVOTES", + "queue_topic_centroid_recompute", + "recompute_authority_scores", + "recompute_topic_centroid", + "run_all_authority_recomputations", + "run_all_topic_centroid_recomputations", + "run_relevance_scoring_skill", + "run_summarization_skill", + "queue_content_skill", + "process_content", + "upsert_content_embedding", +] + + +def __getattr__(name: str) -> Any: + """Resolve compatibility task re-exports lazily.""" - Args: - source_config_id: Primary key of the source configuration to ingest. + try: + module_name, attribute_name = _COMPAT_TASK_EXPORTS[name] + except KeyError as exc: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from exc - Returns: - A summary containing fetched and ingested item counts. - """ + value = getattr(import_module(module_name), attribute_name) + globals()[name] = value + return value - source_config = SourceConfig.objects.select_related("project").get( - pk=source_config_id - ) - ingestion_run = IngestionRun.objects.create( - project=source_config.project, - plugin_name=source_config.plugin_name, - status=RunStatus.RUNNING, - ) - try: - items_fetched, items_ingested = _ingest_source_config(source_config) - except Exception as exc: - ingestion_run.status = RunStatus.FAILED - ingestion_run.completed_at = timezone.now() - ingestion_run.error_message = str(exc) - ingestion_run.save(update_fields=["status", "completed_at", "error_message"]) - logger.exception( - "Source ingestion failed", extra={"source_config_id": source_config_id} - ) - raise - - ingestion_run.status = RunStatus.SUCCESS - ingestion_run.completed_at = timezone.now() - ingestion_run.items_fetched = items_fetched - ingestion_run.items_ingested = items_ingested - ingestion_run.save( - update_fields=["status", "completed_at", "items_fetched", "items_ingested"] - ) - return {"items_fetched": items_fetched, "items_ingested": items_ingested} +class DelayedTask(Protocol): + """Protocol for Celery tasks that can run eagerly or via ``delay``.""" -@shared_task(name="core.tasks.run_all_ingestions") -def run_all_ingestions(): - """Queue ingestion for every active source configuration. + def __call__(self, *args: object, **kwargs: object) -> object: + pass - Returns: - The number of source configurations scheduled. - """ + def delay(self, *args: object, **kwargs: object) -> object: + pass + + +def _enqueue_task(task: object, *args: object) -> None: + """Dispatch a Celery task through a typed ``delay`` seam.""" + + cast(DelayedTask, task).delay(*args) - source_config_ids = list( - SourceConfig.objects.filter(is_active=True).values_list("id", flat=True) - ) - for source_config_id in source_config_ids: - if settings.CELERY_TASK_ALWAYS_EAGER: - run_ingestion(source_config_id) - else: - run_ingestion.delay(source_config_id) - return len(source_config_ids) + +def _require_pk(instance: Model) -> int: + """Return a saved model primary key as an ``int``.""" + + pk = instance.pk + if pk is None: + raise ValueError( + f"{instance.__class__.__name__} must be saved before task dispatch" + ) + return int(pk) @shared_task(name="core.tasks.run_all_authority_recomputations") @@ -135,20 +147,8 @@ def run_all_authority_recomputations(): if settings.CELERY_TASK_ALWAYS_EAGER: recompute_authority_scores(project_id) else: - recompute_authority_scores.delay(project_id) - return len(project_ids) + _enqueue_task(recompute_authority_scores, project_id) - -@shared_task(name="core.tasks.run_all_topic_centroid_recomputations") -def run_all_topic_centroid_recomputations(): - """Queue topic-centroid recomputation for every project.""" - - project_ids = list(Project.objects.values_list("id", flat=True)) - for project_id in project_ids: - if settings.CELERY_TASK_ALWAYS_EAGER: - recompute_topic_centroid(project_id) - else: - recompute_topic_centroid.delay(project_id) return len(project_ids) @@ -239,36 +239,37 @@ def recompute_authority_scores(project_id: int): (abs(value) for value in feedback_totals.values()), default=0.0 ) + entity_rows = [(_require_pk(entity), entity) for entity in entities] entity_updates = [] snapshots = [] snapshot_history = { - entity.id: list( + entity_pk: list( EntityAuthoritySnapshot.objects.filter(entity=entity) .order_by("-computed_at") .only("computed_at", "final_score") ) - for entity in entities + for entity_pk, entity in entity_rows } with transaction.atomic(): - for entity in entities: + for entity_pk, entity in entity_rows: mention_component = _normalize_log_scaled_component( - mention_counts.get(entity.id, 0), + mention_counts.get(entity_pk, 0), max_mention_count, ) feedback_component = _normalize_signed_component( - feedback_totals.get(entity.id, 0.0), + feedback_totals.get(entity_pk, 0.0), max_abs_feedback, ) duplicate_component = _normalize_log_scaled_component( - duplicate_totals.get(entity.id, 0), + duplicate_totals.get(entity_pk, 0), max_duplicate_count, ) decayed_prior = _get_decayed_prior_score( entity=entity, month_start=month_start, authority_decay_rate=config.authority_decay_rate, - snapshot_history=snapshot_history.get(entity.id, []), + snapshot_history=snapshot_history.get(entity_pk, []), ) final_score = _clamp_unit_interval( ( @@ -300,140 +301,6 @@ def recompute_authority_scores(project_id: int): return {"project_id": project_id, "entities_updated": len(entity_updates)} -@shared_task(name="core.tasks.recompute_topic_centroid") -def recompute_topic_centroid(project_id: int): - """Rebuild the project's feedback centroid from recent editorial signals.""" - - now = timezone.now() - window_start = now - timedelta(days=TOPIC_CENTROID_LOOKBACK_DAYS) - feedback_rows = list( - UserFeedback.objects.filter(project_id=project_id, created_at__gte=window_start) - .select_related("content") - .order_by("created_at") - ) - upvote_count = sum( - 1 for row in feedback_rows if row.feedback_type == FeedbackType.UPVOTE - ) - downvote_count = sum( - 1 for row in feedback_rows if row.feedback_type == FeedbackType.DOWNVOTE - ) - - try: - if upvote_count < TOPIC_CENTROID_MIN_UPVOTES: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - vector_cache: dict[int, list[float]] = {} - upvote_vectors: list[tuple[list[float], float]] = [] - downvote_vectors: list[tuple[list[float], float]] = [] - - for feedback in feedback_rows: - vector = vector_cache.get(feedback.content_id) - if vector is None: - vector = embed_text(build_content_embedding_text(feedback.content)) - vector_cache[feedback.content_id] = vector - weight = _feedback_decay_weight(feedback.created_at, now) - if feedback.feedback_type == FeedbackType.UPVOTE: - upvote_vectors.append((vector, weight)) - else: - downvote_vectors.append((vector, weight)) - - upvote_mean, upvote_weight = _weighted_mean_vector(upvote_vectors) - if not upvote_mean or upvote_weight <= 0: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - downvote_mean, downvote_weight = _weighted_mean_vector(downvote_vectors) - downvote_scale = 0.0 - if downvote_mean and downvote_weight > 0: - downvote_scale = TOPIC_CENTROID_DOWNVOTE_WEIGHT * min( - 1.0, upvote_weight / downvote_weight - ) - - centroid_vector = [ - upvote_value - downvote_scale * downvote_value - for upvote_value, downvote_value in zip( - upvote_mean, - downvote_mean or [0.0] * len(upvote_mean), - ) - ] - normalized_centroid = _normalize_vector(centroid_vector) - if not normalized_centroid: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - upsert_topic_centroid( - project_id, - normalized_centroid, - upvote_count=upvote_count, - downvote_count=downvote_count, - feedback_count=len(feedback_rows), - ) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=True, - centroid_vector=normalized_centroid, - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": True, - } - finally: - cache.delete(_topic_centroid_debounce_key(project_id)) - - @shared_task(name="core.tasks.run_relevance_scoring_skill", ignore_result=True) def run_relevance_scoring_skill(skill_result_id: int): """Execute a pending ad hoc relevance skill result in the background.""" @@ -463,15 +330,17 @@ def queue_content_skill(content: Content, skill_name: str): skill_result = create_pending_skill_result(content, skill_name) if skill_name == RELEVANCE_SKILL_NAME: + skill_result_pk = _require_pk(skill_result) if settings.CELERY_TASK_ALWAYS_EAGER: - run_relevance_scoring_skill(skill_result.id) + run_relevance_scoring_skill(skill_result_pk) else: - run_relevance_scoring_skill.delay(skill_result.id) + _enqueue_task(run_relevance_scoring_skill, skill_result_pk) elif skill_name == SUMMARIZATION_SKILL_NAME: + skill_result_pk = _require_pk(skill_result) if settings.CELERY_TASK_ALWAYS_EAGER: - run_summarization_skill(skill_result.id) + run_summarization_skill(skill_result_pk) else: - run_summarization_skill.delay(skill_result.id) + _enqueue_task(run_summarization_skill, skill_result_pk) else: raise ValueError(f"Unsupported async skill name: {skill_name}") @@ -479,23 +348,6 @@ def queue_content_skill(content: Content, skill_name: str): return skill_result -def queue_topic_centroid_recompute(project_id: int) -> bool: - """Debounce and queue topic-centroid recomputation for one project.""" - - if not cache.add( - _topic_centroid_debounce_key(project_id), - timezone.now().isoformat(), - timeout=TOPIC_CENTROID_DEBOUNCE_SECONDS, - ): - return False - - if settings.CELERY_TASK_ALWAYS_EAGER: - recompute_topic_centroid(project_id) - else: - recompute_topic_centroid.delay(project_id) - return True - - def _normalize_log_scaled_component(value: int, max_value: int) -> float: """Normalize a non-negative count into the authority component range [0.5, 1].""" @@ -512,119 +364,6 @@ def _normalize_signed_component(value: float, max_abs_value: float) -> float: return _clamp_unit_interval(0.5 + 0.5 * (value / max_abs_value)) -def _feedback_decay_weight(created_at, now) -> float: - """Return the EMA-style decay weight for one feedback event.""" - - age_days = max(0.0, (now - created_at).total_seconds() / 86400) - return math.exp(-age_days / TOPIC_CENTROID_DECAY_TAU_DAYS) - - -def _create_topic_centroid_snapshot( - *, - project_id: int, - computed_at, - centroid_active: bool, - centroid_vector: list[float], - feedback_count: int, - upvote_count: int, - downvote_count: int, -) -> TopicCentroidSnapshot: - """Persist one centroid snapshot and derived drift metrics.""" - - previous_active_snapshot = ( - TopicCentroidSnapshot.objects.filter( - project_id=project_id, centroid_active=True - ) - .order_by("-computed_at") - .only("centroid_vector", "computed_at") - .first() - ) - week_ago_snapshot = ( - TopicCentroidSnapshot.objects.filter( - project_id=project_id, - centroid_active=True, - computed_at__lte=computed_at - timedelta(days=7), - ) - .order_by("-computed_at") - .only("centroid_vector", "computed_at") - .first() - ) - - snapshot = TopicCentroidSnapshot.objects.create( - project_id=project_id, - centroid_active=centroid_active, - centroid_vector=centroid_vector, - feedback_count=feedback_count, - upvote_count=upvote_count, - downvote_count=downvote_count, - drift_from_previous=( - _cosine_distance(centroid_vector, previous_active_snapshot.centroid_vector) - if centroid_active and previous_active_snapshot is not None - else None - ), - drift_from_week_ago=( - _cosine_distance(centroid_vector, week_ago_snapshot.centroid_vector) - if centroid_active and week_ago_snapshot is not None - else None - ), - ) - if snapshot.computed_at != computed_at: - TopicCentroidSnapshot.objects.filter(pk=snapshot.pk).update( - computed_at=computed_at - ) - snapshot.computed_at = computed_at - return snapshot - - -def _cosine_distance(left: list[float], right: list[float]) -> float | None: - """Return cosine distance between two vectors when both are usable.""" - - if not left or not right or len(left) != len(right): - return None - left_norm = math.sqrt(sum(value * value for value in left)) - right_norm = math.sqrt(sum(value * value for value in right)) - if left_norm <= 0 or right_norm <= 0: - return None - cosine_similarity = sum( - left_value * right_value for left_value, right_value in zip(left, right) - ) / (left_norm * right_norm) - return max(0.0, min(2.0, 1.0 - max(-1.0, min(1.0, cosine_similarity)))) - - -def _weighted_mean_vector( - weighted_vectors: list[tuple[list[float], float]], -) -> tuple[list[float], float]: - """Compute the weighted mean vector and total contributing weight.""" - - if not weighted_vectors: - return [], 0.0 - dimension = len(weighted_vectors[0][0]) - totals = [0.0] * dimension - total_weight = 0.0 - for vector, weight in weighted_vectors: - total_weight += weight - for index, value in enumerate(vector): - totals[index] += float(value) * weight - if total_weight <= 0: - return [], 0.0 - return ([value / total_weight for value in totals], total_weight) - - -def _normalize_vector(vector: list[float]) -> list[float]: - """Normalize a dense vector to unit length.""" - - magnitude = math.sqrt(sum(value * value for value in vector)) - if magnitude <= 0: - return [] - return [float(value) / magnitude for value in vector] - - -def _topic_centroid_debounce_key(project_id: int) -> str: - """Return the cache key used to debounce centroid recomputations.""" - - return f"topic-centroid-recompute:{project_id}" - - def _get_decayed_prior_score( *, entity: Entity, @@ -659,146 +398,12 @@ def _clamp_unit_interval(value: float) -> float: return max(0.0, min(1.0, float(value))) -def _ingest_source_config(source_config: SourceConfig) -> tuple[int, int]: - """Fetch items from a configured source and create new content rows.""" - - plugin = get_plugin_for_source_config(source_config) - fetched_items = plugin.fetch_new_content(source_config.last_fetched_at) - ingested_count = 0 - for item in fetched_items: - if _content_exists_for_item(source_config, item): - continue - source_metadata = getattr(item, "source_metadata", None) or {} - content = Content.objects.create( - project=source_config.project, - entity=_match_entity_for_item(plugin, item), - url=item.url, - canonical_url=canonicalize_url(item.url), - title=item.title[:512], - author=item.author[:255], - source_plugin=item.source_plugin, - published_date=item.published_date, - content_text=item.content_text, - source_metadata=source_metadata, - ) - _schedule_content_processing(content) - ingested_count += 1 - source_config.last_fetched_at = timezone.now() - source_config.save(update_fields=["last_fetched_at"]) - return len(fetched_items), ingested_count - - -def _content_exists_for_item(source_config: SourceConfig, item) -> bool: - """Check whether a fetched item already exists for the project.""" - - post_uri = (getattr(item, "source_metadata", None) or {}).get("post_uri") - if post_uri: - return Content.objects.filter( - project=source_config.project, - source_plugin=item.source_plugin, - source_metadata__post_uri=post_uri, - ).exists() - canonical_url = canonicalize_url(item.url) - return ( - Content.objects.filter( - project=source_config.project, - source_plugin=item.source_plugin, - ) - .filter(Q(canonical_url=canonical_url) | Q(url=item.url)) - .exists() - ) - - -def _match_entity_for_item(plugin, item): - """Resolve the entity for an item while preserving older plugin mocks.""" - - if callable(getattr(type(plugin), "match_entity_for_item", None)): - return plugin.match_entity_for_item(item) - return plugin.match_entity_for_url(item.url) - - -@shared_task(name="core.tasks.process_newsletter_intake") -def process_newsletter_intake(intake_id: int): - """Convert a stored newsletter email into content rows. - - Args: - intake_id: Primary key of the ``NewsletterIntake`` row to process. - - Returns: - A summary containing the final intake status and ingested item count. - """ - - intake = NewsletterIntake.objects.select_related("project").get(pk=intake_id) - - allowlist = IntakeAllowlist.objects.filter( - project=intake.project, - sender_email=intake.sender_email, - confirmed_at__isnull=False, - ).first() - if allowlist is None: - intake.status = NewsletterIntakeStatus.PENDING - intake.error_message = "Sender has not confirmed newsletter intake." - intake.save(update_fields=["status", "error_message"]) - return {"status": intake.status, "items_ingested": 0} - - extracted_items = extract_newsletter_items( - subject=intake.subject, - raw_html=intake.raw_html, - raw_text=intake.raw_text, - ) - ingested_count = 0 - for item in extracted_items: - canonical_url = canonicalize_url(item.url) - if ( - Content.objects.filter( - project=intake.project, - source_plugin="newsletter", - ) - .filter(Q(canonical_url=canonical_url) | Q(url=item.url)) - .exists() - ): - continue - content = Content.objects.create( - project=intake.project, - url=item.url, - canonical_url=canonical_url, - title=item.title[:512], - author=intake.sender_email[:255], - source_plugin="newsletter", - published_date=timezone.now(), - content_text=item.excerpt or intake.raw_text, - source_metadata={ - "newsletter_intake_id": intake.id, - "sender_email": intake.sender_email, - "position": item.position, - }, - ) - _schedule_content_processing(content) - ingested_count += 1 - - intake.status = NewsletterIntakeStatus.EXTRACTED - intake.error_message = "" - intake.extraction_result = { - "method": "heuristic", - "items": [ - { - "url": item.url, - "title": item.title, - "excerpt": item.excerpt, - "position": item.position, - } - for item in extracted_items - ], - } - intake.save(update_fields=["status", "error_message", "extraction_result"]) - return {"status": intake.status, "items_ingested": ingested_count} - - def _schedule_content_processing(content: Content) -> None: """Ensure a content row is embedded before it enters the AI pipeline.""" upsert_content_embedding(content) + content_pk = _require_pk(content) if settings.CELERY_TASK_ALWAYS_EAGER: - process_content(content.id) + process_content(content_pk) else: - process_content.delay(content.id) + _enqueue_task(process_content, content_pk) diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index e7717181..71f96ad9 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -5,28 +5,17 @@ import pytest from django.contrib import messages from django.contrib.admin.sites import AdminSite -from django.contrib.auth.models import Group from django.utils import timezone +from content.admin import ContentAdmin, UserFeedbackAdmin from core.admin import ( - BlueskyCredentialsAdmin, - BlueskyCredentialsAdminForm, - ContentAdmin, DuplicateStateFilter, - EntityAdmin, - EntityAuthoritySnapshotAdmin, - EntityCandidateAdmin, HighValueFilter, - IngestionRunAdmin, - ProjectConfigAdmin, ReviewQueueAdmin, SkillResultAdmin, - SourceConfigAdmin, TopicCentroidSnapshotAdmin, - UserFeedbackAdmin, ) from core.models import ( - BlueskyCredentials, Content, Entity, EntityAuthoritySnapshot, @@ -34,17 +23,27 @@ EntityCandidateStatus, EntityMention, IngestionRun, - Project, - ProjectConfig, ReviewQueue, ReviewReason, RunStatus, SkillResult, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) +from entities.admin import ( + EntityAdmin, + EntityAuthoritySnapshotAdmin, + EntityCandidateAdmin, +) +from ingestion.admin import IngestionRunAdmin +from projects.admin import ( + BlueskyCredentialsAdmin, + BlueskyCredentialsAdminForm, + ProjectConfigAdmin, + SourceConfigAdmin, +) +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, ProjectConfig, SourceConfig pytestmark = pytest.mark.django_db @@ -54,12 +53,8 @@ def source_admin_context(django_user_model): user = django_user_model.objects.create_user( username="admin-owner", password="testpass123" ) - group = Group.objects.create(name="admin-team") - user.groups.add(group) - project = Project.objects.create( - name="Admin Project", group=group, topic_description="Infra" - ) - return SimpleNamespace(user=user, group=group, project=project) + project = Project.objects.create(name="Admin Project", topic_description="Infra") + return SimpleNamespace(user=user, project=project) def test_test_source_connection_reports_success(source_admin_context, mocker): @@ -71,11 +66,11 @@ def test_test_source_connection_reports_success(source_admin_context, mocker): plugin = mocker.Mock() plugin.health_check.return_value = True validate_mock = mocker.patch( - "core.admin.validate_plugin_config", + "projects.admin.validate_plugin_config", return_value={"feed_url": "https://example.com/feed.xml"}, ) get_plugin_mock = mocker.patch( - "core.admin.get_plugin_for_source_config", return_value=plugin + "projects.admin.get_plugin_for_source_config", return_value=plugin ) admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) admin_instance.message_user = mocker.Mock() @@ -130,7 +125,6 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( ): second_project = Project.objects.create( name="Second Admin Project", - group=source_admin_context.group, topic_description="Analytics", ) fixed_now = timezone.now() @@ -168,7 +162,7 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( "django.contrib.admin.options.ModelAdmin.changelist_view", side_effect=lambda request, extra_context=None: extra_context, ) - mocker.patch("core.admin.timezone.now", return_value=fixed_now) + mocker.patch("trends.admin.timezone.now", return_value=fixed_now) response = admin_instance.changelist_view(request=SimpleNamespace()) @@ -188,7 +182,7 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( assert len(response["centroid_project_drilldowns"]) == 2 assert response["centroid_project_drilldowns"][0]["project_name"] == "Admin Project" assert response["centroid_project_drilldowns"][0]["href"] == ( - "/admin/core/topiccentroidsnapshot/?project__id__exact=" + "/admin/trends/topiccentroidsnapshot/?project__id__exact=" f"{source_admin_context.project.id}" ) assert response["centroid_project_drilldowns"][0]["drift_from_previous"] == "10.0%" @@ -201,7 +195,7 @@ def test_test_source_connection_reports_failures(source_admin_context, mocker): config={"feed_url": "https://example.com/feed.xml"}, ) mocker.patch( - "core.admin.validate_plugin_config", + "projects.admin.validate_plugin_config", side_effect=ValueError("Missing required config field: feed_url"), ) admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) diff --git a/core/tests/test_api.py b/core/tests/test_api.py index 65fca82f..c78f4a80 100644 --- a/core/tests/test_api.py +++ b/core/tests/test_api.py @@ -2,13 +2,11 @@ from unittest.mock import patch from django.contrib.auth import get_user_model -from django.contrib.auth.models import Group from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from core.models import ( - BlueskyCredentials, Content, Entity, EntityAuthoritySnapshot, @@ -20,18 +18,23 @@ IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus, - Project, - ProjectConfig, ReviewQueue, ReviewReason, RunStatus, SkillResult, SkillStatus, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) +from projects.model_support import SourcePluginName +from projects.models import ( + BlueskyCredentials, + Project, + ProjectConfig, + ProjectMembership, + ProjectRole, + SourceConfig, +) class ProjectScopedApiTests(APITestCase): @@ -43,20 +46,24 @@ def setUp(self): self.other_user = user_model.objects.create_user( username="other", password="testpass123" ) - self.owner_group = Group.objects.create(name="owner-team") - self.owner.groups.add(self.owner_group) - self.other_group = Group.objects.create(name="other-team") - self.other_user.groups.add(self.other_group) self.owner_project = Project.objects.create( name="Owner Project", - group=self.owner_group, topic_description="Platform engineering", ) self.other_project = Project.objects.create( name="Other Project", - group=self.other_group, topic_description="Frontend", ) + ProjectMembership.objects.create( + user=self.owner, + project=self.owner_project, + role=ProjectRole.ADMIN, + ) + ProjectMembership.objects.create( + user=self.other_user, + project=self.other_project, + role=ProjectRole.ADMIN, + ) self.owner_entity = Entity.objects.create( project=self.owner_project, name="Owner Entity", @@ -172,7 +179,7 @@ def test_project_list_requires_authentication(self): }, ) - def test_project_list_is_scoped_to_request_user_groups(self): + def test_project_list_is_scoped_to_request_user_memberships(self): BlueskyCredentials.objects.create( project=self.owner_project, handle="owner-project.bsky.social", @@ -185,6 +192,7 @@ def test_project_list_is_scoped_to_request_user_groups(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.json()), 1) self.assertEqual(response.json()[0]["id"], self.owner_project.id) + self.assertEqual(response.json()[0]["user_role"], ProjectRole.ADMIN) self.assertEqual( response.json()[0]["intake_token"], self.owner_project.intake_token ) diff --git a/core/tests/test_bluesky.py b/core/tests/test_bluesky.py index 45910176..80443a72 100644 --- a/core/tests/test_bluesky.py +++ b/core/tests/test_bluesky.py @@ -2,30 +2,20 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import ( - BlueskyCredentials, Entity, - Project, - SourceConfig, - SourcePluginName, ) from core.plugins.bluesky import BlueskySourcePlugin +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def bluesky_context(django_user_model): - user = django_user_model.objects.create_user( - username="bluesky-owner", password="testpass123" - ) - group = Group.objects.create(name="bluesky-team") - user.groups.add(group) - project = Project.objects.create( - name="Bluesky Project", group=group, topic_description="Infra" - ) +def bluesky_context(): + project = Project.objects.create(name="Bluesky Project", topic_description="Infra") entity = Entity.objects.create( project=project, name="Alice", @@ -42,7 +32,9 @@ def bluesky_context(django_user_model): def test_bluesky_validate_config_normalizes_defaults_and_rejects_invalid_values(): - assert BlueskySourcePlugin.validate_config({"author_handle": "@Alice.BSKY.social"}) == { + assert BlueskySourcePlugin.validate_config( + {"author_handle": "@Alice.BSKY.social"} + ) == { "author_handle": "alice.bsky.social", "include_replies": False, "max_posts_per_fetch": 100, @@ -74,7 +66,9 @@ def test_bluesky_validate_config_normalizes_defaults_and_rejects_invalid_values( with pytest.raises(ValueError, match="feed_uri must be a Bluesky feed generator"): BlueskySourcePlugin.validate_config({"feed_uri": "https://example.com/feed"}) - with pytest.raises(ValueError, match="max_posts_per_fetch must be a positive integer"): + with pytest.raises( + ValueError, match="max_posts_per_fetch must be a positive integer" + ): BlueskySourcePlugin.validate_config( {"author_handle": "alice.bsky.social", "max_posts_per_fetch": 0} ) @@ -214,10 +208,10 @@ def test_bluesky_credentials_encrypt_password_and_normalize_pds_url(bluesky_cont assert credentials.get_app_password() == "app-password" -def test_bluesky_client_uses_authenticated_project_credentials( - bluesky_context, mocker -): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") +def test_bluesky_client_uses_authenticated_project_credentials(bluesky_context, mocker): + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() client = mocker.Mock() @@ -233,7 +227,9 @@ def test_bluesky_client_uses_authenticated_project_credentials( def test_bluesky_health_check_records_credential_errors(bluesky_context, mocker): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() plugin = BlueskySourcePlugin(bluesky_context.source_config) @@ -252,7 +248,9 @@ def test_bluesky_health_check_records_credential_errors(bluesky_context, mocker) def test_bluesky_verify_credentials_uses_authenticated_session_check( bluesky_context, mocker ): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() client = mocker.Mock() diff --git a/core/tests/test_embeddings.py b/core/tests/test_embeddings.py index 9b07e0be..c79990b0 100644 --- a/core/tests/test_embeddings.py +++ b/core/tests/test_embeddings.py @@ -4,7 +4,6 @@ import httpx import pytest -from django.contrib.auth.models import Group from django.core.management import CommandError, call_command from django.db.models import Count from qdrant_client.http.exceptions import ResponseHandlingException @@ -27,11 +26,8 @@ Content, Entity, IngestionRun, - Project, ReviewQueue, SkillResult, - SourceConfig, - SourcePluginName, UserFeedback, ) from core.pipeline import ( @@ -39,6 +35,8 @@ RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME, ) +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @@ -55,10 +53,8 @@ def embedding_context(django_user_model): user = django_user_model.objects.create_user( username="embed-owner", password="testpass123" ) - group = Group.objects.create(name="embedding-team") - user.groups.add(group) project = Project.objects.create( - name="Embedding Project", group=group, topic_description="Infra" + name="Embedding Project", topic_description="Infra" ) content = Content.objects.create( project=project, @@ -69,7 +65,7 @@ def embedding_context(django_user_model): published_date="2026-04-20T12:00:00Z", content_text="This article covers platform engineering practices.", ) - return SimpleNamespace(user=user, group=group, project=project, content=content) + return SimpleNamespace(user=user, project=project, content=content) def test_upsert_content_embedding_persists_embedding_id_and_payload( @@ -487,13 +483,8 @@ def test_sync_embeddings_scopes_to_requested_content_id(embedding_context, mocke def test_sync_embeddings_filters_project_and_references_only( embedding_context, django_user_model, mocker ): - other_user = django_user_model.objects.create_user( - username="embed-owner-2", password="testpass123" - ) - other_group = Group.objects.create(name="embedding-team-2") - other_user.groups.add(other_group) other_project = Project.objects.create( - name="Other Embedding Project", group=other_group, topic_description="Other" + name="Other Embedding Project", topic_description="Other" ) same_project_reference = Content.objects.create( project=embedding_context.project, diff --git a/core/tests/test_invitations.py b/core/tests/test_invitations.py new file mode 100644 index 00000000..855f7633 --- /dev/null +++ b/core/tests/test_invitations.py @@ -0,0 +1,249 @@ +from django.core import mail +from django.test import override_settings +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APITestCase + +from projects.models import Project, ProjectMembership, ProjectRole +from users.models import MembershipInvitation + + +@override_settings( + EMAIL_BACKEND="django.core.mail.backends.locmem.EmailBackend", + FRONTEND_BASE_URL="http://localhost:3000", +) +class ProjectMembershipAndInvitationApiTests(APITestCase): + def setUp(self): + user_model = self.get_user_model() + self.admin_user = user_model.objects.create_user( + username="project-admin", + email="admin@example.com", + password="testpass123", + display_name="Project Admin", + ) + self.second_admin = user_model.objects.create_user( + username="second-admin", + email="second-admin@example.com", + password="testpass123", + ) + self.member_user = user_model.objects.create_user( + username="project-member", + email="member@example.com", + password="testpass123", + ) + self.reader_user = user_model.objects.create_user( + username="project-reader", + email="reader@example.com", + password="testpass123", + ) + self.invited_user = user_model.objects.create_user( + username="invited-user", + email="invitee@example.com", + password="testpass123", + ) + + self.project = Project.objects.create( + name="Membership Project", + topic_description="Platform engineering", + ) + self.admin_membership = ProjectMembership.objects.create( + user=self.admin_user, + project=self.project, + role=ProjectRole.ADMIN, + ) + self.second_admin_membership = ProjectMembership.objects.create( + user=self.second_admin, + project=self.project, + role=ProjectRole.ADMIN, + ) + self.member_membership = ProjectMembership.objects.create( + user=self.member_user, + project=self.project, + role=ProjectRole.MEMBER, + ) + self.reader_membership = ProjectMembership.objects.create( + user=self.reader_user, + project=self.project, + role=ProjectRole.READER, + ) + + @staticmethod + def get_user_model(): + from django.contrib.auth import get_user_model + + return get_user_model() + + def test_project_create_assigns_creator_as_admin_membership(self): + creator = self.get_user_model().objects.create_user( + username="creator", + email="creator@example.com", + password="testpass123", + ) + self.client.force_authenticate(creator) + + response = self.client.post( + reverse("v1:project-list"), + { + "name": "Creator Project", + "topic_description": "Creator-owned project", + "content_retention_days": 90, + }, + format="json", + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + project = Project.objects.get(name="Creator Project") + membership = ProjectMembership.objects.get(project=project, user=creator) + self.assertEqual(membership.role, ProjectRole.ADMIN) + self.assertEqual(response.json()["user_role"], ProjectRole.ADMIN) + + def test_project_admin_can_list_update_and_remove_memberships(self): + self.client.force_authenticate(self.admin_user) + + list_response = self.client.get( + reverse( + "v1:project-membership-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(list_response.status_code, status.HTTP_200_OK) + self.assertEqual(len(list_response.json()), 4) + + update_response = self.client.patch( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_membership.id, + }, + ), + {"role": ProjectRole.READER}, + format="json", + ) + self.assertEqual(update_response.status_code, status.HTTP_200_OK) + self.member_membership.refresh_from_db() + self.assertEqual(self.member_membership.role, ProjectRole.READER) + + delete_response = self.client.delete( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.reader_membership.id, + }, + ) + ) + self.assertEqual(delete_response.status_code, status.HTTP_204_NO_CONTENT) + self.assertFalse( + ProjectMembership.objects.filter(pk=self.reader_membership.id).exists() + ) + + def test_last_admin_cannot_be_demoted_or_removed(self): + self.second_admin_membership.delete() + self.client.force_authenticate(self.admin_user) + + demote_response = self.client.patch( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_membership.id, + }, + ), + {"role": ProjectRole.MEMBER}, + format="json", + ) + self.assertEqual(demote_response.status_code, status.HTTP_400_BAD_REQUEST) + + delete_response = self.client.delete( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_membership.id, + }, + ) + ) + self.assertEqual(delete_response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_project_admin_can_create_and_revoke_invitation(self): + self.client.force_authenticate(self.admin_user) + + create_response = self.client.post( + reverse( + "v1:project-invitation-list", + kwargs={"project_id": self.project.id}, + ), + {"email": "invitee@example.com", "role": ProjectRole.MEMBER}, + format="json", + ) + + self.assertEqual(create_response.status_code, status.HTTP_201_CREATED) + invitation = MembershipInvitation.objects.get(project=self.project) + self.assertEqual(invitation.invited_by, self.admin_user) + self.assertEqual(len(mail.outbox), 1) + self.assertIn(invitation.token, mail.outbox[0].body) + + revoke_response = self.client.delete( + reverse( + "v1:project-invitation-detail", + kwargs={"project_id": self.project.id, "pk": invitation.id}, + ) + ) + self.assertEqual(revoke_response.status_code, status.HTTP_204_NO_CONTENT) + invitation.refresh_from_db() + self.assertIsNotNone(invitation.revoked_at) + + def test_invited_user_can_view_and_accept_invitation_token(self): + invitation = MembershipInvitation.objects.create( + project=self.project, + email=self.invited_user.email, + role=ProjectRole.READER, + invited_by=self.admin_user, + ) + + public_response = self.client.get( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ) + ) + self.assertEqual(public_response.status_code, status.HTTP_200_OK) + self.assertEqual(public_response.json()["project_name"], self.project.name) + + self.client.force_authenticate(self.invited_user) + accept_response = self.client.post( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ), + format="json", + ) + self.assertEqual(accept_response.status_code, status.HTTP_200_OK) + + invitation.refresh_from_db() + membership = ProjectMembership.objects.get( + project=self.project, + user=self.invited_user, + ) + self.assertEqual(membership.role, ProjectRole.READER) + self.assertIsNotNone(invitation.accepted_at) + + def test_accept_requires_matching_email(self): + invitation = MembershipInvitation.objects.create( + project=self.project, + email="expected@example.com", + role=ProjectRole.MEMBER, + invited_by=self.admin_user, + ) + self.client.force_authenticate(self.member_user) + + response = self.client.post( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ), + format="json", + ) + + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) diff --git a/core/tests/test_membership.py b/core/tests/test_membership.py new file mode 100644 index 00000000..fd4cc701 --- /dev/null +++ b/core/tests/test_membership.py @@ -0,0 +1,77 @@ +import pytest + +from core.permissions import get_user_role, get_visible_projects_queryset +from projects.models import Project, ProjectMembership, ProjectRole + +pytestmark = pytest.mark.django_db + + +def test_visible_projects_queryset_uses_memberships(django_user_model): + user = django_user_model.objects.create_user( + username="member-user", + password="testpass123", + ) + outsider = django_user_model.objects.create_user( + username="outsider-user", + password="testpass123", + ) + project = Project.objects.create( + name="Membership Project", + topic_description="Platform engineering", + ) + other_project = Project.objects.create( + name="Outsider Project", + topic_description="Frontend", + ) + ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.ADMIN, + ) + ProjectMembership.objects.create( + user=outsider, + project=other_project, + role=ProjectRole.ADMIN, + ) + + assert list(get_visible_projects_queryset(user)) == [project] + + +def test_get_user_role_returns_membership_role(django_user_model): + user = django_user_model.objects.create_user( + username="role-user", + password="testpass123", + ) + project = Project.objects.create( + name="Role Project", + topic_description="Platform engineering", + ) + ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.READER, + ) + + assert get_user_role(user, project) == ProjectRole.READER + + +def test_removing_membership_removes_project_visibility(django_user_model): + user = django_user_model.objects.create_user( + username="remove-user", + password="testpass123", + ) + project = Project.objects.create( + name="Removal Project", + topic_description="Platform engineering", + ) + membership = ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.MEMBER, + ) + + assert list(get_visible_projects_queryset(user)) == [project] + + membership.delete() + + assert list(get_visible_projects_queryset(user)) == [] diff --git a/core/tests/test_newsletters.py b/core/tests/test_newsletters.py index 54a0466a..d5a14a9b 100644 --- a/core/tests/test_newsletters.py +++ b/core/tests/test_newsletters.py @@ -4,7 +4,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from django.core import mail from django.urls import reverse from svix.webhooks import Webhook @@ -28,10 +27,8 @@ @pytest.fixture def project(): - group = Group.objects.create(name="newsletter-team") return Project.objects.create( name="Newsletter Project", - group=group, topic_description="Platform engineering", intake_enabled=True, ) diff --git a/core/tests/test_permissions.py b/core/tests/test_permissions.py new file mode 100644 index 00000000..39a543d6 --- /dev/null +++ b/core/tests/test_permissions.py @@ -0,0 +1,355 @@ +from unittest.mock import patch + +from django.contrib.auth import get_user_model +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APITestCase + +from core.models import ( + Content, + Entity, + EntityCandidate, + FeedbackType, + ReviewQueue, + ReviewReason, + TopicCentroidSnapshot, + UserFeedback, +) +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, ProjectMembership, ProjectRole + + +class ProjectRolePermissionTests(APITestCase): + def setUp(self): + queue_centroid_patcher = patch("core.signals.queue_topic_centroid_recompute") + queue_centroid_patcher.start() + self.addCleanup(queue_centroid_patcher.stop) + + user_model = get_user_model() + self.admin_user = user_model.objects.create_user( + username="project-admin", + password="testpass123", + ) + self.member_user = user_model.objects.create_user( + username="project-member", + password="testpass123", + ) + self.reader_user = user_model.objects.create_user( + username="project-reader", + password="testpass123", + ) + self.outsider_user = user_model.objects.create_user( + username="outsider", + password="testpass123", + ) + + self.project = Project.objects.create( + name="Permissions Project", + topic_description="Platform engineering", + ) + + ProjectMembership.objects.bulk_create( + [ + ProjectMembership( + user=self.admin_user, + project=self.project, + role=ProjectRole.ADMIN, + ), + ProjectMembership( + user=self.member_user, + project=self.project, + role=ProjectRole.MEMBER, + ), + ProjectMembership( + user=self.reader_user, + project=self.project, + role=ProjectRole.READER, + ), + ] + ) + + self.entity = Entity.objects.create( + project=self.project, + name="Permissions Entity", + type="vendor", + ) + self.content = Content.objects.create( + project=self.project, + url="https://example.com/permissions-content", + title="Permissions Content", + author="Author", + entity=self.entity, + source_plugin=SourcePluginName.RSS, + published_date="2026-04-29T00:00:00Z", + content_text="Permissions content body.", + ) + self.entity_candidate = EntityCandidate.objects.create( + project=self.project, + name="Candidate Vendor", + suggested_type="vendor", + first_seen_in=self.content, + ) + self.review_queue_item = ReviewQueue.objects.create( + project=self.project, + content=self.content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=0.55, + ) + self.topic_centroid_snapshot = TopicCentroidSnapshot.objects.create( + project=self.project, + centroid_active=True, + centroid_vector=[1.0, 0.0], + feedback_count=3, + upvote_count=2, + downvote_count=1, + drift_from_previous=0.1, + drift_from_week_ago=0.2, + ) + self.member_feedback = UserFeedback.objects.create( + project=self.project, + content=self.content, + user=self.member_user, + feedback_type=FeedbackType.UPVOTE, + ) + self.admin_feedback = UserFeedback.objects.create( + project=self.project, + content=self.content, + user=self.admin_user, + feedback_type=FeedbackType.DOWNVOTE, + ) + self.bluesky_credentials = BlueskyCredentials.objects.create( + project=self.project, + handle="permissions-project.bsky.social", + ) + + def test_project_list_includes_resolved_reader_role(self): + self.client.force_authenticate(self.reader_user) + + response = self.client.get(reverse("v1:project-list")) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()[0]["user_role"], ProjectRole.READER) + + def test_reader_is_denied_contributor_and_admin_endpoints(self): + self.client.force_authenticate(self.reader_user) + + cases = [ + ( + "patch", + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Reader Update"}, + ), + ( + "post", + reverse( + "v1:project-rotate-intake-token", + kwargs={"id": self.project.id}, + ), + None, + ), + ( + "get", + reverse( + "v1:project-review-queue-list", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ( + "post", + reverse( + "v1:project-source-config-list", + kwargs={"project_id": self.project.id}, + ), + { + "plugin_name": SourcePluginName.RSS, + "config": {"feed_url": "https://example.com/feed.xml"}, + "is_active": True, + }, + ), + ( + "get", + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ( + "post", + reverse( + "v1:project-feedback-list", + kwargs={"project_id": self.project.id}, + ), + { + "content": self.content.id, + "feedback_type": FeedbackType.UPVOTE, + }, + ), + ( + "post", + reverse( + "v1:project-entity-candidate-accept", + kwargs={ + "project_id": self.project.id, + "pk": self.entity_candidate.id, + }, + ), + None, + ), + ( + "get", + reverse( + "v1:project-topic-centroid-snapshot-summary", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ] + + for method, url, payload in cases: + with self.subTest(method=method, url=url): + response = getattr(self.client, method)(url, payload, format="json") + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_member_can_use_contributor_endpoints_but_not_admin_only_ones(self): + self.client.force_authenticate(self.member_user) + + review_queue_response = self.client.get( + reverse( + "v1:project-review-queue-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(review_queue_response.status_code, status.HTTP_200_OK) + + source_config_response = self.client.post( + reverse( + "v1:project-source-config-list", + kwargs={"project_id": self.project.id}, + ), + { + "plugin_name": SourcePluginName.RSS, + "config": {"feed_url": "https://example.com/feed.xml"}, + "is_active": True, + }, + format="json", + ) + self.assertEqual(source_config_response.status_code, status.HTTP_201_CREATED) + + topic_summary_response = self.client.get( + reverse( + "v1:project-topic-centroid-snapshot-summary", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(topic_summary_response.status_code, status.HTTP_200_OK) + + accept_candidate_response = self.client.post( + reverse( + "v1:project-entity-candidate-accept", + kwargs={ + "project_id": self.project.id, + "pk": self.entity_candidate.id, + }, + ), + format="json", + ) + self.assertEqual(accept_candidate_response.status_code, status.HTTP_200_OK) + + delete_own_feedback_response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_feedback.id, + }, + ) + ) + self.assertEqual( + delete_own_feedback_response.status_code, status.HTTP_204_NO_CONTENT + ) + + update_project_response = self.client.patch( + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Member Update"}, + format="json", + ) + self.assertEqual(update_project_response.status_code, status.HTTP_403_FORBIDDEN) + + list_credentials_response = self.client.get( + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual( + list_credentials_response.status_code, status.HTTP_403_FORBIDDEN + ) + + rotate_token_response = self.client.post( + reverse("v1:project-rotate-intake-token", kwargs={"id": self.project.id}), + format="json", + ) + self.assertEqual(rotate_token_response.status_code, status.HTTP_403_FORBIDDEN) + + def test_member_cannot_delete_other_users_feedback(self): + self.client.force_authenticate(self.member_user) + + response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_feedback.id, + }, + ) + ) + + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_admin_can_access_admin_endpoints_and_delete_other_feedback(self): + self.client.force_authenticate(self.admin_user) + + update_project_response = self.client.patch( + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Admin Updated Project"}, + format="json", + ) + self.assertEqual(update_project_response.status_code, status.HTTP_200_OK) + + list_credentials_response = self.client.get( + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(list_credentials_response.status_code, status.HTTP_200_OK) + + rotate_token_response = self.client.post( + reverse("v1:project-rotate-intake-token", kwargs={"id": self.project.id}), + format="json", + ) + self.assertEqual(rotate_token_response.status_code, status.HTTP_200_OK) + + delete_feedback_response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_feedback.id, + }, + ) + ) + self.assertEqual( + delete_feedback_response.status_code, status.HTTP_204_NO_CONTENT + ) + + def test_outsider_cannot_access_project_resources(self): + self.client.force_authenticate(self.outsider_user) + + response = self.client.get( + reverse("v1:project-detail", kwargs={"id": self.project.id}) + ) + + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) diff --git a/core/tests/test_pipeline.py b/core/tests/test_pipeline.py index b498f728..0fc461ae 100644 --- a/core/tests/test_pipeline.py +++ b/core/tests/test_pipeline.py @@ -1,7 +1,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.deduplication import canonicalize_url from core.models import ( @@ -50,10 +49,8 @@ def pipeline_context(django_user_model): user = django_user_model.objects.create_user( username="pipeline-owner", password="testpass123" ) - group = Group.objects.create(name="pipeline-team") - user.groups.add(group) project = Project.objects.create( - name="Pipeline Project", group=group, topic_description="Platform engineering" + name="Pipeline Project", topic_description="Platform engineering" ) content = Content.objects.create( project=project, @@ -65,7 +62,7 @@ def pipeline_context(django_user_model): content_text="This article covers a new Kubernetes release and what changed for platform teams.", embedding_id="emb_123", ) - return SimpleNamespace(user=user, group=group, project=project, content=content) + return SimpleNamespace(user=user, project=project, content=content) def test_process_content_runs_full_pipeline_for_relevant_content( diff --git a/core/tests/test_plugin_base.py b/core/tests/test_plugin_base.py index dbefc37c..f425c097 100644 --- a/core/tests/test_plugin_base.py +++ b/core/tests/test_plugin_base.py @@ -2,7 +2,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import Entity, Project from core.plugins.base import ContentItem, SourcePlugin @@ -30,15 +29,8 @@ def health_check(self) -> bool: @pytest.fixture -def plugin_context(django_user_model): - user = django_user_model.objects.create_user( - username="plugin-owner", password="testpass123" - ) - group = Group.objects.create(name="plugin-team") - user.groups.add(group) - project = Project.objects.create( - name="Plugin Project", group=group, topic_description="Infra" - ) +def plugin_context(): + project = Project.objects.create(name="Plugin Project", topic_description="Infra") source_config = SimpleNamespace(project=project, config={"api_key": "secret"}) return SimpleNamespace(project=project, source_config=source_config) diff --git a/core/tests/test_reddit.py b/core/tests/test_reddit.py index c6243629..7fe509c1 100644 --- a/core/tests/test_reddit.py +++ b/core/tests/test_reddit.py @@ -2,25 +2,18 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group -from core.models import Project, SourceConfig, SourcePluginName from core.plugins.reddit import RedditSourcePlugin from core.plugins.registry import validate_plugin_config +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def reddit_context(django_user_model): - user = django_user_model.objects.create_user( - username="reddit-owner", password="testpass123" - ) - group = Group.objects.create(name="reddit-team") - user.groups.add(group) - project = Project.objects.create( - name="Reddit Project", group=group, topic_description="Infra" - ) +def reddit_context(): + project = Project.objects.create(name="Reddit Project", topic_description="Infra") source_config = SourceConfig.objects.create( project=project, plugin_name=SourcePluginName.REDDIT, diff --git a/core/tests/test_rss.py b/core/tests/test_rss.py index 6a291977..01fdab3a 100644 --- a/core/tests/test_rss.py +++ b/core/tests/test_rss.py @@ -3,24 +3,17 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group -from core.models import Project, SourceConfig, SourcePluginName from core.plugins.rss import RSSSourcePlugin +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def rss_context(django_user_model): - user = django_user_model.objects.create_user( - username="rss-owner", password="testpass123" - ) - group = Group.objects.create(name="rss-team") - user.groups.add(group) - project = Project.objects.create( - name="RSS Project", group=group, topic_description="Infra" - ) +def rss_context(): + project = Project.objects.create(name="RSS Project", topic_description="Infra") source_config = SourceConfig.objects.create( project=project, plugin_name=SourcePluginName.RSS, diff --git a/core/tests/test_serializers.py b/core/tests/test_serializers.py index d896c3c5..c627116e 100644 --- a/core/tests/test_serializers.py +++ b/core/tests/test_serializers.py @@ -1,27 +1,25 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import AnonymousUser, Group +from django.contrib.auth.models import AnonymousUser from core.models import ( Content, Entity, - Project, ReviewReason, SkillResult, - SourceConfig, - SourcePluginName, ) from core.serializers import ( ContentSerializer, EntitySerializer, IngestionRunSerializer, - ProjectSerializer, ReviewQueueSerializer, SkillResultSerializer, - SourceConfigSerializer, UserFeedbackSerializer, ) +from projects.model_support import SourcePluginName +from projects.models import Project, ProjectMembership, ProjectRole, SourceConfig +from projects.serializers import ProjectSerializer, SourceConfigSerializer pytestmark = pytest.mark.django_db @@ -34,15 +32,17 @@ def serializer_context(django_user_model): other_user = django_user_model.objects.create_user( username="serializer-other", password="testpass123" ) - group = Group.objects.create(name="serializer-team") - other_group = Group.objects.create(name="serializer-other-team") - user.groups.add(group) - other_user.groups.add(other_group) project = Project.objects.create( - name="Serializer Project", group=group, topic_description="Infra" + name="Serializer Project", topic_description="Infra" ) other_project = Project.objects.create( - name="Other Serializer Project", group=other_group, topic_description="Data" + name="Other Serializer Project", topic_description="Data" + ) + ProjectMembership.objects.create(user=user, project=project, role=ProjectRole.ADMIN) + ProjectMembership.objects.create( + user=other_user, + project=other_project, + role=ProjectRole.ADMIN, ) entity = Entity.objects.create( project=project, name="Serializer Entity", type="vendor" @@ -133,7 +133,7 @@ def test_project_scoped_serializer_filters_related_querysets_without_project_con def test_project_scoped_serializer_skips_filtering_for_anonymous_user(): serializer = ProjectSerializer(context={"request": _request_for(AnonymousUser())}) - assert serializer.fields["group"].queryset.count() == Group.objects.count() + assert "project" not in serializer.fields def test_content_serializer_rejects_cross_project_entity(serializer_context): @@ -167,7 +167,9 @@ def test_content_serializer_exposes_duplicate_state_as_read_only_fields( ) serializer_context.content.duplicate_signal_count = 1 serializer_context.content.canonical_url = "https://example.com/serializer-content" - serializer_context.content.save(update_fields=["duplicate_signal_count", "canonical_url"]) + serializer_context.content.save( + update_fields=["duplicate_signal_count", "canonical_url"] + ) serializer = ContentSerializer(instance=duplicate) @@ -267,7 +269,7 @@ def test_source_config_serializer_surfaces_plugin_validation_errors(serializer_c ) assert serializer.is_valid() is False - assert serializer.errors == {"config": ["Missing required config field: feed_url"]} + assert serializer.errors == {"config": ["Invalid source configuration."]} def test_source_config_serializer_normalizes_bluesky_author_handle_config( diff --git a/core/tests/test_tasks.py b/core/tests/test_tasks.py index dced3e38..4a026d22 100644 --- a/core/tests/test_tasks.py +++ b/core/tests/test_tasks.py @@ -2,7 +2,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import ( Content, @@ -12,30 +11,28 @@ EntityMentionRole, FeedbackType, IngestionRun, - Project, - ProjectConfig, RunStatus, SkillStatus, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) from core.pipeline import RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME from core.tasks import ( - TOPIC_CENTROID_MIN_UPVOTES, - _ingest_source_config, queue_content_skill, - queue_topic_centroid_recompute, recompute_authority_scores, - recompute_topic_centroid, run_all_authority_recomputations, - run_all_ingestions, - run_all_topic_centroid_recomputations, - run_ingestion, run_relevance_scoring_skill, run_summarization_skill, ) +from ingestion.tasks import _ingest_source_config, run_all_ingestions, run_ingestion +from projects.model_support import SourcePluginName +from projects.models import Project, ProjectConfig, SourceConfig +from trends.tasks import ( + TOPIC_CENTROID_MIN_UPVOTES, + queue_topic_centroid_recompute, + recompute_topic_centroid, + run_all_topic_centroid_recomputations, +) pytestmark = pytest.mark.django_db @@ -45,18 +42,14 @@ def source_plugin_context(django_user_model): user = django_user_model.objects.create_user( username="plugin-owner", password="testpass123" ) - group = Group.objects.create(name="plugin-team") - user.groups.add(group) - project = Project.objects.create( - name="Plugin Project", group=group, topic_description="Infra" - ) + project = Project.objects.create(name="Plugin Project", topic_description="Infra") entity = Entity.objects.create( project=project, name="Example", type="vendor", website_url="https://example.com", ) - return SimpleNamespace(user=user, group=group, project=project, entity=entity) + return SimpleNamespace(user=user, project=project, entity=entity) def test_run_ingestion_creates_content_from_rss_entries(source_plugin_context, mocker): @@ -175,7 +168,7 @@ def test_ingest_source_config_allows_cross_plugin_duplicate_urls_for_pipeline_de ], match_entity_for_url=lambda url: None, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) items_fetched, items_ingested = _ingest_source_config(source_config) @@ -258,7 +251,7 @@ def test_ingest_source_config_deduplicates_bluesky_posts_by_post_uri( ], match_entity_for_item=lambda item: source_plugin_context.entity, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) items_fetched, items_ingested = _ingest_source_config(source_config) @@ -272,7 +265,7 @@ def test_ingest_source_config_deduplicates_bluesky_posts_by_post_uri( def test_run_all_ingestions_enqueues_active_source_configs( source_plugin_context, mocker ): - delay_mock = mocker.patch("core.tasks.run_ingestion.delay") + delay_mock = mocker.patch("ingestion.tasks.run_ingestion.delay") active_one = SourceConfig.objects.create( project=source_plugin_context.project, plugin_name=SourcePluginName.RSS, @@ -302,8 +295,8 @@ def test_run_all_ingestions_executes_inline_when_eager( source_plugin_context, settings, mocker ): settings.CELERY_TASK_ALWAYS_EAGER = True - run_ingestion_mock = mocker.patch("core.tasks.run_ingestion") - delay_mock = mocker.patch("core.tasks.run_ingestion.delay") + run_ingestion_mock = mocker.patch("ingestion.tasks.run_ingestion") + delay_mock = mocker.patch("ingestion.tasks.run_ingestion.delay") active_one = SourceConfig.objects.create( project=source_plugin_context.project, plugin_name=SourcePluginName.RSS, @@ -328,10 +321,8 @@ def test_run_all_authority_recomputations_enqueues_all_projects( source_plugin_context, mocker ): delay_mock = mocker.patch("core.tasks.recompute_authority_scores.delay") - other_group = Group.objects.create(name="second-authority-team") other_project = Project.objects.create( name="Other Project", - group=other_group, topic_description="Security", ) @@ -349,10 +340,8 @@ def test_run_all_authority_recomputations_executes_inline_when_eager( settings.CELERY_TASK_ALWAYS_EAGER = True recompute_mock = mocker.patch("core.tasks.recompute_authority_scores") delay_mock = mocker.patch("core.tasks.recompute_authority_scores.delay") - other_group = Group.objects.create(name="inline-authority-team") other_project = Project.objects.create( name="Inline Project", - group=other_group, topic_description="Platform", ) @@ -368,11 +357,9 @@ def test_run_all_authority_recomputations_executes_inline_when_eager( def test_run_all_topic_centroid_recomputations_enqueues_all_projects( source_plugin_context, mocker ): - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") - other_group = Group.objects.create(name="second-centroid-team") + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") other_project = Project.objects.create( name="Other Centroid Project", - group=other_group, topic_description="Security", ) @@ -388,12 +375,10 @@ def test_run_all_topic_centroid_recomputations_executes_inline_when_eager( source_plugin_context, settings, mocker ): settings.CELERY_TASK_ALWAYS_EAGER = True - recompute_mock = mocker.patch("core.tasks.recompute_topic_centroid") - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") - other_group = Group.objects.create(name="inline-centroid-team") + recompute_mock = mocker.patch("trends.tasks.recompute_topic_centroid") + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") other_project = Project.objects.create( name="Inline Centroid Project", - group=other_group, topic_description="Platform", ) @@ -495,8 +480,8 @@ def test_recompute_topic_centroid_upserts_weighted_normalized_centroid( ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") vector_lookup = { **{ f"Upvote {index}": [1.0, 0.0] for index in range(TOPIC_CENTROID_MIN_UPVOTES) @@ -504,7 +489,7 @@ def test_recompute_topic_centroid_upserts_weighted_normalized_centroid( "Downvote": [0.0, 1.0], } mocker.patch( - "core.tasks.embed_text", + "trends.tasks.embed_text", side_effect=lambda text: vector_lookup[text.split("\n\n", 1)[0]], ) @@ -572,9 +557,9 @@ def test_recompute_topic_centroid_persists_drift_from_previous_and_week_old_snap ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") - mocker.patch("core.tasks.embed_text", return_value=[1.0, 0.0]) + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") + mocker.patch("trends.tasks.embed_text", return_value=[1.0, 0.0]) recent_snapshot = TopicCentroidSnapshot.objects.create( project=project, @@ -635,8 +620,8 @@ def test_recompute_topic_centroid_disables_centroid_below_minimum_upvotes( ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") for index in range(TOPIC_CENTROID_MIN_UPVOTES - 1): content = Content.objects.create( project=project, @@ -757,8 +742,8 @@ def test_queue_content_skill_executes_summary_inline_when_eager( def test_queue_topic_centroid_recompute_enqueues_background_task( source_plugin_context, mocker ): - cache_add_mock = mocker.patch("core.tasks.cache.add", return_value=True) - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") + cache_add_mock = mocker.patch("trends.tasks.cache.add", return_value=True) + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") queued = queue_topic_centroid_recompute(source_plugin_context.project.id) @@ -770,8 +755,8 @@ def test_queue_topic_centroid_recompute_enqueues_background_task( def test_queue_topic_centroid_recompute_skips_duplicate_queue_attempts( source_plugin_context, mocker ): - mocker.patch("core.tasks.cache.add", return_value=False) - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") + mocker.patch("trends.tasks.cache.add", return_value=False) + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") queued = queue_topic_centroid_recompute(source_plugin_context.project.id) @@ -949,7 +934,7 @@ def test_ingest_source_config_truncates_fields_and_processes_inline( plugin_name=SourcePluginName.RSS, config={"feed_url": "https://example.com/feed.xml"}, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) upsert_mock = mocker.patch("core.tasks.upsert_content_embedding") process_mock = mocker.patch("core.tasks.process_content") delay_mock = mocker.patch("core.tasks.process_content.delay") diff --git a/core/tests/test_utils.py b/core/tests/test_utils.py index bbc12168..7f76e50d 100644 --- a/core/tests/test_utils.py +++ b/core/tests/test_utils.py @@ -1,8 +1,7 @@ import pytest -from django.contrib.auth.models import Group -from core.models import Project, ProjectConfig from core.utils import dashboard_callback +from projects.models import Project, ProjectConfig pytestmark = pytest.mark.django_db @@ -16,17 +15,12 @@ def test_dashboard_callback_uses_zero_when_no_project_configs(): assert result["avg_authority_weight"] == 0 -def test_dashboard_callback_rounds_average_authority_weight(django_user_model): - user = django_user_model.objects.create_user( - username="utils-owner", password="testpass123" - ) - group = Group.objects.create(name="utils-team") - user.groups.add(group) +def test_dashboard_callback_rounds_average_authority_weight(): project_one = Project.objects.create( - name="Utils Project 1", group=group, topic_description="Infra" + name="Utils Project 1", topic_description="Infra" ) project_two = Project.objects.create( - name="Utils Project 2", group=group, topic_description="Data" + name="Utils Project 2", topic_description="Data" ) ProjectConfig.objects.create(project=project_one, upvote_authority_weight=0.1234) ProjectConfig.objects.create(project=project_two, upvote_authority_weight=0.5678) diff --git a/core/urls.py b/core/urls.py index 5744a8fa..77888129 100644 --- a/core/urls.py +++ b/core/urls.py @@ -1,6 +1,7 @@ from django.urls import path -from core.views import confirm_newsletter_sender_view, healthz_view, readyz_view +from core.views import healthz_view, readyz_view +from newsletters.views import confirm_newsletter_sender_view urlpatterns = [ path("healthz/", healthz_view, name="healthz"), diff --git a/core/utils.py b/core/utils.py index 0a15ff70..9abbadb2 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,6 +1,6 @@ from django.db.models import Avg -from .models import ProjectConfig +from projects.models import ProjectConfig def dashboard_callback(request, context): diff --git a/core/views.py b/core/views.py index 33e68c10..3055ef59 100644 --- a/core/views.py +++ b/core/views.py @@ -1,18 +1,13 @@ -"""Operational and newsletter-intake views used outside the REST API.""" +"""Operational views used outside the REST API.""" from http import HTTPStatus from typing import cast from django.conf import settings as django_settings from django.db import connection -from django.http import HttpRequest, JsonResponse -from django.shortcuts import get_object_or_404 -from django.utils import timezone -from django.views.decorators.http import require_GET +from django.http import JsonResponse from qdrant_client import QdrantClient -from core.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus -from core.newsletters import queue_newsletter_intake from core.settings_types import CoreSettings settings = cast(CoreSettings, django_settings) @@ -64,34 +59,3 @@ def _check_qdrant() -> bool: except Exception: return False return True - - -@require_GET -def confirm_newsletter_sender_view(request: HttpRequest, token: str): - """Confirm a sender and queue any pending newsletter intake rows. - - Args: - request: Incoming confirmation request. - token: Confirmation token stored on the allowlist entry. - - Returns: - A JSON response showing that the sender was confirmed and how many pending - intake rows were queued for processing. - """ - - allowlist = get_object_or_404(IntakeAllowlist, confirmation_token=token) - if allowlist.confirmed_at is None: - allowlist.confirmed_at = timezone.now() - allowlist.save(update_fields=["confirmed_at"]) - - pending_intake_ids = list( - NewsletterIntake.objects.filter( - project=allowlist.project, - sender_email=allowlist.sender_email, - status=NewsletterIntakeStatus.PENDING, - ).values_list("id", flat=True) - ) - for intake_id in pending_intake_ids: - queue_newsletter_intake(intake_id) - - return JsonResponse({"status": "confirmed", "queued": len(pending_intake_ids)}) diff --git a/entities/__init__.py b/entities/__init__.py new file mode 100644 index 00000000..f30b9d91 --- /dev/null +++ b/entities/__init__.py @@ -0,0 +1 @@ +"""Entities app package.""" diff --git a/entities/admin.py b/entities/admin.py new file mode 100644 index 00000000..32b7fb4f --- /dev/null +++ b/entities/admin.py @@ -0,0 +1,256 @@ +"""Admin configuration for entity-domain models.""" + +from django.contrib import admin, messages +from django.db.models import QuerySet +from django.http import HttpRequest +from django.utils.html import format_html + +from core.entity_extraction import ( + accept_entity_candidate, + merge_entity_candidate, + reject_entity_candidate, +) +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityCandidate, + EntityCandidateStatus, + EntityMention, +) + + +def _score_to_percent(value): + """Normalize score-like values for display as percentages.""" + + if value is None: + return None + numeric_value = float(value) + if -1.0 <= numeric_value <= 1.0: + return numeric_value * 100 + return numeric_value + + +def _score_color(value) -> str: + """Return the admin display color for a score-like value.""" + + percent_value = _score_to_percent(value) + if percent_value is None: + return "inherit" + if percent_value >= 75: + return "green" + if percent_value >= 40: + return "orange" + return "red" + + +@admin.register(Entity) +class EntityAdmin(admin.ModelAdmin): + """Admin configuration for tracked people, vendors, and organizations.""" + + list_display = ( + "name", + "project", + "type", + "colored_score", + "latest_snapshot_summary", + "created_at", + ) + search_fields = ("name", "project__name") + + @admin.display(description="Authority Score", ordering="authority_score") + def colored_score(self, obj): + """Render the authority score with a traffic-light color cue.""" + + percent_value = _score_to_percent(obj.authority_score) + color = _score_color(obj.authority_score) + + return format_html( + '{}', + color, + f"{percent_value:.1f}%", + ) + + @admin.display(description="Latest Snapshot") + def latest_snapshot_summary(self, obj): + """Show the latest authority component breakdown for an entity.""" + + latest_snapshot = obj.authority_snapshots.order_by("-computed_at").first() + if latest_snapshot is None: + return "-" + mention_value = f"{_score_to_percent(latest_snapshot.mention_component):.1f}%" + feedback_value = f"{_score_to_percent(latest_snapshot.feedback_component):.1f}%" + duplicate_value = ( + f"{_score_to_percent(latest_snapshot.duplicate_component):.1f}%" + ) + decayed_value = f"{_score_to_percent(latest_snapshot.decayed_prior):.1f}%" + return format_html( + ( + '' + "M {} | F {} | D {} | Carry {}" + ), + mention_value, + feedback_value, + duplicate_value, + decayed_value, + mention_value, + feedback_value, + duplicate_value, + decayed_value, + ) + + +@admin.register(EntityAuthoritySnapshot) +class EntityAuthoritySnapshotAdmin(admin.ModelAdmin): + """Admin view for persisted authority-score history.""" + + list_display = ( + "entity", + "project", + "display_final_score", + "display_components", + "computed_at", + ) + list_filter = (("project", admin.RelatedOnlyFieldListFilter), "computed_at") + search_fields = ("entity__name", "project__name") + autocomplete_fields = ("entity", "project") + + @admin.display(description="Final Score", ordering="final_score") + def display_final_score(self, obj): + """Render the recomputed final authority score as a percentage.""" + + percent_value = _score_to_percent(obj.final_score) + return format_html( + '{}', + _score_color(obj.final_score), + f"{percent_value:.1f}%", + ) + + @admin.display(description="Components") + def display_components(self, obj): + """Render the stored authority components in a compact summary.""" + + mention_value = f"{_score_to_percent(obj.mention_component):.1f}%" + feedback_value = f"{_score_to_percent(obj.feedback_component):.1f}%" + duplicate_value = f"{_score_to_percent(obj.duplicate_component):.1f}%" + decayed_value = f"{_score_to_percent(obj.decayed_prior):.1f}%" + return format_html( + "M {} | F {} | D {} | Carry {}", + mention_value, + feedback_value, + duplicate_value, + decayed_value, + ) + + +@admin.register(EntityMention) +class EntityMentionAdmin(admin.ModelAdmin): + """Admin view for extracted tracked-entity mentions.""" + + list_display = ( + "entity", + "project", + "content", + "role", + "sentiment", + "confidence", + "created_at", + ) + list_filter = ("role", "sentiment", ("project", admin.RelatedOnlyFieldListFilter)) + search_fields = ("entity__name", "content__title", "span") + autocomplete_fields = ("entity", "content", "project") + + +@admin.register(EntityCandidate) +class EntityCandidateAdmin(admin.ModelAdmin): + """Admin view for candidate entities awaiting human review.""" + + actions = [ + "accept_selected_candidates", + "reject_selected_candidates", + "merge_into_existing_entities", + ] + list_display = ( + "name", + "project", + "suggested_type", + "occurrence_count", + "status", + "merged_into", + "first_seen_in", + "created_at", + ) + list_filter = ( + "status", + "suggested_type", + ("project", admin.RelatedOnlyFieldListFilter), + ) + search_fields = ("name", "project__name", "merged_into__name") + autocomplete_fields = ("project", "first_seen_in", "merged_into") + ordering = ("-occurrence_count", "name") + + @admin.action(description="Accept selected candidates") + def accept_selected_candidates(self, request, queryset): + """Promote selected candidates into tracked entities.""" + + accepted_count = 0 + for candidate in queryset.select_related("project"): + if candidate.status == EntityCandidateStatus.ACCEPTED: + continue + accept_entity_candidate(candidate) + accepted_count += 1 + self.message_user( + request, + f"Accepted {accepted_count} entity candidate(s).", + messages.SUCCESS, + ) + + @admin.action(description="Reject selected candidates") + def reject_selected_candidates(self, request, queryset): + """Mark selected candidates as rejected.""" + + rejected_count = 0 + for candidate in queryset: + if candidate.status == EntityCandidateStatus.REJECTED: + continue + reject_entity_candidate(candidate) + rejected_count += 1 + self.message_user( + request, + f"Rejected {rejected_count} entity candidate(s).", + messages.SUCCESS, + ) + + @admin.action(description="Merge selected candidates into existing entities") + def merge_into_existing_entities( + self, + request: HttpRequest, + queryset: QuerySet[EntityCandidate], + ) -> None: + """Merge candidates when a same-name entity already exists in the project.""" + + merged_count = 0 + unresolved_names: list[str] = [] + for candidate in queryset.select_related("project"): + matching_entities = Entity.objects.filter( + project=candidate.project, + name__iexact=candidate.name, + ) + if matching_entities.count() != 1: + unresolved_names.append(candidate.name) + continue + merge_entity_candidate(candidate, matching_entities.get()) + merged_count += 1 + + if merged_count: + self.message_user( + request, + f"Merged {merged_count} entity candidate(s) into existing entities.", + messages.SUCCESS, + ) + if unresolved_names: + self.message_user( + request, + "No unique same-name entity match was available for: " + + ", ".join(sorted(unresolved_names)), + messages.WARNING, + ) diff --git a/entities/api.py b/entities/api.py new file mode 100644 index 00000000..c54c1d8f --- /dev/null +++ b/entities/api.py @@ -0,0 +1,232 @@ +"""Entity-domain API viewsets kept under the existing nested project routes.""" + +from django.db.models import Count, Prefetch +from drf_spectacular.utils import OpenApiParameter, extend_schema +from rest_framework import serializers, viewsets +from rest_framework.decorators import action +from rest_framework.filters import OrderingFilter +from rest_framework.response import Response + +from core.api import ( + AUTHENTICATION_REQUIRED_RESPONSE, + ProjectOwnedQuerysetMixin, + build_crud_action_overrides, + document_project_owned_viewset, +) +from core.entity_extraction import ( + accept_entity_candidate, + merge_entity_candidate, + reject_entity_candidate, +) +from core.permissions import ( + IsProjectAdmin, + IsProjectContributor, + IsProjectMember, + IsProjectMemberWritable, +) +from entities.models import Entity, EntityCandidate, EntityMention +from entities.serializers import ( + EntityAuthoritySnapshotSerializer, + EntityCandidateMergeSerializer, + EntityCandidateSerializer, + EntityMentionSummarySerializer, + EntitySerializer, +) + + +@document_project_owned_viewset( + resource_plural="entities", + resource_singular="entity", + create_description="Create a new tracked entity for the selected project, such as a company, person, or organization.", + tag="Entity Catalog", + action_overrides=build_crud_action_overrides( + EntitySerializer, + resource_plural="entities for the selected project", + resource_singular="entity", + ), +) +class EntityViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): + """Manage tracked entities associated with a project.""" + + serializer_class = EntitySerializer + filter_backends = [OrderingFilter] + ordering_fields = ["authority_score", "created_at", "name"] + ordering = ["name"] + queryset = ( + Entity.objects.select_related("project") + .annotate(mention_count=Count("mentions", distinct=True)) + .prefetch_related( + Prefetch( + "mentions", + queryset=EntityMention.objects.select_related("content").order_by( + "-created_at" + ), + to_attr="prefetched_mentions", + ) + ) + ) + + def get_permissions(self): + """Apply read, contributor-write, and admin-delete permissions for entities.""" + + if self.action == "destroy": + permission_classes = [IsProjectAdmin] + elif self.action in {"create", "update", "partial_update"}: + permission_classes = [IsProjectMemberWritable] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + + @extend_schema( + summary="List entity mentions", + description="Return the extracted mention history for one tracked entity inside the selected project.", + request=None, + responses={ + 200: EntityMentionSummarySerializer(many=True), + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["get"], url_path="mentions") + def mentions(self, request, *args, **kwargs): + """Return the extracted mentions for the selected entity.""" + + entity = self.get_object() + mentions = entity.mentions.select_related("content").order_by("-created_at") + serializer = EntityMentionSummarySerializer(mentions, many=True) + return Response(serializer.data) + + @extend_schema( + summary="List authority history", + description=( + "Return persisted authority-score snapshots for one tracked entity. " + "Use the optional limit query parameter to cap the number of snapshots returned." + ), + parameters=[ + OpenApiParameter( + name="limit", + type=int, + location=OpenApiParameter.QUERY, + description="Maximum number of authority snapshots to return.", + required=False, + ) + ], + request=None, + responses={ + 200: EntityAuthoritySnapshotSerializer(many=True), + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["get"], url_path="authority_history") + def authority_history(self, request, *args, **kwargs): + """Return recent authority snapshots for the selected entity.""" + + entity = self.get_object() + snapshots = entity.authority_snapshots.order_by("-computed_at") + limit_param = request.query_params.get("limit") + if limit_param: + try: + limit = max(1, min(int(limit_param), 100)) + except ValueError as exc: + raise serializers.ValidationError( + {"limit": "Limit must be an integer between 1 and 100."} + ) from exc + snapshots = snapshots[:limit] + serializer = EntityAuthoritySnapshotSerializer(snapshots, many=True) + return Response(serializer.data) + + +@document_project_owned_viewset( + resource_plural="entity candidates", + resource_singular="entity candidate", + create_description="Entity candidates are created by the pipeline and can be reviewed through dedicated actions.", + tag="Entity Catalog", + action_overrides=build_crud_action_overrides( + EntityCandidateSerializer, + resource_plural="entity candidates for the selected project", + resource_singular="entity candidate", + ), +) +class EntityCandidateViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet): + """Inspect and resolve entity candidates surfaced by entity extraction.""" + + serializer_class = EntityCandidateSerializer + queryset = EntityCandidate.objects.select_related( + "project", "first_seen_in", "merged_into" + ) + + def get_permissions(self): + """Allow all members to read candidates and contributors to resolve them.""" + + if self.action in {"accept", "reject", "merge"}: + permission_classes = [IsProjectContributor] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + + @extend_schema( + summary="Accept entity candidate", + description="Promote a pending entity candidate into a tracked entity and backfill recent mentions.", + request=None, + responses={ + 200: EntityCandidateSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="accept") + def accept(self, request, *args, **kwargs): + """Accept an entity candidate and return its updated representation.""" + + candidate = self.get_object() + accept_entity_candidate(candidate) + candidate.refresh_from_db() + serializer = self.get_serializer(candidate) + return Response(serializer.data) + + @extend_schema( + summary="Reject entity candidate", + description="Mark a pending entity candidate as rejected without creating a tracked entity.", + request=None, + responses={ + 200: EntityCandidateSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="reject") + def reject(self, request, *args, **kwargs): + """Reject an entity candidate and return its updated representation.""" + + candidate = self.get_object() + reject_entity_candidate(candidate) + candidate.refresh_from_db() + serializer = self.get_serializer(candidate) + return Response(serializer.data) + + @extend_schema( + summary="Merge entity candidate", + description="Merge a pending entity candidate into an existing tracked entity from the same project.", + request=EntityCandidateMergeSerializer, + responses={ + 200: EntityCandidateSerializer, + 400: EntityCandidateMergeSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="merge") + def merge(self, request, *args, **kwargs): + """Merge an entity candidate into an existing tracked entity.""" + + candidate = self.get_object() + serializer = EntityCandidateMergeSerializer( + data=request.data, + context=self.get_serializer_context(), + ) + serializer.is_valid(raise_exception=True) + merge_entity_candidate(candidate, serializer.validated_data["merged_into"]) + candidate.refresh_from_db() + response_serializer = self.get_serializer(candidate) + return Response(response_serializer.data) diff --git a/entities/api_urls.py b/entities/api_urls.py new file mode 100644 index 00000000..49e6a244 --- /dev/null +++ b/entities/api_urls.py @@ -0,0 +1,16 @@ +"""API route registration for entity resources.""" + +from rest_framework_nested.routers import NestedSimpleRouter + +from entities.api import EntityCandidateViewSet, EntityViewSet + + +def register_project_routes(project_router: NestedSimpleRouter) -> None: + """Register nested entity endpoints.""" + + project_router.register(r"entities", EntityViewSet, basename="project-entity") + project_router.register( + r"entity-candidates", + EntityCandidateViewSet, + basename="project-entity-candidate", + ) diff --git a/entities/apps.py b/entities/apps.py new file mode 100644 index 00000000..cf7ba8e8 --- /dev/null +++ b/entities/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class EntitiesConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "entities" + verbose_name = "Entities" diff --git a/entities/migrations/0001_initial.py b/entities/migrations/0001_initial.py new file mode 100644 index 00000000..fec1dd5c --- /dev/null +++ b/entities/migrations/0001_initial.py @@ -0,0 +1,303 @@ +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [ + ("projects", "0003_remove_project_group"), + ("core", "0010_move_project_models_to_projects"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.CreateModel( + name="Entity", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=255)), + ( + "type", + models.CharField( + choices=[ + ("individual", "Individual"), + ("vendor", "Vendor"), + ("organization", "Organization"), + ], + max_length=32, + ), + ), + ("description", models.TextField(blank=True)), + ("authority_score", models.FloatField(default=0.5)), + ("website_url", models.URLField(blank=True)), + ("github_url", models.URLField(blank=True)), + ("linkedin_url", models.URLField(blank=True)), + ( + "bluesky_handle", + models.CharField(blank=True, max_length=255), + ), + ( + "mastodon_handle", + models.CharField(blank=True, max_length=255), + ), + ( + "twitter_handle", + models.CharField(blank=True, max_length=255), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entities", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["name"], + "db_table": "core_entity", + "constraints": [ + models.UniqueConstraint( + fields=("project", "name"), + name="core_entity_unique_project_name", + ) + ], + }, + ), + migrations.CreateModel( + name="EntityAuthoritySnapshot", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("computed_at", models.DateTimeField(auto_now_add=True)), + ("mention_component", models.FloatField()), + ("feedback_component", models.FloatField()), + ("duplicate_component", models.FloatField()), + ("decayed_prior", models.FloatField()), + ("final_score", models.FloatField()), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="authority_snapshots", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_authority_snapshots", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-computed_at"], + "db_table": "core_entityauthoritysnapshot", + "indexes": [ + models.Index( + fields=["entity", "-computed_at"], + name="core_entity_entity__9fe820_idx", + ), + models.Index( + fields=["project", "-computed_at"], + name="core_entity_project_a31e41_idx", + ), + ], + }, + ), + migrations.CreateModel( + name="EntityMention", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "role", + models.CharField( + choices=[ + ("author", "Author"), + ("subject", "Subject"), + ("quoted", "Quoted"), + ("mentioned", "Mentioned"), + ], + max_length=16, + ), + ), + ( + "sentiment", + models.CharField( + blank=True, + choices=[ + ("positive", "Positive"), + ("neutral", "Neutral"), + ("negative", "Negative"), + ], + default="", + max_length=16, + ), + ), + ("span", models.TextField(blank=True)), + ("confidence", models.FloatField(default=0.0)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "content", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="core.content", + ), + ), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="mentions", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-created_at"], + "db_table": "core_entitymention", + "indexes": [ + models.Index( + fields=["entity", "created_at"], + name="core_entity_entity__8ba01e_idx", + ), + models.Index( + fields=["project", "created_at"], + name="core_entity_project_dabde7_idx", + ), + ], + "constraints": [ + models.UniqueConstraint( + fields=("content", "entity", "role"), + name="core_entitymention_unique_content_entity_role", + ) + ], + }, + ), + migrations.CreateModel( + name="EntityCandidate", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=255)), + ( + "suggested_type", + models.CharField( + choices=[ + ("individual", "Individual"), + ("vendor", "Vendor"), + ("organization", "Organization"), + ], + max_length=32, + ), + ), + ("occurrence_count", models.IntegerField(default=1)), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("accepted", "Accepted"), + ("rejected", "Rejected"), + ("merged", "Merged"), + ], + default="pending", + max_length=16, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "first_seen_in", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="entity_candidates", + to="core.content", + ), + ), + ( + "merged_into", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="merged_entity_candidates", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_candidates", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-occurrence_count", "name"], + "db_table": "core_entitycandidate", + "indexes": [ + models.Index( + fields=["project", "status", "occurrence_count"], + name="core_entity_project_4c32ec_idx", + ) + ], + "constraints": [ + models.UniqueConstraint( + fields=("project", "name"), + name="core_entitycandidate_unique_project_name", + ) + ], + }, + ), + ], + database_operations=[], + ) + ] diff --git a/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py b/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py new file mode 100644 index 00000000..64ba7afd --- /dev/null +++ b/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py @@ -0,0 +1,37 @@ +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("content", "0001_initial"), + ("entities", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name="entitycandidate", + name="first_seen_in", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="entity_candidates", + to="content.content", + ), + ), + migrations.AlterField( + model_name="entitymention", + name="content", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="content.content", + ), + ), + ], + database_operations=[], + ) + ] diff --git a/entities/migrations/__init__.py b/entities/migrations/__init__.py new file mode 100644 index 00000000..b1beed35 --- /dev/null +++ b/entities/migrations/__init__.py @@ -0,0 +1 @@ +"""Entities app migrations package.""" diff --git a/entities/models.py b/entities/models.py new file mode 100644 index 00000000..479c5781 --- /dev/null +++ b/entities/models.py @@ -0,0 +1,204 @@ +"""Entity-domain models split out from the historical core app.""" + +from __future__ import annotations + +from django.db import models + + +class EntityType(models.TextChoices): + """Supported types of tracked entities within a project.""" + + INDIVIDUAL = "individual", "Individual" + VENDOR = "vendor", "Vendor" + ORGANIZATION = "organization", "Organization" + + +class EntityMentionRole(models.TextChoices): + """Supported roles for how an entity appears inside content.""" + + AUTHOR = "author", "Author" + SUBJECT = "subject", "Subject" + QUOTED = "quoted", "Quoted" + MENTIONED = "mentioned", "Mentioned" + + +class EntityMentionSentiment(models.TextChoices): + """Supported editorial sentiment labels for entity mentions.""" + + POSITIVE = "positive", "Positive" + NEUTRAL = "neutral", "Neutral" + NEGATIVE = "negative", "Negative" + + +class EntityCandidateStatus(models.TextChoices): + """Review workflow states for extracted entity candidates.""" + + PENDING = "pending", "Pending" + ACCEPTED = "accepted", "Accepted" + REJECTED = "rejected", "Rejected" + MERGED = "merged", "Merged" + + +class Entity(models.Model): + """Represents a person, vendor, or organization tracked inside a project.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entities" + ) + name = models.CharField(max_length=255) + type = models.CharField(max_length=32, choices=EntityType.choices) + description = models.TextField(blank=True) + authority_score = models.FloatField(default=0.5) + website_url = models.URLField(blank=True) + github_url = models.URLField(blank=True) + linkedin_url = models.URLField(blank=True) + bluesky_handle = models.CharField(max_length=255, blank=True) + mastodon_handle = models.CharField(max_length=255, blank=True) + twitter_handle = models.CharField(max_length=255, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["name"] + db_table = "core_entity" + constraints = [ + models.UniqueConstraint( + fields=["project", "name"], name="core_entity_unique_project_name" + ), + ] + + def __str__(self) -> str: + return self.name + + +class EntityAuthoritySnapshot(models.Model): + """Captures one authority-score recomputation for a tracked entity.""" + + entity = models.ForeignKey( + Entity, on_delete=models.CASCADE, related_name="authority_snapshots" + ) + project = models.ForeignKey( + "projects.Project", + on_delete=models.CASCADE, + related_name="entity_authority_snapshots", + ) + computed_at = models.DateTimeField(auto_now_add=True) + mention_component = models.FloatField() + feedback_component = models.FloatField() + duplicate_component = models.FloatField() + decayed_prior = models.FloatField() + final_score = models.FloatField() + + class Meta: + ordering = ["-computed_at"] + db_table = "core_entityauthoritysnapshot" + indexes = [ + models.Index( + fields=["entity", "-computed_at"], + name="core_entity_entity__9fe820_idx", + ), + models.Index( + fields=["project", "-computed_at"], + name="core_entity_project_a31e41_idx", + ), + ] + + def __str__(self) -> str: + return f"Authority snapshot for {self.entity.name}" + + +class EntityMention(models.Model): + """Represents one tracked-entity mention detected in a content item.""" + + content = models.ForeignKey( + "content.Content", on_delete=models.CASCADE, related_name="entity_mentions" + ) + entity = models.ForeignKey( + Entity, on_delete=models.CASCADE, related_name="mentions" + ) + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entity_mentions" + ) + role = models.CharField(max_length=16, choices=EntityMentionRole.choices) + sentiment = models.CharField( + max_length=16, + choices=EntityMentionSentiment.choices, + blank=True, + default="", + ) + span = models.TextField(blank=True) + confidence = models.FloatField(default=0.0) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["-created_at"] + db_table = "core_entitymention" + constraints = [ + models.UniqueConstraint( + fields=["content", "entity", "role"], + name="core_entitymention_unique_content_entity_role", + ) + ] + indexes = [ + models.Index( + fields=["entity", "created_at"], + name="core_entity_entity__8ba01e_idx", + ), + models.Index( + fields=["project", "created_at"], + name="core_entity_project_dabde7_idx", + ), + ] + + def __str__(self) -> str: + return f"{self.entity.name} in {self.content.title}" + + +class EntityCandidate(models.Model): + """Stores an extracted named entity awaiting human confirmation.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entity_candidates" + ) + name = models.CharField(max_length=255) + suggested_type = models.CharField(max_length=32, choices=EntityType.choices) + first_seen_in = models.ForeignKey( + "content.Content", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="entity_candidates", + ) + occurrence_count = models.IntegerField(default=1) + status = models.CharField( + max_length=16, + choices=EntityCandidateStatus.choices, + default=EntityCandidateStatus.PENDING, + ) + merged_into = models.ForeignKey( + Entity, + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="merged_entity_candidates", + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ["-occurrence_count", "name"] + db_table = "core_entitycandidate" + constraints = [ + models.UniqueConstraint( + fields=["project", "name"], + name="core_entitycandidate_unique_project_name", + ) + ] + indexes = [ + models.Index( + fields=["project", "status", "occurrence_count"], + name="core_entity_project_4c32ec_idx", + ), + ] + + def __str__(self) -> str: + return self.name diff --git a/entities/serializers.py b/entities/serializers.py new file mode 100644 index 00000000..33e6bf03 --- /dev/null +++ b/entities/serializers.py @@ -0,0 +1,124 @@ +"""DRF serializers for entity-domain models.""" + +from rest_framework import serializers + +from core.serializer_mixins import ProjectScopedSerializerMixin +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityCandidate, + EntityMention, +) + + +class EntitySerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): + """Serialize tracked entities for a project.""" + + mention_count = serializers.IntegerField(read_only=True) + latest_mentions = serializers.SerializerMethodField() + + class Meta: + model = Entity + fields = [ + "id", + "project", + "name", + "type", + "description", + "authority_score", + "website_url", + "github_url", + "linkedin_url", + "bluesky_handle", + "mastodon_handle", + "twitter_handle", + "mention_count", + "latest_mentions", + "created_at", + ] + read_only_fields = ["id", "project", "created_at"] + + def get_latest_mentions(self, obj): + """Return a compact summary of the most recent mentions for an entity.""" + + mentions = getattr(obj, "prefetched_mentions", None) + if mentions is None: + mentions = obj.mentions.select_related("content").order_by("-created_at") + return EntityMentionSummarySerializer(mentions[:3], many=True).data + + +class EntityAuthoritySnapshotSerializer(serializers.ModelSerializer): + """Serialize one persisted authority recomputation for an entity.""" + + class Meta: + model = EntityAuthoritySnapshot + fields = [ + "id", + "entity", + "project", + "computed_at", + "mention_component", + "feedback_component", + "duplicate_component", + "decayed_prior", + "final_score", + ] + read_only_fields = fields + + +class EntityMentionSummarySerializer(serializers.ModelSerializer): + """Serialize a compact entity-mention summary for frontend display.""" + + content_id = serializers.IntegerField(read_only=True) + content_title = serializers.CharField(source="content.title", read_only=True) + + class Meta: + model = EntityMention + fields = [ + "id", + "content_id", + "content_title", + "role", + "sentiment", + "span", + "confidence", + "created_at", + ] + read_only_fields = fields + + +class EntityCandidateSerializer( + ProjectScopedSerializerMixin, serializers.ModelSerializer +): + """Serialize extracted entity candidates awaiting editorial review.""" + + first_seen_title = serializers.CharField( + source="first_seen_in.title", read_only=True + ) + merged_into_name = serializers.CharField(source="merged_into.name", read_only=True) + + class Meta: + model = EntityCandidate + fields = [ + "id", + "project", + "name", + "suggested_type", + "first_seen_in", + "first_seen_title", + "occurrence_count", + "status", + "merged_into", + "merged_into_name", + "created_at", + "updated_at", + ] + read_only_fields = fields + + +class EntityCandidateMergeSerializer( + ProjectScopedSerializerMixin, serializers.Serializer +): + """Validate merge requests for entity candidates.""" + + merged_into = serializers.PrimaryKeyRelatedField(queryset=Entity.objects.none()) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 4e2683a3..bd9a06dd 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -25,6 +25,7 @@ "prettier": "^3.8.3", "react": "19.2.5", "react-dom": "19.2.5", + "react-dropzone": "^15.0.0", "react-hook-form": "^7.74.0", "tailwindcss": "^4.2.4", "typescript-eslint": "^8.59.1" @@ -3316,6 +3317,15 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", "license": "MIT" }, + "node_modules/attr-accept": { + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.5.tgz", + "integrity": "sha512-0bDNnY/u6pPwHDMoF0FieU354oBi0a8rD9FcsLwzcGWbc8KS8KPIi7y+s13OlVY+gMWc/9xEMUgNE6Qm8ZllYQ==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -4729,6 +4739,18 @@ "node": ">=16.0.0" } }, + "node_modules/file-selector": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/file-selector/-/file-selector-2.1.2.tgz", + "integrity": "sha512-QgXo+mXTe8ljeqUFaX3QVHc5osSItJ/Km+xpocx0aSqWGMSCf6qYs/VnzZgS864Pjn5iceMRFigeAV7AfTlaig==", + "license": "MIT", + "dependencies": { + "tslib": "^2.7.0" + }, + "engines": { + "node": ">= 12" + } + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", @@ -6990,6 +7012,23 @@ "react": "^19.2.5" } }, + "node_modules/react-dropzone": { + "version": "15.0.0", + "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-15.0.0.tgz", + "integrity": "sha512-lGjYV/EoqEjEWPnmiSvH4v5IoIAwQM2W4Z1C0Q/Pw2xD0eVzKPS359BQTUMum+1fa0kH2nrKjuavmTPOGhpLPg==", + "license": "MIT", + "dependencies": { + "attr-accept": "^2.2.4", + "file-selector": "^2.1.0", + "prop-types": "^15.8.1" + }, + "engines": { + "node": ">= 10.13" + }, + "peerDependencies": { + "react": ">= 16.8 || 18.0.0" + } + }, "node_modules/react-hook-form": { "version": "7.74.0", "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.74.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index fa5f07d3..5aa4253e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -33,6 +33,7 @@ "prettier": "^3.8.3", "react": "19.2.5", "react-dom": "19.2.5", + "react-dropzone": "^15.0.0", "react-hook-form": "^7.74.0", "tailwindcss": "^4.2.4", "typescript-eslint": "^8.59.1" diff --git a/frontend/src/app/__tests__/page.test.tsx b/frontend/src/app/__tests__/page.test.tsx index 08e1fe6e..e32fe613 100644 --- a/frontend/src/app/__tests__/page.test.tsx +++ b/frontend/src/app/__tests__/page.test.tsx @@ -91,9 +91,9 @@ function createProject(overrides: PartialProvision
++ Newsletter Maker +
+Project
+Invited email
+{invitation.email}
+Role
+{invitation.role}
++ Sign in as {invitation.email} to accept this invitation. +
+ + Sign in to continue + +Access
+Roster
+Current team
++ {membership.display_name || membership.username} +
+{membership.email}
++ Joined {formatDate(membership.joined_at)} +
+Pending access
+{invitation.email}
++ {invitation.role} β’ Invited by {invitation.invited_by_email || "system"} +
+Upload
++ {isUploading ? "Uploading avatar..." : "Drop an image here or click to browse."} +
++ PNG, JPEG, and WebP files up to 2 MB. +
+Avatar
++ Drop in a square image to personalize the editor cockpit and header menu. +
+ {profile.avatar_url ? ( + + ) : null} +Identity
+{account.name}
+{accountName}
- {account.email || "Signed in account"} + {accountEmail || "Signed in account"}
Confirm this sender for newsletter ingestion.
" + f'' + ) + + message = EmailMultiAlternatives( + subject=subject, + body=text_body, + from_email=settings.DEFAULT_FROM_EMAIL, + to=[to_email], + ) + message.attach_alternative(html_body, "text/html") + message.send() + + +def build_confirmation_url(token: str) -> str: + """Build the absolute confirmation URL for an allowlist token.""" + + base_url = settings.NEWSLETTER_API_BASE_URL.rstrip("/") + return f"{base_url}{reverse('confirm-newsletter-sender', kwargs={'token': token})}" + + +def process_inbound_newsletter( + *, + recipients: Iterable[str], + sender_email: str, + subject: str, + raw_html: str, + raw_text: str, + message_id: str, +) -> dict[str, Any]: + """Persist and route one inbound newsletter message.""" + + from core import newsletters as core_newsletters + + project = _find_intake_project(recipients) + if project is None: + return {"status": "ignored", "reason": "no_matching_project"} + + normalized_sender_email = normalize_sender_email(sender_email) + normalized_message_id = message_id.strip() + if not normalized_sender_email or not normalized_message_id: + return {"status": "ignored", "reason": "missing_sender_or_message_id"} + + defaults = { + "project": project, + "sender_email": normalized_sender_email, + "subject": subject[:512], + "raw_html": sanitize_newsletter_html(raw_html), + "raw_text": raw_text, + } + intake, created = NewsletterIntake.objects.get_or_create( + message_id=normalized_message_id, + defaults=defaults, + ) + if not created: + return {"id": intake.id, "status": intake.status, "duplicate": True} + + allowlist, allowlist_created = IntakeAllowlist.objects.get_or_create( + project=project, + sender_email=normalized_sender_email, + ) + + if allowlist.is_confirmed: + core_newsletters.queue_newsletter_intake(intake.id) + return {"id": intake.id, "status": intake.status} + + if allowlist_created: + core_newsletters.send_confirmation_email( + to_email=normalized_sender_email, + confirm_url=build_confirmation_url(allowlist.confirmation_token), + project_name=project.name, + ) + + return {"id": intake.id, "status": intake.status, "confirmation_required": True} + + +def queue_newsletter_intake(intake_id: int) -> None: + """Dispatch newsletter extraction for a stored intake row.""" + + process_newsletter_intake = current_app.tasks[ + "core.tasks.process_newsletter_intake" + ] + if settings.CELERY_TASK_ALWAYS_EAGER: + process_newsletter_intake.apply(args=(intake_id,), throw=True) + else: + process_newsletter_intake.delay(intake_id) + + +def _find_intake_project(recipients: Iterable[str]) -> Project | None: + """Resolve the first enabled project referenced by the recipient list.""" + + for recipient in recipients: + token = extract_project_token(recipient) + if token is None: + continue + project = Project.objects.filter( + intake_token=token, intake_enabled=True + ).first() + if project is not None: + return project + return None diff --git a/newsletters/migrations/0001_initial.py b/newsletters/migrations/0001_initial.py new file mode 100644 index 00000000..c153117d --- /dev/null +++ b/newsletters/migrations/0001_initial.py @@ -0,0 +1,117 @@ +import django.db.models.deletion +from django.db import migrations, models + +import newsletters.models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [ + ("projects", "0003_remove_project_group"), + ("core", "0012_move_content_models_to_content_and_ingestion"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.CreateModel( + name="IntakeAllowlist", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("sender_email", models.EmailField(max_length=254)), + ("confirmed_at", models.DateTimeField(blank=True, null=True)), + ( + "confirmation_token", + models.CharField( + default=newsletters.models.generate_confirmation_token, + max_length=64, + unique=True, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="intake_allowlist", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["sender_email"], + "db_table": "core_intakeallowlist", + "constraints": [ + models.UniqueConstraint( + fields=("project", "sender_email"), + name="core_allowlist_unique_project_sender", + ) + ], + }, + ), + migrations.CreateModel( + name="NewsletterIntake", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("sender_email", models.EmailField(max_length=254)), + ("subject", models.CharField(max_length=512)), + ("received_at", models.DateTimeField(auto_now_add=True)), + ("raw_html", models.TextField(blank=True)), + ("raw_text", models.TextField(blank=True)), + ("message_id", models.CharField(max_length=255, unique=True)), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("extracted", "Extracted"), + ("failed", "Failed"), + ("rejected", "Rejected"), + ], + default="pending", + max_length=16, + ), + ), + ("extraction_result", models.JSONField(blank=True, null=True)), + ("error_message", models.TextField(blank=True)), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="newsletter_intakes", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-received_at"], + "db_table": "core_newsletterintake", + "indexes": [ + models.Index( + fields=["project", "sender_email", "status"], + name="core_newsle_project_eee7a4_idx", + ) + ], + }, + ), + ], + database_operations=[], + ) + ] diff --git a/newsletters/migrations/__init__.py b/newsletters/migrations/__init__.py new file mode 100644 index 00000000..45dcf979 --- /dev/null +++ b/newsletters/migrations/__init__.py @@ -0,0 +1 @@ +"""Migrations for the newsletters app.""" diff --git a/newsletters/models.py b/newsletters/models.py new file mode 100644 index 00000000..5ae7a2a9 --- /dev/null +++ b/newsletters/models.py @@ -0,0 +1,89 @@ +"""Newsletter-domain models split out from the historical core app.""" + +import secrets + +from django.db import models + + +def generate_confirmation_token() -> str: + """Generate a one-time token for newsletter sender confirmation links.""" + + return secrets.token_urlsafe(24) + + +class NewsletterIntakeStatus(models.TextChoices): + """Lifecycle states for a raw inbound newsletter email.""" + + PENDING = "pending", "Pending" + EXTRACTED = "extracted", "Extracted" + FAILED = "failed", "Failed" + REJECTED = "rejected", "Rejected" + + +class IntakeAllowlist(models.Model): + """Tracks who is allowed to send newsletters into a project inbox.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="intake_allowlist" + ) + sender_email = models.EmailField() + confirmed_at = models.DateTimeField(null=True, blank=True) + confirmation_token = models.CharField( + max_length=64, unique=True, default=generate_confirmation_token + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["sender_email"] + db_table = "core_intakeallowlist" + constraints = [ + models.UniqueConstraint( + fields=["project", "sender_email"], + name="core_allowlist_unique_project_sender", + ), + ] + + def __str__(self) -> str: + return f"{self.sender_email} for {self.project.name}" + + @property + def is_confirmed(self) -> bool: + """Return whether the sender has confirmed newsletter intake access.""" + + return self.confirmed_at is not None + + +class NewsletterIntake(models.Model): + """Stores a raw inbound newsletter email before extraction.""" + + project = models.ForeignKey( + "projects.Project", + on_delete=models.CASCADE, + related_name="newsletter_intakes", + ) + sender_email = models.EmailField() + subject = models.CharField(max_length=512) + received_at = models.DateTimeField(auto_now_add=True) + raw_html = models.TextField(blank=True) + raw_text = models.TextField(blank=True) + message_id = models.CharField(max_length=255, unique=True) + status = models.CharField( + max_length=16, + choices=NewsletterIntakeStatus.choices, + default=NewsletterIntakeStatus.PENDING, + ) + extraction_result = models.JSONField(null=True, blank=True) + error_message = models.TextField(blank=True) + + class Meta: + ordering = ["-received_at"] + db_table = "core_newsletterintake" + indexes = [ + models.Index( + fields=["project", "sender_email", "status"], + name="core_newsle_project_eee7a4_idx", + ) + ] + + def __str__(self) -> str: + return f"{self.subject or self.message_id}" diff --git a/newsletters/serializers.py b/newsletters/serializers.py new file mode 100644 index 00000000..ed6addf6 --- /dev/null +++ b/newsletters/serializers.py @@ -0,0 +1,57 @@ +"""DRF serializers for newsletter-domain models.""" + +from rest_framework import serializers + +from core.serializer_mixins import ProjectScopedSerializerMixin +from newsletters.models import IntakeAllowlist, NewsletterIntake + + +class IntakeAllowlistSerializer( + ProjectScopedSerializerMixin, serializers.ModelSerializer +): + """Serialize confirmed and pending newsletter sender allowlist entries.""" + + is_confirmed = serializers.BooleanField(read_only=True) + + class Meta: + model = IntakeAllowlist + fields = [ + "id", + "project", + "sender_email", + "is_confirmed", + "confirmed_at", + "confirmation_token", + "created_at", + ] + read_only_fields = ["id", "project", "confirmation_token", "created_at"] + + +class NewsletterIntakeSerializer( + ProjectScopedSerializerMixin, serializers.ModelSerializer +): + """Serialize raw inbound newsletter messages captured for a project.""" + + class Meta: + model = NewsletterIntake + fields = [ + "id", + "project", + "sender_email", + "subject", + "received_at", + "raw_html", + "raw_text", + "message_id", + "status", + "extraction_result", + "error_message", + ] + read_only_fields = [ + "id", + "project", + "received_at", + "status", + "extraction_result", + "error_message", + ] diff --git a/newsletters/signals.py b/newsletters/signals.py new file mode 100644 index 00000000..fbd44ba3 --- /dev/null +++ b/newsletters/signals.py @@ -0,0 +1,52 @@ +"""Signal handlers that adapt inbound email events to newsletter intake logic.""" + +from __future__ import annotations + +from typing import Any + +from anymail.signals import inbound +from django.dispatch import receiver + +from core.newsletters import process_inbound_newsletter + + +def _address_to_string(address) -> str: + """Normalize an Anymail address object or string into plain text.""" + + if address is None: + return "" + addr_spec = getattr(address, "addr_spec", None) + if isinstance(addr_spec, str): + return addr_spec.strip() + return str(address).strip() + + +@receiver(inbound) +def handle_anymail_inbound( + sender: Any, + event: Any, + esp_name: str, + **kwargs: Any, +) -> None: + """Translate an inbound Anymail event into the internal intake payload.""" + + message = event.message + + recipients: list[str] = [] + if message.envelope_recipient: + recipients.append(message.envelope_recipient) + recipients.extend( + address.addr_spec + for address in getattr(message, "to", []) + if getattr(address, "addr_spec", "") + ) + + process_inbound_newsletter( + recipients=recipients, + sender_email=message.envelope_sender + or _address_to_string(getattr(message, "from_email", None)), + subject=message.subject or "", + raw_html=message.html or "", + raw_text=message.text or "", + message_id=str(message.get("Message-ID", "") or event.event_id or ""), + ) diff --git a/newsletters/tasks.py b/newsletters/tasks.py new file mode 100644 index 00000000..2e2f4609 --- /dev/null +++ b/newsletters/tasks.py @@ -0,0 +1,94 @@ +"""Celery tasks and helpers for newsletter intake processing.""" + +from celery import shared_task +from django.conf import settings +from django.db.models import Q +from django.utils import timezone + +from content.models import Content +from core.deduplication import canonicalize_url +from newsletters.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus + + +@shared_task(name="core.tasks.process_newsletter_intake") +def process_newsletter_intake(intake_id: int): + """Convert a stored newsletter email into content rows.""" + + from core.newsletters import extract_newsletter_items + + intake = NewsletterIntake.objects.select_related("project").get(pk=intake_id) + + allowlist = IntakeAllowlist.objects.filter( + project=intake.project, + sender_email=intake.sender_email, + confirmed_at__isnull=False, + ).first() + if allowlist is None: + intake.status = NewsletterIntakeStatus.PENDING + intake.error_message = "Sender has not confirmed newsletter intake." + intake.save(update_fields=["status", "error_message"]) + return {"status": intake.status, "items_ingested": 0} + + extracted_items = extract_newsletter_items( + subject=intake.subject, + raw_html=intake.raw_html, + raw_text=intake.raw_text, + ) + ingested_count = 0 + for item in extracted_items: + canonical_url = canonicalize_url(item.url) + if ( + Content.objects.filter( + project=intake.project, + source_plugin="newsletter", + ) + .filter(Q(canonical_url=canonical_url) | Q(url=item.url)) + .exists() + ): + continue + content = Content.objects.create( + project=intake.project, + url=item.url, + canonical_url=canonical_url, + title=item.title[:512], + author=intake.sender_email[:255], + source_plugin="newsletter", + published_date=timezone.now(), + content_text=item.excerpt or intake.raw_text, + source_metadata={ + "newsletter_intake_id": intake.id, + "sender_email": intake.sender_email, + "position": item.position, + }, + ) + _schedule_content_processing(content) + ingested_count += 1 + + intake.status = NewsletterIntakeStatus.EXTRACTED + intake.error_message = "" + intake.extraction_result = { + "method": "heuristic", + "items": [ + { + "url": item.url, + "title": item.title, + "excerpt": item.excerpt, + "position": item.position, + } + for item in extracted_items + ], + } + intake.save(update_fields=["status", "error_message", "extraction_result"]) + return {"status": intake.status, "items_ingested": ingested_count} + + +def _schedule_content_processing(content: Content) -> None: + """Ensure a content row is embedded before it enters the AI pipeline.""" + + from core.tasks import process_content, upsert_content_embedding + + upsert_content_embedding(content) + if settings.CELERY_TASK_ALWAYS_EAGER: + process_content(content.id) + else: + process_content.delay(content.id) diff --git a/newsletters/views.py b/newsletters/views.py new file mode 100644 index 00000000..97c48ff1 --- /dev/null +++ b/newsletters/views.py @@ -0,0 +1,31 @@ +"""Newsletter-intake views used outside the REST API.""" + +from django.http import HttpRequest, JsonResponse +from django.shortcuts import get_object_or_404 +from django.utils import timezone +from django.views.decorators.http import require_GET + +from core.newsletters import queue_newsletter_intake +from newsletters.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus + + +@require_GET +def confirm_newsletter_sender_view(request: HttpRequest, token: str): + """Confirm a sender and queue any pending newsletter intake rows.""" + + allowlist = get_object_or_404(IntakeAllowlist, confirmation_token=token) + if allowlist.confirmed_at is None: + allowlist.confirmed_at = timezone.now() + allowlist.save(update_fields=["confirmed_at"]) + + pending_intake_ids = list( + NewsletterIntake.objects.filter( + project=allowlist.project, + sender_email=allowlist.sender_email, + status=NewsletterIntakeStatus.PENDING, + ).values_list("id", flat=True) + ) + for intake_id in pending_intake_ids: + queue_newsletter_intake(intake_id) + + return JsonResponse({"status": "confirmed", "queued": len(pending_intake_ids)}) diff --git a/pipeline/__init__.py b/pipeline/__init__.py new file mode 100644 index 00000000..ecf5629a --- /dev/null +++ b/pipeline/__init__.py @@ -0,0 +1 @@ +"""Pipeline-domain models, admin, and API ownership.""" diff --git a/pipeline/admin.py b/pipeline/admin.py new file mode 100644 index 00000000..b5f1dcbd --- /dev/null +++ b/pipeline/admin.py @@ -0,0 +1,222 @@ +"""Admin configuration for pipeline-domain models.""" + +import json + +from django.contrib import admin, messages +from django.db.models import Avg +from django.utils.html import format_html +from django.utils.safestring import mark_safe +from unfold.admin import ModelAdmin + +from pipeline.models import ReviewQueue, SkillResult + + +@admin.register(SkillResult) +class SkillResultAdmin(ModelAdmin): + """Admin view for AI skill history, retries, and result inspection.""" + + list_display = ( + "skill_name", + "get_content_link", + "display_status", + "display_performance", + "preview_json", + "is_current", + "model_used", + "created_at", + ) + list_filter = ("status", "skill_name", "project", "model_used") + search_fields = ("skill_name", "content__title", "model_used", "error_message") + actions = ["retry_selected_skills"] + readonly_fields = ( + "pretty_result_data", + "latency_ms", + "created_at", + "superseded_by", + ) + fieldsets = ( + ( + "Execution Details", + {"fields": ("skill_name", "content", "project", "status", "model_used")}, + ), + ( + "AI Output", + { + "fields": ("pretty_result_data", "error_message"), + }, + ), + ( + "Performance Metrics", + {"fields": ("latency_ms", "confidence", "created_at", "superseded_by")}, + ), + ) + + @admin.action(description="Retry Selected Skills") + def retry_selected_skills(self, request, queryset): + """Reset status to pending and clear errors for retry by the worker.""" + + updated = queryset.update(status="pending", error_message="") + self.message_user( + request, + f"Successfully reset {updated} skills to PENDING for retry.", + messages.SUCCESS, + ) + + @admin.display(description="Result Preview") + def preview_json(self, obj): + """Link that triggers Unfold's detail view.""" + + if not obj.result_data: + return "-" + return format_html( + 'π Preview', + f"{obj.pk}/change/", + ) + + @admin.display(description="Content") + def get_content_link(self, obj): + """Return a compact content title for the table view.""" + + return obj.content.title[:30] + "..." if obj.content.title else "Untitled" + + @admin.display(description="Status") + def display_status(self, obj): + """Render the skill status as a colored dot plus label.""" + + status_value = str(obj.status).lower() + colors = {"completed": "green", "failed": "red", "pending": "orange"} + color = colors.get(status_value, "gray") + return format_html( + 'β {}', + color, + status_value.upper(), + ) + + @admin.display(description="Perf / Conf") + def display_performance(self, obj): + """Show latency and confidence together in a compact cell.""" + + latency = f"{obj.latency_ms}ms" if obj.latency_ms else "-" + conf = f"{int(obj.confidence * 100)}%" if obj.confidence is not None else "-" + return f"{latency} / {conf}" + + @admin.display(description="Current", boolean=True) + def is_current(self, obj): + """Return whether this row is the most recent non-superseded result.""" + + return obj.superseded_by is None + + @admin.display(description="Result Data JSON") + def pretty_result_data(self, obj): + """Render result JSON in a readable preformatted block.""" + + if not obj.result_data: + return "No data available" + formatted_json = json.dumps(obj.result_data, indent=4) + return mark_safe( + f''
+ f"{formatted_json}"
+ f""
+ )
+
+ def changelist_view(self, request, extra_context=None):
+ """Augment the changelist with latency and failure-rate statistics."""
+
+ qs = self.get_queryset(request)
+ extra_context = extra_context or {}
+ avg_latency = qs.aggregate(avg_latency=Avg("latency_ms"))["avg_latency"]
+ total_count = qs.count()
+ failure_count = qs.filter(status__iexact="failed").count()
+
+ extra_context["dashboard_stats"] = [
+ {
+ "title": "Avg Latency",
+ "value": f"{avg_latency:.0f}ms" if avg_latency is not None else "-",
+ "icon": "timer",
+ "color": "warning" if avg_latency and avg_latency > 3000 else "success",
+ },
+ {
+ "title": "Failure Rate",
+ "value": (
+ f"{(failure_count / total_count) * 100:.1f}%"
+ if total_count
+ else "0.0%"
+ ),
+ "icon": "error",
+ "color": "danger" if failure_count > 0 else "success",
+ },
+ ]
+ return super().changelist_view(request, extra_context=extra_context)
+
+
+@admin.register(ReviewQueue)
+class ReviewQueueAdmin(ModelAdmin):
+ """Admin view for items waiting on editorial judgment."""
+
+ list_display = (
+ "get_content_title",
+ "project",
+ "reason",
+ "display_confidence",
+ "resolved",
+ "resolution",
+ "created_at",
+ )
+ list_filter = ("resolved", "reason", ("project", admin.RelatedOnlyFieldListFilter))
+ list_editable = ("resolved", "resolution")
+ actions = ["mark_as_approved", "mark_as_rejected"]
+
+ @admin.display(description="Content")
+ def get_content_title(self, obj):
+ """Return a shortened content title for list display."""
+
+ return obj.content.title[:50] + "..."
+
+ @admin.display(description="Confidence")
+ def display_confidence(self, obj):
+ """Render confidence as a percentage with risk coloring."""
+
+ color = (
+ "red"
+ if obj.confidence < 0.3
+ else "orange" if obj.confidence < 0.6 else "green"
+ )
+ confidence_label = f"{obj.confidence * 100:.0f}%"
+ return format_html('{}', color, confidence_label)
+
+ @admin.action(description="Approve selected items")
+ def mark_as_approved(self, request, queryset):
+ """Resolve selected review items as approved."""
+
+ queryset.update(resolved=True, resolution="APPROVED")
+ self.message_user(request, "Selected items approved.", messages.SUCCESS)
+
+ @admin.action(description="Reject selected items")
+ def mark_as_rejected(self, request, queryset):
+ """Resolve selected review items as rejected."""
+
+ queryset.update(resolved=True, resolution="REJECTED")
+ self.message_user(request, "Selected items rejected.", messages.WARNING)
+
+ def changelist_view(self, request, extra_context=None):
+ """Augment the changelist with pending-volume and confidence stats."""
+
+ qs = self.get_queryset(request)
+ extra_context = extra_context or {}
+ pending_count = qs.filter(resolved=False).count()
+ avg_conf = qs.aggregate(avg_confidence=Avg("confidence"))["avg_confidence"] or 0
+
+ extra_context["dashboard_stats"] = [
+ {
+ "title": "Pending Review",
+ "value": pending_count,
+ "icon": "pending_actions",
+ "color": "danger" if pending_count > 10 else "success",
+ },
+ {
+ "title": "Avg Confidence",
+ "value": f"{avg_conf * 100:.0f}%",
+ "icon": "psychology",
+ },
+ ]
+ return super().changelist_view(request, extra_context=extra_context)
diff --git a/pipeline/api.py b/pipeline/api.py
new file mode 100644
index 00000000..c5946c37
--- /dev/null
+++ b/pipeline/api.py
@@ -0,0 +1,64 @@
+"""Pipeline-domain API viewsets kept under the existing nested project routes."""
+
+from rest_framework import viewsets
+
+from core.api import (
+ SKILL_RESULT_RESPONSE_EXAMPLE,
+ ProjectOwnedQuerysetMixin,
+ build_crud_action_overrides,
+ document_project_owned_viewset,
+)
+from core.permissions import IsProjectContributor, IsProjectMember, IsProjectMemberWritable
+from pipeline.models import ReviewQueue, SkillResult
+from pipeline.serializers import ReviewQueueSerializer, SkillResultSerializer
+
+
+@document_project_owned_viewset(
+ resource_plural="skill results",
+ resource_singular="skill result",
+ create_description="Create a new skill result for project content. The referenced content must belong to the selected project.",
+ tag="AI Processing",
+ action_overrides=build_crud_action_overrides(
+ SkillResultSerializer,
+ resource_plural="skill results for the selected project",
+ resource_singular="skill result",
+ retrieve_examples=[SKILL_RESULT_RESPONSE_EXAMPLE],
+ ),
+)
+class SkillResultViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Inspect persisted AI skill outputs for project content."""
+
+ serializer_class = SkillResultSerializer
+ queryset = SkillResult.objects.select_related("content", "project", "superseded_by")
+
+ def get_permissions(self):
+ """Allow all members to read skill results and contributors to modify them."""
+
+ if self.action in {"create", "update", "partial_update", "destroy"}:
+ permission_classes = [IsProjectMemberWritable]
+ else:
+ permission_classes = [IsProjectMember]
+ return [permission() for permission in permission_classes]
+
+
+@document_project_owned_viewset(
+ resource_plural="review queue entries",
+ resource_singular="review queue entry",
+ create_description="Create a new review queue entry for the selected project. The referenced content must belong to the same project.",
+ tag="Review Queue",
+ action_overrides=build_crud_action_overrides(
+ ReviewQueueSerializer,
+ resource_plural="review queue entries for the selected project",
+ resource_singular="review queue entry",
+ ),
+)
+class ReviewQueueViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Inspect and manage content awaiting manual review."""
+
+ serializer_class = ReviewQueueSerializer
+ queryset = ReviewQueue.objects.select_related("content", "project")
+
+ def get_permissions(self):
+ """Restrict review-queue access to project contributors."""
+
+ return [IsProjectContributor()]
diff --git a/pipeline/api_urls.py b/pipeline/api_urls.py
new file mode 100644
index 00000000..8048e5ec
--- /dev/null
+++ b/pipeline/api_urls.py
@@ -0,0 +1,16 @@
+"""API route registration for pipeline resources."""
+
+from rest_framework_nested.routers import NestedSimpleRouter
+
+from pipeline.api import ReviewQueueViewSet, SkillResultViewSet
+
+
+def register_project_routes(project_router: NestedSimpleRouter) -> None:
+ """Register nested skill-result and review endpoints."""
+
+ project_router.register(
+ r"skill-results", SkillResultViewSet, basename="project-skill-result"
+ )
+ project_router.register(
+ r"review-queue", ReviewQueueViewSet, basename="project-review-queue"
+ )
diff --git a/pipeline/apps.py b/pipeline/apps.py
new file mode 100644
index 00000000..53ff1f2a
--- /dev/null
+++ b/pipeline/apps.py
@@ -0,0 +1,10 @@
+"""Django app configuration for the pipeline domain."""
+
+from django.apps import AppConfig
+
+
+class PipelineConfig(AppConfig):
+ """Configure the pipeline app."""
+
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "pipeline"
diff --git a/pipeline/migrations/0001_initial.py b/pipeline/migrations/0001_initial.py
new file mode 100644
index 00000000..bad9c178
--- /dev/null
+++ b/pipeline/migrations/0001_initial.py
@@ -0,0 +1,154 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ initial = True
+
+ dependencies = [
+ ("projects", "0003_remove_project_group"),
+ ("content", "0001_initial"),
+ ("core", "0013_move_newsletter_models_to_newsletters"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.CreateModel(
+ name="SkillResult",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("skill_name", models.CharField(max_length=64)),
+ (
+ "status",
+ models.CharField(
+ choices=[
+ ("pending", "Pending"),
+ ("running", "Running"),
+ ("completed", "Completed"),
+ ("failed", "Failed"),
+ ],
+ max_length=16,
+ ),
+ ),
+ ("result_data", models.JSONField(blank=True, null=True)),
+ ("error_message", models.TextField(blank=True)),
+ ("model_used", models.CharField(blank=True, max_length=64)),
+ ("latency_ms", models.IntegerField(blank=True, null=True)),
+ ("confidence", models.FloatField(blank=True, null=True)),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ (
+ "content",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="skill_results",
+ to="content.content",
+ ),
+ ),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="skill_results",
+ to="projects.project",
+ ),
+ ),
+ (
+ "superseded_by",
+ models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="supersedes",
+ to="pipeline.skillresult",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["-created_at"],
+ "db_table": "core_skillresult",
+ "indexes": [
+ models.Index(
+ fields=["content", "skill_name"],
+ name="core_skillr_content_0d49f9_idx",
+ ),
+ models.Index(
+ fields=["project", "created_at"],
+ name="core_skillr_project_60360b_idx",
+ ),
+ ],
+ },
+ ),
+ migrations.CreateModel(
+ name="ReviewQueue",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "reason",
+ models.CharField(
+ choices=[
+ (
+ "low_confidence_classification",
+ "Low Confidence Classification",
+ ),
+ ("borderline_relevance", "Borderline Relevance"),
+ ],
+ max_length=64,
+ ),
+ ),
+ ("confidence", models.FloatField()),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ ("resolved", models.BooleanField(default=False)),
+ (
+ "resolution",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("human_approved", "Human Approved"),
+ ("human_rejected", "Human Rejected"),
+ ],
+ max_length=64,
+ ),
+ ),
+ (
+ "content",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="review_queue_items",
+ to="content.content",
+ ),
+ ),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="review_queue_items",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["resolved", "-created_at"],
+ "db_table": "core_reviewqueue",
+ },
+ ),
+ ],
+ database_operations=[],
+ )
+ ]
diff --git a/pipeline/migrations/__init__.py b/pipeline/migrations/__init__.py
new file mode 100644
index 00000000..fff661a5
--- /dev/null
+++ b/pipeline/migrations/__init__.py
@@ -0,0 +1 @@
+"""Migration package for the pipeline app."""
diff --git a/pipeline/models.py b/pipeline/models.py
new file mode 100644
index 00000000..bf063d53
--- /dev/null
+++ b/pipeline/models.py
@@ -0,0 +1,100 @@
+"""Pipeline-domain models for persisted AI workflow state."""
+
+from django.db import models
+
+from content.models import Content
+from projects.models import Project
+
+
+class SkillStatus(models.TextChoices):
+ """Execution states recorded for AI skill runs."""
+
+ PENDING = "pending", "Pending"
+ RUNNING = "running", "Running"
+ COMPLETED = "completed", "Completed"
+ FAILED = "failed", "Failed"
+
+
+class ReviewReason(models.TextChoices):
+ """Reasons content is pushed to the manual review queue."""
+
+ LOW_CONFIDENCE_CLASSIFICATION = (
+ "low_confidence_classification",
+ "Low Confidence Classification",
+ )
+ BORDERLINE_RELEVANCE = "borderline_relevance", "Borderline Relevance"
+
+
+class ReviewResolution(models.TextChoices):
+ """Human outcomes for review queue items."""
+
+ HUMAN_APPROVED = "human_approved", "Human Approved"
+ HUMAN_REJECTED = "human_rejected", "Human Rejected"
+
+
+class SkillResult(models.Model):
+ """Persist the output of one AI skill execution for a content item."""
+
+ content = models.ForeignKey(
+ Content, on_delete=models.CASCADE, related_name="skill_results"
+ )
+ project = models.ForeignKey(
+ Project, on_delete=models.CASCADE, related_name="skill_results"
+ )
+ skill_name = models.CharField(max_length=64)
+ status = models.CharField(max_length=16, choices=SkillStatus.choices)
+ result_data = models.JSONField(null=True, blank=True)
+ error_message = models.TextField(blank=True)
+ model_used = models.CharField(max_length=64, blank=True)
+ latency_ms = models.IntegerField(null=True, blank=True)
+ confidence = models.FloatField(null=True, blank=True)
+ created_at = models.DateTimeField(auto_now_add=True)
+ superseded_by = models.ForeignKey(
+ "self",
+ null=True,
+ blank=True,
+ on_delete=models.SET_NULL,
+ related_name="supersedes",
+ )
+
+ class Meta:
+ ordering = ["-created_at"]
+ db_table = "core_skillresult"
+ indexes = [
+ models.Index(
+ fields=["content", "skill_name"],
+ name="core_skillr_content_0d49f9_idx",
+ ),
+ models.Index(
+ fields=["project", "created_at"],
+ name="core_skillr_project_60360b_idx",
+ ),
+ ]
+
+ def __str__(self) -> str:
+ return f"{self.skill_name} for {self.content.title}"
+
+
+class ReviewQueue(models.Model):
+ """Track content items that require a human decision."""
+
+ project = models.ForeignKey(
+ Project, on_delete=models.CASCADE, related_name="review_queue_items"
+ )
+ content = models.ForeignKey(
+ Content, on_delete=models.CASCADE, related_name="review_queue_items"
+ )
+ reason = models.CharField(max_length=64, choices=ReviewReason.choices)
+ confidence = models.FloatField()
+ created_at = models.DateTimeField(auto_now_add=True)
+ resolved = models.BooleanField(default=False)
+ resolution = models.CharField(
+ max_length=64, choices=ReviewResolution.choices, blank=True
+ )
+
+ class Meta:
+ ordering = ["resolved", "-created_at"]
+ db_table = "core_reviewqueue"
+
+ def __str__(self) -> str:
+ return f"{self.reason} for {self.content.title}"
diff --git a/pipeline/serializers.py b/pipeline/serializers.py
new file mode 100644
index 00000000..21c9052e
--- /dev/null
+++ b/pipeline/serializers.py
@@ -0,0 +1,76 @@
+"""DRF serializers for pipeline-domain models."""
+
+from rest_framework import serializers
+
+from core.serializer_mixins import ProjectScopedSerializerMixin
+from pipeline.models import ReviewQueue, SkillResult
+
+
+class SkillResultSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer):
+ """Serialize persisted AI skill executions for content."""
+
+ class Meta:
+ model = SkillResult
+ fields = [
+ "id",
+ "content",
+ "project",
+ "skill_name",
+ "status",
+ "result_data",
+ "error_message",
+ "model_used",
+ "latency_ms",
+ "confidence",
+ "created_at",
+ "superseded_by",
+ ]
+ read_only_fields = ["id", "project", "created_at"]
+
+ def validate(self, attrs):
+ """Reject skill results whose content does not belong to the active project."""
+
+ project = (
+ self.context.get("project")
+ or attrs.get("project")
+ or getattr(self.instance, "project", None)
+ )
+ content = attrs.get("content") or getattr(self.instance, "content", None)
+ if project and content and content.project_id != project.id:
+ raise serializers.ValidationError(
+ {"content": "Content must belong to the selected project."}
+ )
+ return attrs
+
+
+class ReviewQueueSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer):
+ """Serialize manual-review queue items for project content."""
+
+ class Meta:
+ model = ReviewQueue
+ fields = [
+ "id",
+ "project",
+ "content",
+ "reason",
+ "confidence",
+ "created_at",
+ "resolved",
+ "resolution",
+ ]
+ read_only_fields = ["id", "project", "created_at"]
+
+ def validate(self, attrs):
+ """Reject review items whose content does not belong to the active project."""
+
+ project = (
+ self.context.get("project")
+ or attrs.get("project")
+ or getattr(self.instance, "project", None)
+ )
+ content = attrs.get("content") or getattr(self.instance, "content", None)
+ if project and content and content.project_id != project.id:
+ raise serializers.ValidationError(
+ {"content": "Content must belong to the selected project."}
+ )
+ return attrs
diff --git a/projects/__init__.py b/projects/__init__.py
new file mode 100644
index 00000000..0c9b9aad
--- /dev/null
+++ b/projects/__init__.py
@@ -0,0 +1 @@
+"""Projects app package."""
diff --git a/projects/admin.py b/projects/admin.py
new file mode 100644
index 00000000..3976d199
--- /dev/null
+++ b/projects/admin.py
@@ -0,0 +1,308 @@
+"""Admin configuration for project-owned models."""
+
+import json
+
+from django import forms
+from django.contrib import admin, messages
+from django.utils import timezone
+from django.utils.html import format_html
+from django.utils.safestring import mark_safe
+from import_export.admin import ExportActionMixin
+from unfold.admin import ModelAdmin
+
+from core.plugins import get_plugin_for_source_config, validate_plugin_config
+from projects.models import (
+ BlueskyCredentials,
+ Project,
+ ProjectConfig,
+ ProjectMembership,
+ SourceConfig,
+)
+
+
+class ProjectMembershipInline(admin.TabularInline):
+ """Edit project memberships inline from the project admin."""
+
+ model = ProjectMembership
+ extra = 0
+ autocomplete_fields = ("user", "invited_by")
+
+
+class BlueskyCredentialsAdminForm(forms.ModelForm):
+ """Admin form that accepts a plaintext Bluesky app credential input."""
+
+ credential_input = forms.CharField(
+ required=False,
+ strip=False,
+ widget=forms.PasswordInput(render_value=False),
+ help_text="Leave blank to keep the existing stored credential.",
+ label="Bluesky app credential",
+ )
+
+ class Meta:
+ model = BlueskyCredentials
+ fields = ["project", "handle", "pds_url", "is_active"]
+
+ def clean(self):
+ """Require a credential when creating the record for the first time."""
+
+ cleaned_data = super().clean()
+ credential_input = cleaned_data.get("credential_input", "")
+ if not self.instance.has_stored_credential() and not credential_input:
+ self.add_error("credential_input", "A Bluesky app credential is required.")
+ return cleaned_data
+
+ def save(self, commit=True):
+ """Encrypt a new credential value before saving the model instance."""
+
+ instance = super().save(commit=False)
+ credential_input = self.cleaned_data.get("credential_input", "")
+ if credential_input:
+ instance.set_stored_credential(credential_input)
+ if commit:
+ instance.save()
+ return instance
+
+
+@admin.register(Project)
+class ProjectAdmin(ExportActionMixin, admin.ModelAdmin):
+ """Admin configuration for top-level project workspaces."""
+
+ list_display = ("name", "content_retention_days", "created_at")
+ date_hierarchy = "created_at"
+ list_filter = ("created_at",)
+ search_fields = ("name",)
+ list_editable = ("content_retention_days",)
+ inlines = (ProjectMembershipInline,)
+
+
+@admin.register(BlueskyCredentials)
+class BlueskyCredentialsAdmin(ModelAdmin):
+ """Admin view for project-scoped Bluesky authentication settings."""
+
+ form = BlueskyCredentialsAdminForm
+ actions = ["verify_selected_credentials"]
+ list_display = (
+ "project",
+ "handle",
+ "display_pds_host",
+ "has_stored_credential",
+ "is_active",
+ "last_verified_at",
+ )
+ list_filter = ("is_active", ("project", admin.RelatedOnlyFieldListFilter))
+ search_fields = ("project__name", "handle", "pds_url")
+ autocomplete_fields = ("project",)
+ readonly_fields = (
+ "has_stored_credential",
+ "last_verified_at",
+ "last_error",
+ "created_at",
+ "updated_at",
+ )
+ fieldsets = (
+ (
+ "Account",
+ {"fields": ("project", "handle", "credential_input", "is_active")},
+ ),
+ (
+ "PDS Override",
+ {
+ "fields": ("pds_url",),
+ "description": "Leave blank to use the default Bluesky-hosted account flow.",
+ },
+ ),
+ (
+ "Verification",
+ {
+ "fields": (
+ "has_stored_credential",
+ "last_verified_at",
+ "last_error",
+ "created_at",
+ "updated_at",
+ )
+ },
+ ),
+ )
+
+ @admin.display(description="PDS")
+ def display_pds_host(self, obj):
+ """Show whether the credentials use the hosted default or a custom PDS."""
+
+ return obj.pds_url or "Bluesky hosted default"
+
+ @admin.display(boolean=True, description="Stored Credential")
+ def has_stored_credential(self, obj):
+ """Return whether an encrypted Bluesky credential has been configured."""
+
+ return obj.has_stored_credential()
+
+ @admin.action(description="Verify Selected Credentials")
+ def verify_selected_credentials(self, request, queryset):
+ """Authenticate the selected Bluesky accounts and report the outcome."""
+
+ from core.plugins.bluesky import BlueskySourcePlugin
+
+ verified_credentials = []
+ failed_credentials = []
+
+ for credentials in queryset.select_related("project"):
+ try:
+ BlueskySourcePlugin.verify_credentials(credentials)
+ except Exception as exc:
+ failed_credentials.append(f"{credentials}: {exc}")
+ else:
+ verified_credentials.append(str(credentials))
+
+ if verified_credentials:
+ self.message_user(
+ request,
+ f"Credential verification passed for {len(verified_credentials)} account(s).",
+ messages.SUCCESS,
+ )
+
+ if failed_credentials:
+ self.message_user(
+ request,
+ "Credential verification failed for: " + "; ".join(failed_credentials),
+ messages.ERROR,
+ )
+
+
+@admin.register(ProjectConfig)
+class ProjectConfigAdmin(admin.ModelAdmin):
+ """Admin configuration for per-project scoring settings."""
+
+ list_display = (
+ "project",
+ "upvote_authority_weight",
+ "downvote_authority_weight",
+ "authority_decay_rate",
+ "recompute_topic_centroid_on_feedback_save",
+ )
+ list_filter = ("recompute_topic_centroid_on_feedback_save",)
+ fields = (
+ "project",
+ "upvote_authority_weight",
+ "downvote_authority_weight",
+ "authority_decay_rate",
+ "recompute_topic_centroid_on_feedback_save",
+ )
+
+
+@admin.register(SourceConfig)
+class SourceConfigAdmin(ModelAdmin):
+ """Admin view for source-plugin configuration and connectivity checks."""
+
+ list_display = (
+ "plugin_name",
+ "project",
+ "display_health",
+ "is_active",
+ "last_fetched_at",
+ )
+ list_filter = (
+ "is_active",
+ "plugin_name",
+ ("project", admin.RelatedOnlyFieldListFilter),
+ )
+ list_editable = ("is_active",)
+ search_fields = ("plugin_name", "project__name")
+ actions = ["test_source_connection"]
+ readonly_fields = ("last_fetched_at", "pretty_config")
+ fieldsets = (
+ ("Core Settings", {"fields": ("plugin_name", "project", "is_active")}),
+ ("Configuration", {"fields": ("pretty_config", "config")}),
+ ("Activity", {"fields": ("last_fetched_at",)}),
+ )
+
+ @admin.display(description="Status")
+ def display_health(self, obj):
+ """Infer a human-friendly health state from activity timestamps."""
+
+ if not obj.is_active:
+ return format_html('{}', "gray", "β Paused")
+
+ if obj.last_fetched_at:
+ hours_since = (timezone.now() - obj.last_fetched_at).total_seconds() / 3600
+ if hours_since > 24:
+ return format_html(
+ '{}', "red", "β Stale"
+ )
+ return format_html(
+ '{}', "green", "β Healthy"
+ )
+
+ return format_html(
+ '{}', "orange", "β Never Run"
+ )
+
+ @admin.display(description="Config Preview")
+ def pretty_config(self, obj):
+ """Display the JSON config in a readable format."""
+
+ if not obj.config:
+ return "Empty"
+ formatted_json = json.dumps(obj.config, indent=4)
+ return mark_safe(
+ f'{formatted_json}'
+ )
+
+ @admin.action(description="Test Source Connectivity")
+ def test_source_connection(self, request, queryset):
+ """Trigger a dry-run connectivity check for the selected sources."""
+
+ healthy_sources = []
+ failed_sources = []
+
+ for source_config in queryset.select_related("project"):
+ try:
+ source_config.config = validate_plugin_config(
+ source_config.plugin_name,
+ source_config.config,
+ )
+ plugin = get_plugin_for_source_config(source_config)
+ if not plugin.health_check():
+ raise RuntimeError("Health check returned an unhealthy status.")
+ except Exception as exc:
+ failed_sources.append(f"{source_config}: {exc}")
+ else:
+ healthy_sources.append(str(source_config))
+
+ if healthy_sources:
+ self.message_user(
+ request,
+ f"Connectivity check passed for {len(healthy_sources)} source(s).",
+ messages.SUCCESS,
+ )
+
+ if failed_sources:
+ self.message_user(
+ request,
+ "Connectivity check failed for: " + "; ".join(failed_sources),
+ messages.ERROR,
+ )
+
+ def changelist_view(self, request, extra_context=None):
+ """Augment the changelist with source-count and diversity stats."""
+
+ qs = self.get_queryset(request)
+ extra_context = extra_context or {}
+ active_count = qs.filter(is_active=True).count()
+ total_count = qs.count() or 1
+
+ extra_context["dashboard_stats"] = [
+ {
+ "title": "Active Sources",
+ "value": f"{active_count} / {total_count}",
+ "icon": "settings_input_component",
+ "color": "success" if active_count == total_count else "warning",
+ },
+ {
+ "title": "Plugin Variety",
+ "value": qs.values("plugin_name").distinct().count(),
+ "icon": "extension",
+ },
+ ]
+ return super().changelist_view(request, extra_context=extra_context)
diff --git a/projects/api.py b/projects/api.py
new file mode 100644
index 00000000..fde7247e
--- /dev/null
+++ b/projects/api.py
@@ -0,0 +1,417 @@
+"""REST API viewsets for project-owned models."""
+
+from django.conf import settings
+from django.core.mail import send_mail
+from django.utils import timezone
+from drf_spectacular.utils import OpenApiResponse, extend_schema, inline_serializer
+from rest_framework import mixins, serializers, status, viewsets
+from rest_framework.decorators import action
+from rest_framework.response import Response
+
+from core.api import (
+ AUTHENTICATION_REQUIRED_RESPONSE,
+ BLUESKY_CREDENTIALS_RESPONSE_EXAMPLE,
+ BLUESKY_CREDENTIALS_VERIFY_RESPONSE,
+ PROJECT_CREATE_REQUEST_EXAMPLE,
+ PROJECT_RESPONSE_EXAMPLE,
+ ProjectOwnedQuerysetMixin,
+ SOURCE_CONFIG_BLUESKY_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_CREATE_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_REDDIT_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_RESPONSE_EXAMPLE,
+ build_crud_action_overrides,
+ build_success_response,
+ document_group_access_viewset,
+ document_project_owned_viewset,
+ logger,
+)
+from core.permissions import (
+ IsProjectAdmin,
+ IsProjectMember,
+ IsProjectMemberWritable,
+ get_visible_projects_queryset,
+)
+from core.plugins.bluesky import BlueskySourcePlugin
+from projects.models import (
+ BlueskyCredentials,
+ Project,
+ ProjectConfig,
+ ProjectMembership,
+ ProjectRole,
+ SourceConfig,
+ generate_project_intake_token,
+)
+from projects.serializers import (
+ BlueskyCredentialsSerializer,
+ ProjectConfigSerializer,
+ ProjectMembershipSerializer,
+ ProjectSerializer,
+ SourceConfigSerializer,
+)
+from users.models import MembershipInvitation
+from users.serializers import MembershipInvitationSerializer
+
+
+def _assert_project_keeps_admin(
+ project: Project,
+ membership: ProjectMembership,
+ *,
+ next_role: str | None = None,
+) -> None:
+ """Reject changes that would leave a project without an admin."""
+
+ resulting_role = next_role
+ if membership.role != ProjectRole.ADMIN or resulting_role == ProjectRole.ADMIN:
+ return
+
+ has_other_admin = (
+ project.memberships.exclude(pk=membership.pk)
+ .filter(role=ProjectRole.ADMIN)
+ .exists()
+ )
+ if not has_other_admin:
+ raise serializers.ValidationError(
+ {"role": "Projects must keep at least one admin."}
+ )
+
+
+def _send_membership_invitation_email(invitation: MembershipInvitation) -> None:
+ """Send the one-time membership invitation email."""
+
+ invite_url = f"{settings.FRONTEND_BASE_URL.rstrip('/')}/invite/{invitation.token}"
+ send_mail(
+ subject=f"You're invited to join {invitation.project.name}",
+ message=(
+ f"You have been invited to join {invitation.project.name} as a "
+ f"{invitation.role}.\n\nOpen this link to accept the invitation:\n{invite_url}"
+ ),
+ from_email=settings.DEFAULT_FROM_EMAIL,
+ recipient_list=[invitation.email],
+ fail_silently=False,
+ )
+
+
+@document_group_access_viewset(
+ resource_plural="projects",
+ resource_singular="project",
+ create_description="Create a new project for the authenticated user.",
+ tag="Project Management",
+ action_overrides=build_crud_action_overrides(
+ ProjectSerializer,
+ resource_plural="projects available to the authenticated user",
+ resource_singular="project",
+ create_examples=[PROJECT_CREATE_REQUEST_EXAMPLE, PROJECT_RESPONSE_EXAMPLE],
+ create_response_examples=[PROJECT_RESPONSE_EXAMPLE],
+ retrieve_examples=[PROJECT_RESPONSE_EXAMPLE],
+ ),
+)
+class ProjectViewSet(viewsets.ModelViewSet):
+ """Manage projects accessible through the current user's project memberships."""
+
+ serializer_class = ProjectSerializer
+ queryset = Project.objects.select_related("bluesky_credentials")
+ lookup_url_kwarg = "id"
+
+ def get_permissions(self):
+ """Apply role-aware permissions by action for project-level operations."""
+
+ if self.action in {
+ "update",
+ "partial_update",
+ "destroy",
+ "rotate_intake_token",
+ "verify_bluesky_credentials",
+ }:
+ permission_classes = [IsProjectAdmin]
+ elif self.action in {"list", "retrieve"}:
+ permission_classes = [IsProjectMember]
+ else:
+ permission_classes = self.permission_classes
+ return [permission() for permission in permission_classes]
+
+ def get_queryset(self):
+ """Limit projects to those visible through the authenticated user."""
+
+ return get_visible_projects_queryset(self.request.user).select_related(
+ "bluesky_credentials"
+ )
+
+ def perform_create(self, serializer):
+ """Create the project and make the creator its first admin."""
+
+ project = serializer.save()
+ ProjectMembership.objects.create(
+ user=self.request.user,
+ project=project,
+ role=ProjectRole.ADMIN,
+ )
+
+ @extend_schema(
+ summary="Rotate newsletter intake token",
+ description=(
+ "Generate a fresh project-specific newsletter intake token and return the "
+ "updated project payload."
+ ),
+ tags=["Project Management"],
+ request=None,
+ responses={200: ProjectSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE},
+ )
+ @action(detail=True, methods=["post"], url_path="rotate-intake-token")
+ def rotate_intake_token(self, request, *args, **kwargs):
+ """Generate a fresh intake token for the selected project."""
+
+ project = self.get_object()
+ project.intake_token = generate_project_intake_token()
+ project.save(update_fields=["intake_token"])
+ serializer = self.get_serializer(project)
+ return Response(serializer.data)
+
+ @extend_schema(
+ summary="Verify Bluesky credentials",
+ description=(
+ "Verify the selected project's stored Bluesky credentials by authenticating "
+ "the account and checking the current session."
+ ),
+ tags=["Ingestion"],
+ request=None,
+ responses={
+ 200: build_success_response(
+ BLUESKY_CREDENTIALS_VERIFY_RESPONSE,
+ "The project's Bluesky credentials were verified successfully.",
+ ),
+ 400: OpenApiResponse(
+ response=inline_serializer(
+ name="BlueskyCredentialsVerifyErrorResponse",
+ fields={
+ "type": serializers.CharField(),
+ "errors": inline_serializer(
+ name="BlueskyCredentialsVerifyError",
+ fields={
+ "code": serializers.CharField(),
+ "detail": serializers.CharField(),
+ "attr": serializers.CharField(allow_null=True),
+ },
+ many=True,
+ ),
+ },
+ ),
+ description="The project is missing Bluesky credentials or verification failed.",
+ ),
+ 403: AUTHENTICATION_REQUIRED_RESPONSE,
+ },
+ )
+ @action(detail=True, methods=["post"], url_path="verify-bluesky-credentials")
+ def verify_bluesky_credentials(self, request, *args, **kwargs):
+ """Verify the Bluesky credentials stored for the selected project."""
+
+ project = self.get_object()
+ try:
+ credentials = project.bluesky_credentials
+ except BlueskyCredentials.DoesNotExist as exc:
+ raise serializers.ValidationError(
+ {
+ "bluesky_credentials": "No Bluesky credentials are configured for this project."
+ }
+ ) from exc
+
+ try:
+ BlueskySourcePlugin.verify_credentials(credentials)
+ except Exception as exc:
+ logger.exception(
+ "Bluesky credential verification failed for project id=%s",
+ project.id,
+ )
+ raise serializers.ValidationError(
+ {
+ "bluesky_credentials": (
+ "Credential verification failed. Please re-check the credentials "
+ "and try again."
+ )
+ }
+ ) from exc
+
+ credentials.refresh_from_db()
+ return Response(
+ {
+ "status": "verified",
+ "handle": credentials.handle,
+ "last_verified_at": credentials.last_verified_at,
+ "last_error": "",
+ }
+ )
+
+
+@document_project_owned_viewset(
+ resource_plural="project configurations",
+ resource_singular="project configuration",
+ create_description="Create a new project configuration record for the selected project, including authority weighting and decay settings.",
+ tag="Project Management",
+ action_overrides=build_crud_action_overrides(
+ ProjectConfigSerializer,
+ resource_plural="project configurations for the selected project",
+ resource_singular="project configuration",
+ ),
+)
+class ProjectConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Manage per-project scoring and authority configuration."""
+
+ serializer_class = ProjectConfigSerializer
+ queryset = ProjectConfig.objects.select_related("project")
+
+ def get_permissions(self):
+ """Allow all members to read project config, but only admins to modify it."""
+
+ if self.action in {"update", "partial_update", "create", "destroy"}:
+ permission_classes = [IsProjectAdmin]
+ else:
+ permission_classes = [IsProjectMember]
+ return [permission() for permission in permission_classes]
+
+
+@document_project_owned_viewset(
+ resource_plural="Bluesky credentials",
+ resource_singular="Bluesky credentials",
+ create_description=(
+ "Create Bluesky credentials for the selected project. The app password is "
+ "accepted write-only and is never returned in API responses."
+ ),
+ tag="Ingestion",
+ action_overrides=build_crud_action_overrides(
+ BlueskyCredentialsSerializer,
+ resource_plural="Bluesky credentials for the selected project",
+ resource_singular="Bluesky credentials",
+ retrieve_examples=[BLUESKY_CREDENTIALS_RESPONSE_EXAMPLE],
+ ),
+)
+class BlueskyCredentialsViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Manage project-scoped Bluesky credentials."""
+
+ serializer_class = BlueskyCredentialsSerializer
+ queryset = BlueskyCredentials.objects.select_related("project")
+
+ def get_permissions(self):
+ """Restrict Bluesky credential access to project admins."""
+
+ return [IsProjectAdmin()]
+
+ def get_queryset(self):
+ """Restrict credentials to the selected project and current user."""
+
+ return super().get_queryset().order_by("-updated_at")
+
+
+@document_project_owned_viewset(
+ resource_plural="source configurations",
+ resource_singular="source configuration",
+ create_description="Create a new source configuration for the selected project. Plugin-specific configuration is validated before the record is saved.",
+ tag="Ingestion",
+ action_overrides=build_crud_action_overrides(
+ SourceConfigSerializer,
+ resource_plural="source configurations for the selected project",
+ resource_singular="source configuration",
+ create_examples=[
+ SOURCE_CONFIG_CREATE_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_REDDIT_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_BLUESKY_REQUEST_EXAMPLE,
+ SOURCE_CONFIG_RESPONSE_EXAMPLE,
+ ],
+ create_response_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE],
+ retrieve_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE],
+ ),
+)
+class SourceConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Manage source-plugin configuration for a project."""
+
+ serializer_class = SourceConfigSerializer
+ queryset = SourceConfig.objects.select_related("project")
+
+ def get_permissions(self):
+ """Allow all members to read source configs, but only contributors to modify them."""
+
+ if self.action == "destroy":
+ permission_classes = [IsProjectMemberWritable]
+ elif self.action in {"create", "update", "partial_update"}:
+ permission_classes = [IsProjectMemberWritable]
+ else:
+ permission_classes = [IsProjectMember]
+ return [permission() for permission in permission_classes]
+
+
+class ProjectMembershipViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
+ """Manage the roster of members attached to one project."""
+
+ serializer_class = ProjectMembershipSerializer
+ queryset = ProjectMembership.objects.select_related("project", "user", "invited_by")
+
+ def get_permissions(self):
+ """Restrict member-roster management to project admins."""
+
+ return [IsProjectAdmin()]
+
+ def get_queryset(self):
+ """Return roster rows for the selected project ordered by join time."""
+
+ return super().get_queryset().order_by("joined_at", "user__username")
+
+ def perform_update(self, serializer):
+ """Persist role changes without allowing the last admin to disappear."""
+
+ membership = self.get_object()
+ next_role = serializer.validated_data.get("role", membership.role)
+ _assert_project_keeps_admin(membership.project, membership, next_role=next_role)
+ serializer.save()
+
+ def destroy(self, request, *args, **kwargs):
+ """Remove one membership while preserving at least one project admin."""
+
+ membership = self.get_object()
+ _assert_project_keeps_admin(membership.project, membership, next_role=None)
+ membership.delete()
+ return Response(status=status.HTTP_204_NO_CONTENT)
+
+
+class ProjectInvitationViewSet(
+ ProjectOwnedQuerysetMixin,
+ mixins.CreateModelMixin,
+ mixins.DestroyModelMixin,
+ mixins.ListModelMixin,
+ viewsets.GenericViewSet,
+):
+ """Create, list, and revoke project invitation tokens."""
+
+ serializer_class = MembershipInvitationSerializer
+ queryset = MembershipInvitation.objects.select_related("project", "invited_by")
+
+ def get_permissions(self):
+ """Restrict invitation management to project admins."""
+
+ return [IsProjectAdmin()]
+
+ def get_queryset(self):
+ """Return invitations for the selected project ordered newest first."""
+
+ return super().get_queryset().order_by("-created_at")
+
+ def get_serializer_context(self):
+ """Pass the selected project into invitation validation and URL generation."""
+
+ context = super().get_serializer_context()
+ context["project"] = self.get_project()
+ return context
+
+ def perform_create(self, serializer):
+ """Persist the invitation row and dispatch its email."""
+
+ invitation = serializer.save(
+ project=self.get_project(),
+ invited_by=self.request.user,
+ )
+ _send_membership_invitation_email(invitation)
+
+ def destroy(self, request, *args, **kwargs):
+ """Mark one invitation as revoked instead of deleting it outright."""
+
+ invitation = self.get_object()
+ invitation.revoked_at = timezone.now()
+ invitation.save(update_fields=["revoked_at"])
+ return Response(status=status.HTTP_204_NO_CONTENT)
diff --git a/projects/api_urls.py b/projects/api_urls.py
new file mode 100644
index 00000000..75bacb9c
--- /dev/null
+++ b/projects/api_urls.py
@@ -0,0 +1,41 @@
+"""API route registration for project-owned resources."""
+
+from rest_framework.routers import DefaultRouter
+from rest_framework_nested.routers import NestedSimpleRouter
+
+from projects.api import (
+ BlueskyCredentialsViewSet,
+ ProjectConfigViewSet,
+ ProjectInvitationViewSet,
+ ProjectMembershipViewSet,
+ ProjectViewSet,
+ SourceConfigViewSet,
+)
+
+
+def register_root_routes(router: DefaultRouter) -> None:
+ """Register top-level project endpoints."""
+
+ router.register("projects", ProjectViewSet, basename="project")
+
+
+def register_project_routes(project_router: NestedSimpleRouter) -> None:
+ """Register nested project-management endpoints."""
+
+ project_router.register(
+ r"project-configs", ProjectConfigViewSet, basename="project-config"
+ )
+ project_router.register(
+ r"memberships", ProjectMembershipViewSet, basename="project-membership"
+ )
+ project_router.register(
+ r"invitations", ProjectInvitationViewSet, basename="project-invitation"
+ )
+ project_router.register(
+ r"bluesky-credentials",
+ BlueskyCredentialsViewSet,
+ basename="project-bluesky-credentials",
+ )
+ project_router.register(
+ r"source-configs", SourceConfigViewSet, basename="project-source-config"
+ )
diff --git a/projects/apps.py b/projects/apps.py
new file mode 100644
index 00000000..c992f4a7
--- /dev/null
+++ b/projects/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class ProjectsConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "projects"
diff --git a/projects/migrations/0001_initial.py b/projects/migrations/0001_initial.py
new file mode 100644
index 00000000..e4f0101f
--- /dev/null
+++ b/projects/migrations/0001_initial.py
@@ -0,0 +1,176 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+import projects.model_support
+
+
+class Migration(migrations.Migration):
+ initial = True
+
+ dependencies = [
+ ("auth", "0012_alter_user_first_name_max_length"),
+ ("core", "0009_topiccentroidsnapshot"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.CreateModel(
+ name="Project",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("name", models.CharField(max_length=255)),
+ ("topic_description", models.TextField()),
+ ("content_retention_days", models.PositiveIntegerField(default=365)),
+ (
+ "intake_token",
+ models.CharField(
+ default=projects.model_support.generate_project_intake_token,
+ editable=False,
+ max_length=64,
+ unique=True,
+ ),
+ ),
+ ("intake_enabled", models.BooleanField(default=False)),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ (
+ "group",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="projects",
+ to="auth.group",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["name"],
+ "db_table": "core_project",
+ },
+ ),
+ migrations.CreateModel(
+ name="BlueskyCredentials",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("handle", models.CharField(max_length=255)),
+ ("app_password_encrypted", models.TextField(blank=True)),
+ ("pds_url", models.URLField(blank=True)),
+ ("is_active", models.BooleanField(default=True)),
+ ("last_verified_at", models.DateTimeField(blank=True, null=True)),
+ ("last_error", models.TextField(blank=True)),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ ("updated_at", models.DateTimeField(auto_now=True)),
+ (
+ "project",
+ models.OneToOneField(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="bluesky_credentials",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["project__name"],
+ "verbose_name_plural": "Bluesky credentials",
+ "db_table": "core_blueskycredentials",
+ },
+ ),
+ migrations.CreateModel(
+ name="ProjectConfig",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("upvote_authority_weight", models.FloatField(default=0.1)),
+ ("downvote_authority_weight", models.FloatField(default=-0.05)),
+ ("authority_decay_rate", models.FloatField(default=0.95)),
+ (
+ "recompute_topic_centroid_on_feedback_save",
+ models.BooleanField(default=True),
+ ),
+ (
+ "project",
+ models.OneToOneField(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="config",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "verbose_name": "Project config",
+ "verbose_name_plural": "Project configs",
+ "db_table": "core_projectconfig",
+ },
+ ),
+ migrations.CreateModel(
+ name="SourceConfig",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "plugin_name",
+ models.CharField(
+ choices=[
+ ("rss", "RSS"),
+ ("reddit", "Reddit"),
+ ("bluesky", "Bluesky"),
+ ],
+ max_length=64,
+ ),
+ ),
+ ("config", models.JSONField(default=dict)),
+ ("is_active", models.BooleanField(default=True)),
+ ("last_fetched_at", models.DateTimeField(blank=True, null=True)),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="source_configs",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["plugin_name", "id"],
+ "db_table": "core_sourceconfig",
+ "indexes": [
+ models.Index(
+ fields=["project", "plugin_name", "is_active"],
+ name="core_source_project_f1abc6_idx",
+ )
+ ],
+ },
+ ),
+ ],
+ database_operations=[],
+ )
+ ]
diff --git a/projects/migrations/0002_alter_project_group_projectmembership_and_more.py b/projects/migrations/0002_alter_project_group_projectmembership_and_more.py
new file mode 100644
index 00000000..3b4c387d
--- /dev/null
+++ b/projects/migrations/0002_alter_project_group_projectmembership_and_more.py
@@ -0,0 +1,135 @@
+# Generated by Django 6.0.4 on 2026-04-30 01:46
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+def backfill_project_memberships(apps, schema_editor):
+ """Create project memberships from legacy project group users."""
+
+ Project = apps.get_model("projects", "Project")
+ ProjectMembership = apps.get_model("projects", "ProjectMembership")
+
+ for project in Project.objects.select_related("group").all():
+ if project.group_id is None:
+ continue
+
+ users = list(project.group.user_set.order_by("id"))
+ for index, user in enumerate(users):
+ role = "admin" if index == 0 else "member"
+ ProjectMembership.objects.get_or_create(
+ user=user,
+ project=project,
+ defaults={"role": role},
+ )
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("auth", "0012_alter_user_first_name_max_length"),
+ ("projects", "0001_initial"),
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="project",
+ name="group",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="projects",
+ to="auth.group",
+ ),
+ ),
+ migrations.CreateModel(
+ name="ProjectMembership",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "role",
+ models.CharField(
+ choices=[
+ ("admin", "Project Admin"),
+ ("member", "Project Member"),
+ ("reader", "Project Reader"),
+ ],
+ default="member",
+ max_length=16,
+ ),
+ ),
+ ("joined_at", models.DateTimeField(auto_now_add=True)),
+ (
+ "invited_by",
+ models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="memberships_invited",
+ to=settings.AUTH_USER_MODEL,
+ ),
+ ),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="memberships",
+ to="projects.project",
+ ),
+ ),
+ (
+ "user",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="project_memberships",
+ to=settings.AUTH_USER_MODEL,
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["project__name", "user__username"],
+ },
+ ),
+ migrations.AddField(
+ model_name="project",
+ name="members",
+ field=models.ManyToManyField(
+ blank=True,
+ related_name="projects",
+ through="projects.ProjectMembership",
+ through_fields=("project", "user"),
+ to=settings.AUTH_USER_MODEL,
+ ),
+ ),
+ migrations.AddIndex(
+ model_name="projectmembership",
+ index=models.Index(
+ fields=["project", "role"], name="projects_pr_project_33946c_idx"
+ ),
+ ),
+ migrations.AddIndex(
+ model_name="projectmembership",
+ index=models.Index(
+ fields=["user", "project"], name="projects_pr_user_id_116009_idx"
+ ),
+ ),
+ migrations.AddConstraint(
+ model_name="projectmembership",
+ constraint=models.UniqueConstraint(
+ fields=("user", "project"),
+ name="projects_membership_unique_user_project",
+ ),
+ ),
+ migrations.RunPython(backfill_project_memberships, migrations.RunPython.noop),
+ ]
diff --git a/projects/migrations/0003_remove_project_group.py b/projects/migrations/0003_remove_project_group.py
new file mode 100644
index 00000000..1721d6c5
--- /dev/null
+++ b/projects/migrations/0003_remove_project_group.py
@@ -0,0 +1,14 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("projects", "0002_alter_project_group_projectmembership_and_more"),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name="project",
+ name="group",
+ )
+ ]
diff --git a/projects/migrations/__init__.py b/projects/migrations/__init__.py
new file mode 100644
index 00000000..e6ea5e35
--- /dev/null
+++ b/projects/migrations/__init__.py
@@ -0,0 +1 @@
+"""Projects app migrations package."""
diff --git a/projects/model_support.py b/projects/model_support.py
new file mode 100644
index 00000000..cb420a6d
--- /dev/null
+++ b/projects/model_support.py
@@ -0,0 +1,64 @@
+"""Support types and helpers for project-owned models."""
+
+import base64
+import hashlib
+import secrets
+from urllib.parse import urlsplit, urlunsplit
+
+from cryptography.fernet import Fernet
+from django.conf import settings
+from django.db import models
+
+
+def generate_project_intake_token() -> str:
+ """Generate the stable token used in project-specific intake email aliases."""
+
+ return secrets.token_hex(16)
+
+
+def normalize_bluesky_handle(handle: str) -> str:
+ """Normalize Bluesky handles so stored account references stay consistent."""
+
+ return handle.strip().removeprefix("@").lower()
+
+
+def normalize_bluesky_pds_url(pds_url: str) -> str:
+ """Normalize a user-provided PDS URL to its base host form."""
+
+ stripped_url = pds_url.strip().rstrip("/")
+ if not stripped_url:
+ return ""
+ parsed_url = urlsplit(stripped_url)
+ path = parsed_url.path.rstrip("/")
+ if path.endswith("/xrpc"):
+ path = path[: -len("/xrpc")]
+ return urlunsplit(
+ (
+ parsed_url.scheme,
+ parsed_url.netloc,
+ path,
+ parsed_url.query,
+ parsed_url.fragment,
+ )
+ ).rstrip("/")
+
+
+def bluesky_credentials_fernet() -> Fernet:
+ """Build the symmetric cipher used for Bluesky app-password storage."""
+
+ key_material = (
+ getattr(settings, "BLUESKY_CREDENTIALS_ENCRYPTION_KEY", "")
+ or settings.SECRET_KEY
+ )
+ derived_key = base64.urlsafe_b64encode(
+ hashlib.sha256(key_material.encode("utf-8")).digest()
+ )
+ return Fernet(derived_key)
+
+
+class SourcePluginName(models.TextChoices):
+ """Built-in ingestion plugins that can populate project content."""
+
+ RSS = "rss", "RSS"
+ REDDIT = "reddit", "Reddit"
+ BLUESKY = "bluesky", "Bluesky"
diff --git a/projects/models.py b/projects/models.py
new file mode 100644
index 00000000..57b83139
--- /dev/null
+++ b/projects/models.py
@@ -0,0 +1,227 @@
+"""Project-owned models split out from the historical core app."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from django.conf import settings
+from django.db import models
+
+from projects.model_support import (
+ SourcePluginName,
+ bluesky_credentials_fernet,
+ generate_project_intake_token,
+ normalize_bluesky_handle,
+ normalize_bluesky_pds_url,
+)
+
+if TYPE_CHECKING:
+ from users.models import AppUser
+
+
+class ProjectRole(models.TextChoices):
+ """Role assigned to a user's membership within one project."""
+
+ ADMIN = "admin", "Project Admin"
+ MEMBER = "member", "Project Member"
+ READER = "reader", "Project Reader"
+
+
+class Project(models.Model):
+ """Represents a newsletter workspace owned through project memberships."""
+
+ name = models.CharField(max_length=255)
+ members: models.ManyToManyField[AppUser, ProjectMembership] = (
+ models.ManyToManyField(
+ settings.AUTH_USER_MODEL,
+ through="ProjectMembership",
+ through_fields=("project", "user"),
+ related_name="projects",
+ blank=True,
+ )
+ )
+ topic_description = models.TextField()
+ content_retention_days = models.PositiveIntegerField(default=365)
+ intake_token = models.CharField(
+ max_length=64,
+ unique=True,
+ default=generate_project_intake_token,
+ editable=False,
+ )
+ intake_enabled = models.BooleanField(default=False)
+ created_at = models.DateTimeField(auto_now_add=True)
+
+ class Meta:
+ ordering = ["name"]
+ db_table = "core_project"
+
+ def __str__(self) -> str:
+ return self.name
+
+
+class ProjectMembership(models.Model):
+ """Associate one user with one project and an authorization role."""
+
+ user = models.ForeignKey(
+ settings.AUTH_USER_MODEL,
+ on_delete=models.CASCADE,
+ related_name="project_memberships",
+ )
+ project = models.ForeignKey(
+ Project,
+ on_delete=models.CASCADE,
+ related_name="memberships",
+ )
+ role = models.CharField(
+ max_length=16,
+ choices=ProjectRole.choices,
+ default=ProjectRole.MEMBER,
+ )
+ invited_by = models.ForeignKey(
+ settings.AUTH_USER_MODEL,
+ on_delete=models.SET_NULL,
+ null=True,
+ blank=True,
+ related_name="memberships_invited",
+ )
+ joined_at = models.DateTimeField(auto_now_add=True)
+
+ class Meta:
+ ordering = ["project__name", "user__username"]
+ constraints = [
+ models.UniqueConstraint(
+ fields=["user", "project"],
+ name="projects_membership_unique_user_project",
+ )
+ ]
+ indexes = [
+ models.Index(fields=["project", "role"]),
+ models.Index(fields=["user", "project"]),
+ ]
+
+ def __str__(self) -> str:
+ return f"{self.user} in {self.project} ({self.role})"
+
+
+class BlueskyCredentials(models.Model):
+ """Stores the authenticated Bluesky account used by one project."""
+
+ project = models.OneToOneField(
+ Project, on_delete=models.CASCADE, related_name="bluesky_credentials"
+ )
+ handle = models.CharField(max_length=255)
+ app_password_encrypted = models.TextField(blank=True)
+ pds_url = models.URLField(blank=True)
+ is_active = models.BooleanField(default=True)
+ last_verified_at = models.DateTimeField(null=True, blank=True)
+ last_error = models.TextField(blank=True)
+ created_at = models.DateTimeField(auto_now_add=True)
+ updated_at = models.DateTimeField(auto_now=True)
+
+ class Meta:
+ ordering = ["project__name"]
+ verbose_name_plural = "Bluesky credentials"
+ db_table = "core_blueskycredentials"
+
+ def __str__(self) -> str:
+ return f"Bluesky credentials for {self.project.name}"
+
+ @property
+ def client_base_url(self) -> str:
+ """Return the effective base URL used by the ATProto client."""
+
+ if not self.pds_url:
+ return "https://bsky.social/xrpc"
+ return f"{self.pds_url.rstrip('/')}/xrpc"
+
+ def has_app_password(self) -> bool:
+ """Return whether an encrypted app password has been stored."""
+
+ return bool(self.app_password_encrypted)
+
+ def has_stored_credential(self) -> bool:
+ """Return whether an encrypted Bluesky credential has been stored."""
+
+ return self.has_app_password()
+
+ def set_app_password(self, app_password: str) -> None:
+ """Encrypt and store the given Bluesky app password."""
+
+ if not app_password:
+ self.app_password_encrypted = ""
+ return
+ self.app_password_encrypted = (
+ bluesky_credentials_fernet()
+ .encrypt(app_password.encode("utf-8"))
+ .decode("utf-8")
+ )
+
+ def set_stored_credential(self, credential_value: str) -> None:
+ """Encrypt and store the given Bluesky credential value."""
+
+ self.set_app_password(credential_value)
+
+ def get_app_password(self) -> str:
+ """Decrypt and return the stored Bluesky app password."""
+
+ if not self.app_password_encrypted:
+ return ""
+ return (
+ bluesky_credentials_fernet()
+ .decrypt(self.app_password_encrypted.encode("utf-8"))
+ .decode("utf-8")
+ )
+
+ def get_stored_credential(self) -> str:
+ """Decrypt and return the stored Bluesky credential value."""
+
+ return self.get_app_password()
+
+ def save(self, *args, **kwargs):
+ """Normalize stored account fields before persisting the credentials."""
+
+ self.handle = normalize_bluesky_handle(self.handle)
+ self.pds_url = normalize_bluesky_pds_url(self.pds_url)
+ super().save(*args, **kwargs)
+
+
+class ProjectConfig(models.Model):
+ """Stores tunable scoring parameters for a single project."""
+
+ project = models.OneToOneField(
+ Project, on_delete=models.CASCADE, related_name="config"
+ )
+ upvote_authority_weight = models.FloatField(default=0.1)
+ downvote_authority_weight = models.FloatField(default=-0.05)
+ authority_decay_rate = models.FloatField(default=0.95)
+ recompute_topic_centroid_on_feedback_save = models.BooleanField(default=True)
+
+ class Meta:
+ verbose_name = "Project config"
+ verbose_name_plural = "Project configs"
+ db_table = "core_projectconfig"
+
+ def __str__(self) -> str:
+ return f"Config for {self.project.name}"
+
+
+class SourceConfig(models.Model):
+ """Configures one ingestion source for a project."""
+
+ project = models.ForeignKey(
+ Project, on_delete=models.CASCADE, related_name="source_configs"
+ )
+ plugin_name = models.CharField(max_length=64, choices=SourcePluginName.choices)
+ config = models.JSONField(default=dict)
+ is_active = models.BooleanField(default=True)
+ last_fetched_at = models.DateTimeField(null=True, blank=True)
+
+ class Meta:
+ ordering = ["plugin_name", "id"]
+ indexes = [
+ models.Index(fields=["project", "plugin_name", "is_active"]),
+ ]
+ db_table = "core_sourceconfig"
+
+ def __str__(self) -> str:
+ return f"{self.plugin_name} source for {self.project.name}"
diff --git a/projects/serializers.py b/projects/serializers.py
new file mode 100644
index 00000000..3f97b539
--- /dev/null
+++ b/projects/serializers.py
@@ -0,0 +1,262 @@
+"""DRF serializers for project-owned models."""
+
+import logging
+
+from rest_framework import serializers
+
+from core.plugins import validate_plugin_config
+from core.permissions import get_user_role
+from core.serializer_mixins import ProjectScopedSerializerMixin
+from projects.models import (
+ BlueskyCredentials,
+ Project,
+ ProjectConfig,
+ ProjectMembership,
+ ProjectRole,
+ SourceConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class ProjectSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer):
+ """Serialize top-level project records."""
+
+ user_role = serializers.SerializerMethodField()
+ has_bluesky_credentials = serializers.SerializerMethodField()
+ bluesky_handle = serializers.SerializerMethodField()
+ bluesky_is_active = serializers.SerializerMethodField()
+ bluesky_last_verified_at = serializers.SerializerMethodField()
+ bluesky_last_error = serializers.SerializerMethodField()
+
+ class Meta:
+ model = Project
+ fields = [
+ "id",
+ "name",
+ "topic_description",
+ "content_retention_days",
+ "intake_token",
+ "intake_enabled",
+ "user_role",
+ "has_bluesky_credentials",
+ "bluesky_handle",
+ "bluesky_is_active",
+ "bluesky_last_verified_at",
+ "bluesky_last_error",
+ "created_at",
+ ]
+ read_only_fields = ["id", "created_at"]
+
+ def get_user_role(self, obj: Project) -> str | None:
+ """Return the current request user's role for this project."""
+
+ request = self.context.get("request")
+ if not request or not request.user.is_authenticated:
+ return None
+ return get_user_role(request.user, obj)
+
+ def _get_bluesky_credentials(self, obj: Project):
+ """Return the project's stored Bluesky credentials, if configured."""
+
+ try:
+ return obj.bluesky_credentials
+ except Project.bluesky_credentials.RelatedObjectDoesNotExist:
+ return None
+
+ def get_has_bluesky_credentials(self, obj: Project) -> bool:
+ """Return whether the project has stored Bluesky credentials."""
+
+ return self._get_bluesky_credentials(obj) is not None
+
+ def get_bluesky_handle(self, obj: Project) -> str:
+ """Return the stored Bluesky handle, or an empty string."""
+
+ credentials = self._get_bluesky_credentials(obj)
+ return credentials.handle if credentials else ""
+
+ def get_bluesky_is_active(self, obj: Project) -> bool:
+ """Return whether the stored Bluesky credentials are currently active."""
+
+ credentials = self._get_bluesky_credentials(obj)
+ return credentials.is_active if credentials else False
+
+ def get_bluesky_last_verified_at(self, obj: Project):
+ """Return the last successful verification timestamp, if available."""
+
+ credentials = self._get_bluesky_credentials(obj)
+ return credentials.last_verified_at if credentials else None
+
+ def get_bluesky_last_error(self, obj: Project) -> str:
+ """Return the latest Bluesky verification error, or an empty string."""
+
+ credentials = self._get_bluesky_credentials(obj)
+ return credentials.last_error if credentials else ""
+
+
+class ProjectConfigSerializer(
+ ProjectScopedSerializerMixin, serializers.ModelSerializer
+):
+ """Serialize per-project authority and scoring settings."""
+
+ class Meta:
+ model = ProjectConfig
+ fields = [
+ "id",
+ "project",
+ "upvote_authority_weight",
+ "downvote_authority_weight",
+ "authority_decay_rate",
+ ]
+ read_only_fields = ["id", "project"]
+
+
+class BlueskyCredentialsSerializer(
+ ProjectScopedSerializerMixin, serializers.ModelSerializer
+):
+ """Serialize project-scoped Bluesky credentials without exposing secrets."""
+
+ app_password = serializers.CharField(
+ write_only=True,
+ required=False,
+ allow_blank=True,
+ trim_whitespace=False,
+ )
+ has_stored_credential = serializers.SerializerMethodField()
+
+ class Meta:
+ model = BlueskyCredentials
+ fields = [
+ "id",
+ "project",
+ "handle",
+ "pds_url",
+ "is_active",
+ "has_stored_credential",
+ "app_password",
+ "last_verified_at",
+ "last_error",
+ "created_at",
+ "updated_at",
+ ]
+ read_only_fields = [
+ "id",
+ "project",
+ "has_stored_credential",
+ "last_verified_at",
+ "last_error",
+ "created_at",
+ "updated_at",
+ ]
+
+ def get_has_stored_credential(self, obj: BlueskyCredentials) -> bool:
+ """Return whether the project has an encrypted Bluesky credential stored."""
+
+ return obj.has_stored_credential()
+
+ def validate(self, attrs):
+ """Require an app password when creating a credential record."""
+
+ attrs = super().validate(attrs)
+ app_password = attrs.get("app_password", "")
+ if self.instance is None and not app_password:
+ raise serializers.ValidationError(
+ {"app_password": "A Bluesky app credential is required."}
+ )
+ return attrs
+
+ def create(self, validated_data):
+ """Encrypt the submitted Bluesky app password before saving the record."""
+
+ app_password = validated_data.pop("app_password", "")
+ instance = super().create(validated_data)
+ if app_password:
+ instance.set_app_password(app_password)
+ instance.save(update_fields=["app_password_encrypted", "updated_at"])
+ return instance
+
+ def update(self, instance, validated_data):
+ """Keep the stored credential unless a replacement app password is submitted."""
+
+ app_password = validated_data.pop("app_password", "")
+ instance = super().update(instance, validated_data)
+ if app_password:
+ instance.set_app_password(app_password)
+ instance.save(update_fields=["app_password_encrypted", "updated_at"])
+ return instance
+
+
+class SourceConfigSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer):
+ """Serialize source-plugin configuration and normalize provider settings."""
+
+ class Meta:
+ model = SourceConfig
+ fields = [
+ "id",
+ "project",
+ "plugin_name",
+ "config",
+ "is_active",
+ "last_fetched_at",
+ ]
+ read_only_fields = ["id", "project", "last_fetched_at"]
+
+ def validate(self, attrs):
+ """Validate plugin-specific configuration with the plugin registry."""
+
+ plugin_name = attrs.get("plugin_name") or getattr(
+ self.instance, "plugin_name", None
+ )
+ config = attrs.get("config") or getattr(self.instance, "config", {})
+ if plugin_name:
+ try:
+ attrs["config"] = validate_plugin_config(plugin_name, config)
+ except ValueError as exc:
+ logger.warning(
+ "Rejected invalid source config",
+ extra={"plugin_name": str(plugin_name)},
+ exc_info=exc,
+ )
+ raise serializers.ValidationError(
+ {"config": "Invalid source configuration."}
+ )
+ return attrs
+
+
+class ProjectMembershipSerializer(serializers.ModelSerializer):
+ """Serialize project-member roster entries for admin workflows."""
+
+ username = serializers.CharField(source="user.username", read_only=True)
+ email = serializers.CharField(source="user.email", read_only=True)
+ display_name = serializers.CharField(source="user.display_name", read_only=True)
+
+ class Meta:
+ model = ProjectMembership
+ fields = [
+ "id",
+ "project",
+ "user",
+ "username",
+ "email",
+ "display_name",
+ "role",
+ "invited_by",
+ "joined_at",
+ ]
+ read_only_fields = [
+ "id",
+ "project",
+ "user",
+ "username",
+ "email",
+ "display_name",
+ "invited_by",
+ "joined_at",
+ ]
+
+ def validate_role(self, value: str) -> str:
+ """Restrict role updates to the supported project-role values."""
+
+ if value not in ProjectRole.values:
+ raise serializers.ValidationError("Select a valid project role.")
+ return value
diff --git a/requirements.txt b/requirements.txt
index 14e6a7ee..69bfbfcc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,7 @@ django-allauth==65.16.1
django-anymail==15.0
django-cors-headers==4.9.0
django-import-export==4.4.0
+django-storages==1.14.6
django-stubs-ext==6.0.3
django-stubs==6.0.3
django-unfold==0.91.0
diff --git a/trends/__init__.py b/trends/__init__.py
new file mode 100644
index 00000000..1c6c5363
--- /dev/null
+++ b/trends/__init__.py
@@ -0,0 +1 @@
+"""Trends-domain models, admin, and API ownership."""
diff --git a/trends/admin.py b/trends/admin.py
new file mode 100644
index 00000000..3709eef5
--- /dev/null
+++ b/trends/admin.py
@@ -0,0 +1,210 @@
+"""Admin configuration for trends-domain models."""
+
+from urllib.parse import urlencode
+
+from django.contrib import admin
+from django.db.models import Avg, Max
+from django.urls import reverse
+from django.utils import timezone
+
+from trends.models import TopicCentroidSnapshot
+
+
+def _score_to_percent(value):
+ """Normalize score-like values for display as percentages."""
+
+ if value is None:
+ return None
+ numeric_value = float(value)
+ if -1.0 <= numeric_value <= 1.0:
+ return numeric_value * 100
+ return numeric_value
+
+
+def _drift_card_color(value) -> str:
+ """Return an admin card severity for centroid drift percentages."""
+
+ if value is None:
+ return "info"
+ numeric_value = float(value)
+ if numeric_value <= 0.15:
+ return "success"
+ if numeric_value <= 0.35:
+ return "warning"
+ return "danger"
+
+
+def _format_snapshot_freshness(computed_at) -> str:
+ """Return a compact human-readable age for the latest snapshot."""
+
+ if computed_at is None:
+ return "-"
+ age = timezone.now() - computed_at
+ total_hours = max(0, int(age.total_seconds() // 3600))
+ if total_hours < 24:
+ return f"{total_hours}h ago"
+ return f"{max(1, total_hours // 24)}d ago"
+
+
+def _freshness_card_color(computed_at) -> str:
+ """Return an admin card severity based on snapshot recency."""
+
+ if computed_at is None:
+ return "warning"
+ age = timezone.now() - computed_at
+ age_hours = age.total_seconds() / 3600
+ if age_hours <= 24:
+ return "success"
+ if age_hours <= 72:
+ return "warning"
+ return "danger"
+
+
+def _build_topic_centroid_project_drilldowns(queryset, changelist_url: str):
+ """Build one filtered-history drilldown row per project."""
+
+ latest_by_project: dict[int, TopicCentroidSnapshot] = {}
+ snapshot_counts: dict[int, int] = {}
+ ordered_snapshots = queryset.select_related("project").order_by(
+ "project_id", "-computed_at"
+ )
+
+ for snapshot in ordered_snapshots:
+ project_id = snapshot.project_id
+ snapshot_counts[project_id] = snapshot_counts.get(project_id, 0) + 1
+ latest_by_project.setdefault(project_id, snapshot)
+
+ project_drilldowns = []
+ for snapshot in sorted(
+ latest_by_project.values(),
+ key=lambda value: value.project.name.lower(),
+ ):
+ project_drilldowns.append(
+ {
+ "project_id": snapshot.project_id,
+ "project_name": snapshot.project.name,
+ "snapshot_count": snapshot_counts[snapshot.project_id],
+ "centroid_active": snapshot.centroid_active,
+ "feedback_count": snapshot.feedback_count,
+ "latest_snapshot": _format_snapshot_freshness(snapshot.computed_at),
+ "drift_from_previous": (
+ f"{_score_to_percent(snapshot.drift_from_previous):.1f}%"
+ if snapshot.drift_from_previous is not None
+ else "n/a"
+ ),
+ "drift_from_week_ago": (
+ f"{_score_to_percent(snapshot.drift_from_week_ago):.1f}%"
+ if snapshot.drift_from_week_ago is not None
+ else "n/a"
+ ),
+ "href": f"{changelist_url}?{urlencode({'project__id__exact': snapshot.project_id})}",
+ }
+ )
+
+ return project_drilldowns
+
+
+@admin.register(TopicCentroidSnapshot)
+class TopicCentroidSnapshotAdmin(admin.ModelAdmin):
+ """Admin view for persisted topic-centroid history and drift."""
+
+ list_before_template = "admin/topic_centroid_snapshot_changelist_widget.html"
+ list_display = (
+ "project",
+ "centroid_active",
+ "feedback_count",
+ "display_drift_from_previous",
+ "display_drift_from_week_ago",
+ "computed_at",
+ )
+ list_filter = (
+ "centroid_active",
+ ("project", admin.RelatedOnlyFieldListFilter),
+ "computed_at",
+ )
+ search_fields = ("project__name",)
+ autocomplete_fields = ("project",)
+
+ @admin.display(description="Drift vs Previous", ordering="drift_from_previous")
+ def display_drift_from_previous(self, obj):
+ """Render cosine-distance drift from the previous active snapshot."""
+
+ if obj.drift_from_previous is None:
+ return "n/a"
+ return f"{_score_to_percent(obj.drift_from_previous):.1f}%"
+
+ @admin.display(description="Drift vs 7d", ordering="drift_from_week_ago")
+ def display_drift_from_week_ago(self, obj):
+ """Render cosine-distance drift from the nearest week-old snapshot."""
+
+ if obj.drift_from_week_ago is None:
+ return "n/a"
+ return f"{_score_to_percent(obj.drift_from_week_ago):.1f}%"
+
+ def changelist_view(self, request, extra_context=None):
+ """Augment the changelist with centroid freshness and drift summary cards."""
+
+ queryset = self.get_queryset(request)
+ changelist_url = reverse(
+ f"{self.admin_site.name}:{self.model._meta.app_label}_{self.model._meta.model_name}_changelist"
+ )
+ metrics = queryset.aggregate(
+ avg_drift_from_previous=Avg("drift_from_previous"),
+ avg_drift_from_week_ago=Avg("drift_from_week_ago"),
+ latest_snapshot_at=Max("computed_at"),
+ )
+ project_count = queryset.values("project_id").distinct().count()
+ active_project_count = (
+ queryset.filter(centroid_active=True)
+ .values("project_id")
+ .distinct()
+ .count()
+ )
+
+ extra_context = extra_context or {}
+ extra_context["dashboard_stats"] = [
+ {
+ "title": "Active Centroids",
+ "value": (
+ f"{active_project_count} / {project_count}"
+ if project_count
+ else "0 / 0"
+ ),
+ "icon": "hub",
+ "color": (
+ "success"
+ if active_project_count == project_count and project_count
+ else "warning"
+ ),
+ },
+ {
+ "title": "Avg Drift vs Previous",
+ "value": (
+ f"{_score_to_percent(metrics['avg_drift_from_previous']):.1f}%"
+ if metrics["avg_drift_from_previous"] is not None
+ else "-"
+ ),
+ "icon": "show_chart",
+ "color": _drift_card_color(metrics["avg_drift_from_previous"]),
+ },
+ {
+ "title": "Avg Drift vs 7d",
+ "value": (
+ f"{_score_to_percent(metrics['avg_drift_from_week_ago']):.1f}%"
+ if metrics["avg_drift_from_week_ago"] is not None
+ else "-"
+ ),
+ "icon": "timeline",
+ "color": _drift_card_color(metrics["avg_drift_from_week_ago"]),
+ },
+ {
+ "title": "Latest Snapshot",
+ "value": _format_snapshot_freshness(metrics["latest_snapshot_at"]),
+ "icon": "schedule",
+ "color": _freshness_card_color(metrics["latest_snapshot_at"]),
+ },
+ ]
+ extra_context["centroid_project_drilldowns"] = (
+ _build_topic_centroid_project_drilldowns(queryset, changelist_url)
+ )
+ return super().changelist_view(request, extra_context=extra_context)
diff --git a/trends/api.py b/trends/api.py
new file mode 100644
index 00000000..dd287d02
--- /dev/null
+++ b/trends/api.py
@@ -0,0 +1,82 @@
+"""Trends-domain API viewsets kept under the existing nested project routes."""
+
+from django.db.models import Avg, Count, Q
+from drf_spectacular.utils import extend_schema
+from rest_framework import viewsets
+from rest_framework.decorators import action
+from rest_framework.response import Response
+
+from core.api import (
+ AUTHENTICATION_REQUIRED_RESPONSE,
+ ProjectOwnedQuerysetMixin,
+ build_crud_action_overrides,
+ document_project_owned_viewset,
+)
+from core.permissions import IsProjectContributor
+from trends.models import TopicCentroidSnapshot
+from trends.serializers import (
+ TopicCentroidObservabilitySummarySerializer,
+ TopicCentroidSnapshotSerializer,
+)
+
+
+@document_project_owned_viewset(
+ resource_plural="topic centroid snapshots",
+ resource_singular="topic centroid snapshot",
+ create_description="Topic centroid snapshots are pipeline-managed history rows and are exposed read-only for observability.",
+ tag="Observability",
+ action_overrides=build_crud_action_overrides(
+ TopicCentroidSnapshotSerializer,
+ resource_plural="topic centroid snapshots for the selected project",
+ resource_singular="topic centroid snapshot",
+ ),
+)
+class TopicCentroidSnapshotViewSet(
+ ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet
+):
+ """Inspect persisted centroid history and aggregate drift for a project."""
+
+ serializer_class = TopicCentroidSnapshotSerializer
+ queryset = TopicCentroidSnapshot.objects.select_related("project")
+
+ def get_permissions(self):
+ """Restrict centroid observability to project contributors."""
+
+ return [IsProjectContributor()]
+
+ @extend_schema(
+ summary="Get topic centroid summary",
+ description=(
+ "Return aggregate centroid observability metrics for the selected project, "
+ "including average drift and the latest persisted snapshot."
+ ),
+ request=None,
+ responses={
+ 200: TopicCentroidObservabilitySummarySerializer,
+ 403: AUTHENTICATION_REQUIRED_RESPONSE,
+ },
+ tags=["Observability"],
+ )
+ @action(detail=False, methods=["get"], url_path="summary")
+ def summary(self, request, *args, **kwargs):
+ """Return centroid observability summary metrics for the current project."""
+
+ queryset = self.get_queryset()
+ metrics = queryset.aggregate(
+ snapshot_count=Count("id"),
+ active_snapshot_count=Count("id", filter=Q(centroid_active=True)),
+ avg_drift_from_previous=Avg("drift_from_previous"),
+ avg_drift_from_week_ago=Avg("drift_from_week_ago"),
+ )
+ serializer = TopicCentroidObservabilitySummarySerializer(
+ {
+ "project": self.get_project().id,
+ "snapshot_count": metrics["snapshot_count"],
+ "active_snapshot_count": metrics["active_snapshot_count"],
+ "avg_drift_from_previous": metrics["avg_drift_from_previous"],
+ "avg_drift_from_week_ago": metrics["avg_drift_from_week_ago"],
+ "latest_snapshot": queryset.order_by("-computed_at").first(),
+ },
+ context=self.get_serializer_context(),
+ )
+ return Response(serializer.data)
diff --git a/trends/api_urls.py b/trends/api_urls.py
new file mode 100644
index 00000000..8dc3309a
--- /dev/null
+++ b/trends/api_urls.py
@@ -0,0 +1,15 @@
+"""API route registration for trends resources."""
+
+from rest_framework_nested.routers import NestedSimpleRouter
+
+from trends.api import TopicCentroidSnapshotViewSet
+
+
+def register_project_routes(project_router: NestedSimpleRouter) -> None:
+ """Register nested trend observability endpoints."""
+
+ project_router.register(
+ r"topic-centroid-snapshots",
+ TopicCentroidSnapshotViewSet,
+ basename="project-topic-centroid-snapshot",
+ )
diff --git a/trends/apps.py b/trends/apps.py
new file mode 100644
index 00000000..38713bc9
--- /dev/null
+++ b/trends/apps.py
@@ -0,0 +1,10 @@
+"""Django app configuration for the trends domain."""
+
+from django.apps import AppConfig
+
+
+class TrendsConfig(AppConfig):
+ """Configure the trends app."""
+
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "trends"
diff --git a/trends/migrations/0001_initial.py b/trends/migrations/0001_initial.py
new file mode 100644
index 00000000..cffffcbc
--- /dev/null
+++ b/trends/migrations/0001_initial.py
@@ -0,0 +1,69 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ initial = True
+
+ dependencies = [
+ ("projects", "0003_remove_project_group"),
+ ("core", "0014_move_pipeline_models_to_pipeline"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ state_operations=[
+ migrations.CreateModel(
+ name="TopicCentroidSnapshot",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("computed_at", models.DateTimeField(auto_now_add=True)),
+ ("centroid_active", models.BooleanField(default=False)),
+ ("centroid_vector", models.JSONField(blank=True, default=list)),
+ ("feedback_count", models.PositiveIntegerField(default=0)),
+ ("upvote_count", models.PositiveIntegerField(default=0)),
+ ("downvote_count", models.PositiveIntegerField(default=0)),
+ (
+ "drift_from_previous",
+ models.FloatField(blank=True, null=True),
+ ),
+ (
+ "drift_from_week_ago",
+ models.FloatField(blank=True, null=True),
+ ),
+ (
+ "project",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="topic_centroid_snapshots",
+ to="projects.project",
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["-computed_at"],
+ "db_table": "core_topiccentroidsnapshot",
+ "indexes": [
+ models.Index(
+ fields=["project", "-computed_at"],
+ name="core_topicc_project_2e2c18_idx",
+ ),
+ models.Index(
+ fields=["project", "centroid_active", "-computed_at"],
+ name="core_topicc_project_6b2dd8_idx",
+ ),
+ ],
+ },
+ ),
+ ],
+ database_operations=[],
+ )
+ ]
diff --git a/trends/migrations/__init__.py b/trends/migrations/__init__.py
new file mode 100644
index 00000000..ec213ec3
--- /dev/null
+++ b/trends/migrations/__init__.py
@@ -0,0 +1 @@
+"""Migration package for the trends app."""
diff --git a/trends/models.py b/trends/models.py
new file mode 100644
index 00000000..0c169049
--- /dev/null
+++ b/trends/models.py
@@ -0,0 +1,38 @@
+"""Trends-domain models for project observability and discovery workflows."""
+
+from django.db import models
+
+
+class TopicCentroidSnapshot(models.Model):
+ """Capture one recomputed topic-centroid state for a project."""
+
+ project = models.ForeignKey(
+ "projects.Project",
+ on_delete=models.CASCADE,
+ related_name="topic_centroid_snapshots",
+ )
+ computed_at = models.DateTimeField(auto_now_add=True)
+ centroid_active = models.BooleanField(default=False)
+ centroid_vector = models.JSONField(default=list, blank=True)
+ feedback_count = models.PositiveIntegerField(default=0)
+ upvote_count = models.PositiveIntegerField(default=0)
+ downvote_count = models.PositiveIntegerField(default=0)
+ drift_from_previous = models.FloatField(null=True, blank=True)
+ drift_from_week_ago = models.FloatField(null=True, blank=True)
+
+ class Meta:
+ ordering = ["-computed_at"]
+ db_table = "core_topiccentroidsnapshot"
+ indexes = [
+ models.Index(
+ fields=["project", "-computed_at"],
+ name="core_topicc_project_2e2c18_idx",
+ ),
+ models.Index(
+ fields=["project", "centroid_active", "-computed_at"],
+ name="core_topicc_project_6b2dd8_idx",
+ ),
+ ]
+
+ def __str__(self) -> str:
+ return f"Topic centroid snapshot for {self.project.name}"
diff --git a/trends/serializers.py b/trends/serializers.py
new file mode 100644
index 00000000..4e441488
--- /dev/null
+++ b/trends/serializers.py
@@ -0,0 +1,35 @@
+"""DRF serializers for trends-domain models."""
+
+from rest_framework import serializers
+
+from trends.models import TopicCentroidSnapshot
+
+
+class TopicCentroidSnapshotSerializer(serializers.ModelSerializer):
+ """Serialize one persisted topic-centroid recomputation for a project."""
+
+ class Meta:
+ model = TopicCentroidSnapshot
+ fields = [
+ "id",
+ "project",
+ "computed_at",
+ "centroid_active",
+ "feedback_count",
+ "upvote_count",
+ "downvote_count",
+ "drift_from_previous",
+ "drift_from_week_ago",
+ ]
+ read_only_fields = fields
+
+
+class TopicCentroidObservabilitySummarySerializer(serializers.Serializer):
+ """Serialize project-level centroid observability summary metrics."""
+
+ project = serializers.IntegerField()
+ snapshot_count = serializers.IntegerField()
+ active_snapshot_count = serializers.IntegerField()
+ avg_drift_from_previous = serializers.FloatField(allow_null=True)
+ avg_drift_from_week_ago = serializers.FloatField(allow_null=True)
+ latest_snapshot = TopicCentroidSnapshotSerializer(allow_null=True)
diff --git a/trends/tasks.py b/trends/tasks.py
new file mode 100644
index 00000000..ae111569
--- /dev/null
+++ b/trends/tasks.py
@@ -0,0 +1,333 @@
+"""Celery tasks and helpers for trends-domain centroid recomputation."""
+
+import math
+from datetime import datetime, timedelta
+from typing import Protocol, cast
+
+from celery import shared_task
+from django.conf import settings
+from django.core.cache import cache
+from django.db.models import Model
+from django.utils import timezone
+
+from core.embeddings import (
+ build_content_embedding_text,
+ delete_topic_centroid,
+ embed_text,
+ upsert_topic_centroid,
+)
+from core.models import FeedbackType, UserFeedback
+from projects.models import Project
+
+from .models import TopicCentroidSnapshot
+
+TOPIC_CENTROID_LOOKBACK_DAYS = 90
+TOPIC_CENTROID_MIN_UPVOTES = 10
+TOPIC_CENTROID_DOWNVOTE_WEIGHT = 0.25
+TOPIC_CENTROID_DEBOUNCE_SECONDS = 60 * 5
+TOPIC_CENTROID_DECAY_TAU_DAYS = 45
+
+
+class DelayedTask(Protocol):
+ """Protocol for Celery tasks that can run eagerly or via ``delay``."""
+
+ def __call__(self, *args: object, **kwargs: object) -> object:
+ pass
+
+ def delay(self, *args: object, **kwargs: object) -> object:
+ pass
+
+
+def _enqueue_task(task: object, *args: object) -> None:
+ """Dispatch a Celery task through a typed ``delay`` seam."""
+
+ cast(DelayedTask, task).delay(*args)
+
+
+def _require_pk(instance: Model) -> int:
+ """Return a saved model primary key as an ``int``."""
+
+ pk = instance.pk
+ if pk is None:
+ raise ValueError(
+ f"{instance.__class__.__name__} must be saved before task dispatch"
+ )
+ return int(pk)
+
+
+@shared_task(name="core.tasks.run_all_topic_centroid_recomputations")
+def run_all_topic_centroid_recomputations() -> int:
+ """Queue topic-centroid recomputation for every project."""
+
+ project_ids = list(Project.objects.values_list("id", flat=True))
+ for project_id in project_ids:
+ if settings.CELERY_TASK_ALWAYS_EAGER:
+ recompute_topic_centroid(project_id)
+ else:
+ _enqueue_task(recompute_topic_centroid, project_id)
+ return len(project_ids)
+
+
+@shared_task(name="core.tasks.recompute_topic_centroid")
+def recompute_topic_centroid(project_id: int):
+ """Rebuild the project's feedback centroid from recent editorial signals."""
+
+ now = timezone.now()
+ window_start = now - timedelta(days=TOPIC_CENTROID_LOOKBACK_DAYS)
+ feedback_rows = list(
+ UserFeedback.objects.filter(project_id=project_id, created_at__gte=window_start)
+ .select_related("content")
+ .order_by("created_at")
+ )
+ upvote_count = sum(
+ 1 for row in feedback_rows if row.feedback_type == FeedbackType.UPVOTE
+ )
+ downvote_count = sum(
+ 1 for row in feedback_rows if row.feedback_type == FeedbackType.DOWNVOTE
+ )
+
+ try:
+ if upvote_count < TOPIC_CENTROID_MIN_UPVOTES:
+ delete_topic_centroid(project_id)
+ _create_topic_centroid_snapshot(
+ project_id=project_id,
+ computed_at=now,
+ centroid_active=False,
+ centroid_vector=[],
+ feedback_count=len(feedback_rows),
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ )
+ return {
+ "project_id": project_id,
+ "feedback_count": len(feedback_rows),
+ "upvote_count": upvote_count,
+ "downvote_count": downvote_count,
+ "centroid_active": False,
+ }
+
+ vector_cache: dict[int, list[float]] = {}
+ upvote_vectors: list[tuple[list[float], float]] = []
+ downvote_vectors: list[tuple[list[float], float]] = []
+
+ for feedback in feedback_rows:
+ content_pk = _require_pk(feedback.content)
+ vector = vector_cache.get(content_pk)
+ if vector is None:
+ vector = embed_text(build_content_embedding_text(feedback.content))
+ vector_cache[content_pk] = vector
+ weight = _feedback_decay_weight(feedback.created_at, now)
+ if feedback.feedback_type == FeedbackType.UPVOTE:
+ upvote_vectors.append((vector, weight))
+ else:
+ downvote_vectors.append((vector, weight))
+
+ upvote_mean, upvote_weight = _weighted_mean_vector(upvote_vectors)
+ if not upvote_mean or upvote_weight <= 0:
+ delete_topic_centroid(project_id)
+ _create_topic_centroid_snapshot(
+ project_id=project_id,
+ computed_at=now,
+ centroid_active=False,
+ centroid_vector=[],
+ feedback_count=len(feedback_rows),
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ )
+ return {
+ "project_id": project_id,
+ "feedback_count": len(feedback_rows),
+ "upvote_count": upvote_count,
+ "downvote_count": downvote_count,
+ "centroid_active": False,
+ }
+
+ downvote_mean, downvote_weight = _weighted_mean_vector(downvote_vectors)
+ downvote_scale = 0.0
+ if downvote_mean and downvote_weight > 0:
+ downvote_scale = TOPIC_CENTROID_DOWNVOTE_WEIGHT * min(
+ 1.0, upvote_weight / downvote_weight
+ )
+
+ centroid_vector = [
+ upvote_value - downvote_scale * downvote_value
+ for upvote_value, downvote_value in zip(
+ upvote_mean,
+ downvote_mean or [0.0] * len(upvote_mean),
+ )
+ ]
+ normalized_centroid = _normalize_vector(centroid_vector)
+ if not normalized_centroid:
+ delete_topic_centroid(project_id)
+ _create_topic_centroid_snapshot(
+ project_id=project_id,
+ computed_at=now,
+ centroid_active=False,
+ centroid_vector=[],
+ feedback_count=len(feedback_rows),
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ )
+ return {
+ "project_id": project_id,
+ "feedback_count": len(feedback_rows),
+ "upvote_count": upvote_count,
+ "downvote_count": downvote_count,
+ "centroid_active": False,
+ }
+
+ upsert_topic_centroid(
+ project_id,
+ normalized_centroid,
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ feedback_count=len(feedback_rows),
+ )
+ _create_topic_centroid_snapshot(
+ project_id=project_id,
+ computed_at=now,
+ centroid_active=True,
+ centroid_vector=normalized_centroid,
+ feedback_count=len(feedback_rows),
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ )
+ return {
+ "project_id": project_id,
+ "feedback_count": len(feedback_rows),
+ "upvote_count": upvote_count,
+ "downvote_count": downvote_count,
+ "centroid_active": True,
+ }
+ finally:
+ cache.delete(_topic_centroid_debounce_key(project_id))
+
+
+def queue_topic_centroid_recompute(project_id: int) -> bool:
+ """Debounce and queue topic-centroid recomputation for one project."""
+
+ if not cache.add(
+ _topic_centroid_debounce_key(project_id),
+ timezone.now().isoformat(),
+ timeout=TOPIC_CENTROID_DEBOUNCE_SECONDS,
+ ):
+ return False
+
+ if settings.CELERY_TASK_ALWAYS_EAGER:
+ recompute_topic_centroid(project_id)
+ else:
+ _enqueue_task(recompute_topic_centroid, project_id)
+ return True
+
+
+def _feedback_decay_weight(created_at: datetime, now: datetime) -> float:
+ """Return the EMA-style decay weight for one feedback event."""
+
+ age_days = max(0.0, (now - created_at).total_seconds() / 86400)
+ return math.exp(-age_days / TOPIC_CENTROID_DECAY_TAU_DAYS)
+
+
+def _create_topic_centroid_snapshot(
+ *,
+ project_id: int,
+ computed_at: datetime,
+ centroid_active: bool,
+ centroid_vector: list[float],
+ feedback_count: int,
+ upvote_count: int,
+ downvote_count: int,
+) -> TopicCentroidSnapshot:
+ """Persist one centroid snapshot and derived drift metrics."""
+
+ previous_active_snapshot = (
+ TopicCentroidSnapshot.objects.filter(
+ project_id=project_id, centroid_active=True
+ )
+ .order_by("-computed_at")
+ .only("centroid_vector", "computed_at")
+ .first()
+ )
+ week_ago_snapshot = (
+ TopicCentroidSnapshot.objects.filter(
+ project_id=project_id,
+ centroid_active=True,
+ computed_at__lte=computed_at - timedelta(days=7),
+ )
+ .order_by("-computed_at")
+ .only("centroid_vector", "computed_at")
+ .first()
+ )
+
+ snapshot = TopicCentroidSnapshot.objects.create(
+ project_id=project_id,
+ centroid_active=centroid_active,
+ centroid_vector=centroid_vector,
+ feedback_count=feedback_count,
+ upvote_count=upvote_count,
+ downvote_count=downvote_count,
+ drift_from_previous=(
+ _cosine_distance(centroid_vector, previous_active_snapshot.centroid_vector)
+ if centroid_active and previous_active_snapshot is not None
+ else None
+ ),
+ drift_from_week_ago=(
+ _cosine_distance(centroid_vector, week_ago_snapshot.centroid_vector)
+ if centroid_active and week_ago_snapshot is not None
+ else None
+ ),
+ )
+ if snapshot.computed_at != computed_at:
+ TopicCentroidSnapshot.objects.filter(pk=snapshot.pk).update(
+ computed_at=computed_at
+ )
+ snapshot.computed_at = computed_at
+ return snapshot
+
+
+def _cosine_distance(left: list[float], right: list[float]) -> float | None:
+ """Return cosine distance between two vectors when both are usable."""
+
+ if not left or not right or len(left) != len(right):
+ return None
+ left_norm = math.sqrt(sum(value * value for value in left))
+ right_norm = math.sqrt(sum(value * value for value in right))
+ if left_norm <= 0 or right_norm <= 0:
+ return None
+ cosine_similarity = sum(
+ left_value * right_value for left_value, right_value in zip(left, right)
+ ) / (left_norm * right_norm)
+ return max(0.0, min(2.0, 1.0 - max(-1.0, min(1.0, cosine_similarity))))
+
+
+def _weighted_mean_vector(
+ weighted_vectors: list[tuple[list[float], float]],
+) -> tuple[list[float], float]:
+ """Compute the weighted mean vector and total contributing weight."""
+
+ if not weighted_vectors:
+ return [], 0.0
+ dimension = len(weighted_vectors[0][0])
+ totals = [0.0] * dimension
+ total_weight = 0.0
+ for vector, weight in weighted_vectors:
+ total_weight += weight
+ for index, value in enumerate(vector):
+ totals[index] += float(value) * weight
+ if total_weight <= 0:
+ return [], 0.0
+ return ([value / total_weight for value in totals], total_weight)
+
+
+def _normalize_vector(vector: list[float]) -> list[float]:
+ """Normalize a dense vector to unit length."""
+
+ magnitude = math.sqrt(sum(value * value for value in vector))
+ if magnitude <= 0:
+ return []
+ return [float(value) / magnitude for value in vector]
+
+
+def _topic_centroid_debounce_key(project_id: int) -> str:
+ """Return the cache key used to debounce centroid recomputations."""
+
+ return f"topic-centroid-recompute:{project_id}"
diff --git a/users/__init__.py b/users/__init__.py
new file mode 100644
index 00000000..81d0e904
--- /dev/null
+++ b/users/__init__.py
@@ -0,0 +1 @@
+"""Users app package."""
diff --git a/users/admin.py b/users/admin.py
new file mode 100644
index 00000000..cb349749
--- /dev/null
+++ b/users/admin.py
@@ -0,0 +1,47 @@
+"""Django admin registration for the custom application user model."""
+
+from django.contrib import admin
+from django.contrib.auth.admin import UserAdmin
+
+from users.forms import AppUserChangeForm, AppUserCreationForm
+from users.models import AppUser
+
+
+@admin.register(AppUser)
+class AppUserAdmin(UserAdmin):
+ """Admin configuration for the custom application user model."""
+
+ add_form = AppUserCreationForm
+ form = AppUserChangeForm
+ model = AppUser
+ list_display = (
+ "username",
+ "email",
+ "display_name",
+ "is_staff",
+ "is_active",
+ )
+ fieldsets = UserAdmin.fieldsets + (
+ (
+ "Profile",
+ {
+ "fields": (
+ "display_name",
+ "avatar",
+ "bio",
+ "timezone",
+ )
+ },
+ ),
+ )
+ add_fieldsets = UserAdmin.add_fieldsets + (
+ (
+ "Profile",
+ {
+ "fields": (
+ "display_name",
+ "email",
+ )
+ },
+ ),
+ )
diff --git a/users/api.py b/users/api.py
new file mode 100644
index 00000000..a839cd73
--- /dev/null
+++ b/users/api.py
@@ -0,0 +1,159 @@
+"""Current-user profile API endpoints for the users app."""
+
+from __future__ import annotations
+
+from django.conf import settings
+from django.utils import timezone
+from drf_spectacular.utils import extend_schema
+from rest_framework import permissions, serializers, status
+from rest_framework.exceptions import PermissionDenied
+from rest_framework.parsers import FormParser, MultiPartParser
+from rest_framework.response import Response
+from rest_framework.views import APIView
+
+from projects.models import ProjectMembership
+from users.models import AppUser, MembershipInvitation, avatar_thumbnail_path
+from users.serializers import (
+ AvatarUploadSerializer,
+ ProfileSerializer,
+ PublicMembershipInvitationSerializer,
+)
+from users.tasks import generate_avatar_thumbnail
+
+
+def _delete_avatar_assets(user: AppUser) -> None:
+ """Delete the user's stored avatar and generated thumbnail files."""
+
+ if user.avatar:
+ storage = user.avatar.storage
+ avatar_name = user.avatar.name
+ thumbnail_name = avatar_thumbnail_path(user)
+ if avatar_name and storage.exists(avatar_name):
+ storage.delete(avatar_name)
+ if storage.exists(thumbnail_name):
+ storage.delete(thumbnail_name)
+
+
+@extend_schema(tags=["Users"])
+class ProfileView(APIView):
+ """Expose the authenticated user's editable profile surface."""
+
+ permission_classes = [permissions.IsAuthenticated]
+
+ @extend_schema(responses={200: ProfileSerializer})
+ def get(self, request):
+ """Return the current user's profile payload."""
+
+ serializer = ProfileSerializer(request.user)
+ return Response(serializer.data)
+
+ @extend_schema(request=ProfileSerializer, responses={200: ProfileSerializer})
+ def patch(self, request):
+ """Update display-name and profile-text fields for the current user."""
+
+ serializer = ProfileSerializer(request.user, data=request.data, partial=True)
+ serializer.is_valid(raise_exception=True)
+ serializer.save()
+ return Response(ProfileSerializer(request.user).data)
+
+
+@extend_schema(tags=["Users"])
+class ProfileAvatarView(APIView):
+ """Create or delete the authenticated user's avatar image."""
+
+ permission_classes = [permissions.IsAuthenticated]
+ parser_classes = [MultiPartParser, FormParser]
+
+ @extend_schema(request=AvatarUploadSerializer, responses={200: ProfileSerializer})
+ def post(self, request):
+ """Store a new avatar image for the current user and queue a thumbnail."""
+
+ serializer = AvatarUploadSerializer(data=request.data)
+ serializer.is_valid(raise_exception=True)
+
+ user = request.user
+ _delete_avatar_assets(user)
+ user.avatar = serializer.validated_data["avatar"]
+ user.save(update_fields=["avatar"])
+
+ if settings.CELERY_TASK_ALWAYS_EAGER:
+ generate_avatar_thumbnail(user.id)
+ else:
+ generate_avatar_thumbnail.delay(user.id)
+
+ return Response(ProfileSerializer(user).data, status=status.HTTP_200_OK)
+
+ @extend_schema(responses={200: ProfileSerializer})
+ def delete(self, request):
+ """Remove the current user's avatar image and generated thumbnail."""
+
+ user = request.user
+ _delete_avatar_assets(user)
+ user.avatar = None
+ user.save(update_fields=["avatar"])
+ return Response(ProfileSerializer(user).data)
+
+
+@extend_schema(tags=["Users"])
+class MembershipInvitationTokenView(APIView):
+ """Expose and redeem one invitation token."""
+
+ def get_permissions(self):
+ """Allow anyone to inspect an invitation, but require auth to accept it."""
+
+ if self.request.method == "GET":
+ permission_classes = [permissions.AllowAny]
+ else:
+ permission_classes = [permissions.IsAuthenticated]
+ return [permission() for permission in permission_classes]
+
+ def _get_invitation(self, token: str) -> MembershipInvitation:
+ """Load one invitation row by token or return 404."""
+
+ return MembershipInvitation.objects.select_related("project", "invited_by").get(
+ token=token
+ )
+
+ @extend_schema(responses={200: PublicMembershipInvitationSerializer})
+ def get(self, request, token: str):
+ """Return public invite details for one token."""
+
+ invitation = self._get_invitation(token)
+ return Response(PublicMembershipInvitationSerializer(invitation).data)
+
+ @extend_schema(responses={200: PublicMembershipInvitationSerializer})
+ def post(self, request, token: str):
+ """Accept a project invitation for the authenticated user."""
+
+ invitation = self._get_invitation(token)
+ if invitation.revoked_at is not None:
+ raise serializers.ValidationError(
+ {"token": "This invitation has been revoked."}
+ )
+ if invitation.accepted_at is not None:
+ raise serializers.ValidationError(
+ {"token": "This invitation has already been accepted."}
+ )
+
+ user_email = (request.user.email or "").strip().lower()
+ invitation_email = invitation.email.strip().lower()
+ if not user_email or user_email != invitation_email:
+ raise PermissionDenied(
+ f"Sign in as {invitation.email} to accept this invite."
+ )
+
+ membership, created = ProjectMembership.objects.update_or_create(
+ user=request.user,
+ project=invitation.project,
+ defaults={
+ "role": invitation.role,
+ "invited_by": invitation.invited_by,
+ },
+ )
+ if created and membership.joined_at is None:
+ membership.save(update_fields=["joined_at"])
+
+ invitation.accepted_at = timezone.now()
+ invitation.save(update_fields=["accepted_at"])
+
+ return Response(PublicMembershipInvitationSerializer(invitation).data)
diff --git a/users/api_urls.py b/users/api_urls.py
new file mode 100644
index 00000000..e0e053aa
--- /dev/null
+++ b/users/api_urls.py
@@ -0,0 +1,19 @@
+"""URL routes for current-user profile endpoints."""
+
+from django.urls import path
+
+from users.api import (
+ MembershipInvitationTokenView,
+ ProfileAvatarView,
+ ProfileView,
+)
+
+urlpatterns = [
+ path("profile/", ProfileView.as_view(), name="profile"),
+ path("profile/avatar/", ProfileAvatarView.as_view(), name="profile-avatar"),
+ path(
+ "invitations/