diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 1ae4fcfb..2c047dab 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -4,9 +4,9 @@ You are working in Newsletter Maker, a Django + DRF + Celery + Qdrant backend wi ## Repository Shape -- Backend runtime code lives in `core/`. +- Backend runtime code is split across `core/`, `projects/`, `content/`, `entities/`, `ingestion/`, `newsletters/`, `pipeline/`, `trends/`, and `users/`. - Django project settings and top-level URLs live in `newsletter_maker/`. -- Backend tests live primarily in `core/tests/` and `tests/`. +- Backend tests live primarily in `core/tests/`, app-local `tests/` packages, and `tests/`. - Frontend application code lives in `frontend/src/app/`, shared UI in `frontend/src/components/`, and shared API/types/helpers in `frontend/src/lib/`. - Operational and architecture docs live in `docs/`. diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c51afc6..a3669184 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -37,6 +37,7 @@ "pyyaml", "QDRANT", "readyz", + "recomputations", "Referer", "scaffolder", "simplejwt", diff --git a/SESSION.md b/SESSION.md index 20701140..e5657c34 100644 --- a/SESSION.md +++ b/SESSION.md @@ -1,13 +1,16 @@ # Session Restore Point -## Useful Commands From Today - -```bash -docker run --rm newsletter-maker-app:dev python -c "import drf_standardized_errors; print('ok')" -docker compose exec django python -c "import drf_standardized_errors; print('ok')" -docker compose exec django pip show drf-standardized-errors -docker inspect newsletter-maker-django-1 --format '{{.Id}} {{.Image}} {{.Config.Image}}' -docker inspect newsletter-maker-django-1 --format '{{json .Mounts}}' -pytest core/tests/test_embeddings.py -q -ruff check core/management/commands/seed_demo.py core/tests/test_embeddings.py -``` +I’m checking how this repo wires the database for just migrate so I can tell you whether reset-volumes will actually drop the database you’re about to migrate. + +Searched for database configuration in settings files +Not necessarily. + +just reset-volumes runs docker compose down -v --remove-orphans, so it deletes the Docker named volumes from justfile:119 and justfile:159. In this repo that includes the Postgres volume from docker-compose.yml:35 and docker-compose.yml:79, but also Redis, Qdrant, and the frontend node_modules volume. + +The important detail is that just migrate is a local python3 manage.py migrate command from justfile:123, and the repo default .env.example points DATABASE_URL at SQLite in .env.example:5. So: + +If your current .env still uses sqlite:///db.sqlite3, just reset-volumes will not drop the database you are migrating. You would need to remove db.sqlite3 instead, then run just migrate. +If your current .env points at the Docker Postgres service, then yes, just reset-volumes will wipe that Postgres database. After that, bring services back up so Postgres exists again, then run just migrate. +So the short answer is: only use just reset-volumes if the database you want to drop is the Dockerized Postgres one. If you want, I can help you verify which DB your current .env is targeting before you do anything destructive. + +DATABASE_URL=sqlite:///db.sqlite3 diff --git a/content/__init__.py b/content/__init__.py new file mode 100644 index 00000000..fbe6fe7c --- /dev/null +++ b/content/__init__.py @@ -0,0 +1 @@ +"""Content-domain app owning editorial content records and feedback.""" diff --git a/content/admin.py b/content/admin.py new file mode 100644 index 00000000..20d617b4 --- /dev/null +++ b/content/admin.py @@ -0,0 +1,322 @@ +"""Admin configuration for content-domain models.""" + +from django.contrib import admin, messages +from django.db.models import Avg +from django.utils.html import format_html +from unfold.admin import ModelAdmin + +from content.models import Content, UserFeedback +from core.admin import ( + DuplicateStateFilter, + HighValueFilter, + _score_color, + _score_to_percent, +) + + +@admin.register(Content) +class ContentAdmin(admin.ModelAdmin): + """Admin view for curated content plus trace and score context.""" + + list_display = ( + "display_relevance", + "display_authority_adjusted_score", + "duplicate_badge", + "duplicate_parent", + "is_active", + "is_reference", + "preview_content", + "source_plugin", + "project", + "title", + "view_trace", + ) + list_editable = ("is_reference", "is_active") + list_filter = ( + HighValueFilter, + DuplicateStateFilter, + ("project", admin.RelatedOnlyFieldListFilter), + "source_plugin", + "is_active", + ) + search_fields = ("title", "author", "url") + actions = ["generate_newsletter_ideas"] + + @admin.display(description="Preview") + def preview_content(self, obj): + """Add a quick preview based on the stored content text.""" + + preview_text = (obj.content_text or "").strip() + if not preview_text: + return "-" + return format_html( + 'πŸ” View', + preview_text[:500], + ) + + @admin.display(description="AI Trace") + def view_trace(self, obj): + """Link to the latest external trace or fall back to stored skill history.""" + + from urllib.parse import urlencode + + from django.conf import settings + from django.urls import reverse + + latest_skill_result = ( + obj.skill_results.filter(superseded_by__isnull=True) + .order_by("-created_at") + .first() + ) + if latest_skill_result is None: + return "-" + + result_data = latest_skill_result.result_data or {} + trace_sections = [result_data] + for section_name in ( + "trace", + "langsmith", + "langfuse", + "observability", + "telemetry", + ): + section = result_data.get(section_name) + if isinstance(section, dict): + trace_sections.append(section) + + trace_url = "" + trace_id = "" + for section in trace_sections: + for key in ( + "trace_url", + "traceUrl", + "langsmith_run_url", + "langfuse_trace_url", + ): + value = section.get(key) + if isinstance(value, str) and value: + trace_url = value + break + if trace_url: + break + for key in ( + "trace_id", + "traceId", + "run_id", + "runId", + "langsmith_run_id", + "langfuse_trace_id", + ): + value = section.get(key) + if isinstance(value, str) and value: + trace_id = value + break + + if ( + not trace_url + and trace_id + and getattr(settings, "AI_TRACE_URL_TEMPLATE", "") + ): + trace_url = settings.AI_TRACE_URL_TEMPLATE.format( + content_id=obj.id, + run_id=trace_id, + skill_name=latest_skill_result.skill_name, + skill_result_id=latest_skill_result.id, + project_id=obj.project_id, + trace_id=trace_id, + ) + + if trace_url: + link_label = "πŸ“ˆ Trace" + link_title = f"Open external trace for {latest_skill_result.skill_name}" + else: + trace_url = "{}?{}".format( + reverse( + "admin:{}_{}_changelist".format( + latest_skill_result._meta.app_label, + latest_skill_result._meta.model_name, + ) + ), + urlencode({"content__id__exact": obj.id}), + ) + link_label = "🧠 Skill runs" + link_title = f"Open persisted skill runs for {obj.title}" + + return format_html( + '{}', + trace_url, + link_title, + link_label, + ) + + @admin.display(description="Base Score") + def display_relevance(self, obj): + """Render the relevance score with a coarse color-coded severity band.""" + + if obj.relevance_score is None: + return "-" + percent_value = _score_to_percent(obj.relevance_score) + return format_html( + '{}', + _score_color(obj.relevance_score), + f"{percent_value:.1f}%", + ) + + @admin.display(description="Adjusted") + def display_authority_adjusted_score(self, obj): + """Render the authority-adjusted relevance score when available.""" + + if obj.authority_adjusted_score is None: + return "-" + percent_value = _score_to_percent(obj.authority_adjusted_score) + return format_html( + '{}', + _score_color(obj.authority_adjusted_score), + f"{percent_value:.1f}%", + ) + + @admin.display(description="Duplicates", ordering="duplicate_signal_count") + def duplicate_badge(self, obj): + """Show how many duplicate sightings point at this content row.""" + + if obj.duplicate_signal_count <= 0: + return "-" + return format_html( + 'Also seen in {} source(s)', + obj.duplicate_signal_count, + ) + + @admin.display(description="Duplicate Of", ordering="duplicate_of") + def duplicate_parent(self, obj): + """Show the retained canonical content row when this item is a duplicate.""" + + if obj.duplicate_of is None: + return "-" + return obj.duplicate_of.title + + def changelist_view(self, request, extra_context=None): + """Augment the changelist with content dashboard statistics.""" + + queryset = self.get_queryset(request) + metrics = queryset.aggregate( + avg_score=Avg("relevance_score"), + avg_adjusted_score=Avg("authority_adjusted_score"), + ) + + extra_context = extra_context or {} + extra_context["dashboard_stats"] = [ + { + "title": "Avg Base Score", + "value": ( + f"{_score_to_percent(metrics['avg_score']):.1f}%" + if metrics["avg_score"] is not None + else "-" + ), + "icon": "insights", + "color": ( + "success" + if _score_color(metrics["avg_score"]) == "green" + else "warning" + ), + }, + { + "title": "Avg Adjusted Score", + "value": ( + f"{_score_to_percent(metrics['avg_adjusted_score']):.1f}%" + if metrics["avg_adjusted_score"] is not None + else "-" + ), + "icon": "auto_graph", + "color": ( + "success" + if _score_color(metrics["avg_adjusted_score"]) == "green" + else "warning" + ), + }, + { + "title": "Total Filtered", + "value": queryset.count(), + "icon": "inventory_2", + }, + ] + + return super().changelist_view(request, extra_context=extra_context) + + @admin.action(description="Generate Ideas for Newsletter") + def generate_newsletter_ideas(self, request, queryset): + """Queue pipeline processing for the selected content items.""" + + from core.tasks import process_content + + content_ids = list(queryset.values_list("id", flat=True)) + for content_id in content_ids: + process_content.delay(content_id) + self.message_user( + request, + f"Successfully queued the pipeline for {len(content_ids)} items.", + messages.SUCCESS, + ) + + +@admin.register(UserFeedback) +class UserFeedbackAdmin(ModelAdmin): + """Admin view for editorial feedback and agreement with AI scoring.""" + + list_display = ( + "display_feedback", + "get_content_title", + "get_ai_score", + "project", + "user", + "created_at", + ) + list_filter = ("feedback_type", ("project", admin.RelatedOnlyFieldListFilter)) + search_fields = ("content__title", "user__email", "user__username") + + @admin.display(description="Type") + def display_feedback(self, obj): + """Render feedback as a thumbs-up or thumbs-down glyph.""" + + if str(obj.feedback_type).lower() == "upvote": + return format_html('{}', "1.2rem", "πŸ‘") + return format_html('{}', "1.2rem", "πŸ‘Ž") + + @admin.display(description="Content Title") + def get_content_title(self, obj): + """Return a shortened content title for list display.""" + + return obj.content.title[:50] + "..." + + @admin.display(description="AI Score") + def get_ai_score(self, obj): + """Display the original AI score to compare with user feedback.""" + + score = obj.content.relevance_score + if score is None: + return "-" + color = "green" if score > 75 else "red" if score < 40 else "orange" + return format_html('{}%', color, score) + + def changelist_view(self, request, extra_context=None): + """Augment the changelist with editorial approval statistics.""" + + qs = self.get_queryset(request) + extra_context = extra_context or {} + upvotes = qs.filter(feedback_type="upvote").count() + total = qs.count() or 1 + approval_rate = (upvotes / total) * 100 + + extra_context["dashboard_stats"] = [ + { + "title": "Approval Rate", + "value": f"{approval_rate:.1f}%", + "icon": "thumb_up", + "color": "success" if approval_rate > 80 else "warning", + }, + { + "title": "Total Feedback", + "value": total, + "icon": "forum", + }, + ] + return super().changelist_view(request, extra_context=extra_context) diff --git a/content/api.py b/content/api.py new file mode 100644 index 00000000..49f1d091 --- /dev/null +++ b/content/api.py @@ -0,0 +1,143 @@ +"""Content-domain API viewsets kept under the existing nested project routes.""" + +from drf_spectacular.utils import extend_schema +from rest_framework import serializers, status, viewsets +from rest_framework.decorators import action +from rest_framework.response import Response + +from content.models import Content, UserFeedback +from content.serializers import ContentSerializer, UserFeedbackSerializer +from core.api import ( + AUTHENTICATION_REQUIRED_RESPONSE, + CONTENT_CREATE_REQUEST_EXAMPLE, + CONTENT_RESPONSE_EXAMPLE, + PROJECT_ID_PARAMETER, + ProjectOwnedQuerysetMixin, + SKILL_NAME_PARAMETER, + build_crud_action_overrides, + document_project_owned_viewset, +) +from core.permissions import ( + IsProjectAdmin, + IsProjectFeedbackEditor, + IsProjectMember, + IsProjectMemberWritable, +) +from pipeline.serializers import SkillResultSerializer + +CLASSIFICATION_SKILL_NAME = "content_classification" +RELEVANCE_SKILL_NAME = "relevance_scoring" +SUMMARIZATION_SKILL_NAME = "summarization" +RELATED_CONTENT_SKILL_NAME = "find_related" + + +@document_project_owned_viewset( + resource_plural="content items", + resource_singular="content item", + create_description="Create a new content item for the selected project. Any related entity must belong to the same project.", + tag="Content Library", + action_overrides=build_crud_action_overrides( + ContentSerializer, + resource_plural="content items for the selected project", + resource_singular="content item", + create_examples=[CONTENT_CREATE_REQUEST_EXAMPLE, CONTENT_RESPONSE_EXAMPLE], + create_response_examples=[CONTENT_RESPONSE_EXAMPLE], + retrieve_examples=[CONTENT_RESPONSE_EXAMPLE], + ), +) +class ContentViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): + """Browse project content and trigger ad hoc AI processing for it.""" + + serializer_class = ContentSerializer + queryset = Content.objects.select_related("project", "entity") + + def get_permissions(self): + """Allow all members to read content, contributors to edit, and admins to delete.""" + + if self.action == "destroy": + permission_classes = [IsProjectAdmin] + elif self.action in {"create", "update", "partial_update", "run_skill"}: + permission_classes = [IsProjectMemberWritable] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + + @extend_schema( + summary="Run content skill", + description=( + "Run one ad hoc skill for the selected content item and persist the outcome as a SkillResult. " + "Supported skill names are content_classification, relevance_scoring, summarization, and find_related." + ), + tags=["AI Processing"], + parameters=[PROJECT_ID_PARAMETER, SKILL_NAME_PARAMETER], + request=None, + responses={ + 201: SkillResultSerializer, + 202: SkillResultSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + ) + @action(detail=True, methods=["post"], url_path=r"skills/(?P[^/.]+)") + def run_skill(self, request, *args, **kwargs): + """Execute one supported ad hoc skill for a content item.""" + + from core.pipeline import execute_ad_hoc_skill + from core.tasks import queue_content_skill + + skill_name = str(kwargs["skill_name"]) + if skill_name not in { + CLASSIFICATION_SKILL_NAME, + RELEVANCE_SKILL_NAME, + SUMMARIZATION_SKILL_NAME, + RELATED_CONTENT_SKILL_NAME, + }: + raise serializers.ValidationError( + { + "skill_name": ( + "Unsupported skill. Choose one of: content_classification, relevance_scoring, " + "summarization, find_related." + ) + } + ) + + content = self.get_object() + if skill_name in {RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME}: + skill_result = queue_content_skill(content, skill_name) + serializer = SkillResultSerializer( + skill_result, context=self.get_serializer_context() + ) + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + + skill_result = execute_ad_hoc_skill(content, skill_name) + serializer = SkillResultSerializer( + skill_result, context=self.get_serializer_context() + ) + return Response(serializer.data, status=status.HTTP_201_CREATED) + + +@document_project_owned_viewset( + resource_plural="user feedback entries", + resource_singular="user feedback entry", + create_description="Create a new feedback entry for content in the selected project. The authenticated user is recorded automatically.", + tag="Feedback", + action_overrides=build_crud_action_overrides( + UserFeedbackSerializer, + resource_plural="user feedback entries for the selected project", + resource_singular="user feedback entry", + ), +) +class UserFeedbackViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): + """Capture editor feedback on project content items.""" + + serializer_class = UserFeedbackSerializer + queryset = UserFeedback.objects.select_related("content", "project", "user") + + def get_permissions(self): + """Allow all members to read feedback and owners or admins to modify it.""" + + return [IsProjectFeedbackEditor()] + + def perform_create(self, serializer): + """Attach the authenticated user automatically to new feedback rows.""" + + serializer.save(project=self.get_project(), user=self.request.user) diff --git a/content/api_urls.py b/content/api_urls.py new file mode 100644 index 00000000..78111e6b --- /dev/null +++ b/content/api_urls.py @@ -0,0 +1,14 @@ +"""API route registration for content resources.""" + +from rest_framework_nested.routers import NestedSimpleRouter + +from content.api import ContentViewSet, UserFeedbackViewSet + + +def register_project_routes(project_router: NestedSimpleRouter) -> None: + """Register nested content and feedback endpoints.""" + + project_router.register(r"contents", ContentViewSet, basename="project-content") + project_router.register( + r"feedback", UserFeedbackViewSet, basename="project-feedback" + ) diff --git a/content/apps.py b/content/apps.py new file mode 100644 index 00000000..a3c0dbb6 --- /dev/null +++ b/content/apps.py @@ -0,0 +1,10 @@ +"""Django app configuration for the content domain.""" + +from django.apps import AppConfig + + +class ContentConfig(AppConfig): + """Configure the content app.""" + + default_auto_field = "django.db.models.BigAutoField" + name = "content" diff --git a/content/migrations/0001_initial.py b/content/migrations/0001_initial.py new file mode 100644 index 00000000..c04f9a10 --- /dev/null +++ b/content/migrations/0001_initial.py @@ -0,0 +1,174 @@ +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("projects", "0003_remove_project_group"), + ("entities", "0001_initial"), + ("core", "0011_move_entity_models_to_entities"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.CreateModel( + name="Content", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("url", models.URLField()), + ("title", models.CharField(max_length=512)), + ("author", models.CharField(blank=True, max_length=255)), + ( + "source_plugin", + models.CharField(max_length=64), + ), + ("content_type", models.CharField(blank=True, max_length=64)), + ( + "canonical_url", + models.URLField(blank=True, db_index=True, default=""), + ), + ("published_date", models.DateTimeField()), + ("ingested_at", models.DateTimeField(auto_now_add=True)), + ("content_text", models.TextField()), + ( + "relevance_score", + models.FloatField(blank=True, null=True), + ), + ( + "authority_adjusted_score", + models.FloatField(blank=True, null=True), + ), + ("embedding_id", models.CharField(blank=True, max_length=64)), + ("source_metadata", models.JSONField(blank=True, default=dict)), + ("duplicate_signal_count", models.IntegerField(default=0)), + ("is_reference", models.BooleanField(default=False)), + ("is_active", models.BooleanField(default=True)), + ( + "duplicate_of", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="duplicates", + to="content.content", + ), + ), + ( + "entity", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="contents", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="contents", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-published_date"], + "db_table": "core_content", + "indexes": [ + models.Index( + fields=["project", "-published_date"], + name="core_conten_project_6662d0_idx", + ), + models.Index( + fields=["project", "-relevance_score"], + name="core_conten_project_127912_idx", + ), + models.Index( + fields=["project", "-authority_adjusted_score"], + name="core_conten_project_44fd9d_idx", + ), + models.Index( + fields=["project", "is_reference"], + name="core_conten_project_c689be_idx", + ), + models.Index( + fields=["url"], + name="core_conten_url_4d8416_idx", + ), + ], + }, + ), + migrations.CreateModel( + name="UserFeedback", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "feedback_type", + models.CharField( + choices=[("upvote", "Upvote"), ("downvote", "Downvote")], + max_length=16, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "content", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="feedback", + to="content.content", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="feedback", + to="projects.project", + ), + ), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="content_feedback", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "ordering": ["-created_at"], + "db_table": "core_userfeedback", + "constraints": [ + models.UniqueConstraint( + fields=("content", "user"), + name="core_feedback_unique_content_user", + ) + ], + }, + ), + ], + database_operations=[], + ) + ] diff --git a/content/migrations/__init__.py b/content/migrations/__init__.py new file mode 100644 index 00000000..4395b4eb --- /dev/null +++ b/content/migrations/__init__.py @@ -0,0 +1 @@ +"""Migrations for the content app.""" diff --git a/content/models.py b/content/models.py new file mode 100644 index 00000000..34873a5c --- /dev/null +++ b/content/models.py @@ -0,0 +1,94 @@ +"""Content-domain models split out from the historical core app.""" + +from django.conf import settings +from django.db import models + + +class FeedbackType(models.TextChoices): + """Editorial feedback signals that tune authority and ranking.""" + + UPVOTE = "upvote", "Upvote" + DOWNVOTE = "downvote", "Downvote" + + +class Content(models.Model): + """Stores an ingested content item that may appear in a newsletter.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="contents" + ) + url = models.URLField() + title = models.CharField(max_length=512) + author = models.CharField(max_length=255, blank=True) + entity = models.ForeignKey( + "entities.Entity", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="contents", + ) + source_plugin = models.CharField(max_length=64) + content_type = models.CharField(max_length=64, blank=True) + canonical_url = models.URLField(blank=True, default="", db_index=True) + published_date = models.DateTimeField() + ingested_at = models.DateTimeField(auto_now_add=True) + content_text = models.TextField() + relevance_score = models.FloatField(null=True, blank=True) + authority_adjusted_score = models.FloatField(null=True, blank=True) + embedding_id = models.CharField(max_length=64, blank=True) + source_metadata = models.JSONField(default=dict, blank=True) + duplicate_of = models.ForeignKey( + "self", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="duplicates", + ) + duplicate_signal_count = models.IntegerField(default=0) + is_reference = models.BooleanField(default=False) + is_active = models.BooleanField(default=True) + + class Meta: + ordering = ["-published_date"] + db_table = "core_content" + indexes = [ + models.Index(fields=["project", "-published_date"]), + models.Index(fields=["project", "-relevance_score"]), + models.Index(fields=["project", "-authority_adjusted_score"]), + models.Index(fields=["project", "is_reference"]), + models.Index(fields=["url"]), + ] + + def __str__(self) -> str: + return self.title + + +class UserFeedback(models.Model): + """Records an editor's feedback on a specific content item.""" + + content = models.ForeignKey( + Content, on_delete=models.CASCADE, related_name="feedback" + ) + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="feedback" + ) + user = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name="content_feedback", + ) + feedback_type = models.CharField(max_length=16, choices=FeedbackType.choices) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["-created_at"] + db_table = "core_userfeedback" + constraints = [ + models.UniqueConstraint( + fields=["content", "user"], + name="core_feedback_unique_content_user", + ), + ] + + def __str__(self) -> str: + return f"{self.feedback_type} by {self.user}" diff --git a/content/serializers.py b/content/serializers.py new file mode 100644 index 00000000..d80b3d2a --- /dev/null +++ b/content/serializers.py @@ -0,0 +1,86 @@ +"""DRF serializers for content-domain models.""" + +from rest_framework import serializers + +from content.models import Content, UserFeedback +from core.serializer_mixins import ProjectScopedSerializerMixin + + +class ContentSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): + """Serialize ingested content items and enforce project/entity consistency.""" + + class Meta: + model = Content + fields = [ + "id", + "project", + "url", + "title", + "author", + "entity", + "source_plugin", + "content_type", + "canonical_url", + "published_date", + "ingested_at", + "content_text", + "relevance_score", + "authority_adjusted_score", + "embedding_id", + "source_metadata", + "duplicate_of", + "duplicate_signal_count", + "is_reference", + "is_active", + ] + read_only_fields = [ + "id", + "project", + "canonical_url", + "ingested_at", + "authority_adjusted_score", + "embedding_id", + "duplicate_of", + "duplicate_signal_count", + ] + + def validate(self, attrs): + """Reject entity assignments that point at a different project.""" + + project = ( + self.context.get("project") + or attrs.get("project") + or getattr(self.instance, "project", None) + ) + entity = attrs.get("entity") or getattr(self.instance, "entity", None) + if project and entity and entity.project_id != project.id: + raise serializers.ValidationError( + {"entity": "Entity must belong to the selected project."} + ) + return attrs + + +class UserFeedbackSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): + """Serialize editor feedback attached to a content item.""" + + user = serializers.PrimaryKeyRelatedField(read_only=True) + + class Meta: + model = UserFeedback + fields = ["id", "content", "project", "user", "feedback_type", "created_at"] + read_only_fields = ["id", "project", "user", "created_at"] + + def validate(self, attrs): + """Reject feedback that targets content outside the active project.""" + + project = ( + self.context.get("project") + or attrs.get("project") + or getattr(self.instance, "project", None) + ) + content = attrs.get("content") or getattr(self.instance, "content", None) + if project and content and content.project_id != project.id: + raise serializers.ValidationError( + {"content": "Content must belong to the selected project."} + ) + return attrs diff --git a/core/admin.py b/core/admin.py index e3185536..ade491bb 100644 --- a/core/admin.py +++ b/core/admin.py @@ -1,47 +1,9 @@ -"""Django admin configuration for the core editorial workflow. +"""Django admin configuration for the remaining core cross-cutting workflow.""" -These admin classes are intentionally richer than default CRUD screens. They expose -the health, traceability, and review information editors and operators need while -running ingestion and AI-assisted content curation. -""" +from django.contrib import admin +from unfold.admin import ModelAdmin as UnfoldModelAdmin -import json -from urllib.parse import urlencode - -from django import forms -from django.contrib import admin, messages -from django.db.models import Avg, Max, QuerySet -from django.http import HttpRequest -from django.urls import reverse -from django.utils import timezone -from django.utils.html import format_html -from django.utils.safestring import mark_safe -from import_export.admin import ExportActionMixin -from unfold.admin import ModelAdmin - -from core.entity_extraction import ( - accept_entity_candidate, - merge_entity_candidate, - reject_entity_candidate, -) -from core.models import ( - BlueskyCredentials, - Content, - Entity, - EntityAuthoritySnapshot, - EntityCandidate, - EntityCandidateStatus, - EntityMention, - IngestionRun, - Project, - ProjectConfig, - ReviewQueue, - SkillResult, - SourceConfig, - TopicCentroidSnapshot, - UserFeedback, -) -from core.plugins import get_plugin_for_source_config, validate_plugin_config +ModelAdmin = UnfoldModelAdmin def _score_to_percent(value): @@ -68,582 +30,7 @@ def _score_color(value) -> str: return "red" -def _drift_card_color(value) -> str: - """Return an admin card severity for centroid drift percentages.""" - - if value is None: - return "info" - numeric_value = float(value) - if numeric_value <= 0.15: - return "success" - if numeric_value <= 0.35: - return "warning" - return "danger" - - -def _format_snapshot_freshness(computed_at) -> str: - """Return a compact human-readable age for the latest snapshot.""" - - if computed_at is None: - return "-" - age = timezone.now() - computed_at - total_hours = max(0, int(age.total_seconds() // 3600)) - if total_hours < 24: - return f"{total_hours}h ago" - return f"{max(1, total_hours // 24)}d ago" - - -def _freshness_card_color(computed_at) -> str: - """Return an admin card severity based on snapshot recency.""" - - if computed_at is None: - return "warning" - age = timezone.now() - computed_at - age_hours = age.total_seconds() / 3600 - if age_hours <= 24: - return "success" - if age_hours <= 72: - return "warning" - return "danger" - - -def _build_topic_centroid_project_drilldowns(queryset, changelist_url: str): - """Build one filtered-history drilldown row per project. - - The changelist widget needs stable project links even on SQLite, so this keeps - the grouping logic in Python instead of relying on database-specific distinct-on - behavior. - """ - - latest_by_project: dict[int, TopicCentroidSnapshot] = {} - snapshot_counts: dict[int, int] = {} - ordered_snapshots = queryset.select_related("project").order_by( - "project_id", "-computed_at" - ) - - for snapshot in ordered_snapshots: - project_id = snapshot.project_id - snapshot_counts[project_id] = snapshot_counts.get(project_id, 0) + 1 - latest_by_project.setdefault(project_id, snapshot) - - project_drilldowns = [] - for snapshot in sorted( - latest_by_project.values(), - key=lambda value: value.project.name.lower(), - ): - project_drilldowns.append( - { - "project_id": snapshot.project_id, - "project_name": snapshot.project.name, - "snapshot_count": snapshot_counts[snapshot.project_id], - "centroid_active": snapshot.centroid_active, - "feedback_count": snapshot.feedback_count, - "latest_snapshot": _format_snapshot_freshness(snapshot.computed_at), - "drift_from_previous": ( - f"{_score_to_percent(snapshot.drift_from_previous):.1f}%" - if snapshot.drift_from_previous is not None - else "n/a" - ), - "drift_from_week_ago": ( - f"{_score_to_percent(snapshot.drift_from_week_ago):.1f}%" - if snapshot.drift_from_week_ago is not None - else "n/a" - ), - "href": f"{changelist_url}?{urlencode({'project__id__exact': snapshot.project_id})}", - } - ) - - return project_drilldowns - - -class BlueskyCredentialsAdminForm(forms.ModelForm): - """Admin form that accepts a plaintext Bluesky app credential input.""" - - credential_input = forms.CharField( - required=False, - strip=False, - widget=forms.PasswordInput(render_value=False), - help_text="Leave blank to keep the existing stored credential.", - label="Bluesky app credential", - ) - - class Meta: - model = BlueskyCredentials - fields = ["project", "handle", "pds_url", "is_active"] - - def clean(self): - """Require a credential when creating the record for the first time.""" - - cleaned_data = super().clean() - credential_input = cleaned_data.get("credential_input", "") - if not self.instance.has_stored_credential() and not credential_input: - self.add_error("credential_input", "A Bluesky app credential is required.") - return cleaned_data - - def save(self, commit=True): - """Encrypt a new credential value before saving the model instance.""" - - instance = super().save(commit=False) - credential_input = self.cleaned_data.get("credential_input", "") - if credential_input: - instance.set_stored_credential(credential_input) - if commit: - instance.save() - return instance - - -@admin.register(Project) -class ProjectAdmin(ExportActionMixin, admin.ModelAdmin): - """Admin configuration for top-level project workspaces.""" - - list_display = ("name", "group", "content_retention_days", "created_at") - - # Better navigation - date_hierarchy = "created_at" - list_filter = ("created_at",) - - # Faster searching - search_fields = ("name", "group__name") - - # Performance for large user lists - autocomplete_fields = ("group",) - - # Quick editing - list_editable = ("content_retention_days",) - - -@admin.register(BlueskyCredentials) -class BlueskyCredentialsAdmin(ModelAdmin): - """Admin view for project-scoped Bluesky authentication settings.""" - - form = BlueskyCredentialsAdminForm - actions = ["verify_selected_credentials"] - list_display = ( - "project", - "handle", - "display_pds_host", - "has_stored_credential", - "is_active", - "last_verified_at", - ) - list_filter = ("is_active", ("project", admin.RelatedOnlyFieldListFilter)) - search_fields = ("project__name", "handle", "pds_url") - autocomplete_fields = ("project",) - readonly_fields = ( - "has_stored_credential", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ) - fieldsets = ( - ( - "Account", - {"fields": ("project", "handle", "credential_input", "is_active")}, - ), - ( - "PDS Override", - { - "fields": ("pds_url",), - "description": "Leave blank to use the default Bluesky-hosted account flow.", - }, - ), - ( - "Verification", - { - "fields": ( - "has_stored_credential", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ) - }, - ), - ) - - @admin.display(description="PDS") - def display_pds_host(self, obj): - """Show whether the credentials use the hosted default or a custom PDS.""" - - return obj.pds_url or "Bluesky hosted default" - - @admin.display(boolean=True, description="Stored Credential") - def has_stored_credential(self, obj): - """Return whether an encrypted Bluesky credential has been configured.""" - - return obj.has_stored_credential() - - @admin.action(description="Verify Selected Credentials") - def verify_selected_credentials(self, request, queryset): - """Authenticate the selected Bluesky accounts and report the outcome.""" - - from core.plugins.bluesky import BlueskySourcePlugin - - verified_credentials = [] - failed_credentials = [] - - for credentials in queryset.select_related("project"): - try: - BlueskySourcePlugin.verify_credentials(credentials) - except Exception as exc: - failed_credentials.append(f"{credentials}: {exc}") - else: - verified_credentials.append(str(credentials)) - - if verified_credentials: - self.message_user( - request, - f"Credential verification passed for {len(verified_credentials)} account(s).", - messages.SUCCESS, - ) - - if failed_credentials: - self.message_user( - request, - "Credential verification failed for: " + "; ".join(failed_credentials), - messages.ERROR, - ) - - -@admin.register(ProjectConfig) -class ProjectConfigAdmin(admin.ModelAdmin): - """Admin configuration for per-project scoring settings.""" - - list_display = ( - "project", - "upvote_authority_weight", - "downvote_authority_weight", - "authority_decay_rate", - "recompute_topic_centroid_on_feedback_save", - ) - list_filter = ("recompute_topic_centroid_on_feedback_save",) - fields = ( - "project", - "upvote_authority_weight", - "downvote_authority_weight", - "authority_decay_rate", - "recompute_topic_centroid_on_feedback_save", - ) - - -@admin.register(Entity) -class EntityAdmin(admin.ModelAdmin): - """Admin configuration for tracked people, vendors, and organizations.""" - - list_display = ( - "name", - "project", - "type", - "colored_score", - "latest_snapshot_summary", - "created_at", - ) - search_fields = ("name", "project__name") - - @admin.display(description="Authority Score", ordering="authority_score") - def colored_score(self, obj): - """Render the authority score with a traffic-light color cue.""" - - percent_value = _score_to_percent(obj.authority_score) - color = _score_color(obj.authority_score) - - return format_html( - '{}', - color, - f"{percent_value:.1f}%", - ) - - @admin.display(description="Latest Snapshot") - def latest_snapshot_summary(self, obj): - """Show the latest authority component breakdown for an entity.""" - - latest_snapshot = obj.authority_snapshots.order_by("-computed_at").first() - if latest_snapshot is None: - return "-" - mention_value = f"{_score_to_percent(latest_snapshot.mention_component):.1f}%" - feedback_value = f"{_score_to_percent(latest_snapshot.feedback_component):.1f}%" - duplicate_value = ( - f"{_score_to_percent(latest_snapshot.duplicate_component):.1f}%" - ) - decayed_value = f"{_score_to_percent(latest_snapshot.decayed_prior):.1f}%" - return format_html( - ( - '' - "M {} | F {} | D {} | Carry {}" - ), - mention_value, - feedback_value, - duplicate_value, - decayed_value, - mention_value, - feedback_value, - duplicate_value, - decayed_value, - ) - - -@admin.register(EntityAuthoritySnapshot) -class EntityAuthoritySnapshotAdmin(admin.ModelAdmin): - """Admin view for persisted authority-score history.""" - - list_display = ( - "entity", - "project", - "display_final_score", - "display_components", - "computed_at", - ) - list_filter = (("project", admin.RelatedOnlyFieldListFilter), "computed_at") - search_fields = ("entity__name", "project__name") - autocomplete_fields = ("entity", "project") - - @admin.display(description="Final Score", ordering="final_score") - def display_final_score(self, obj): - """Render the recomputed final authority score as a percentage.""" - - percent_value = _score_to_percent(obj.final_score) - return format_html( - '{}', - _score_color(obj.final_score), - f"{percent_value:.1f}%", - ) - - @admin.display(description="Components") - def display_components(self, obj): - """Render the stored authority components in a compact summary.""" - - mention_value = f"{_score_to_percent(obj.mention_component):.1f}%" - feedback_value = f"{_score_to_percent(obj.feedback_component):.1f}%" - duplicate_value = f"{_score_to_percent(obj.duplicate_component):.1f}%" - decayed_value = f"{_score_to_percent(obj.decayed_prior):.1f}%" - return format_html( - "M {} | F {} | D {} | Carry {}", - mention_value, - feedback_value, - duplicate_value, - decayed_value, - ) - - -@admin.register(TopicCentroidSnapshot) -class TopicCentroidSnapshotAdmin(admin.ModelAdmin): - """Admin view for persisted topic-centroid history and drift.""" - - list_before_template = "admin/topic_centroid_snapshot_changelist_widget.html" - list_display = ( - "project", - "centroid_active", - "feedback_count", - "display_drift_from_previous", - "display_drift_from_week_ago", - "computed_at", - ) - list_filter = ( - "centroid_active", - ("project", admin.RelatedOnlyFieldListFilter), - "computed_at", - ) - search_fields = ("project__name",) - autocomplete_fields = ("project",) - - @admin.display(description="Drift vs Previous", ordering="drift_from_previous") - def display_drift_from_previous(self, obj): - """Render cosine-distance drift from the previous active snapshot.""" - - if obj.drift_from_previous is None: - return "n/a" - return f"{_score_to_percent(obj.drift_from_previous):.1f}%" - - @admin.display(description="Drift vs 7d", ordering="drift_from_week_ago") - def display_drift_from_week_ago(self, obj): - """Render cosine-distance drift from the nearest week-old snapshot.""" - - if obj.drift_from_week_ago is None: - return "n/a" - return f"{_score_to_percent(obj.drift_from_week_ago):.1f}%" - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with centroid freshness and drift summary cards.""" - - queryset = self.get_queryset(request) - changelist_url = reverse( - f"{self.admin_site.name}:{self.model._meta.app_label}_{self.model._meta.model_name}_changelist" - ) - metrics = queryset.aggregate( - avg_drift_from_previous=Avg("drift_from_previous"), - avg_drift_from_week_ago=Avg("drift_from_week_ago"), - latest_snapshot_at=Max("computed_at"), - ) - project_count = queryset.values("project_id").distinct().count() - active_project_count = ( - queryset.filter(centroid_active=True) - .values("project_id") - .distinct() - .count() - ) - - extra_context = extra_context or {} - extra_context["dashboard_stats"] = [ - { - "title": "Active Centroids", - "value": ( - f"{active_project_count} / {project_count}" - if project_count - else "0 / 0" - ), - "icon": "hub", - "color": ( - "success" - if active_project_count == project_count and project_count - else "warning" - ), - }, - { - "title": "Avg Drift vs Previous", - "value": ( - f"{_score_to_percent(metrics['avg_drift_from_previous']):.1f}%" - if metrics["avg_drift_from_previous"] is not None - else "-" - ), - "icon": "show_chart", - "color": _drift_card_color(metrics["avg_drift_from_previous"]), - }, - { - "title": "Avg Drift vs 7d", - "value": ( - f"{_score_to_percent(metrics['avg_drift_from_week_ago']):.1f}%" - if metrics["avg_drift_from_week_ago"] is not None - else "-" - ), - "icon": "timeline", - "color": _drift_card_color(metrics["avg_drift_from_week_ago"]), - }, - { - "title": "Latest Snapshot", - "value": _format_snapshot_freshness(metrics["latest_snapshot_at"]), - "icon": "schedule", - "color": _freshness_card_color(metrics["latest_snapshot_at"]), - }, - ] - extra_context["centroid_project_drilldowns"] = ( - _build_topic_centroid_project_drilldowns(queryset, changelist_url) - ) - return super().changelist_view(request, extra_context=extra_context) - - -@admin.register(EntityMention) -class EntityMentionAdmin(admin.ModelAdmin): - """Admin view for extracted tracked-entity mentions.""" - - list_display = ( - "entity", - "project", - "content", - "role", - "sentiment", - "confidence", - "created_at", - ) - list_filter = ("role", "sentiment", ("project", admin.RelatedOnlyFieldListFilter)) - search_fields = ("entity__name", "content__title", "span") - autocomplete_fields = ("entity", "content", "project") - - -@admin.register(EntityCandidate) -class EntityCandidateAdmin(admin.ModelAdmin): - """Admin view for candidate entities awaiting human review.""" - - actions = [ - "accept_selected_candidates", - "reject_selected_candidates", - "merge_into_existing_entities", - ] - list_display = ( - "name", - "project", - "suggested_type", - "occurrence_count", - "status", - "merged_into", - "first_seen_in", - "created_at", - ) - list_filter = ( - "status", - "suggested_type", - ("project", admin.RelatedOnlyFieldListFilter), - ) - search_fields = ("name", "project__name", "merged_into__name") - autocomplete_fields = ("project", "first_seen_in", "merged_into") - ordering = ("-occurrence_count", "name") - - @admin.action(description="Accept selected candidates") - def accept_selected_candidates(self, request, queryset): - """Promote selected candidates into tracked entities.""" - - accepted_count = 0 - for candidate in queryset.select_related("project"): - if candidate.status == EntityCandidateStatus.ACCEPTED: - continue - accept_entity_candidate(candidate) - accepted_count += 1 - self.message_user( - request, - f"Accepted {accepted_count} entity candidate(s).", - messages.SUCCESS, - ) - - @admin.action(description="Reject selected candidates") - def reject_selected_candidates(self, request, queryset): - """Mark selected candidates as rejected.""" - - rejected_count = 0 - for candidate in queryset: - if candidate.status == EntityCandidateStatus.REJECTED: - continue - reject_entity_candidate(candidate) - rejected_count += 1 - self.message_user( - request, - f"Rejected {rejected_count} entity candidate(s).", - messages.SUCCESS, - ) - - @admin.action(description="Merge selected candidates into existing entities") - def merge_into_existing_entities( - self, - request: HttpRequest, - queryset: QuerySet[EntityCandidate], - ) -> None: - """Merge candidates when a same-name entity already exists in the project.""" - - merged_count = 0 - unresolved_names: list[str] = [] - for candidate in queryset.select_related("project"): - matching_entities = Entity.objects.filter( - project=candidate.project, - name__iexact=candidate.name, - ) - if matching_entities.count() != 1: - unresolved_names.append(candidate.name) - continue - merge_entity_candidate(candidate, matching_entities.get()) - merged_count += 1 - - if merged_count: - self.message_user( - request, - f"Merged {merged_count} entity candidate(s) into existing entities.", - messages.SUCCESS, - ) - if unresolved_names: - self.message_user( - request, - "No unique same-name entity match was available for: " - + ", ".join(sorted(unresolved_names)), - messages.WARNING, - ) +from trends.admin import TopicCentroidSnapshotAdmin # noqa: E402,F401 class HighValueFilter(admin.SimpleListFilter): @@ -689,731 +76,4 @@ def queryset(self, request, queryset): return queryset -@admin.register(Content) -class ContentAdmin(admin.ModelAdmin): - """Admin view for curated content plus trace and score context.""" - - list_display = ( - "display_relevance", - "display_authority_adjusted_score", - "duplicate_badge", - "duplicate_parent", - "is_active", - "is_reference", - "preview_content", - "source_plugin", - "project", - "title", - "view_trace", - ) - list_editable = ("is_reference", "is_active") - list_filter = ( - HighValueFilter, - DuplicateStateFilter, - ("project", admin.RelatedOnlyFieldListFilter), - "source_plugin", - "is_active", - ) - search_fields = ("title", "author", "url") - actions = ["generate_newsletter_ideas"] - - @admin.display(description="Preview") - def preview_content(self, obj): - """Adds a quick preview based on the stored content text.""" - preview_text = (obj.content_text or "").strip() - if not preview_text: - return "-" - return format_html( - 'πŸ” View', - preview_text[:500], - ) - - @admin.display(description="AI Trace") - def view_trace(self, obj): - """Link to the latest external trace or fall back to stored skill history.""" - from urllib.parse import urlencode - - from django.conf import settings - from django.urls import reverse - - latest_skill_result = ( - obj.skill_results.filter( - superseded_by__isnull=True, - ) - .order_by("-created_at") - .first() - ) - if latest_skill_result is None: - return "-" - - result_data = latest_skill_result.result_data or {} - trace_sections = [result_data] - for section_name in ( - "trace", - "langsmith", - "langfuse", - "observability", - "telemetry", - ): - section = result_data.get(section_name) - if isinstance(section, dict): - trace_sections.append(section) - - trace_url = "" - trace_id = "" - for section in trace_sections: - for key in ( - "trace_url", - "traceUrl", - "langsmith_run_url", - "langfuse_trace_url", - ): - value = section.get(key) - if isinstance(value, str) and value: - trace_url = value - break - if trace_url: - break - for key in ( - "trace_id", - "traceId", - "run_id", - "runId", - "langsmith_run_id", - "langfuse_trace_id", - ): - value = section.get(key) - if isinstance(value, str) and value: - trace_id = value - break - - if ( - not trace_url - and trace_id - and getattr(settings, "AI_TRACE_URL_TEMPLATE", "") - ): - trace_url = settings.AI_TRACE_URL_TEMPLATE.format( - content_id=obj.id, - run_id=trace_id, - skill_name=latest_skill_result.skill_name, - skill_result_id=latest_skill_result.id, - project_id=obj.project_id, - trace_id=trace_id, - ) - - if trace_url: - link_label = "πŸ“ˆ Trace" - link_title = f"Open external trace for {latest_skill_result.skill_name}" - else: - trace_url = "{}?{}".format( - reverse("admin:core_skillresult_changelist"), - urlencode({"content__id__exact": obj.id}), - ) - link_label = "🧠 Skill runs" - link_title = f"Open persisted skill runs for {obj.title}" - - return format_html( - '{}', - trace_url, - link_title, - link_label, - ) - - @admin.display(description="Base Score") - def display_relevance(self, obj): - """Render the relevance score with a coarse color-coded severity band.""" - - if obj.relevance_score is None: - return "-" - percent_value = _score_to_percent(obj.relevance_score) - return format_html( - '{}', - _score_color(obj.relevance_score), - f"{percent_value:.1f}%", - ) - - @admin.display(description="Adjusted") - def display_authority_adjusted_score(self, obj): - """Render the authority-adjusted relevance score when available.""" - - if obj.authority_adjusted_score is None: - return "-" - percent_value = _score_to_percent(obj.authority_adjusted_score) - return format_html( - '{}', - _score_color(obj.authority_adjusted_score), - f"{percent_value:.1f}%", - ) - - @admin.display(description="Duplicates", ordering="duplicate_signal_count") - def duplicate_badge(self, obj): - """Show how many duplicate sightings point at this content row.""" - - if obj.duplicate_signal_count <= 0: - return "-" - return format_html( - 'Also seen in {} source(s)', - obj.duplicate_signal_count, - ) - - @admin.display(description="Duplicate Of", ordering="duplicate_of") - def duplicate_parent(self, obj): - """Show the retained canonical content row when this item is a duplicate.""" - - if obj.duplicate_of is None: - return "-" - return obj.duplicate_of.title - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with content dashboard statistics.""" - - queryset = self.get_queryset(request) - metrics = queryset.aggregate( - avg_score=Avg("relevance_score"), - avg_adjusted_score=Avg("authority_adjusted_score"), - ) - - extra_context = extra_context or {} - extra_context["dashboard_stats"] = [ - { - "title": "Avg Base Score", - "value": ( - f"{_score_to_percent(metrics['avg_score']):.1f}%" - if metrics["avg_score"] is not None - else "-" - ), - "icon": "insights", - "color": ( - "success" - if _score_color(metrics["avg_score"]) == "green" - else "warning" - ), - }, - { - "title": "Avg Adjusted Score", - "value": ( - f"{_score_to_percent(metrics['avg_adjusted_score']):.1f}%" - if metrics["avg_adjusted_score"] is not None - else "-" - ), - "icon": "auto_graph", - "color": ( - "success" - if _score_color(metrics["avg_adjusted_score"]) == "green" - else "warning" - ), - }, - { - "title": "Total Filtered", - "value": queryset.count(), - "icon": "inventory_2", - }, - ] - - return super().changelist_view(request, extra_context=extra_context) - - @admin.action(description="Generate Ideas for Newsletter") - def generate_newsletter_ideas(self, request, queryset): - """Queue pipeline processing for the selected content items.""" - - from core.tasks import process_content - - content_ids = list(queryset.values_list("id", flat=True)) - for content_id in content_ids: - process_content.delay(content_id) - self.message_user( - request, - f"Successfully queued the pipeline for {len(content_ids)} items.", - messages.SUCCESS, - ) - - -@admin.register(SkillResult) -class SkillResultAdmin(ModelAdmin): - """Admin view for AI skill history, retries, and result inspection.""" - - list_display = ( - "skill_name", - "get_content_link", - "display_status", - "display_performance", - "preview_json", - "is_current", - "model_used", - "created_at", - ) - list_filter = ("status", "skill_name", "project", "model_used") - search_fields = ("skill_name", "content__title", "model_used", "error_message") - actions = ["retry_selected_skills"] - readonly_fields = ( - "pretty_result_data", - "latency_ms", - "created_at", - "superseded_by", - ) - fieldsets = ( - ( - "Execution Details", - {"fields": ("skill_name", "content", "project", "status", "model_used")}, - ), - ( - "AI Output", - { - "fields": ("pretty_result_data", "error_message"), - }, - ), - ( - "Performance Metrics", - { - "fields": ("latency_ms", "confidence", "created_at", "superseded_by"), - }, - ), - ) - - @admin.action(description="Retry Selected Skills") - def retry_selected_skills(self, request, queryset): - """Resets status to PENDING and clears errors for retry by the worker.""" - updated = queryset.update(status="pending", error_message="") - self.message_user( - request, - f"Successfully reset {updated} skills to PENDING for retry.", - messages.SUCCESS, - ) - - @admin.display(description="Result Preview") - def preview_json(self, obj): - """Link that triggers Unfold's detail view (can be opened in side-panel).""" - if not obj.result_data: - return "-" - return format_html( - 'πŸ” Preview', - f"{obj.pk}/change/", - ) - - @admin.display(description="Content") - def get_content_link(self, obj): - """Return a compact content title for the table view.""" - - return obj.content.title[:30] + "..." if obj.content.title else "Untitled" - - @admin.display(description="Status") - def display_status(self, obj): - """Render the skill status as a colored dot plus label.""" - - status_value = str(obj.status).lower() - colors = {"completed": "green", "failed": "red", "pending": "orange"} - color = colors.get(status_value, "gray") - return format_html( - '● {}', - color, - status_value.upper(), - ) - - @admin.display(description="Perf / Conf") - def display_performance(self, obj): - """Show latency and confidence together in a compact cell.""" - - latency = f"{obj.latency_ms}ms" if obj.latency_ms else "-" - conf = f"{int(obj.confidence * 100)}%" if obj.confidence is not None else "-" - return f"{latency} / {conf}" - - @admin.display(description="Current", boolean=True) - def is_current(self, obj): - """Return whether this row is the most recent non-superseded result.""" - - return obj.superseded_by is None - - @admin.display(description="Result Data JSON") - def pretty_result_data(self, obj): - """Render result JSON in a readable preformatted block.""" - - if not obj.result_data: - return "No data available" - formatted_json = json.dumps(obj.result_data, indent=4) - return mark_safe( - f'
'
-            f"{formatted_json}"
-            f"
" - ) - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with latency and failure-rate statistics.""" - - qs = self.get_queryset(request) - extra_context = extra_context or {} - metrics = qs.aggregate(avg_lat=Avg("latency_ms")) - avg_latency = metrics["avg_lat"] or 0 - failure_count = qs.filter(status="failed").count() - total_count = qs.count() or 1 - - extra_context["dashboard_stats"] = [ - { - "title": "Avg Latency", - "value": f"{avg_latency:.0f}ms", - "icon": "timer", - "color": "warning" if avg_latency > 2000 else "success", - }, - { - "title": "Failure Rate", - "value": f"{(failure_count / total_count) * 100:.1f}%", - "icon": "error", - "color": "danger" if failure_count > 0 else "success", - }, - ] - return super().changelist_view(request, extra_context=extra_context) - - -@admin.register(UserFeedback) -class UserFeedbackAdmin(ModelAdmin): - """Admin view for editorial feedback and agreement with AI scoring.""" - - list_display = ( - "display_feedback", - "get_content_title", - "get_ai_score", - "project", - "user", - "created_at", - ) - list_filter = ("feedback_type", ("project", admin.RelatedOnlyFieldListFilter)) - search_fields = ("content__title", "user__email", "user__username") - - @admin.display(description="Type") - def display_feedback(self, obj): - """Render feedback as a thumbs-up or thumbs-down glyph.""" - - if str(obj.feedback_type).lower() == "upvote": - return format_html('{}', "1.2rem", "πŸ‘") - return format_html('{}', "1.2rem", "πŸ‘Ž") - - @admin.display(description="Content Title") - def get_content_title(self, obj): - """Return a shortened content title for list display.""" - - return obj.content.title[:50] + "..." - - @admin.display(description="AI Score") - def get_ai_score(self, obj): - """Displays the original AI score to compare with user feedback.""" - score = obj.content.relevance_score - if score is None: - return "-" - color = "green" if score > 75 else "red" if score < 40 else "orange" - return format_html('{}%', color, score) - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with editorial approval statistics.""" - - qs = self.get_queryset(request) - extra_context = extra_context or {} - upvotes = qs.filter(feedback_type="upvote").count() - total = qs.count() or 1 - approval_rate = (upvotes / total) * 100 - - extra_context["dashboard_stats"] = [ - { - "title": "Approval Rate", - "value": f"{approval_rate:.1f}%", - "icon": "thumb_up", - "color": "success" if approval_rate > 80 else "warning", - }, - { - "title": "Total Feedback", - "value": total, - "icon": "forum", - }, - ] - return super().changelist_view(request, extra_context=extra_context) - - -@admin.register(IngestionRun) -class IngestionRunAdmin(ModelAdmin): - """Admin view for ingestion health, throughput, and timing.""" - - list_display = ( - "plugin_name", - "project", - "display_status", - "display_efficiency", - "display_duration", - "started_at", - ) - list_filter = ( - "plugin_name", - "status", - ("project", admin.RelatedOnlyFieldListFilter), - ) - search_fields = ("plugin_name", "error_message", "project__name") - readonly_fields = ("display_duration", "started_at", "completed_at") - fieldsets = ( - ("Run Info", {"fields": ("plugin_name", "project", "status")}), - ( - "Data Metrics", - {"fields": ("items_fetched", "items_ingested", "display_efficiency")}, - ), - ("Timing", {"fields": ("started_at", "completed_at", "display_duration")}), - ("Logs", {"fields": ("error_message",), "classes": ("collapse",)}), - ) - - @admin.display(description="Status") - def display_status(self, obj): - """Render ingestion status as an Unfold badge.""" - - status_value = str(obj.status).lower() - colors = {"success": "success", "failed": "danger", "running": "info"} - return format_html( - '{}', - colors.get(status_value, "warning"), - status_value.upper(), - ) - - @admin.display(description="Efficiency (Ingested/Fetched)") - def display_efficiency(self, obj): - """Show how much of the fetched content became stored content.""" - - if obj.items_fetched == 0: - return "0/0" - percent = (obj.items_ingested / obj.items_fetched) * 100 - color = "green" if percent > 90 else "orange" if percent > 50 else "red" - percent_label = f"({percent:.0f}%)" - return format_html( - '{} / {} {}', - obj.items_ingested, - obj.items_fetched, - color, - percent_label, - ) - - @admin.display(description="Duration") - def display_duration(self, obj): - """Return human-readable runtime for completed ingestion runs.""" - - if not obj.completed_at: - return "In Progress..." - duration = obj.completed_at - obj.started_at - seconds = duration.total_seconds() - return f"{int(seconds // 60)}m {int(seconds % 60)}s" - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with ingestion success statistics.""" - - qs = self.get_queryset(request) - extra_context = extra_context or {} - total_runs = qs.count() - failed_runs = qs.filter(status="failed").count() - total_ingested = sum(qs.values_list("items_ingested", flat=True)) - - extra_context["dashboard_stats"] = [ - { - "title": "Total Content Ingested", - "value": f"{total_ingested:,}", - "icon": "cloud_download", - }, - { - "title": "Success Rate", - "value": f"{((total_runs - failed_runs) / (total_runs or 1)) * 100:.1f}%", - "icon": "check_circle", - "color": "success" if failed_runs == 0 else "warning", - }, - ] - return super().changelist_view(request, extra_context=extra_context) - - -@admin.register(SourceConfig) -class SourceConfigAdmin(ModelAdmin): - """Admin view for source-plugin configuration and connectivity checks.""" - - list_display = ( - "plugin_name", - "project", - "display_health", - "is_active", - "last_fetched_at", - ) - list_filter = ( - "is_active", - "plugin_name", - ("project", admin.RelatedOnlyFieldListFilter), - ) - list_editable = ("is_active",) - search_fields = ("plugin_name", "project__name") - actions = ["test_source_connection"] - readonly_fields = ("last_fetched_at", "pretty_config") - fieldsets = ( - ("Core Settings", {"fields": ("plugin_name", "project", "is_active")}), - ( - "Configuration", - { - "fields": ("pretty_config", "config"), - }, - ), - ( - "Activity", - { - "fields": ("last_fetched_at",), - }, - ), - ) - - @admin.display(description="Status") - def display_health(self, obj): - """Infer a human-friendly health state from activity timestamps.""" - - if not obj.is_active: - return format_html('{}', "gray", "● Paused") - - if obj.last_fetched_at: - hours_since = (timezone.now() - obj.last_fetched_at).total_seconds() / 3600 - if hours_since > 24: - return format_html( - '{}', "red", "● Stale" - ) - return format_html( - '{}', "green", "● Healthy" - ) - - return format_html( - '{}', "orange", "● Never Run" - ) - - @admin.display(description="Config Preview") - def pretty_config(self, obj): - """Displays the JSON config in a readable format.""" - if not obj.config: - return "Empty" - formatted_json = json.dumps(obj.config, indent=4) - return mark_safe( - f'
{formatted_json}
' - ) - - @admin.action(description="Test Source Connectivity") - def test_source_connection(self, request, queryset): - """ - Custom action to trigger a dry-run fetch for the selected sources. - """ - healthy_sources = [] - failed_sources = [] - - for source_config in queryset.select_related("project"): - try: - source_config.config = validate_plugin_config( - source_config.plugin_name, - source_config.config, - ) - plugin = get_plugin_for_source_config(source_config) - if not plugin.health_check(): - raise RuntimeError("Health check returned an unhealthy status.") - except Exception as exc: - failed_sources.append(f"{source_config}: {exc}") - else: - healthy_sources.append(str(source_config)) - - if healthy_sources: - self.message_user( - request, - f"Connectivity check passed for {len(healthy_sources)} source(s).", - messages.SUCCESS, - ) - - if failed_sources: - self.message_user( - request, - "Connectivity check failed for: " + "; ".join(failed_sources), - messages.ERROR, - ) - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with source-count and diversity stats.""" - - qs = self.get_queryset(request) - extra_context = extra_context or {} - active_count = qs.filter(is_active=True).count() - total_count = qs.count() or 1 - - extra_context["dashboard_stats"] = [ - { - "title": "Active Sources", - "value": f"{active_count} / {total_count}", - "icon": "settings_input_component", - "color": "success" if active_count == total_count else "warning", - }, - { - "title": "Plugin Variety", - "value": qs.values("plugin_name").distinct().count(), - "icon": "extension", - }, - ] - return super().changelist_view(request, extra_context=extra_context) - - -@admin.register(ReviewQueue) -class ReviewQueueAdmin(ModelAdmin): - """Admin view for items waiting on editorial judgment.""" - - list_display = ( - "get_content_title", - "project", - "reason", - "display_confidence", - "resolved", - "resolution", - "created_at", - ) - list_filter = ("resolved", "reason", ("project", admin.RelatedOnlyFieldListFilter)) - list_editable = ("resolved", "resolution") - actions = ["mark_as_approved", "mark_as_rejected"] - - @admin.display(description="Content") - def get_content_title(self, obj): - """Return a shortened content title for list display.""" - - return obj.content.title[:50] + "..." - - @admin.display(description="Confidence") - def display_confidence(self, obj): - """Render confidence as a percentage with risk coloring.""" - - color = ( - "red" - if obj.confidence < 0.3 - else "orange" if obj.confidence < 0.6 else "green" - ) - confidence_label = f"{obj.confidence * 100:.0f}%" - return format_html('{}', color, confidence_label) - - @admin.action(description="Approve selected items") - def mark_as_approved(self, request, queryset): - """Resolve selected review items as approved.""" - - queryset.update(resolved=True, resolution="APPROVED") - self.message_user(request, "Selected items approved.", messages.SUCCESS) - - @admin.action(description="Reject selected items") - def mark_as_rejected(self, request, queryset): - """Resolve selected review items as rejected.""" - - queryset.update(resolved=True, resolution="REJECTED") - self.message_user(request, "Selected items rejected.", messages.WARNING) - - def changelist_view(self, request, extra_context=None): - """Augment the changelist with pending-volume and confidence stats.""" - - qs = self.get_queryset(request) - extra_context = extra_context or {} - pending_count = qs.filter(resolved=False).count() - avg_conf = qs.aggregate(avg_confidence=Avg("confidence"))["avg_confidence"] or 0 - - extra_context["dashboard_stats"] = [ - { - "title": "Pending Review", - "value": pending_count, - "icon": "pending_actions", - "color": "danger" if pending_count > 10 else "success", - }, - { - "title": "Avg Confidence", - "value": f"{avg_conf * 100:.0f}%", - "icon": "psychology", - }, - ] - return super().changelist_view(request, extra_context=extra_context) +from pipeline.admin import ReviewQueueAdmin, SkillResultAdmin # noqa: E402,F401 diff --git a/core/api.py b/core/api.py index 52ee8b73..b8093372 100644 --- a/core/api.py +++ b/core/api.py @@ -8,7 +8,6 @@ import logging from typing import Any -from django.db.models import Avg, Count, Prefetch, Q from drf_spectacular.utils import ( OpenApiExample, OpenApiParameter, @@ -20,52 +19,32 @@ from rest_framework import serializers, status, viewsets from rest_framework.decorators import action from rest_framework.exceptions import NotFound -from rest_framework.filters import OrderingFilter from rest_framework.response import Response -from core.entity_extraction import ( - accept_entity_candidate, - merge_entity_candidate, - reject_entity_candidate, -) from core.models import ( - BlueskyCredentials, Content, - Entity, - EntityCandidate, - EntityMention, IngestionRun, IntakeAllowlist, NewsletterIntake, - Project, - ProjectConfig, - ReviewQueue, - SkillResult, - SourceConfig, - TopicCentroidSnapshot, UserFeedback, - generate_project_intake_token, +) +from core.permissions import ( + IsProjectAdmin, + IsProjectContributor, + IsProjectFeedbackEditor, + IsProjectMember, + IsProjectMemberWritable, + get_visible_projects_queryset, ) from core.serializers import ( - BlueskyCredentialsSerializer, ContentSerializer, - EntityAuthoritySnapshotSerializer, - EntityCandidateMergeSerializer, - EntityCandidateSerializer, - EntityMentionSummarySerializer, - EntitySerializer, IngestionRunSerializer, IntakeAllowlistSerializer, NewsletterIntakeSerializer, - ProjectConfigSerializer, - ProjectSerializer, - ReviewQueueSerializer, SkillResultSerializer, - SourceConfigSerializer, - TopicCentroidObservabilitySummarySerializer, - TopicCentroidSnapshotSerializer, UserFeedbackSerializer, ) +from projects.models import Project CLASSIFICATION_SKILL_NAME = "content_classification" RELEVANCE_SKILL_NAME = "relevance_scoring" @@ -95,7 +74,6 @@ "Create Project Request", value={ "name": "AI Weekly", - "group": 3, "topic_description": "Coverage of developer tools, model releases, and applied AI workflows.", "content_retention_days": 180, }, @@ -107,11 +85,11 @@ value={ "id": 1, "name": "AI Weekly", - "group": 3, "topic_description": "Coverage of developer tools, model releases, and applied AI workflows.", "content_retention_days": 180, "intake_token": "project-token-123", "intake_enabled": True, + "user_role": "admin", "has_bluesky_credentials": True, "bluesky_handle": "aiweekly.bsky.social", "bluesky_is_active": True, @@ -424,7 +402,7 @@ def document_group_access_viewset( tag: str, action_overrides: dict[str, dict] | None = None, ): - """Decorate a viewset with schema metadata for group-access resources. + """Decorate a viewset with schema metadata for membership-scoped resources. Args: resource_plural: Human-readable plural label for the resource. @@ -456,12 +434,12 @@ def schema(action: str, **kwargs): list=schema( "list", summary=f"List {resource_plural}", - description=f"Return all {resource_plural} available to the authenticated user through group membership.", + description=f"Return all {resource_plural} available to the authenticated user through project membership.", ), retrieve=schema( "retrieve", summary=f"Get {resource_singular}", - description=f"Return a single {resource_singular} available to the authenticated user through group membership.", + description=f"Return a single {resource_singular} available to the authenticated user through project membership.", ), create=schema( "create", @@ -471,17 +449,17 @@ def schema(action: str, **kwargs): update=schema( "update", summary=f"Replace {resource_singular}", - description=f"Replace an existing {resource_singular} available to the authenticated user through group membership.", + description=f"Replace an existing {resource_singular} available to the authenticated user through project membership.", ), partial_update=schema( "partial_update", summary=f"Update {resource_singular}", - description=f"Update one or more fields on an existing {resource_singular} available to the authenticated user through group membership.", + description=f"Update one or more fields on an existing {resource_singular} available to the authenticated user through project membership.", ), destroy=schema( "destroy", summary=f"Delete {resource_singular}", - description=f"Delete an existing {resource_singular} available to the authenticated user through group membership.", + description=f"Delete an existing {resource_singular} available to the authenticated user through project membership.", ), ) @@ -581,7 +559,7 @@ def get_project(self): "project_id must be present in nested project-scoped routes" ) try: - return Project.objects.get(pk=project_id, group__user=self.request.user) + return get_visible_projects_queryset(self.request.user).get(pk=project_id) except Project.DoesNotExist as exc: raise NotFound("Project not found.") from exc @@ -606,325 +584,6 @@ def perform_create(self, serializer): serializer.save(project=self.get_project()) -@document_group_access_viewset( - resource_plural="projects", - resource_singular="project", - create_description="Create a new project for one of the authenticated user's groups.", - tag="Project Management", - action_overrides=build_crud_action_overrides( - ProjectSerializer, - resource_plural="projects available to the authenticated user", - resource_singular="project", - create_examples=[PROJECT_CREATE_REQUEST_EXAMPLE, PROJECT_RESPONSE_EXAMPLE], - create_response_examples=[PROJECT_RESPONSE_EXAMPLE], - retrieve_examples=[PROJECT_RESPONSE_EXAMPLE], - ), -) -class ProjectViewSet(viewsets.ModelViewSet): - """Manage projects accessible through the current user's group memberships.""" - - serializer_class = ProjectSerializer - queryset = Project.objects.select_related("group", "bluesky_credentials") - lookup_url_kwarg = "id" - - def get_queryset(self): - """Limit projects to those visible through the authenticated user.""" - - return self.queryset.filter(group__user=self.request.user).distinct() - - @extend_schema( - summary="Rotate newsletter intake token", - description=( - "Generate a fresh project-specific newsletter intake token and return the " - "updated project payload." - ), - tags=["Project Management"], - request=None, - responses={200: ProjectSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE}, - ) - @action(detail=True, methods=["post"], url_path="rotate-intake-token") - def rotate_intake_token(self, request, *args, **kwargs): - """Generate a fresh intake token for the selected project.""" - - project = self.get_object() - project.intake_token = generate_project_intake_token() - project.save(update_fields=["intake_token"]) - serializer = self.get_serializer(project) - return Response(serializer.data) - - @extend_schema( - summary="Verify Bluesky credentials", - description=( - "Verify the selected project's stored Bluesky credentials by authenticating " - "the account and checking the current session." - ), - tags=["Ingestion"], - request=None, - responses={ - 200: build_success_response( - BLUESKY_CREDENTIALS_VERIFY_RESPONSE, - "The project's Bluesky credentials were verified successfully.", - ), - 400: OpenApiResponse( - response=inline_serializer( - name="BlueskyCredentialsVerifyErrorResponse", - fields={ - "type": serializers.CharField(), - "errors": inline_serializer( - name="BlueskyCredentialsVerifyError", - fields={ - "code": serializers.CharField(), - "detail": serializers.CharField(), - "attr": serializers.CharField(allow_null=True), - }, - many=True, - ), - }, - ), - description="The project is missing Bluesky credentials or verification failed.", - ), - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - ) - @action(detail=True, methods=["post"], url_path="verify-bluesky-credentials") - def verify_bluesky_credentials(self, request, *args, **kwargs): - """Verify the Bluesky credentials stored for the selected project.""" - - from core.plugins.bluesky import BlueskySourcePlugin - - project = self.get_object() - try: - credentials = project.bluesky_credentials - except BlueskyCredentials.DoesNotExist as exc: - raise serializers.ValidationError( - { - "bluesky_credentials": "No Bluesky credentials are configured for this project." - } - ) from exc - - try: - BlueskySourcePlugin.verify_credentials(credentials) - except Exception as exc: - logger.exception( - "Bluesky credential verification failed for project id=%s", - project.id, - ) - raise serializers.ValidationError( - { - "bluesky_credentials": ( - "Credential verification failed. Please re-check the credentials " - "and try again." - ) - } - ) from exc - - credentials.refresh_from_db() - return Response( - { - "status": "verified", - "handle": credentials.handle, - "last_verified_at": credentials.last_verified_at, - "last_error": "", - } - ) - - -@document_project_owned_viewset( - resource_plural="project configurations", - resource_singular="project configuration", - create_description="Create a new project configuration record for the selected project, including authority weighting and decay settings.", - tag="Project Management", - action_overrides=build_crud_action_overrides( - ProjectConfigSerializer, - resource_plural="project configurations for the selected project", - resource_singular="project configuration", - ), -) -class ProjectConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Manage per-project scoring and authority configuration.""" - - serializer_class = ProjectConfigSerializer - queryset = ProjectConfig.objects.select_related("project") - - -@document_project_owned_viewset( - resource_plural="entities", - resource_singular="entity", - create_description="Create a new tracked entity for the selected project, such as a company, person, or organization.", - tag="Entity Catalog", - action_overrides=build_crud_action_overrides( - EntitySerializer, - resource_plural="entities for the selected project", - resource_singular="entity", - ), -) -class EntityViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Manage tracked entities associated with a project.""" - - serializer_class = EntitySerializer - filter_backends = [OrderingFilter] - ordering_fields = ["authority_score", "created_at", "name"] - ordering = ["name"] - queryset = ( - Entity.objects.select_related("project") - .annotate(mention_count=Count("mentions", distinct=True)) - .prefetch_related( - Prefetch( - "mentions", - queryset=EntityMention.objects.select_related("content").order_by( - "-created_at" - ), - to_attr="prefetched_mentions", - ) - ) - ) - - @extend_schema( - summary="List entity mentions", - description="Return the extracted mention history for one tracked entity inside the selected project.", - request=None, - responses={ - 200: EntityMentionSummarySerializer(many=True), - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Entity Catalog"], - ) - @action(detail=True, methods=["get"], url_path="mentions") - def mentions(self, request, *args, **kwargs): - """Return the extracted mentions for the selected entity.""" - - entity = self.get_object() - mentions = entity.mentions.select_related("content").order_by("-created_at") - serializer = EntityMentionSummarySerializer(mentions, many=True) - return Response(serializer.data) - - @extend_schema( - summary="List authority history", - description=( - "Return persisted authority-score snapshots for one tracked entity. " - "Use the optional limit query parameter to cap the number of snapshots returned." - ), - parameters=[ - OpenApiParameter( - name="limit", - type=int, - location=OpenApiParameter.QUERY, - description="Maximum number of authority snapshots to return.", - required=False, - ) - ], - request=None, - responses={ - 200: EntityAuthoritySnapshotSerializer(many=True), - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Entity Catalog"], - ) - @action(detail=True, methods=["get"], url_path="authority_history") - def authority_history(self, request, *args, **kwargs): - """Return recent authority snapshots for the selected entity.""" - - entity = self.get_object() - snapshots = entity.authority_snapshots.order_by("-computed_at") - limit_param = request.query_params.get("limit") - if limit_param: - try: - limit = max(1, min(int(limit_param), 100)) - except ValueError as exc: - raise serializers.ValidationError( - {"limit": "Limit must be an integer between 1 and 100."} - ) from exc - snapshots = snapshots[:limit] - serializer = EntityAuthoritySnapshotSerializer(snapshots, many=True) - return Response(serializer.data) - - -@document_project_owned_viewset( - resource_plural="entity candidates", - resource_singular="entity candidate", - create_description="Entity candidates are created by the pipeline and can be reviewed through dedicated actions.", - tag="Entity Catalog", - action_overrides=build_crud_action_overrides( - EntityCandidateSerializer, - resource_plural="entity candidates for the selected project", - resource_singular="entity candidate", - ), -) -class EntityCandidateViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet): - """Inspect and resolve entity candidates surfaced by entity extraction.""" - - serializer_class = EntityCandidateSerializer - queryset = EntityCandidate.objects.select_related( - "project", "first_seen_in", "merged_into" - ) - - @extend_schema( - summary="Accept entity candidate", - description="Promote a pending entity candidate into a tracked entity and backfill recent mentions.", - request=None, - responses={ - 200: EntityCandidateSerializer, - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Entity Catalog"], - ) - @action(detail=True, methods=["post"], url_path="accept") - def accept(self, request, *args, **kwargs): - """Accept an entity candidate and return its updated representation.""" - - candidate = self.get_object() - accept_entity_candidate(candidate) - candidate.refresh_from_db() - serializer = self.get_serializer(candidate) - return Response(serializer.data) - - @extend_schema( - summary="Reject entity candidate", - description="Mark a pending entity candidate as rejected without creating a tracked entity.", - request=None, - responses={ - 200: EntityCandidateSerializer, - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Entity Catalog"], - ) - @action(detail=True, methods=["post"], url_path="reject") - def reject(self, request, *args, **kwargs): - """Reject an entity candidate and return its updated representation.""" - - candidate = self.get_object() - reject_entity_candidate(candidate) - candidate.refresh_from_db() - serializer = self.get_serializer(candidate) - return Response(serializer.data) - - @extend_schema( - summary="Merge entity candidate", - description="Merge a pending entity candidate into an existing tracked entity from the same project.", - request=EntityCandidateMergeSerializer, - responses={ - 200: EntityCandidateSerializer, - 400: EntityCandidateMergeSerializer, - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Entity Catalog"], - ) - @action(detail=True, methods=["post"], url_path="merge") - def merge(self, request, *args, **kwargs): - """Merge an entity candidate into an existing tracked entity.""" - - candidate = self.get_object() - serializer = EntityCandidateMergeSerializer( - data=request.data, - context=self.get_serializer_context(), - ) - serializer.is_valid(raise_exception=True) - merge_entity_candidate(candidate, serializer.validated_data["merged_into"]) - candidate.refresh_from_db() - response_serializer = self.get_serializer(candidate) - return Response(response_serializer.data) - - @document_project_owned_viewset( resource_plural="content items", resource_singular="content item", @@ -945,6 +604,17 @@ class ContentViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): serializer_class = ContentSerializer queryset = Content.objects.select_related("project", "entity") + def get_permissions(self): + """Allow all members to read content, contributors to edit, and admins to delete.""" + + if self.action == "destroy": + permission_classes = [IsProjectAdmin] + elif self.action in {"create", "update", "partial_update", "run_skill"}: + permission_classes = [IsProjectMemberWritable] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + @extend_schema( summary="Run content skill", description=( @@ -1003,25 +673,6 @@ def run_skill(self, request, *args, **kwargs): return Response(serializer.data, status=status.HTTP_201_CREATED) -@document_project_owned_viewset( - resource_plural="skill results", - resource_singular="skill result", - create_description="Create a new skill result for project content. The referenced content must belong to the selected project.", - tag="AI Processing", - action_overrides=build_crud_action_overrides( - SkillResultSerializer, - resource_plural="skill results for the selected project", - resource_singular="skill result", - retrieve_examples=[SKILL_RESULT_RESPONSE_EXAMPLE], - ), -) -class SkillResultViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Inspect persisted AI skill outputs for project content.""" - - serializer_class = SkillResultSerializer - queryset = SkillResult.objects.select_related("content", "project", "superseded_by") - - @document_project_owned_viewset( resource_plural="user feedback entries", resource_singular="user feedback entry", @@ -1039,6 +690,11 @@ class UserFeedbackViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): serializer_class = UserFeedbackSerializer queryset = UserFeedback.objects.select_related("content", "project", "user") + def get_permissions(self): + """Allow all members to read feedback and owners or admins to modify it.""" + + return [IsProjectFeedbackEditor()] + def perform_create(self, serializer): """Attach the authenticated user automatically to new feedback rows.""" @@ -1062,32 +718,14 @@ class IngestionRunViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): serializer_class = IngestionRunSerializer queryset = IngestionRun.objects.select_related("project") + def get_permissions(self): + """Allow all members to read ingestion runs and contributors to manage them.""" -@document_project_owned_viewset( - resource_plural="Bluesky credentials", - resource_singular="Bluesky credentials", - create_description=( - "Create Bluesky credentials for the selected project. The app password is " - "accepted write-only and is never returned in API responses." - ), - tag="Ingestion", - action_overrides=build_crud_action_overrides( - BlueskyCredentialsSerializer, - resource_plural="Bluesky credentials for the selected project", - resource_singular="Bluesky credentials", - retrieve_examples=[BLUESKY_CREDENTIALS_RESPONSE_EXAMPLE], - ), -) -class BlueskyCredentialsViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Manage project-scoped Bluesky credentials.""" - - serializer_class = BlueskyCredentialsSerializer - queryset = BlueskyCredentials.objects.select_related("project") - - def get_queryset(self): - """Restrict credentials to the selected project and current user.""" - - return super().get_queryset().order_by("-updated_at") + if self.action in {"create", "update", "partial_update", "destroy"}: + permission_classes = [IsProjectMemberWritable] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] @document_project_owned_viewset( @@ -1110,6 +748,11 @@ class IntakeAllowlistViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): serializer_class = IntakeAllowlistSerializer queryset = IntakeAllowlist.objects.select_related("project") + def get_permissions(self): + """Restrict intake allowlist access to project contributors.""" + + return [IsProjectContributor()] + @document_project_owned_viewset( resource_plural="newsletter intake entries", @@ -1131,103 +774,7 @@ class NewsletterIntakeViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelV serializer_class = NewsletterIntakeSerializer queryset = NewsletterIntake.objects.select_related("project") + def get_permissions(self): + """Allow any project member to inspect newsletter intake history.""" -@document_project_owned_viewset( - resource_plural="source configurations", - resource_singular="source configuration", - create_description="Create a new source configuration for the selected project. Plugin-specific configuration is validated before the record is saved.", - tag="Ingestion", - action_overrides=build_crud_action_overrides( - SourceConfigSerializer, - resource_plural="source configurations for the selected project", - resource_singular="source configuration", - create_examples=[ - SOURCE_CONFIG_CREATE_REQUEST_EXAMPLE, - SOURCE_CONFIG_REDDIT_REQUEST_EXAMPLE, - SOURCE_CONFIG_BLUESKY_REQUEST_EXAMPLE, - SOURCE_CONFIG_RESPONSE_EXAMPLE, - ], - create_response_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE], - retrieve_examples=[SOURCE_CONFIG_RESPONSE_EXAMPLE], - ), -) -class SourceConfigViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Manage source-plugin configuration for a project.""" - - serializer_class = SourceConfigSerializer - queryset = SourceConfig.objects.select_related("project") - - -@document_project_owned_viewset( - resource_plural="topic centroid snapshots", - resource_singular="topic centroid snapshot", - create_description="Topic centroid snapshots are pipeline-managed history rows and are exposed read-only for observability.", - tag="Observability", - action_overrides=build_crud_action_overrides( - TopicCentroidSnapshotSerializer, - resource_plural="topic centroid snapshots for the selected project", - resource_singular="topic centroid snapshot", - ), -) -class TopicCentroidSnapshotViewSet( - ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet -): - """Inspect persisted centroid history and aggregate drift for a project.""" - - serializer_class = TopicCentroidSnapshotSerializer - queryset = TopicCentroidSnapshot.objects.select_related("project") - - @extend_schema( - summary="Get topic centroid summary", - description=( - "Return aggregate centroid observability metrics for the selected project, " - "including average drift and the latest persisted snapshot." - ), - request=None, - responses={ - 200: TopicCentroidObservabilitySummarySerializer, - 403: AUTHENTICATION_REQUIRED_RESPONSE, - }, - tags=["Observability"], - ) - @action(detail=False, methods=["get"], url_path="summary") - def summary(self, request, *args, **kwargs): - """Return centroid observability summary metrics for the current project.""" - - queryset = self.get_queryset() - metrics = queryset.aggregate( - snapshot_count=Count("id"), - active_snapshot_count=Count("id", filter=Q(centroid_active=True)), - avg_drift_from_previous=Avg("drift_from_previous"), - avg_drift_from_week_ago=Avg("drift_from_week_ago"), - ) - serializer = TopicCentroidObservabilitySummarySerializer( - { - "project": self.get_project().id, - "snapshot_count": metrics["snapshot_count"], - "active_snapshot_count": metrics["active_snapshot_count"], - "avg_drift_from_previous": metrics["avg_drift_from_previous"], - "avg_drift_from_week_ago": metrics["avg_drift_from_week_ago"], - "latest_snapshot": queryset.order_by("-computed_at").first(), - }, - context=self.get_serializer_context(), - ) - return Response(serializer.data) - - -@document_project_owned_viewset( - resource_plural="review queue entries", - resource_singular="review queue entry", - create_description="Create a new review queue entry for the selected project. The referenced content must belong to the same project.", - tag="Review Queue", - action_overrides=build_crud_action_overrides( - ReviewQueueSerializer, - resource_plural="review queue entries for the selected project", - resource_singular="review queue entry", - ), -) -class ReviewQueueViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): - """Inspect and manage content awaiting manual review.""" - - serializer_class = ReviewQueueSerializer - queryset = ReviewQueue.objects.select_related("content", "project") + return [IsProjectMember()] diff --git a/core/api_urls.py b/core/api_urls.py index e1c70538..e4bc93ab 100644 --- a/core/api_urls.py +++ b/core/api_urls.py @@ -1,72 +1,42 @@ +"""Aggregate app-owned API route registrations under the public v1 surface.""" + from rest_framework.routers import DefaultRouter from rest_framework_nested.routers import NestedSimpleRouter -from core.api import ( - BlueskyCredentialsViewSet, - ContentViewSet, - EntityCandidateViewSet, - EntityViewSet, - IngestionRunViewSet, - IntakeAllowlistViewSet, - NewsletterIntakeViewSet, - ProjectConfigViewSet, - ProjectViewSet, - ReviewQueueViewSet, - SkillResultViewSet, - SourceConfigViewSet, - TopicCentroidSnapshotViewSet, - UserFeedbackViewSet, +from content.api_urls import register_project_routes as register_content_project_routes +from entities.api_urls import ( + register_project_routes as register_entities_project_routes, +) +from ingestion.api_urls import ( + register_project_routes as register_ingestion_project_routes, +) +from newsletters.api_urls import ( + register_project_routes as register_newsletters_project_routes, +) +from pipeline.api_urls import ( + register_project_routes as register_pipeline_project_routes, ) +from projects.api_urls import ( + register_project_routes as register_projects_project_routes, +) +from projects.api_urls import ( + register_root_routes as register_projects_root_routes, +) +from trends.api_urls import register_project_routes as register_trends_project_routes app_name = "api" router = DefaultRouter() -router.register("projects", ProjectViewSet, basename="project") +register_projects_root_routes(router) project_router = NestedSimpleRouter(router, r"projects", lookup="project") -project_router.register( - r"project-configs", ProjectConfigViewSet, basename="project-config" -) -project_router.register(r"entities", EntityViewSet, basename="project-entity") -project_router.register( - r"entity-candidates", - EntityCandidateViewSet, - basename="project-entity-candidate", -) -project_router.register(r"contents", ContentViewSet, basename="project-content") -project_router.register( - r"skill-results", SkillResultViewSet, basename="project-skill-result" -) -project_router.register(r"feedback", UserFeedbackViewSet, basename="project-feedback") -project_router.register( - r"ingestion-runs", IngestionRunViewSet, basename="project-ingestion-run" -) -project_router.register( - r"bluesky-credentials", - BlueskyCredentialsViewSet, - basename="project-bluesky-credentials", -) -project_router.register( - r"intake-allowlist", - IntakeAllowlistViewSet, - basename="project-intake-allowlist", -) -project_router.register( - r"newsletter-intakes", - NewsletterIntakeViewSet, - basename="project-newsletter-intake", -) -project_router.register( - r"source-configs", SourceConfigViewSet, basename="project-source-config" -) -project_router.register( - r"topic-centroid-snapshots", - TopicCentroidSnapshotViewSet, - basename="project-topic-centroid-snapshot", -) -project_router.register( - r"review-queue", ReviewQueueViewSet, basename="project-review-queue" -) +register_projects_project_routes(project_router) +register_entities_project_routes(project_router) +register_content_project_routes(project_router) +register_pipeline_project_routes(project_router) +register_ingestion_project_routes(project_router) +register_newsletters_project_routes(project_router) +register_trends_project_routes(project_router) urlpatterns = [ *router.urls, diff --git a/core/embeddings.py b/core/embeddings.py index bbee9fd5..39fd198b 100644 --- a/core/embeddings.py +++ b/core/embeddings.py @@ -26,8 +26,9 @@ VectorParams, ) -from core.models import Content, Entity +from core.models import Content from core.settings_types import CoreSettings +from entities.models import Entity SentenceTransformer = None settings = cast(CoreSettings, django_settings) diff --git a/core/entity_extraction.py b/core/entity_extraction.py index 4e3935b7..80adee6b 100644 --- a/core/entity_extraction.py +++ b/core/entity_extraction.py @@ -15,6 +15,10 @@ from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json from core.models import ( Content, + SkillResult, + SkillStatus, +) +from entities.models import ( Entity, EntityCandidate, EntityCandidateStatus, @@ -22,8 +26,6 @@ EntityMentionRole, EntityMentionSentiment, EntityType, - SkillResult, - SkillStatus, ) ENTITY_EXTRACTION_SKILL_NAME = "entity_extraction" @@ -121,7 +123,9 @@ def run_entity_extraction(content: Content) -> dict[str, Any]: confidence = max((mention.confidence for mention in mentions), default=0.0) return { "mentions": [_serialize_mention(mention) for mention in mentions], - "candidate_entities": [_serialize_candidate(candidate) for candidate in candidates], + "candidate_entities": [ + _serialize_candidate(candidate) for candidate in candidates + ], "primary_entity_id": primary_entity.id if primary_entity is not None else None, "confidence": confidence, "explanation": extraction.get( @@ -211,7 +215,8 @@ def persist_entity_candidates( if not created: update_fields: list[str] = [] suggested_type = candidate_payload.get( - "suggested_type", candidate.suggested_type or _guess_candidate_type(name) + "suggested_type", + candidate.suggested_type or _guess_candidate_type(name), ) if candidate.suggested_type != suggested_type: candidate.suggested_type = suggested_type @@ -264,7 +269,9 @@ def reject_entity_candidate(candidate: EntityCandidate) -> None: candidate.save(update_fields=["status", "updated_at"]) -def backfill_entity_mentions(entity: Entity, *, candidate_name: str | None = None) -> None: +def backfill_entity_mentions( + entity: Entity, *, candidate_name: str | None = None +) -> None: """Retroactively attach recent content rows to an accepted or merged entity.""" cutoff = timezone.now() - timedelta(days=RETROACTIVE_MENTION_WINDOW_DAYS) @@ -312,7 +319,10 @@ def _run_entity_extraction_with_fallback( "title": content.title, "content_text": content.content_text[:5000], "project_id": content.project_id, - "tracked_entities": [_serialize_tracked_entity(entity) for entity in candidate_entities], + "tracked_entities": [ + _serialize_tracked_entity(entity) + for entity in candidate_entities + ], }, ), ) @@ -393,7 +403,11 @@ def _retrieve_candidate_entities( ordered_ids.append(entity_id) if not ordered_ids: return tracked_entities - return [entities_by_id[entity_id] for entity_id in ordered_ids if entity_id in entities_by_id] + return [ + entities_by_id[entity_id] + for entity_id in ordered_ids + if entity_id in entities_by_id + ] def _normalize_mentions( @@ -423,7 +437,9 @@ def _normalize_mentions( "role": _normalize_role(raw_mention.get("role")), "sentiment": _normalize_sentiment(raw_mention.get("sentiment")), "span": str(raw_mention.get("span", entity_name)).strip(), - "confidence": _normalize_confidence(raw_mention.get("confidence", 0.75)), + "confidence": _normalize_confidence( + raw_mention.get("confidence", 0.75) + ), } ) return normalized_mentions, unresolved_names @@ -447,7 +463,9 @@ def _normalize_candidates( elif isinstance(raw_candidate, dict): candidate_name = _clean_candidate_name(str(raw_candidate.get("name", ""))) suggested_type = str( - raw_candidate.get("suggested_type", _guess_candidate_type(candidate_name)) + raw_candidate.get( + "suggested_type", _guess_candidate_type(candidate_name) + ) ) else: continue @@ -477,7 +495,9 @@ def _discover_candidates( discovered: list[dict[str, str]] = [] seen_names: set[str] = set() candidate_text = "\n".join( - part for part in [content.author, content.title, content.content_text[:2000]] if part + part + for part in [content.author, content.title, content.content_text[:2000]] + if part ) for match in PROPER_NOUN_PATTERN.findall(candidate_text): name = _clean_candidate_name(match) @@ -490,9 +510,7 @@ def _discover_candidates( ): continue seen_names.add(normalized_name) - discovered.append( - {"name": name, "suggested_type": _guess_candidate_type(name)} - ) + discovered.append({"name": name, "suggested_type": _guess_candidate_type(name)}) return discovered @@ -536,7 +554,9 @@ def _find_entity_span( stripped_label = label.strip() if not stripped_label: continue - pattern = re.compile(rf"(? str: return EntityMentionRole.AUTHOR if content.title and span_lower in content.title.lower(): return EntityMentionRole.SUBJECT - if re.search(rf'"[^\n]{{0,120}}{re.escape(span)}[^\n]{{0,120}}"', content.content_text, re.IGNORECASE): + if re.search( + rf'"[^\n]{{0,120}}{re.escape(span)}[^\n]{{0,120}}"', + content.content_text, + re.IGNORECASE, + ): return EntityMentionRole.QUOTED return EntityMentionRole.MENTIONED @@ -714,7 +738,9 @@ def _guess_candidate_type(name: str) -> str: return EntityType.VENDOR if any(token in ORGANIZATION_SUFFIXES for token in tokens): return EntityType.ORGANIZATION - title_case_tokens = [token for token in name.split() if token and token[:1].isupper()] + title_case_tokens = [ + token for token in name.split() if token and token[:1].isupper() + ] if 2 <= len(title_case_tokens) <= 3: return EntityType.INDIVIDUAL return EntityType.ORGANIZATION diff --git a/core/management/commands/seed_demo.py b/core/management/commands/seed_demo.py index 14247100..0d8fadb9 100644 --- a/core/management/commands/seed_demo.py +++ b/core/management/commands/seed_demo.py @@ -5,7 +5,6 @@ from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.auth.models import Group from django.core.management.base import BaseCommand from django.db import transaction from django.utils import timezone @@ -16,20 +15,14 @@ from core.embeddings import upsert_content_embedding from core.models import ( Content, - Entity, - EntityType, FeedbackType, IngestionRun, - Project, - ProjectConfig, ReviewQueue, ReviewReason, ReviewResolution, RunStatus, SkillResult, SkillStatus, - SourceConfig, - SourcePluginName, UserFeedback, ) from core.pipeline import ( @@ -37,9 +30,17 @@ RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME, ) +from entities.models import Entity, EntityType +from projects.model_support import SourcePluginName +from projects.models import ( + Project, + ProjectConfig, + ProjectMembership, + ProjectRole, + SourceConfig, +) DEMO_PROJECT_NAME = "Platform Engineering Weekly" -DEMO_GROUP_NAME = "platform-engineering-editors" DEMO_TOPIC_DESCRIPTION = ( "Platform engineering, DevOps, cloud infrastructure, reliability, and " "developer experience." @@ -537,17 +538,19 @@ def _ensure_demo_project(self) -> Project: ) user.set_password("demo-password") user.save(update_fields=["password"]) - group, _ = Group.objects.get_or_create(name=DEMO_GROUP_NAME) - user.groups.add(group) project, created = Project.objects.get_or_create( - group=group, name=DEMO_PROJECT_NAME, defaults={"topic_description": DEMO_TOPIC_DESCRIPTION}, ) if not created and project.topic_description != DEMO_TOPIC_DESCRIPTION: project.topic_description = DEMO_TOPIC_DESCRIPTION project.save(update_fields=["topic_description"]) + ProjectMembership.objects.get_or_create( + user=user, + project=project, + defaults={"role": ProjectRole.ADMIN}, + ) ProjectConfig.objects.get_or_create(project=project) return project diff --git a/core/migrations/0010_move_project_models_to_projects.py b/core/migrations/0010_move_project_models_to_projects.py new file mode 100644 index 00000000..dee95e58 --- /dev/null +++ b/core/migrations/0010_move_project_models_to_projects.py @@ -0,0 +1,159 @@ +import django.db.models.deletion +from django.db import migrations, models + + +PROJECT_MODEL_NAMES = [ + "project", + "projectconfig", + "sourceconfig", + "blueskycredentials", +] + + +def rename_project_content_types(apps, schema_editor): + """Retarget existing content types to the new projects app label.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + ContentType.objects.using(schema_editor.connection.alias).filter( + app_label="core", model__in=PROJECT_MODEL_NAMES + ).update(app_label="projects") + + +def rename_project_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + ContentType.objects.using(schema_editor.connection.alias).filter( + app_label="projects", model__in=PROJECT_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0009_topiccentroidsnapshot"), + ("projects", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name="entity", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entities", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="entityauthoritysnapshot", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_authority_snapshots", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="topiccentroidsnapshot", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="topic_centroid_snapshots", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="content", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="contents", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="entitymention", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="entitycandidate", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_candidates", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="intakeallowlist", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="intake_allowlist", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="newsletterintake", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="newsletter_intakes", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="skillresult", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="skill_results", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="userfeedback", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="feedback", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="ingestionrun", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="ingestion_runs", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="reviewqueue", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="review_queue_items", + to="projects.project", + ), + ), + migrations.DeleteModel(name="BlueskyCredentials"), + migrations.DeleteModel(name="ProjectConfig"), + migrations.DeleteModel(name="Project"), + migrations.DeleteModel(name="SourceConfig"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_project_content_types, + rename_project_content_types_reverse, + ), + ] diff --git a/core/migrations/0011_move_entity_models_to_entities.py b/core/migrations/0011_move_entity_models_to_entities.py new file mode 100644 index 00000000..4208487c --- /dev/null +++ b/core/migrations/0011_move_entity_models_to_entities.py @@ -0,0 +1,61 @@ +import django.db.models.deletion +from django.db import migrations, models + +ENTITY_MODEL_NAMES = [ + "entity", + "entityauthoritysnapshot", + "entitymention", + "entitycandidate", +] + + +def rename_entity_content_types(apps, schema_editor): + """Retarget existing content types to the new entities app label.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + ContentType.objects.using(schema_editor.connection.alias).filter( + app_label="core", model__in=ENTITY_MODEL_NAMES + ).update(app_label="entities") + + +def rename_entity_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + ContentType.objects.using(schema_editor.connection.alias).filter( + app_label="entities", model__in=ENTITY_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0010_move_project_models_to_projects"), + ("entities", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name="content", + name="entity", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="contents", + to="entities.entity", + ), + ), + migrations.DeleteModel(name="EntityAuthoritySnapshot"), + migrations.DeleteModel(name="EntityMention"), + migrations.DeleteModel(name="EntityCandidate"), + migrations.DeleteModel(name="Entity"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_entity_content_types, + rename_entity_content_types_reverse, + ), + ] diff --git a/core/migrations/0012_move_content_models_to_content_and_ingestion.py b/core/migrations/0012_move_content_models_to_content_and_ingestion.py new file mode 100644 index 00000000..b4e5a393 --- /dev/null +++ b/core/migrations/0012_move_content_models_to_content_and_ingestion.py @@ -0,0 +1,74 @@ +import django.db.models.deletion +from django.db import migrations, models + + +CONTENT_MODEL_NAMES = ["content", "userfeedback"] +INGESTION_MODEL_NAMES = ["ingestionrun"] + + +def rename_content_and_ingestion_content_types(apps, schema_editor): + """Retarget existing content types to the new owning app labels.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="core", model__in=CONTENT_MODEL_NAMES + ).update(app_label="content") + ContentType.objects.using(database_alias).filter( + app_label="core", model__in=INGESTION_MODEL_NAMES + ).update(app_label="ingestion") + + +def rename_content_and_ingestion_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="content", model__in=CONTENT_MODEL_NAMES + ).update(app_label="core") + ContentType.objects.using(database_alias).filter( + app_label="ingestion", model__in=INGESTION_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0011_move_entity_models_to_entities"), + ("content", "0001_initial"), + ("ingestion", "0001_initial"), + ("entities", "0002_alter_entitycandidate_first_seen_in_and_more"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name="skillresult", + name="content", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="skill_results", + to="content.content", + ), + ), + migrations.AlterField( + model_name="reviewqueue", + name="content", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="review_queue_items", + to="content.content", + ), + ), + migrations.DeleteModel(name="IngestionRun"), + migrations.DeleteModel(name="Content"), + migrations.DeleteModel(name="UserFeedback"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_content_and_ingestion_content_types, + rename_content_and_ingestion_content_types_reverse, + ), + ] diff --git a/core/migrations/0013_move_newsletter_models_to_newsletters.py b/core/migrations/0013_move_newsletter_models_to_newsletters.py new file mode 100644 index 00000000..b629f090 --- /dev/null +++ b/core/migrations/0013_move_newsletter_models_to_newsletters.py @@ -0,0 +1,44 @@ +from django.db import migrations + +NEWSLETTER_MODEL_NAMES = ["intakeallowlist", "newsletterintake"] + + +def rename_newsletter_content_types(apps, schema_editor): + """Retarget existing content types to the new newsletters app label.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="core", model__in=NEWSLETTER_MODEL_NAMES + ).update(app_label="newsletters") + + +def rename_newsletter_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="newsletters", model__in=NEWSLETTER_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0012_move_content_models_to_content_and_ingestion"), + ("newsletters", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.DeleteModel(name="NewsletterIntake"), + migrations.DeleteModel(name="IntakeAllowlist"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_newsletter_content_types, + rename_newsletter_content_types_reverse, + ), + ] diff --git a/core/migrations/0014_move_pipeline_models_to_pipeline.py b/core/migrations/0014_move_pipeline_models_to_pipeline.py new file mode 100644 index 00000000..71f2fc59 --- /dev/null +++ b/core/migrations/0014_move_pipeline_models_to_pipeline.py @@ -0,0 +1,44 @@ +from django.db import migrations + +PIPELINE_MODEL_NAMES = ["skillresult", "reviewqueue"] + + +def rename_pipeline_content_types(apps, schema_editor): + """Retarget existing content types to the new pipeline app label.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="core", model__in=PIPELINE_MODEL_NAMES + ).update(app_label="pipeline") + + +def rename_pipeline_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="pipeline", model__in=PIPELINE_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0013_move_newsletter_models_to_newsletters"), + ("pipeline", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.DeleteModel(name="SkillResult"), + migrations.DeleteModel(name="ReviewQueue"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_pipeline_content_types, + rename_pipeline_content_types_reverse, + ), + ] diff --git a/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py b/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py new file mode 100644 index 00000000..b1a396d8 --- /dev/null +++ b/core/migrations/0015_move_topic_centroid_snapshot_to_trends.py @@ -0,0 +1,43 @@ +from django.db import migrations + +TRENDS_MODEL_NAMES = ["topiccentroidsnapshot"] + + +def rename_trends_content_types(apps, schema_editor): + """Retarget existing content types to the new trends app label.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="core", model__in=TRENDS_MODEL_NAMES + ).update(app_label="trends") + + +def rename_trends_content_types_reverse(apps, schema_editor): + """Restore the historical core app label on rollback.""" + + ContentType = apps.get_model("contenttypes", "ContentType") + database_alias = schema_editor.connection.alias + ContentType.objects.using(database_alias).filter( + app_label="trends", model__in=TRENDS_MODEL_NAMES + ).update(app_label="core") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0014_move_pipeline_models_to_pipeline"), + ("trends", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.DeleteModel(name="TopicCentroidSnapshot"), + ], + database_operations=[], + ), + migrations.RunPython( + rename_trends_content_types, + rename_trends_content_types_reverse, + ), + ] diff --git a/core/models.py b/core/models.py index cff292d8..2b1bd85a 100644 --- a/core/models.py +++ b/core/models.py @@ -1,19 +1,60 @@ -"""Core domain models for projects, ingestion, and editorial review. +"""Core domain models plus compatibility re-exports during app decomposition. The admin, API, Celery tasks, and AI pipeline all revolve around the models in this module. Adding model-level docstrings here gives Django admindocs a useful summary of the core entities new contributors interact with first. """ -import base64 -import hashlib import secrets -from urllib.parse import urlsplit, urlunsplit -from cryptography.fernet import Fernet -from django.conf import settings -from django.contrib.auth.models import Group -from django.db import models +from content.models import Content, FeedbackType, UserFeedback +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityCandidate, + EntityCandidateStatus, + EntityMention, + EntityMentionRole, + EntityMentionSentiment, + EntityType, +) +from ingestion.models import IngestionRun, RunStatus +from newsletters.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus +from pipeline.models import ReviewQueue as _ReviewQueue +from pipeline.models import ReviewReason as _ReviewReason +from pipeline.models import ReviewResolution as _ReviewResolution +from pipeline.models import SkillResult as _SkillResult +from pipeline.models import SkillStatus as _SkillStatus +from projects.models import Project as _Project +from trends.models import TopicCentroidSnapshot as _TopicCentroidSnapshot + +Project = _Project +ReviewQueue = _ReviewQueue +ReviewReason = _ReviewReason +ReviewResolution = _ReviewResolution +SkillResult = _SkillResult +SkillStatus = _SkillStatus +TopicCentroidSnapshot = _TopicCentroidSnapshot + +__all__ = [ + "Content", + "Entity", + "EntityAuthoritySnapshot", + "EntityCandidate", + "EntityCandidateStatus", + "EntityMention", + "EntityMentionRole", + "EntityMentionSentiment", + "EntityType", + "FeedbackType", + "IngestionRun", + "IntakeAllowlist", + "NewsletterIntake", + "NewsletterIntakeStatus", + "Project", + "RunStatus", + "UserFeedback", +] def generate_project_intake_token() -> str: @@ -24,7 +65,9 @@ def generate_project_intake_token() -> str: ``intake+@...`` to route inbound newsletters to a project. """ - return secrets.token_hex(16) + from projects.model_support import generate_project_intake_token as _generate_token + + return _generate_token() def generate_confirmation_token() -> str: @@ -36,741 +79,3 @@ def generate_confirmation_token() -> str: """ return secrets.token_urlsafe(24) - - -def normalize_bluesky_handle(handle: str) -> str: - """Normalize Bluesky handles so stored account references stay consistent.""" - - return handle.strip().removeprefix("@").lower() - - -def normalize_bluesky_pds_url(pds_url: str) -> str: - """Normalize a user-provided PDS URL to its base host form.""" - - stripped_url = pds_url.strip().rstrip("/") - if not stripped_url: - return "" - parsed_url = urlsplit(stripped_url) - path = parsed_url.path.rstrip("/") - if path.endswith("/xrpc"): - path = path[: -len("/xrpc")] - return urlunsplit( - ( - parsed_url.scheme, - parsed_url.netloc, - path, - parsed_url.query, - parsed_url.fragment, - ) - ).rstrip("/") - - -def _bluesky_credentials_fernet() -> Fernet: - """Build the symmetric cipher used for Bluesky app-password storage.""" - - key_material = ( - getattr(settings, "BLUESKY_CREDENTIALS_ENCRYPTION_KEY", "") - or settings.SECRET_KEY - ) - derived_key = base64.urlsafe_b64encode( - hashlib.sha256(key_material.encode("utf-8")).digest() - ) - return Fernet(derived_key) - - -class EntityType(models.TextChoices): - """Supported types of tracked entities within a project.""" - - INDIVIDUAL = "individual", "Individual" - VENDOR = "vendor", "Vendor" - ORGANIZATION = "organization", "Organization" - - -class EntityMentionRole(models.TextChoices): - """Supported roles for how an entity appears inside content.""" - - AUTHOR = "author", "Author" - SUBJECT = "subject", "Subject" - QUOTED = "quoted", "Quoted" - MENTIONED = "mentioned", "Mentioned" - - -class EntityMentionSentiment(models.TextChoices): - """Supported editorial sentiment labels for entity mentions.""" - - POSITIVE = "positive", "Positive" - NEUTRAL = "neutral", "Neutral" - NEGATIVE = "negative", "Negative" - - -class EntityCandidateStatus(models.TextChoices): - """Review workflow states for extracted entity candidates.""" - - PENDING = "pending", "Pending" - ACCEPTED = "accepted", "Accepted" - REJECTED = "rejected", "Rejected" - MERGED = "merged", "Merged" - - -class SkillStatus(models.TextChoices): - """Execution states recorded for AI skill runs.""" - - PENDING = "pending", "Pending" - RUNNING = "running", "Running" - COMPLETED = "completed", "Completed" - FAILED = "failed", "Failed" - - -class FeedbackType(models.TextChoices): - """Editorial feedback signals that tune authority and ranking.""" - - UPVOTE = "upvote", "Upvote" - DOWNVOTE = "downvote", "Downvote" - - -class SourcePluginName(models.TextChoices): - """Built-in ingestion plugins that can populate project content.""" - - RSS = "rss", "RSS" - REDDIT = "reddit", "Reddit" - BLUESKY = "bluesky", "Bluesky" - - -class NewsletterIntakeStatus(models.TextChoices): - """Lifecycle states for a raw inbound newsletter email.""" - - PENDING = "pending", "Pending" - EXTRACTED = "extracted", "Extracted" - FAILED = "failed", "Failed" - REJECTED = "rejected", "Rejected" - - -class RunStatus(models.TextChoices): - """Outcome states for ingestion runs.""" - - RUNNING = "running", "Running" - SUCCESS = "success", "Success" - FAILED = "failed", "Failed" - - -class ReviewReason(models.TextChoices): - """Reasons content is pushed to the manual review queue.""" - - LOW_CONFIDENCE_CLASSIFICATION = ( - "low_confidence_classification", - "Low Confidence Classification", - ) - BORDERLINE_RELEVANCE = "borderline_relevance", "Borderline Relevance" - - -class ReviewResolution(models.TextChoices): - """Human outcomes for review queue items.""" - - HUMAN_APPROVED = "human_approved", "Human Approved" - HUMAN_REJECTED = "human_rejected", "Human Rejected" - - -class Project(models.Model): - """Represents a newsletter workspace owned by a Django auth group. - - A project defines the editorial topic, retention policy, and email-intake - identity used by all downstream ingestion, relevance scoring, and review flows. - Most other core models are scoped to a single project. - """ - - name = models.CharField(max_length=255) - group = models.ForeignKey(Group, on_delete=models.CASCADE, related_name="projects") - topic_description = models.TextField() - content_retention_days = models.PositiveIntegerField(default=365) - intake_token = models.CharField( - max_length=64, - unique=True, - default=generate_project_intake_token, - editable=False, - ) - intake_enabled = models.BooleanField(default=False) - created_at = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ["name"] - - def __str__(self) -> str: - return self.name - - -class BlueskyCredentials(models.Model): - """Stores the authenticated Bluesky account used by one project. - - The plugin can read public content through AppView without credentials, but a - stored account enables authenticated reads and self-hosted PDS support. - """ - - project = models.OneToOneField( - Project, on_delete=models.CASCADE, related_name="bluesky_credentials" - ) - handle = models.CharField(max_length=255) - app_password_encrypted = models.TextField(blank=True) - pds_url = models.URLField(blank=True) - is_active = models.BooleanField(default=True) - last_verified_at = models.DateTimeField(null=True, blank=True) - last_error = models.TextField(blank=True) - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - ordering = ["project__name"] - verbose_name_plural = "Bluesky credentials" - - def __str__(self) -> str: - return f"Bluesky credentials for {self.project.name}" - - @property - def client_base_url(self) -> str: - """Return the effective base URL used by the ATProto client.""" - - if not self.pds_url: - return "https://bsky.social/xrpc" - return f"{self.pds_url.rstrip('/')}/xrpc" - - def has_app_password(self) -> bool: - """Return whether an encrypted app password has been stored.""" - - return bool(self.app_password_encrypted) - - def has_stored_credential(self) -> bool: - """Return whether an encrypted Bluesky credential has been stored.""" - - return self.has_app_password() - - def set_app_password(self, app_password: str) -> None: - """Encrypt and store the given Bluesky app password.""" - - if not app_password: - self.app_password_encrypted = "" - return - self.app_password_encrypted = ( - _bluesky_credentials_fernet() - .encrypt(app_password.encode("utf-8")) - .decode("utf-8") - ) - - def set_stored_credential(self, credential_value: str) -> None: - """Encrypt and store the given Bluesky credential value.""" - - self.set_app_password(credential_value) - - def get_app_password(self) -> str: - """Decrypt and return the stored Bluesky app password.""" - - if not self.app_password_encrypted: - return "" - return ( - _bluesky_credentials_fernet() - .decrypt(self.app_password_encrypted.encode("utf-8")) - .decode("utf-8") - ) - - def get_stored_credential(self) -> str: - """Decrypt and return the stored Bluesky credential value.""" - - return self.get_app_password() - - def save(self, *args, **kwargs): - """Normalize stored account fields before persisting the credentials.""" - - self.handle = normalize_bluesky_handle(self.handle) - self.pds_url = normalize_bluesky_pds_url(self.pds_url) - super().save(*args, **kwargs) - - -class ProjectConfig(models.Model): - """Stores tunable scoring parameters for a single project. - - These values let the application adjust how strongly upvotes, downvotes, and - score decay influence entity authority over time without changing code. - They also control whether feedback saves should immediately refresh the - project's topic centroid. - """ - - project = models.OneToOneField( - Project, on_delete=models.CASCADE, related_name="config" - ) - upvote_authority_weight = models.FloatField(default=0.1) - downvote_authority_weight = models.FloatField(default=-0.05) - authority_decay_rate = models.FloatField(default=0.95) - recompute_topic_centroid_on_feedback_save = models.BooleanField(default=True) - - class Meta: - verbose_name = "Project config" - verbose_name_plural = "Project configs" - - def __str__(self) -> str: - return f"Config for {self.project.name}" - - -class Entity(models.Model): - """Represents a person, vendor, or organization tracked inside a project. - - Content can optionally link to an entity so authority signals and editorial - curation can accumulate around a known subject instead of isolated articles. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="entities" - ) - name = models.CharField(max_length=255) - type = models.CharField(max_length=32, choices=EntityType.choices) - description = models.TextField(blank=True) - authority_score = models.FloatField(default=0.5) - website_url = models.URLField(blank=True) - github_url = models.URLField(blank=True) - linkedin_url = models.URLField(blank=True) - bluesky_handle = models.CharField(max_length=255, blank=True) - mastodon_handle = models.CharField(max_length=255, blank=True) - twitter_handle = models.CharField(max_length=255, blank=True) - created_at = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ["name"] - constraints = [ - models.UniqueConstraint( - fields=["project", "name"], name="core_entity_unique_project_name" - ), - ] - - def __str__(self) -> str: - return self.name - - -class EntityAuthoritySnapshot(models.Model): - """Captures one authority-score recomputation for a tracked entity. - - Snapshot rows make the score explainable over time by storing the normalized - component values and final score produced by the recomputation task. - """ - - entity = models.ForeignKey( - Entity, on_delete=models.CASCADE, related_name="authority_snapshots" - ) - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="entity_authority_snapshots" - ) - computed_at = models.DateTimeField(auto_now_add=True) - mention_component = models.FloatField() - feedback_component = models.FloatField() - duplicate_component = models.FloatField() - decayed_prior = models.FloatField() - final_score = models.FloatField() - - class Meta: - ordering = ["-computed_at"] - indexes = [ - models.Index(fields=["entity", "-computed_at"]), - models.Index(fields=["project", "-computed_at"]), - ] - - def __str__(self) -> str: - return f"Authority snapshot for {self.entity.name}" - - -class TopicCentroidSnapshot(models.Model): - """Captures one recomputed topic-centroid state for a project. - - Snapshot rows preserve the normalized centroid vector and enough derived drift - metadata to support future admin widgets without querying historical vectors - back out of Qdrant. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="topic_centroid_snapshots" - ) - computed_at = models.DateTimeField(auto_now_add=True) - centroid_active = models.BooleanField(default=False) - centroid_vector = models.JSONField(default=list, blank=True) - feedback_count = models.PositiveIntegerField(default=0) - upvote_count = models.PositiveIntegerField(default=0) - downvote_count = models.PositiveIntegerField(default=0) - drift_from_previous = models.FloatField(null=True, blank=True) - drift_from_week_ago = models.FloatField(null=True, blank=True) - - class Meta: - ordering = ["-computed_at"] - indexes = [ - models.Index(fields=["project", "-computed_at"]), - models.Index(fields=["project", "centroid_active", "-computed_at"]), - ] - - def __str__(self) -> str: - return f"Topic centroid snapshot for {self.project.name}" - - -class Content(models.Model): - """Stores an ingested content item that may appear in a newsletter. - - A content row is the canonical record for fetched articles, newsletter links, - or other source items. It keeps the raw text used for embedding, skill output, - editorial review, duplicate tracking, and it also links the row to its Qdrant - vector via ``embedding_id``. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="contents" - ) - url = models.URLField() - title = models.CharField(max_length=512) - author = models.CharField(max_length=255, blank=True) - entity = models.ForeignKey( - Entity, - null=True, - blank=True, - on_delete=models.SET_NULL, - related_name="contents", - ) - source_plugin = models.CharField(max_length=64) - content_type = models.CharField(max_length=64, blank=True) - canonical_url = models.URLField(blank=True, default="", db_index=True) - published_date = models.DateTimeField() - ingested_at = models.DateTimeField(auto_now_add=True) - content_text = models.TextField() - relevance_score = models.FloatField(null=True, blank=True) - authority_adjusted_score = models.FloatField(null=True, blank=True) - embedding_id = models.CharField(max_length=64, blank=True) - source_metadata = models.JSONField(default=dict, blank=True) - duplicate_of = models.ForeignKey( - "self", - null=True, - blank=True, - on_delete=models.SET_NULL, - related_name="duplicates", - ) - duplicate_signal_count = models.IntegerField(default=0) - is_reference = models.BooleanField(default=False) - is_active = models.BooleanField(default=True) - - class Meta: - ordering = ["-published_date"] - indexes = [ - models.Index(fields=["project", "-published_date"]), - models.Index(fields=["project", "-relevance_score"]), - models.Index(fields=["project", "-authority_adjusted_score"]), - models.Index(fields=["project", "is_reference"]), - models.Index(fields=["url"]), - ] - - def __str__(self) -> str: - return self.title - - -class EntityMention(models.Model): - """Represents one tracked-entity mention detected in a content item.""" - - content = models.ForeignKey( - Content, on_delete=models.CASCADE, related_name="entity_mentions" - ) - entity = models.ForeignKey( - Entity, on_delete=models.CASCADE, related_name="mentions" - ) - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="entity_mentions" - ) - role = models.CharField(max_length=16, choices=EntityMentionRole.choices) - sentiment = models.CharField( - max_length=16, - choices=EntityMentionSentiment.choices, - blank=True, - default="", - ) - span = models.TextField(blank=True) - confidence = models.FloatField(default=0.0) - created_at = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ["-created_at"] - constraints = [ - models.UniqueConstraint( - fields=["content", "entity", "role"], - name="core_entitymention_unique_content_entity_role", - ) - ] - indexes = [ - models.Index(fields=["entity", "created_at"]), - models.Index(fields=["project", "created_at"]), - ] - - def __str__(self) -> str: - return f"{self.entity.name} in {self.content.title}" - - -class EntityCandidate(models.Model): - """Stores an extracted named entity awaiting human confirmation.""" - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="entity_candidates" - ) - name = models.CharField(max_length=255) - suggested_type = models.CharField(max_length=32, choices=EntityType.choices) - first_seen_in = models.ForeignKey( - Content, - null=True, - blank=True, - on_delete=models.SET_NULL, - related_name="entity_candidates", - ) - occurrence_count = models.IntegerField(default=1) - status = models.CharField( - max_length=16, - choices=EntityCandidateStatus.choices, - default=EntityCandidateStatus.PENDING, - ) - merged_into = models.ForeignKey( - Entity, - null=True, - blank=True, - on_delete=models.SET_NULL, - related_name="merged_entity_candidates", - ) - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - ordering = ["-occurrence_count", "name"] - constraints = [ - models.UniqueConstraint( - fields=["project", "name"], - name="core_entitycandidate_unique_project_name", - ) - ] - indexes = [ - models.Index(fields=["project", "status", "occurrence_count"]), - ] - - def __str__(self) -> str: - return self.name - - -class IntakeAllowlist(models.Model): - """Tracks who is allowed to send newsletters into a project inbox. - - When the first message arrives from a sender, the system creates an allowlist - entry and emails a confirmation link. After confirmation, future inbound - messages from the same sender can be processed automatically. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="intake_allowlist" - ) - sender_email = models.EmailField() - confirmed_at = models.DateTimeField(null=True, blank=True) - confirmation_token = models.CharField( - max_length=64, unique=True, default=generate_confirmation_token - ) - created_at = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ["sender_email"] - constraints = [ - models.UniqueConstraint( - fields=["project", "sender_email"], - name="core_allowlist_unique_project_sender", - ), - ] - - def __str__(self) -> str: - return f"{self.sender_email} for {self.project.name}" - - @property - def is_confirmed(self) -> bool: - """Return whether the sender has confirmed newsletter intake access.""" - - return self.confirmed_at is not None - - -class NewsletterIntake(models.Model): - """Stores a raw inbound newsletter email before extraction. - - Intake rows preserve the original email payload, deduplicate by message ID, - and record whether extraction succeeded so the system can reprocess or audit - inbound newsletter handling later. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="newsletter_intakes" - ) - sender_email = models.EmailField() - subject = models.CharField(max_length=512) - received_at = models.DateTimeField(auto_now_add=True) - raw_html = models.TextField(blank=True) - raw_text = models.TextField(blank=True) - message_id = models.CharField(max_length=255, unique=True) - status = models.CharField( - max_length=16, - choices=NewsletterIntakeStatus.choices, - default=NewsletterIntakeStatus.PENDING, - ) - extraction_result = models.JSONField(null=True, blank=True) - error_message = models.TextField(blank=True) - - class Meta: - ordering = ["-received_at"] - indexes = [ - models.Index(fields=["project", "sender_email", "status"]), - ] - - def __str__(self) -> str: - return f"{self.subject or self.message_id}" - - -class SkillResult(models.Model): - """Persists the output of one AI skill execution for a content item. - - Skill results provide an auditable history of classifications, relevance - scores, summaries, and related-content lookups, including model metadata, - latency, and any superseded reruns. - """ - - content = models.ForeignKey( - Content, on_delete=models.CASCADE, related_name="skill_results" - ) - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="skill_results" - ) - skill_name = models.CharField(max_length=64) - status = models.CharField(max_length=16, choices=SkillStatus.choices) - result_data = models.JSONField(null=True, blank=True) - error_message = models.TextField(blank=True) - model_used = models.CharField(max_length=64, blank=True) - latency_ms = models.IntegerField(null=True, blank=True) - confidence = models.FloatField(null=True, blank=True) - created_at = models.DateTimeField(auto_now_add=True) - superseded_by = models.ForeignKey( - "self", - null=True, - blank=True, - on_delete=models.SET_NULL, - related_name="supersedes", - ) - - class Meta: - ordering = ["-created_at"] - indexes = [ - models.Index(fields=["content", "skill_name"]), - models.Index(fields=["project", "created_at"]), - ] - - def __str__(self) -> str: - return f"{self.skill_name} for {self.content.title}" - - -class UserFeedback(models.Model): - """Records an editor's feedback on a specific content item. - - Feedback is stored separately from the content row so the application can use - it as an explicit human signal when adjusting ranking and authority logic. - """ - - content = models.ForeignKey( - Content, on_delete=models.CASCADE, related_name="feedback" - ) - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="feedback" - ) - user = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.CASCADE, - related_name="content_feedback", - ) - feedback_type = models.CharField(max_length=16, choices=FeedbackType.choices) - created_at = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ["-created_at"] - constraints = [ - models.UniqueConstraint( - fields=["content", "user"], name="core_feedback_unique_content_user" - ), - ] - - def __str__(self) -> str: - return f"{self.feedback_type} by {self.user}" - - -class SourceConfig(models.Model): - """Configures one ingestion source for a project. - - Each source config selects a plugin, stores its provider-specific settings, - and records the last successful fetch time used for incremental ingestion. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="source_configs" - ) - plugin_name = models.CharField(max_length=64, choices=SourcePluginName.choices) - config = models.JSONField(default=dict) - is_active = models.BooleanField(default=True) - last_fetched_at = models.DateTimeField(null=True, blank=True) - - class Meta: - ordering = ["plugin_name", "id"] - indexes = [ - models.Index(fields=["project", "plugin_name", "is_active"]), - ] - - def __str__(self) -> str: - return f"{self.plugin_name} source for {self.project.name}" - - -class IngestionRun(models.Model): - """Captures the outcome of one source-ingestion execution. - - Run rows make ingestion observable in the admin by recording the source, - timestamps, item counts, and any error that stopped the fetch. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="ingestion_runs" - ) - plugin_name = models.CharField(max_length=64) - started_at = models.DateTimeField(auto_now_add=True) - completed_at = models.DateTimeField(null=True, blank=True) - status = models.CharField(max_length=16, choices=RunStatus.choices) - items_fetched = models.IntegerField(default=0) - items_ingested = models.IntegerField(default=0) - error_message = models.TextField(blank=True) - - class Meta: - ordering = ["-started_at"] - indexes = [ - models.Index(fields=["project", "plugin_name", "-started_at"]), - ] - - def __str__(self) -> str: - return f"{self.plugin_name} for {self.project.name}" - - -class ReviewQueue(models.Model): - """Tracks content items that require a human decision. - - The AI pipeline adds rows here when classification confidence is low or the - relevance score is borderline. Review outcomes are stored on the queue item so - editors can see why an article was escalated and how it was resolved. - """ - - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="review_queue_items" - ) - content = models.ForeignKey( - Content, on_delete=models.CASCADE, related_name="review_queue_items" - ) - reason = models.CharField(max_length=64, choices=ReviewReason.choices) - confidence = models.FloatField() - created_at = models.DateTimeField(auto_now_add=True) - resolved = models.BooleanField(default=False) - resolution = models.CharField( - max_length=64, choices=ReviewResolution.choices, blank=True - ) - - class Meta: - ordering = ["resolved", "-created_at"] - - def __str__(self) -> str: - return f"{self.reason} for {self.content.title}" diff --git a/core/newsletters.py b/core/newsletters.py index c7fb4665..813222b2 100644 --- a/core/newsletters.py +++ b/core/newsletters.py @@ -1,332 +1,21 @@ -"""Newsletter intake helpers for inbound email processing. - -This module normalizes inbound sender data, sanitizes HTML before storage, -deduplicates raw email messages, and hands confirmed messages off to the Celery -task that extracts content items from a newsletter email. -""" - -from __future__ import annotations - -from email.utils import parseaddr -from html import escape -from html.parser import HTMLParser -from typing import Any, Iterable, cast - -from celery import current_app -from django.conf import settings as django_settings -from django.core.mail import EmailMultiAlternatives -from django.urls import reverse - -from core.models import IntakeAllowlist, NewsletterIntake, Project -from core.newsletter_extraction import extract_newsletter_items -from core.settings_types import CoreSettings - -settings = cast(CoreSettings, django_settings) - -__all__ = ["extract_newsletter_items"] - - -def normalize_sender_email(value: str) -> str: - """Normalize a sender header into a lowercase bare email address.""" - - _, email_address = parseaddr(value) - return email_address.strip().lower() - - -def sanitize_newsletter_html(raw_html: str) -> str: - """Remove script content and inline event handlers from newsletter HTML. - - Args: - raw_html: Raw HTML body captured from the inbound message. - - Returns: - A sanitized HTML fragment safe to persist and render in the admin. - """ - - without_scripts = _strip_script_blocks(raw_html) - parser = _InlineHandlerStrippingParser() - parser.feed(without_scripts) - parser.close() - return parser.get_html() - - -def _strip_script_blocks(raw_html: str) -> str: - """Remove complete ```` boundary for a previously found script tag.""" - - search_index = start_index - lower_html = raw_html.lower() - while True: - candidate = lower_html.find("": - search_index = candidate + 1 - continue - closing_tag_end = _find_tag_end(raw_html, candidate + 1) - if closing_tag_end == -1: - return len(raw_html) - return closing_tag_end + 1 - - -def _find_tag_end(raw_html: str, start_index: int) -> int: - """Find the closing ``>`` for a tag while respecting quoted attributes.""" - - quote_char: str | None = None - for index in range(start_index, len(raw_html)): - current_char = raw_html[index] - if quote_char is not None: - if current_char == quote_char: - quote_char = None - continue - if current_char in {'"', "'"}: - quote_char = current_char - continue - if current_char == ">": - return index - return -1 - - -class _InlineHandlerStrippingParser(HTMLParser): - """HTML parser that rebuilds markup without inline JavaScript handlers.""" - - def __init__(self) -> None: - super().__init__(convert_charrefs=False) - self._parts: list[str] = [] - - def get_html(self) -> str: - """Return the reconstructed sanitized HTML string.""" - - return "".join(self._parts) - - def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: - self._parts.append(self._render_tag(tag, attrs)) - - def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: - rendered = self._render_tag(tag, attrs) - if rendered.endswith(">"): - rendered = f"{rendered[:-1]} />" - self._parts.append(rendered) - - def handle_endtag(self, tag: str) -> None: - self._parts.append(f"") - - def handle_data(self, data: str) -> None: - self._parts.append(data) - - def handle_entityref(self, name: str) -> None: - self._parts.append(f"&{name};") - - def handle_charref(self, name: str) -> None: - self._parts.append(f"&#{name};") - - def handle_comment(self, data: str) -> None: - self._parts.append(f"") - - def handle_decl(self, decl: str) -> None: - self._parts.append(f"") - - def unknown_decl(self, data: str) -> None: - self._parts.append(f"") - - @staticmethod - def _render_tag(tag: str, attrs: list[tuple[str, str | None]]) -> str: - """Render a tag while omitting attributes like ``onclick``.""" - - rendered_attrs: list[str] = [] - for name, value in attrs: - if name.lower().startswith("on"): - continue - if value is None: - rendered_attrs.append(name) - continue - rendered_attrs.append(f'{name}="{escape(value, quote=True)}"') - attr_suffix = f" {' '.join(rendered_attrs)}" if rendered_attrs else "" - return f"<{tag}{attr_suffix}>" - - -def extract_project_token(recipient: str) -> str | None: - """Extract the project intake token from an inbound recipient address. - - Args: - recipient: Email recipient such as ``intake+@example.com``. - - Returns: - The embedded project token, or ``None`` when the address does not match the - intake alias format. - """ - - _, email_address = parseaddr(recipient) - local_part = email_address.partition("@")[0] - prefix, separator, token = local_part.partition("+") - if prefix != "intake" or separator != "+" or not token: - return None - return token - - -def send_confirmation_email( - *, to_email: str, confirm_url: str, project_name: str -) -> None: - """Send the confirmation email required for new newsletter senders.""" - - subject = f"Confirm newsletter intake for {project_name}" - text_body = ( - "Confirm this sender for newsletter ingestion.\n\n" - f"Confirm sender: {confirm_url}" - ) - html_body = ( - "

Confirm this sender for newsletter ingestion.

" - f'

Confirm sender

' - ) - - message = EmailMultiAlternatives( - subject=subject, - body=text_body, - from_email=settings.DEFAULT_FROM_EMAIL, - to=[to_email], - ) - message.attach_alternative(html_body, "text/html") - message.send() - - -def build_confirmation_url(token: str) -> str: - """Build the absolute confirmation URL for an allowlist token.""" - - base_url = settings.NEWSLETTER_API_BASE_URL.rstrip("/") - return f"{base_url}{reverse('confirm-newsletter-sender', kwargs={'token': token})}" - - -def process_inbound_newsletter( - *, - recipients: Iterable[str], - sender_email: str, - subject: str, - raw_html: str, - raw_text: str, - message_id: str, -) -> dict[str, Any]: - """Persist and route one inbound newsletter message. - - Args: - recipients: Recipient addresses from the inbound email payload. - sender_email: Envelope sender or normalized message sender. - subject: Newsletter email subject. - raw_html: Raw HTML body captured from the provider webhook. - raw_text: Raw plain-text body captured from the provider webhook. - message_id: Provider message identifier used for deduplication. - - Returns: - A status payload describing whether the message was ignored, queued, or is - waiting for sender confirmation. - """ - - project = _find_intake_project(recipients) - if project is None: - return {"status": "ignored", "reason": "no_matching_project"} - - normalized_sender_email = normalize_sender_email(sender_email) - normalized_message_id = message_id.strip() - if not normalized_sender_email or not normalized_message_id: - return {"status": "ignored", "reason": "missing_sender_or_message_id"} - - defaults = { - "project": project, - "sender_email": normalized_sender_email, - "subject": subject[:512], - "raw_html": sanitize_newsletter_html(raw_html), - "raw_text": raw_text, - } - intake, created = NewsletterIntake.objects.get_or_create( - message_id=normalized_message_id, - defaults=defaults, - ) - if not created: - return {"id": intake.id, "status": intake.status, "duplicate": True} - - allowlist, allowlist_created = IntakeAllowlist.objects.get_or_create( - project=project, - sender_email=normalized_sender_email, - ) - - if allowlist.is_confirmed: - queue_newsletter_intake(intake.id) - return {"id": intake.id, "status": intake.status} - - if allowlist_created: - send_confirmation_email( - to_email=normalized_sender_email, - confirm_url=build_confirmation_url(allowlist.confirmation_token), - project_name=project.name, - ) - - return {"id": intake.id, "status": intake.status, "confirmation_required": True} - - -def queue_newsletter_intake(intake_id: int) -> None: - """Dispatch newsletter extraction for a stored intake row. - - Args: - intake_id: Primary key of the stored ``NewsletterIntake`` row. - """ - - process_newsletter_intake = current_app.tasks[ - "core.tasks.process_newsletter_intake" - ] - if settings.CELERY_TASK_ALWAYS_EAGER: - process_newsletter_intake.apply(args=(intake_id,), throw=True) - else: - process_newsletter_intake.delay(intake_id) - - -def _find_intake_project(recipients: Iterable[str]) -> Project | None: - """Resolve the first enabled project referenced by the recipient list.""" - - for recipient in recipients: - token = extract_project_token(recipient) - if token is None: - continue - project = Project.objects.filter( - intake_token=token, intake_enabled=True - ).first() - if project is not None: - return project - return None +"""Compatibility exports for newsletter intake helpers.""" + +from newsletters.intake import ( + build_confirmation_url, + extract_newsletter_items, + normalize_sender_email, + process_inbound_newsletter, + queue_newsletter_intake, + sanitize_newsletter_html, + send_confirmation_email, +) + +__all__ = [ + "build_confirmation_url", + "extract_newsletter_items", + "normalize_sender_email", + "process_inbound_newsletter", + "queue_newsletter_intake", + "sanitize_newsletter_html", + "send_confirmation_email", +] diff --git a/core/permissions.py b/core/permissions.py new file mode 100644 index 00000000..77757f13 --- /dev/null +++ b/core/permissions.py @@ -0,0 +1,158 @@ +"""Cross-cutting helpers and DRF permission classes for project roles.""" + +from __future__ import annotations + +from rest_framework import permissions + +from projects.models import Project, ProjectMembership, ProjectRole + + +def get_visible_projects_queryset(user): + """Return the projects visible to the given authenticated user.""" + + if not getattr(user, "is_authenticated", False): + return Project.objects.none() + return Project.objects.filter(memberships__user=user).distinct() + + +def get_user_role(user, project: Project) -> str | None: + """Return the user's membership role for the given project, if any.""" + + if not getattr(user, "is_authenticated", False): + return None + if getattr(user, "is_superuser", False): + return ProjectRole.ADMIN + return ( + ProjectMembership.objects.filter(user=user, project=project) + .values_list("role", flat=True) + .first() + ) + + +def _get_project_from_view(view) -> Project | None: + """Resolve the current nested project from a project-scoped view when present.""" + + get_project = getattr(view, "get_project", None) + if callable(get_project): + return get_project() + return None + + +def _resolve_project(obj) -> Project: + """Resolve the owning project for a project-scoped model instance.""" + + if isinstance(obj, Project): + return obj + return obj.project + + +class IsProjectMember(permissions.BasePermission): + """Allow authenticated project members to read project-scoped resources.""" + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) is not None + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) is not None + + +class IsProjectContributor(permissions.BasePermission): + """Allow only admins and members to access contributor-only resources.""" + + allowed_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) in self.allowed_roles + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) in self.allowed_roles + + +class IsProjectMemberWritable(permissions.BasePermission): + """Allow all members to read, but reserve writes for admins and members.""" + + writable_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + role = get_user_role(user, project) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.writable_roles + + def has_object_permission(self, request, view, obj) -> bool: + role = get_user_role(request.user, _resolve_project(obj)) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.writable_roles + + +class IsProjectAdmin(permissions.BasePermission): + """Restrict access to project admins.""" + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + return get_user_role(user, project) == ProjectRole.ADMIN + + def has_object_permission(self, request, view, obj) -> bool: + return get_user_role(request.user, _resolve_project(obj)) == ProjectRole.ADMIN + + +class IsProjectFeedbackEditor(permissions.BasePermission): + """Allow feedback reads to any member and writes by owners or project admins.""" + + contributor_roles = {ProjectRole.ADMIN, ProjectRole.MEMBER} + + def has_permission(self, request, view) -> bool: + user = request.user + if not user or not user.is_authenticated: + return False + + project = _get_project_from_view(view) + if project is None: + return True + + role = get_user_role(user, project) + if request.method in permissions.SAFE_METHODS: + return role is not None + return role in self.contributor_roles + + def has_object_permission(self, request, view, obj) -> bool: + role = get_user_role(request.user, _resolve_project(obj)) + if request.method in permissions.SAFE_METHODS: + return role is not None + if role == ProjectRole.ADMIN: + return True + if role != ProjectRole.MEMBER: + return False + return obj.user_id == request.user.id diff --git a/core/pipeline.py b/core/pipeline.py index 8c34d620..0d19cae7 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -18,6 +18,7 @@ from django.utils import timezone from langgraph.graph import END, StateGraph +from content.models import Content from core.deduplication import canonicalize_url from core.embeddings import ( build_content_embedding_text, @@ -28,7 +29,7 @@ ) from core.entity_extraction import run_entity_extraction from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json -from core.models import Content, ReviewQueue, ReviewReason, SkillResult, SkillStatus +from pipeline.models import ReviewQueue, ReviewReason, SkillResult, SkillStatus logger = logging.getLogger(__name__) diff --git a/core/plugins/__init__.py b/core/plugins/__init__.py index 29184f7d..cd50803b 100644 --- a/core/plugins/__init__.py +++ b/core/plugins/__init__.py @@ -1,5 +1,5 @@ -"""Public plugin-registry helpers used by the rest of the application.""" +"""Compatibility wrappers for the ingestion plugin registry.""" -from core.plugins.registry import get_plugin_for_source_config, validate_plugin_config +from ingestion.plugins import get_plugin_for_source_config, validate_plugin_config __all__ = ["get_plugin_for_source_config", "validate_plugin_config"] diff --git a/core/plugins/base.py b/core/plugins/base.py index 8fe08f0d..00801b41 100644 --- a/core/plugins/base.py +++ b/core/plugins/base.py @@ -1,95 +1,3 @@ -"""Base types and shared behavior for ingestion source plugins.""" +"""Compatibility wrapper for the ingestion plugin base types.""" -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from datetime import datetime -from urllib.parse import urlparse - - -@dataclass(slots=True) -class ContentItem: - """Normalized content payload returned by source plugins.""" - - url: str - title: str - author: str - published_date: datetime - content_text: str - source_plugin: str - source_metadata: dict[str, object] | None = None - - -class SourcePlugin(ABC): - """Abstract base class implemented by all ingestion source plugins.""" - - required_config_fields: tuple[str, ...] = () - - def __init__(self, source_config): - """Bind a plugin instance to the saved source configuration and project.""" - - self.source_config = source_config - self.project = source_config.project - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate and normalize raw JSON configuration for a plugin. - - Args: - config: Raw configuration object submitted through admin or API. - - Returns: - A normalized configuration dictionary. - - Raises: - ValueError: If the config is not a mapping or required fields are - missing. - """ - - if not isinstance(config, dict): - raise ValueError("Config must be a JSON object.") - normalized_config = dict(config) - for field_name in cls.required_config_fields: - if not normalized_config.get(field_name): - raise ValueError(f"Missing required config field: {field_name}") - return normalized_config - - @abstractmethod - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch content newer than the given timestamp.""" - - raise NotImplementedError - - @abstractmethod - def health_check(self) -> bool: - """Return whether the remote source is reachable and usable.""" - - raise NotImplementedError - - def match_entity_for_url(self, url: str): - """Match a fetched URL to a tracked entity based on hostname equality.""" - - target_hostname = self._normalize_hostname(url) - if not target_hostname: - return None - for entity in self.project.entities.exclude(website_url=""): - if self._normalize_hostname(entity.website_url) == target_hostname: - return entity - return None - - def match_entity_for_item(self, item: ContentItem): - """Match a fetched content item to an entity. - - The default implementation preserves the existing hostname-based behavior - by matching against the normalized item URL. - """ - - return self.match_entity_for_url(item.url) - - @staticmethod - def _normalize_hostname(url: str) -> str: - """Normalize a URL hostname for entity matching.""" - - hostname = urlparse(url).hostname or "" - return hostname.removeprefix("www.").lower() +from ingestion.plugins.base import * # noqa: F403 diff --git a/core/plugins/bluesky.py b/core/plugins/bluesky.py index 0f19c760..f402666b 100644 --- a/core/plugins/bluesky.py +++ b/core/plugins/bluesky.py @@ -1,247 +1,3 @@ -"""Bluesky source plugin used to ingest public feeds and author timelines.""" +"""Compatibility wrapper for the Bluesky source plugin.""" -from __future__ import annotations - -from datetime import datetime - -from atproto import Client -from django.utils import timezone -from django.utils.dateparse import parse_datetime - -from core.models import BlueskyCredentials, SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - -PUBLIC_APPVIEW_BASE_URL = "https://public.api.bsky.app" - - -class BlueskySourcePlugin(SourcePlugin): - """Fetch public Bluesky feed or author posts through AppView.""" - - @classmethod - def verify_credentials(cls, credentials: BlueskyCredentials) -> None: - """Authenticate a stored Bluesky account and confirm the session works.""" - - try: - client = cls._authenticated_client_for_credentials(credentials) - client.com.atproto.server.get_session() - except Exception as exc: - cls._record_credentials_status(credentials, error_message=str(exc)) - raise - cls._record_credentials_status(credentials, error_message="") - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate Bluesky feed or author configuration.""" - - normalized_config = super().validate_config(config) - feed_uri = normalized_config.get("feed_uri") - author_handle = normalized_config.get("author_handle") - if bool(feed_uri) == bool(author_handle): - raise ValueError("Provide exactly one of feed_uri or author_handle") - if feed_uri and ( - not isinstance(feed_uri, str) - or not feed_uri.startswith("at://") - or "/app.bsky.feed.generator/" not in feed_uri - ): - raise ValueError( - "feed_uri must be a Bluesky feed generator at:// URI" - ) - if author_handle: - normalized_handle = cls._normalize_handle(author_handle) - if not normalized_handle: - raise ValueError("author_handle must be a non-empty Bluesky handle") - normalized_config["author_handle"] = normalized_handle - - normalized_config["max_posts_per_fetch"] = int( - normalized_config.get("max_posts_per_fetch", 100) - ) - if normalized_config["max_posts_per_fetch"] <= 0: - raise ValueError("max_posts_per_fetch must be a positive integer") - - include_replies = normalized_config.get("include_replies", False) - if not isinstance(include_replies, bool): - raise ValueError("include_replies must be a boolean") - normalized_config["include_replies"] = include_replies - return normalized_config - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch public Bluesky posts newer than ``since`` and normalize them.""" - - response = self._get_feed_response() - items: list[ContentItem] = [] - seen_post_uris: set[str] = set() - for feed_view in response.feed: - post = getattr(feed_view, "post", None) - if post is None or post.uri in seen_post_uris: - continue - seen_post_uris.add(post.uri) - if not self.source_config.config.get("include_replies", False) and getattr( - feed_view, "reply", None - ): - continue - published_date = self._published_date_for_post(post) - if since and published_date <= since: - continue - items.append(self._build_content_item(post, published_date)) - return items - - def health_check(self) -> bool: - """Treat the source as healthy when the AppView request succeeds.""" - - credentials = self._credentials() - try: - self._get_feed_response(limit=1) - except Exception as exc: - self._record_credentials_status(credentials, error_message=str(exc)) - raise - self._record_credentials_status(credentials, error_message="") - return True - - def match_entity_for_item(self, item: ContentItem): - """Match posts to entities using the author's Bluesky handle first.""" - - author_handle = self._normalize_handle( - str((item.source_metadata or {}).get("author_handle", "")) - ) - if author_handle: - for entity in self.project.entities.exclude(bluesky_handle=""): - if self._normalize_handle(entity.bluesky_handle) == author_handle: - return entity - return super().match_entity_for_item(item) - - def _get_feed_response(self, limit: int | None = None): - """Query the configured public feed endpoint.""" - - request_limit = limit or self.source_config.config.get("max_posts_per_fetch", 100) - client = self._client() - feed_uri = self.source_config.config.get("feed_uri") - if feed_uri: - return client.app.bsky.feed.get_feed( - {"feed": feed_uri, "limit": request_limit} - ) - return client.app.bsky.feed.get_author_feed( - { - "actor": self.source_config.config["author_handle"], - "include_pins": False, - "limit": request_limit, - } - ) - - def _build_content_item(self, post, published_date: datetime) -> ContentItem: - """Convert one AppView post into the shared plugin payload.""" - - author_handle = self._normalize_handle(self._nested_value(post, "author", "handle")) - external_url = self._nested_value(post, "embed", "external", "uri") - external_title = ( - self._nested_value(post, "embed", "external", "title") or "" - ).strip() - post_url = self._post_url(post) - record_text = (self._nested_value(post, "record", "text") or "").strip() - title = external_title or record_text.splitlines()[0].strip() or post_url - return ContentItem( - url=external_url or post_url, - title=title, - author=author_handle, - published_date=published_date, - content_text=record_text or external_title or post_url, - source_plugin=SourcePluginName.BLUESKY, - source_metadata={ - "author_did": self._nested_value(post, "author", "did") or "", - "author_handle": author_handle, - "embedded_url": external_url or "", - "post_uri": getattr(post, "uri", ""), - "reply_count": getattr(post, "reply_count", 0) or 0, - "repost_count": getattr(post, "repost_count", 0) or 0, - }, - ) - - @staticmethod - def _published_date_for_post(post) -> datetime: - """Choose the indexed or record timestamp for a Bluesky post.""" - - for value in ( - getattr(post, "indexed_at", None), - BlueskySourcePlugin._nested_value(post, "record", "created_at"), - ): - if value: - parsed_value = parse_datetime(value) - if parsed_value is not None: - return parsed_value - return timezone.now() - - @staticmethod - def _post_url(post) -> str: - """Build the public web URL for a Bluesky post when no card link exists.""" - - actor = ( - BlueskySourcePlugin._normalize_handle( - BlueskySourcePlugin._nested_value(post, "author", "handle") - ) - or BlueskySourcePlugin._nested_value(post, "author", "did") - or "" - ) - post_uri = getattr(post, "uri", "") - post_id = post_uri.rstrip("/").split("/")[-1] if post_uri else "" - if actor and post_id: - return f"https://bsky.app/profile/{actor}/post/{post_id}" - return post_uri - - @staticmethod - def _normalize_handle(handle: object) -> str: - """Normalize handles so matching stays case-insensitive.""" - - if not isinstance(handle, str): - return "" - return handle.strip().removeprefix("@").lower() - - @staticmethod - def _nested_value(value, *path: str): - """Read nested object or dict attributes without binding to model types.""" - - current_value = value - for path_part in path: - if current_value is None: - return None - if isinstance(current_value, dict): - current_value = current_value.get(path_part) - else: - current_value = getattr(current_value, path_part, None) - return current_value - - def _client(self) -> Client: - """Create a public or authenticated ATProto client for the project.""" - - credentials = self._credentials() - if credentials is None: - return Client(base_url=PUBLIC_APPVIEW_BASE_URL) - return self._authenticated_client_for_credentials(credentials) - - def _credentials(self) -> BlueskyCredentials | None: - """Return the active project-scoped Bluesky credentials, if configured.""" - - return BlueskyCredentials.objects.filter(project=self.project, is_active=True).first() - - @staticmethod - def _authenticated_client_for_credentials(credentials: BlueskyCredentials) -> Client: - """Build an authenticated client from a stored credential record.""" - - if not credentials.has_app_password(): - raise RuntimeError("Bluesky credentials are missing an app password.") - client = Client(base_url=credentials.client_base_url) - client.login(login=credentials.handle, password=credentials.get_app_password()) - return client - - @staticmethod - def _record_credentials_status( - credentials: BlueskyCredentials | None, *, error_message: str - ) -> None: - """Persist the latest credential verification result when credentials exist.""" - - if credentials is None: - return - update_fields = ["last_error", "updated_at"] - credentials.last_error = error_message - if not error_message: - credentials.last_verified_at = timezone.now() - update_fields.append("last_verified_at") - credentials.save(update_fields=update_fields) +from ingestion.plugins.bluesky import * # noqa: F403 diff --git a/core/plugins/reddit.py b/core/plugins/reddit.py index c91f5168..f9cd3f0b 100644 --- a/core/plugins/reddit.py +++ b/core/plugins/reddit.py @@ -1,95 +1,3 @@ -"""Reddit source plugin used for trend and discussion ingestion.""" +"""Compatibility wrapper for the Reddit source plugin.""" -from __future__ import annotations - -from datetime import UTC, datetime - -import praw -from django.conf import settings - -from core.models import SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - - -class RedditSourcePlugin(SourcePlugin): - """Fetch posts from a configured subreddit listing.""" - - required_config_fields = ("subreddit",) - - @classmethod - def validate_config(cls, config: object) -> dict: - """Validate Reddit-specific config such as listing and limit values.""" - - normalized_config = super().validate_config(config) - listing = normalized_config.get("listing", "both") - if listing not in {"new", "hot", "both"}: - raise ValueError("listing must be one of: new, hot, both") - normalized_config["listing"] = listing - normalized_config["limit"] = int(normalized_config.get("limit", 25)) - if normalized_config["limit"] <= 0: - raise ValueError("limit must be a positive integer") - return normalized_config - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Fetch subreddit submissions newer than ``since`` and normalize them.""" - - subreddit = self._client().subreddit(self.source_config.config["subreddit"]) - items: list[ContentItem] = [] - seen_submission_ids: set[str] = set() - for submission in self._iter_submissions(subreddit): - if submission.id in seen_submission_ids: - continue - seen_submission_ids.add(submission.id) - published_date = datetime.fromtimestamp(submission.created_utc, tz=UTC) - if since and published_date <= since: - continue - items.append( - ContentItem( - url=submission.url - or f"https://www.reddit.com{submission.permalink}", - title=submission.title.strip(), - author=str(submission.author) if submission.author else "", - published_date=published_date, - content_text=(submission.selftext or submission.title).strip(), - source_plugin=SourcePluginName.REDDIT, - ) - ) - return items - - def health_check(self) -> bool: - """Verify that the configured subreddit can be queried successfully.""" - - subreddit = self._client().subreddit(self.source_config.config["subreddit"]) - next(subreddit.new(limit=1), None) - return True - - def match_entity_for_url(self, url: str): - """Skip entity matching for Reddit because posts are not entity-driven.""" - - return None - - def _iter_submissions(self, subreddit): - """Yield submissions from the configured listing modes without duplicates.""" - - listing = self.source_config.config.get("listing", "both") - limit = self.source_config.config.get("limit", 25) - if listing in {"new", "both"}: - yield from subreddit.new(limit=limit) - if listing in {"hot", "both"}: - yield from subreddit.hot(limit=limit) - - @staticmethod - def _client(): - """Create the authenticated PRAW client from Django settings. - - Raises: - RuntimeError: If Reddit credentials are missing. - """ - - if not settings.REDDIT_CLIENT_ID or not settings.REDDIT_CLIENT_SECRET: - raise RuntimeError("Reddit credentials are not configured.") - return praw.Reddit( - client_id=settings.REDDIT_CLIENT_ID, - client_secret=settings.REDDIT_CLIENT_SECRET, - user_agent=settings.REDDIT_USER_AGENT, - ) +from ingestion.plugins.reddit import * # noqa: F403 diff --git a/core/plugins/registry.py b/core/plugins/registry.py index 7bd8ccc4..6b40bc04 100644 --- a/core/plugins/registry.py +++ b/core/plugins/registry.py @@ -1,43 +1,3 @@ -"""Registry helpers for resolving source-plugin implementations by name.""" +"""Compatibility wrapper for the ingestion plugin registry.""" -from typing import Any - -from core.models import SourcePluginName -from core.plugins.bluesky import BlueskySourcePlugin -from core.plugins.reddit import RedditSourcePlugin -from core.plugins.rss import RSSSourcePlugin - -PLUGIN_REGISTRY = { - SourcePluginName.RSS: RSSSourcePlugin, - SourcePluginName.REDDIT: RedditSourcePlugin, - SourcePluginName.BLUESKY: BlueskySourcePlugin, -} - - -def get_plugin_for_source_config(source_config): - """Instantiate the plugin configured for a saved source configuration.""" - - return _get_plugin_class(source_config.plugin_name)(source_config) - - -def validate_plugin_config( - plugin_name: SourcePluginName | str, config: object -) -> dict[str, Any]: - """Validate plugin config using the plugin class registered for the name.""" - - return _get_plugin_class(plugin_name).validate_config(config) - - -def _get_plugin_class(plugin_name: SourcePluginName | str): - """Resolve a plugin enum value or string into its registered class. - - Raises: - ValueError: If the plugin name is not supported. - """ - - try: - return PLUGIN_REGISTRY[SourcePluginName(plugin_name)] - except KeyError as exc: - raise ValueError(f"Unsupported source plugin: {plugin_name}") from exc - except ValueError as exc: - raise ValueError(f"Unsupported source plugin: {plugin_name}") from exc +from ingestion.plugins.registry import * # noqa: F403 diff --git a/core/plugins/rss.py b/core/plugins/rss.py index 75bd906b..ca0cdb64 100644 --- a/core/plugins/rss.py +++ b/core/plugins/rss.py @@ -1,78 +1,3 @@ -"""RSS source plugin used to ingest feed entries into project content.""" +"""Compatibility wrapper for the RSS source plugin.""" -from __future__ import annotations - -from datetime import UTC, datetime -from time import struct_time - -import feedparser -from django.utils import timezone - -from core.models import SourcePluginName -from core.plugins.base import ContentItem, SourcePlugin - - -class RSSSourcePlugin(SourcePlugin): - """Fetch content from a configured RSS or Atom feed.""" - - required_config_fields = ("feed_url",) - - def fetch_new_content(self, since: datetime | None) -> list[ContentItem]: - """Parse the feed and return entries newer than ``since``.""" - - parsed_feed = feedparser.parse(self.source_config.config["feed_url"]) - items: list[ContentItem] = [] - for entry in parsed_feed.entries: - published_date = self._published_date_for_entry(entry) - if since and published_date <= since: - continue - url = getattr(entry, "link", "") - title = (getattr(entry, "title", "") or "").strip() - if not url or not title: - continue - summary = ( - getattr(entry, "summary", "") - or getattr(entry, "description", "") - or title - ) - items.append( - ContentItem( - url=url, - title=title, - author=(getattr(entry, "author", "") or "").strip(), - published_date=published_date, - content_text=summary.strip(), - source_plugin=SourcePluginName.RSS, - ) - ) - return items - - def health_check(self) -> bool: - """Treat the feed as healthy when it returns at least one entry.""" - - parsed_feed = feedparser.parse(self.source_config.config["feed_url"]) - return bool(getattr(parsed_feed, "entries", [])) - - @staticmethod - def _published_date_for_entry(entry) -> datetime: - """Choose the best available published timestamp for a feed entry.""" - - for field_name in ("published_parsed", "updated_parsed", "created_parsed"): - parsed_value = getattr(entry, field_name, None) - if parsed_value: - return RSSSourcePlugin._struct_time_to_datetime(parsed_value) - return timezone.now() - - @staticmethod - def _struct_time_to_datetime(parsed_value: struct_time) -> datetime: - """Convert ``feedparser`` time tuples into timezone-aware datetimes.""" - - return datetime( - parsed_value.tm_year, - parsed_value.tm_mon, - parsed_value.tm_mday, - parsed_value.tm_hour, - parsed_value.tm_min, - parsed_value.tm_sec, - tzinfo=UTC, - ) +from ingestion.plugins.rss import * # noqa: F403 diff --git a/core/serializer_mixins.py b/core/serializer_mixins.py new file mode 100644 index 00000000..d3ab6317 --- /dev/null +++ b/core/serializer_mixins.py @@ -0,0 +1,55 @@ +"""Shared DRF serializer mixins used across app-owned serializer modules.""" + +from core.models import Content, SkillResult +from core.permissions import get_visible_projects_queryset +from entities.models import Entity + + +class ProjectScopedSerializerMixin: + """Limit serializer relationship fields to objects the current user can access.""" + + def _filter_related_queryset(self, request): + """Constrain related-field querysets using the request user and project context.""" + + user = request.user + project = self.context.get("project") + if "project" in self.fields: + self.fields["project"].queryset = get_visible_projects_queryset(user) + if "entity" in self.fields: + entity_queryset = ( + Entity.objects.filter(project=project) + if project + else Entity.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["entity"].queryset = entity_queryset + if "merged_into" in self.fields: + merged_into_queryset = ( + Entity.objects.filter(project=project) + if project + else Entity.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["merged_into"].queryset = merged_into_queryset + if "content" in self.fields: + content_queryset = ( + Content.objects.filter(project=project) + if project + else Content.objects.filter(project__memberships__user=user).distinct() + ) + self.fields["content"].queryset = content_queryset + if "superseded_by" in self.fields: + skill_result_queryset = ( + SkillResult.objects.filter(project=project) + if project + else SkillResult.objects.filter( + project__memberships__user=user + ).distinct() + ) + self.fields["superseded_by"].queryset = skill_result_queryset + + def __init__(self, *args, **kwargs): + """Initialize the serializer and scope relation fields when authenticated.""" + + super().__init__(*args, **kwargs) + request = self.context.get("request") + if request and request.user.is_authenticated: + self._filter_related_queryset(request) diff --git a/core/serializers.py b/core/serializers.py index 059cd3a4..87a91cdf 100644 --- a/core/serializers.py +++ b/core/serializers.py @@ -1,385 +1,88 @@ -"""DRF serializers for project-scoped core models. +"""DRF serializers for project-scoped core models and compatibility exports.""" -These serializers enforce the project's access rules at the API boundary. They do -more than simple field translation: several serializers limit related querysets to - the active project and validate that cross-project relationships cannot be posted. -""" +from importlib import import_module +from typing import TYPE_CHECKING, Any -from django.contrib.auth.models import Group from rest_framework import serializers from core.models import ( - BlueskyCredentials, Content, - Entity, - EntityAuthoritySnapshot, - EntityCandidate, - EntityMention, IngestionRun, IntakeAllowlist, NewsletterIntake, - Project, - ProjectConfig, - ReviewQueue, - SkillResult, - SourceConfig, - TopicCentroidSnapshot, UserFeedback, ) -from core.plugins import validate_plugin_config - - -class ProjectScopedSerializerMixin: - """Limit serializer relationship fields to objects the current user can access.""" - - def _filter_related_queryset(self, request): - """Constrain related-field querysets using the request user and project context.""" - - user = request.user - project = self.context.get("project") - if "group" in self.fields: - self.fields["group"].queryset = Group.objects.filter(user=user) - if "project" in self.fields: - self.fields["project"].queryset = Project.objects.filter( - group__user=user - ).distinct() - if "entity" in self.fields: - entity_queryset = ( - Entity.objects.filter(project=project) - if project - else Entity.objects.filter(project__group__user=user) - ) - self.fields["entity"].queryset = entity_queryset - if "merged_into" in self.fields: - merged_into_queryset = ( - Entity.objects.filter(project=project) - if project - else Entity.objects.filter(project__group__user=user) - ) - self.fields["merged_into"].queryset = merged_into_queryset - if "content" in self.fields: - content_queryset = ( - Content.objects.filter(project=project) - if project - else Content.objects.filter(project__group__user=user) - ) - self.fields["content"].queryset = content_queryset - if "superseded_by" in self.fields: - skill_result_queryset = ( - SkillResult.objects.filter(project=project) - if project - else SkillResult.objects.filter(project__group__user=user) - ) - self.fields["superseded_by"].queryset = skill_result_queryset - - def __init__(self, *args, **kwargs): - """Initialize the serializer and scope relation fields when authenticated.""" - - super().__init__(*args, **kwargs) - request = self.context.get("request") - if request and request.user.is_authenticated: - self._filter_related_queryset(request) - - -class ProjectSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize top-level project records.""" - - has_bluesky_credentials = serializers.SerializerMethodField() - bluesky_handle = serializers.SerializerMethodField() - bluesky_is_active = serializers.SerializerMethodField() - bluesky_last_verified_at = serializers.SerializerMethodField() - bluesky_last_error = serializers.SerializerMethodField() - - class Meta: - model = Project - fields = [ - "id", - "name", - "group", - "topic_description", - "content_retention_days", - "intake_token", - "intake_enabled", - "has_bluesky_credentials", - "bluesky_handle", - "bluesky_is_active", - "bluesky_last_verified_at", - "bluesky_last_error", - "created_at", - ] - read_only_fields = ["id", "created_at"] - - def _get_bluesky_credentials(self, obj: Project): - """Return the project's stored Bluesky credentials, if configured.""" - - try: - return obj.bluesky_credentials - except Project.bluesky_credentials.RelatedObjectDoesNotExist: - return None - - def get_has_bluesky_credentials(self, obj: Project) -> bool: - """Return whether the project has stored Bluesky credentials.""" - - return self._get_bluesky_credentials(obj) is not None - - def get_bluesky_handle(self, obj: Project) -> str: - """Return the stored Bluesky handle, or an empty string.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.handle if credentials else "" - - def get_bluesky_is_active(self, obj: Project) -> bool: - """Return whether the stored Bluesky credentials are currently active.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.is_active if credentials else False - - def get_bluesky_last_verified_at(self, obj: Project): - """Return the last successful verification timestamp, if available.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.last_verified_at if credentials else None - - def get_bluesky_last_error(self, obj: Project) -> str: - """Return the latest Bluesky verification error, or an empty string.""" - - credentials = self._get_bluesky_credentials(obj) - return credentials.last_error if credentials else "" - - -class ProjectConfigSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize per-project authority and scoring settings.""" - - class Meta: - model = ProjectConfig - fields = [ - "id", - "project", - "upvote_authority_weight", - "downvote_authority_weight", - "authority_decay_rate", - ] - read_only_fields = ["id", "project"] - - -class BlueskyCredentialsSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize project-scoped Bluesky credentials without exposing secrets.""" - - app_password = serializers.CharField( - write_only=True, - required=False, - allow_blank=True, - trim_whitespace=False, +from core.serializer_mixins import ProjectScopedSerializerMixin + +if TYPE_CHECKING: + from entities.serializers import ( + EntityAuthoritySnapshotSerializer, + EntityCandidateMergeSerializer, + EntityCandidateSerializer, + EntityMentionSummarySerializer, + EntitySerializer, ) - has_stored_credential = serializers.SerializerMethodField() - - class Meta: - model = BlueskyCredentials - fields = [ - "id", - "project", - "handle", - "pds_url", - "is_active", - "has_stored_credential", - "app_password", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ] - read_only_fields = [ - "id", - "project", - "has_stored_credential", - "last_verified_at", - "last_error", - "created_at", - "updated_at", - ] - - def get_has_stored_credential(self, obj: BlueskyCredentials) -> bool: - """Return whether the project has an encrypted Bluesky credential stored.""" - - return obj.has_stored_credential() - - def validate(self, attrs): - """Require an app password when creating a credential record.""" - - attrs = super().validate(attrs) - app_password = attrs.get("app_password", "") - if self.instance is None and not app_password: - raise serializers.ValidationError( - {"app_password": "A Bluesky app credential is required."} - ) - return attrs - - def create(self, validated_data): - """Encrypt the submitted Bluesky app password before saving the record.""" - - app_password = validated_data.pop("app_password", "") - instance = super().create(validated_data) - if app_password: - instance.set_app_password(app_password) - instance.save(update_fields=["app_password_encrypted", "updated_at"]) - return instance - - def update(self, instance, validated_data): - """Keep the stored credential unless a replacement app password is submitted.""" - - app_password = validated_data.pop("app_password", "") - instance = super().update(instance, validated_data) - if app_password: - instance.set_app_password(app_password) - instance.save(update_fields=["app_password_encrypted", "updated_at"]) - return instance - - -class EntitySerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize tracked entities for a project.""" - - mention_count = serializers.IntegerField(read_only=True) - latest_mentions = serializers.SerializerMethodField() - - class Meta: - model = Entity - fields = [ - "id", - "project", - "name", - "type", - "description", - "authority_score", - "website_url", - "github_url", - "linkedin_url", - "bluesky_handle", - "mastodon_handle", - "twitter_handle", - "mention_count", - "latest_mentions", - "created_at", - ] - read_only_fields = ["id", "project", "created_at"] - - def get_latest_mentions(self, obj): - """Return a compact summary of the most recent mentions for an entity.""" - - mentions = getattr(obj, "prefetched_mentions", None) - if mentions is None: - mentions = obj.mentions.select_related("content").order_by("-created_at") - return EntityMentionSummarySerializer(mentions[:3], many=True).data - - -class EntityAuthoritySnapshotSerializer(serializers.ModelSerializer): - """Serialize one persisted authority recomputation for an entity.""" - - class Meta: - model = EntityAuthoritySnapshot - fields = [ - "id", - "entity", - "project", - "computed_at", - "mention_component", - "feedback_component", - "duplicate_component", - "decayed_prior", - "final_score", - ] - read_only_fields = fields - - -class TopicCentroidSnapshotSerializer(serializers.ModelSerializer): - """Serialize one persisted topic-centroid recomputation for a project.""" - - class Meta: - model = TopicCentroidSnapshot - fields = [ - "id", - "project", - "computed_at", - "centroid_active", - "feedback_count", - "upvote_count", - "downvote_count", - "drift_from_previous", - "drift_from_week_ago", - ] - read_only_fields = fields - - -class TopicCentroidObservabilitySummarySerializer(serializers.Serializer): - """Serialize project-level centroid observability summary metrics.""" - - project = serializers.IntegerField() - snapshot_count = serializers.IntegerField() - active_snapshot_count = serializers.IntegerField() - avg_drift_from_previous = serializers.FloatField(allow_null=True) - avg_drift_from_week_ago = serializers.FloatField(allow_null=True) - latest_snapshot = TopicCentroidSnapshotSerializer(allow_null=True) - - -class EntityMentionSummarySerializer(serializers.ModelSerializer): - """Serialize a compact entity-mention summary for frontend display.""" - - content_id = serializers.IntegerField(read_only=True) - content_title = serializers.CharField(source="content.title", read_only=True) - - class Meta: - model = EntityMention - fields = [ - "id", - "content_id", - "content_title", - "role", - "sentiment", - "span", - "confidence", - "created_at", - ] - read_only_fields = fields - - -class EntityCandidateSerializer( - ProjectScopedSerializerMixin, serializers.ModelSerializer -): - """Serialize extracted entity candidates awaiting editorial review.""" - - first_seen_title = serializers.CharField( - source="first_seen_in.title", read_only=True + from pipeline.serializers import ReviewQueueSerializer, SkillResultSerializer + from trends.serializers import ( + TopicCentroidObservabilitySummarySerializer, + TopicCentroidSnapshotSerializer, ) - merged_into_name = serializers.CharField(source="merged_into.name", read_only=True) - - class Meta: - model = EntityCandidate - fields = [ - "id", - "project", - "name", - "suggested_type", - "first_seen_in", - "first_seen_title", - "occurrence_count", - "status", - "merged_into", - "merged_into_name", - "created_at", - "updated_at", - ] - read_only_fields = fields - -class EntityCandidateMergeSerializer( - ProjectScopedSerializerMixin, serializers.Serializer -): - """Validate merge requests for entity candidates.""" - - merged_into = serializers.PrimaryKeyRelatedField(queryset=Entity.objects.none()) +_COMPAT_SERIALIZER_EXPORTS = { + "EntityAuthoritySnapshotSerializer": ( + "entities.serializers", + "EntityAuthoritySnapshotSerializer", + ), + "EntityCandidateMergeSerializer": ( + "entities.serializers", + "EntityCandidateMergeSerializer", + ), + "EntityCandidateSerializer": ( + "entities.serializers", + "EntityCandidateSerializer", + ), + "EntityMentionSummarySerializer": ( + "entities.serializers", + "EntityMentionSummarySerializer", + ), + "EntitySerializer": ("entities.serializers", "EntitySerializer"), + "ReviewQueueSerializer": ("pipeline.serializers", "ReviewQueueSerializer"), + "SkillResultSerializer": ("pipeline.serializers", "SkillResultSerializer"), + "TopicCentroidObservabilitySummarySerializer": ( + "trends.serializers", + "TopicCentroidObservabilitySummarySerializer", + ), + "TopicCentroidSnapshotSerializer": ( + "trends.serializers", + "TopicCentroidSnapshotSerializer", + ), +} + +__all__ = [ + "ProjectScopedSerializerMixin", + "EntityAuthoritySnapshotSerializer", + "EntityCandidateMergeSerializer", + "EntityCandidateSerializer", + "EntityMentionSummarySerializer", + "EntitySerializer", + "ReviewQueueSerializer", + "SkillResultSerializer", + "TopicCentroidObservabilitySummarySerializer", + "TopicCentroidSnapshotSerializer", +] + + +def __getattr__(name: str) -> Any: + """Resolve compatibility serializer re-exports lazily.""" + + try: + module_name, attribute_name = _COMPAT_SERIALIZER_EXPORTS[name] + except KeyError as exc: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from exc + + value = getattr(import_module(module_name), attribute_name) + globals()[name] = value + return value class ContentSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): @@ -436,43 +139,6 @@ def validate(self, attrs): return attrs -class SkillResultSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize persisted AI skill executions for content.""" - - class Meta: - model = SkillResult - fields = [ - "id", - "content", - "project", - "skill_name", - "status", - "result_data", - "error_message", - "model_used", - "latency_ms", - "confidence", - "created_at", - "superseded_by", - ] - read_only_fields = ["id", "project", "created_at"] - - def validate(self, attrs): - """Reject skill results whose content does not belong to the active project.""" - - project = ( - self.context.get("project") - or attrs.get("project") - or getattr(self.instance, "project", None) - ) - content = attrs.get("content") or getattr(self.instance, "content", None) - if project and content and content.project_id != project.id: - raise serializers.ValidationError( - {"content": "Content must belong to the selected project."} - ) - return attrs - - class UserFeedbackSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): """Serialize editor feedback attached to a content item.""" @@ -518,69 +184,6 @@ class Meta: read_only_fields = ["id", "project", "started_at"] -class SourceConfigSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize source-plugin configuration and normalize provider settings.""" - - class Meta: - model = SourceConfig - fields = [ - "id", - "project", - "plugin_name", - "config", - "is_active", - "last_fetched_at", - ] - read_only_fields = ["id", "project", "last_fetched_at"] - - def validate(self, attrs): - """Validate plugin-specific configuration with the plugin registry.""" - - plugin_name = attrs.get("plugin_name") or getattr( - self.instance, "plugin_name", None - ) - config = attrs.get("config") or getattr(self.instance, "config", {}) - if plugin_name: - try: - attrs["config"] = validate_plugin_config(plugin_name, config) - except ValueError as exc: - raise serializers.ValidationError({"config": str(exc)}) from exc - return attrs - - -class ReviewQueueSerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): - """Serialize manual-review queue items for project content.""" - - class Meta: - model = ReviewQueue - fields = [ - "id", - "project", - "content", - "reason", - "confidence", - "created_at", - "resolved", - "resolution", - ] - read_only_fields = ["id", "project", "created_at"] - - def validate(self, attrs): - """Reject review items whose content does not belong to the active project.""" - - project = ( - self.context.get("project") - or attrs.get("project") - or getattr(self.instance, "project", None) - ) - content = attrs.get("content") or getattr(self.instance, "content", None) - if project and content and content.project_id != project.id: - raise serializers.ValidationError( - {"content": "Content must belong to the selected project."} - ) - return attrs - - class IntakeAllowlistSerializer( ProjectScopedSerializerMixin, serializers.ModelSerializer ): diff --git a/core/signals.py b/core/signals.py index 4cb25fa0..4ab7e4b6 100644 --- a/core/signals.py +++ b/core/signals.py @@ -1,64 +1,31 @@ -"""Signal handlers that adapt Anymail inbound events to project intake logic.""" +"""Signal handlers for cross-cutting core behaviors.""" from __future__ import annotations from typing import Any -from anymail.signals import inbound from django.db.models.signals import post_save from django.dispatch import receiver -from core.models import ProjectConfig, UserFeedback -from core.newsletters import process_inbound_newsletter -from core.tasks import queue_topic_centroid_recompute +from core.models import UserFeedback +from newsletters.signals import handle_anymail_inbound as _handle_anymail_inbound +from projects.models import ProjectConfig +from trends.tasks import queue_topic_centroid_recompute -def _address_to_string(address) -> str: - """Normalize an Anymail address object or string into plain text.""" - - if address is None: - return "" - addr_spec = getattr(address, "addr_spec", None) - if isinstance(addr_spec, str): - return addr_spec.strip() - return str(address).strip() - - -@receiver(inbound) def handle_anymail_inbound( sender: Any, event: Any, esp_name: str, **kwargs: Any, ) -> None: - """Translate an inbound Anymail event into the internal intake payload. - - Args: - sender: Signal sender supplied by Anymail. - event: Normalized inbound event object. - esp_name: Name of the email service provider that generated the event. - **kwargs: Additional Anymail signal metadata. - """ - - message = event.message - - recipients: list[str] = [] - if message.envelope_recipient: - recipients.append(message.envelope_recipient) - recipients.extend( - address.addr_spec - for address in getattr(message, "to", []) - if getattr(address, "addr_spec", "") - ) + """Preserve the legacy core.signals import path for inbound handling.""" - process_inbound_newsletter( - recipients=recipients, - sender_email=message.envelope_sender - or _address_to_string(getattr(message, "from_email", None)), - subject=message.subject or "", - raw_html=message.html or "", - raw_text=message.text or "", - message_id=str(message.get("Message-ID", "") or event.event_id or ""), + _handle_anymail_inbound( + sender=sender, + event=event, + esp_name=esp_name, + **kwargs, ) diff --git a/core/tasks.py b/core/tasks.py index 4805580d..3be63815 100644 --- a/core/tasks.py +++ b/core/tasks.py @@ -4,41 +4,23 @@ import math from collections import defaultdict from datetime import timedelta +from importlib import import_module +from typing import TYPE_CHECKING, Any, Protocol, cast from celery import shared_task from django.conf import settings -from django.core.cache import cache from django.db import transaction -from django.db.models import Count, Q +from django.db.models import Count, Model from django.utils import timezone -from core.deduplication import canonicalize_url from core.embeddings import ( - build_content_embedding_text, - delete_topic_centroid, - embed_text, upsert_content_embedding, - upsert_topic_centroid, ) from core.models import ( Content, - Entity, - EntityAuthoritySnapshot, - EntityMention, - EntityMentionRole, FeedbackType, - IngestionRun, - IntakeAllowlist, - NewsletterIntake, - NewsletterIntakeStatus, - Project, - ProjectConfig, - RunStatus, - SourceConfig, - TopicCentroidSnapshot, UserFeedback, ) -from core.newsletter_extraction import extract_newsletter_items from core.pipeline import ( RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME, @@ -46,7 +28,13 @@ execute_background_skill_result, process_content_pipeline, ) -from core.plugins import get_plugin_for_source_config +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityMention, + EntityMentionRole, +) +from projects.models import Project, ProjectConfig logger = logging.getLogger(__name__) @@ -55,71 +43,95 @@ EntityMentionRole.AUTHOR, EntityMentionRole.SUBJECT, ) -TOPIC_CENTROID_LOOKBACK_DAYS = 90 -TOPIC_CENTROID_MIN_UPVOTES = 10 -TOPIC_CENTROID_DOWNVOTE_WEIGHT = 0.25 -TOPIC_CENTROID_DEBOUNCE_SECONDS = 60 * 5 -TOPIC_CENTROID_DECAY_TAU_DAYS = 45 +if TYPE_CHECKING: + from ingestion.tasks import run_all_ingestions, run_ingestion + from newsletters.tasks import process_newsletter_intake + from trends.tasks import ( + TOPIC_CENTROID_MIN_UPVOTES, + queue_topic_centroid_recompute, + recompute_topic_centroid, + run_all_topic_centroid_recomputations, + ) -@shared_task(name="core.tasks.run_ingestion") -def run_ingestion(source_config_id: int): - """Fetch new content for one source config and record an ingestion run. +_COMPAT_TASK_EXPORTS = { + "process_newsletter_intake": ( + "newsletters.tasks", + "process_newsletter_intake", + ), + "run_all_ingestions": ("ingestion.tasks", "run_all_ingestions"), + "run_ingestion": ("ingestion.tasks", "run_ingestion"), + "TOPIC_CENTROID_MIN_UPVOTES": ( + "trends.tasks", + "TOPIC_CENTROID_MIN_UPVOTES", + ), + "queue_topic_centroid_recompute": ( + "trends.tasks", + "queue_topic_centroid_recompute", + ), + "recompute_topic_centroid": ("trends.tasks", "recompute_topic_centroid"), + "run_all_topic_centroid_recomputations": ( + "trends.tasks", + "run_all_topic_centroid_recomputations", + ), +} + +__all__ = [ + "process_newsletter_intake", + "run_all_ingestions", + "run_ingestion", + "TOPIC_CENTROID_MIN_UPVOTES", + "queue_topic_centroid_recompute", + "recompute_authority_scores", + "recompute_topic_centroid", + "run_all_authority_recomputations", + "run_all_topic_centroid_recomputations", + "run_relevance_scoring_skill", + "run_summarization_skill", + "queue_content_skill", + "process_content", + "upsert_content_embedding", +] + + +def __getattr__(name: str) -> Any: + """Resolve compatibility task re-exports lazily.""" - Args: - source_config_id: Primary key of the source configuration to ingest. + try: + module_name, attribute_name = _COMPAT_TASK_EXPORTS[name] + except KeyError as exc: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from exc - Returns: - A summary containing fetched and ingested item counts. - """ + value = getattr(import_module(module_name), attribute_name) + globals()[name] = value + return value - source_config = SourceConfig.objects.select_related("project").get( - pk=source_config_id - ) - ingestion_run = IngestionRun.objects.create( - project=source_config.project, - plugin_name=source_config.plugin_name, - status=RunStatus.RUNNING, - ) - try: - items_fetched, items_ingested = _ingest_source_config(source_config) - except Exception as exc: - ingestion_run.status = RunStatus.FAILED - ingestion_run.completed_at = timezone.now() - ingestion_run.error_message = str(exc) - ingestion_run.save(update_fields=["status", "completed_at", "error_message"]) - logger.exception( - "Source ingestion failed", extra={"source_config_id": source_config_id} - ) - raise - - ingestion_run.status = RunStatus.SUCCESS - ingestion_run.completed_at = timezone.now() - ingestion_run.items_fetched = items_fetched - ingestion_run.items_ingested = items_ingested - ingestion_run.save( - update_fields=["status", "completed_at", "items_fetched", "items_ingested"] - ) - return {"items_fetched": items_fetched, "items_ingested": items_ingested} +class DelayedTask(Protocol): + """Protocol for Celery tasks that can run eagerly or via ``delay``.""" -@shared_task(name="core.tasks.run_all_ingestions") -def run_all_ingestions(): - """Queue ingestion for every active source configuration. + def __call__(self, *args: object, **kwargs: object) -> object: + pass - Returns: - The number of source configurations scheduled. - """ + def delay(self, *args: object, **kwargs: object) -> object: + pass + + +def _enqueue_task(task: object, *args: object) -> None: + """Dispatch a Celery task through a typed ``delay`` seam.""" + + cast(DelayedTask, task).delay(*args) - source_config_ids = list( - SourceConfig.objects.filter(is_active=True).values_list("id", flat=True) - ) - for source_config_id in source_config_ids: - if settings.CELERY_TASK_ALWAYS_EAGER: - run_ingestion(source_config_id) - else: - run_ingestion.delay(source_config_id) - return len(source_config_ids) + +def _require_pk(instance: Model) -> int: + """Return a saved model primary key as an ``int``.""" + + pk = instance.pk + if pk is None: + raise ValueError( + f"{instance.__class__.__name__} must be saved before task dispatch" + ) + return int(pk) @shared_task(name="core.tasks.run_all_authority_recomputations") @@ -135,20 +147,8 @@ def run_all_authority_recomputations(): if settings.CELERY_TASK_ALWAYS_EAGER: recompute_authority_scores(project_id) else: - recompute_authority_scores.delay(project_id) - return len(project_ids) + _enqueue_task(recompute_authority_scores, project_id) - -@shared_task(name="core.tasks.run_all_topic_centroid_recomputations") -def run_all_topic_centroid_recomputations(): - """Queue topic-centroid recomputation for every project.""" - - project_ids = list(Project.objects.values_list("id", flat=True)) - for project_id in project_ids: - if settings.CELERY_TASK_ALWAYS_EAGER: - recompute_topic_centroid(project_id) - else: - recompute_topic_centroid.delay(project_id) return len(project_ids) @@ -239,36 +239,37 @@ def recompute_authority_scores(project_id: int): (abs(value) for value in feedback_totals.values()), default=0.0 ) + entity_rows = [(_require_pk(entity), entity) for entity in entities] entity_updates = [] snapshots = [] snapshot_history = { - entity.id: list( + entity_pk: list( EntityAuthoritySnapshot.objects.filter(entity=entity) .order_by("-computed_at") .only("computed_at", "final_score") ) - for entity in entities + for entity_pk, entity in entity_rows } with transaction.atomic(): - for entity in entities: + for entity_pk, entity in entity_rows: mention_component = _normalize_log_scaled_component( - mention_counts.get(entity.id, 0), + mention_counts.get(entity_pk, 0), max_mention_count, ) feedback_component = _normalize_signed_component( - feedback_totals.get(entity.id, 0.0), + feedback_totals.get(entity_pk, 0.0), max_abs_feedback, ) duplicate_component = _normalize_log_scaled_component( - duplicate_totals.get(entity.id, 0), + duplicate_totals.get(entity_pk, 0), max_duplicate_count, ) decayed_prior = _get_decayed_prior_score( entity=entity, month_start=month_start, authority_decay_rate=config.authority_decay_rate, - snapshot_history=snapshot_history.get(entity.id, []), + snapshot_history=snapshot_history.get(entity_pk, []), ) final_score = _clamp_unit_interval( ( @@ -300,140 +301,6 @@ def recompute_authority_scores(project_id: int): return {"project_id": project_id, "entities_updated": len(entity_updates)} -@shared_task(name="core.tasks.recompute_topic_centroid") -def recompute_topic_centroid(project_id: int): - """Rebuild the project's feedback centroid from recent editorial signals.""" - - now = timezone.now() - window_start = now - timedelta(days=TOPIC_CENTROID_LOOKBACK_DAYS) - feedback_rows = list( - UserFeedback.objects.filter(project_id=project_id, created_at__gte=window_start) - .select_related("content") - .order_by("created_at") - ) - upvote_count = sum( - 1 for row in feedback_rows if row.feedback_type == FeedbackType.UPVOTE - ) - downvote_count = sum( - 1 for row in feedback_rows if row.feedback_type == FeedbackType.DOWNVOTE - ) - - try: - if upvote_count < TOPIC_CENTROID_MIN_UPVOTES: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - vector_cache: dict[int, list[float]] = {} - upvote_vectors: list[tuple[list[float], float]] = [] - downvote_vectors: list[tuple[list[float], float]] = [] - - for feedback in feedback_rows: - vector = vector_cache.get(feedback.content_id) - if vector is None: - vector = embed_text(build_content_embedding_text(feedback.content)) - vector_cache[feedback.content_id] = vector - weight = _feedback_decay_weight(feedback.created_at, now) - if feedback.feedback_type == FeedbackType.UPVOTE: - upvote_vectors.append((vector, weight)) - else: - downvote_vectors.append((vector, weight)) - - upvote_mean, upvote_weight = _weighted_mean_vector(upvote_vectors) - if not upvote_mean or upvote_weight <= 0: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - downvote_mean, downvote_weight = _weighted_mean_vector(downvote_vectors) - downvote_scale = 0.0 - if downvote_mean and downvote_weight > 0: - downvote_scale = TOPIC_CENTROID_DOWNVOTE_WEIGHT * min( - 1.0, upvote_weight / downvote_weight - ) - - centroid_vector = [ - upvote_value - downvote_scale * downvote_value - for upvote_value, downvote_value in zip( - upvote_mean, - downvote_mean or [0.0] * len(upvote_mean), - ) - ] - normalized_centroid = _normalize_vector(centroid_vector) - if not normalized_centroid: - delete_topic_centroid(project_id) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=False, - centroid_vector=[], - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": False, - } - - upsert_topic_centroid( - project_id, - normalized_centroid, - upvote_count=upvote_count, - downvote_count=downvote_count, - feedback_count=len(feedback_rows), - ) - _create_topic_centroid_snapshot( - project_id=project_id, - computed_at=now, - centroid_active=True, - centroid_vector=normalized_centroid, - feedback_count=len(feedback_rows), - upvote_count=upvote_count, - downvote_count=downvote_count, - ) - return { - "project_id": project_id, - "feedback_count": len(feedback_rows), - "upvote_count": upvote_count, - "downvote_count": downvote_count, - "centroid_active": True, - } - finally: - cache.delete(_topic_centroid_debounce_key(project_id)) - - @shared_task(name="core.tasks.run_relevance_scoring_skill", ignore_result=True) def run_relevance_scoring_skill(skill_result_id: int): """Execute a pending ad hoc relevance skill result in the background.""" @@ -463,15 +330,17 @@ def queue_content_skill(content: Content, skill_name: str): skill_result = create_pending_skill_result(content, skill_name) if skill_name == RELEVANCE_SKILL_NAME: + skill_result_pk = _require_pk(skill_result) if settings.CELERY_TASK_ALWAYS_EAGER: - run_relevance_scoring_skill(skill_result.id) + run_relevance_scoring_skill(skill_result_pk) else: - run_relevance_scoring_skill.delay(skill_result.id) + _enqueue_task(run_relevance_scoring_skill, skill_result_pk) elif skill_name == SUMMARIZATION_SKILL_NAME: + skill_result_pk = _require_pk(skill_result) if settings.CELERY_TASK_ALWAYS_EAGER: - run_summarization_skill(skill_result.id) + run_summarization_skill(skill_result_pk) else: - run_summarization_skill.delay(skill_result.id) + _enqueue_task(run_summarization_skill, skill_result_pk) else: raise ValueError(f"Unsupported async skill name: {skill_name}") @@ -479,23 +348,6 @@ def queue_content_skill(content: Content, skill_name: str): return skill_result -def queue_topic_centroid_recompute(project_id: int) -> bool: - """Debounce and queue topic-centroid recomputation for one project.""" - - if not cache.add( - _topic_centroid_debounce_key(project_id), - timezone.now().isoformat(), - timeout=TOPIC_CENTROID_DEBOUNCE_SECONDS, - ): - return False - - if settings.CELERY_TASK_ALWAYS_EAGER: - recompute_topic_centroid(project_id) - else: - recompute_topic_centroid.delay(project_id) - return True - - def _normalize_log_scaled_component(value: int, max_value: int) -> float: """Normalize a non-negative count into the authority component range [0.5, 1].""" @@ -512,119 +364,6 @@ def _normalize_signed_component(value: float, max_abs_value: float) -> float: return _clamp_unit_interval(0.5 + 0.5 * (value / max_abs_value)) -def _feedback_decay_weight(created_at, now) -> float: - """Return the EMA-style decay weight for one feedback event.""" - - age_days = max(0.0, (now - created_at).total_seconds() / 86400) - return math.exp(-age_days / TOPIC_CENTROID_DECAY_TAU_DAYS) - - -def _create_topic_centroid_snapshot( - *, - project_id: int, - computed_at, - centroid_active: bool, - centroid_vector: list[float], - feedback_count: int, - upvote_count: int, - downvote_count: int, -) -> TopicCentroidSnapshot: - """Persist one centroid snapshot and derived drift metrics.""" - - previous_active_snapshot = ( - TopicCentroidSnapshot.objects.filter( - project_id=project_id, centroid_active=True - ) - .order_by("-computed_at") - .only("centroid_vector", "computed_at") - .first() - ) - week_ago_snapshot = ( - TopicCentroidSnapshot.objects.filter( - project_id=project_id, - centroid_active=True, - computed_at__lte=computed_at - timedelta(days=7), - ) - .order_by("-computed_at") - .only("centroid_vector", "computed_at") - .first() - ) - - snapshot = TopicCentroidSnapshot.objects.create( - project_id=project_id, - centroid_active=centroid_active, - centroid_vector=centroid_vector, - feedback_count=feedback_count, - upvote_count=upvote_count, - downvote_count=downvote_count, - drift_from_previous=( - _cosine_distance(centroid_vector, previous_active_snapshot.centroid_vector) - if centroid_active and previous_active_snapshot is not None - else None - ), - drift_from_week_ago=( - _cosine_distance(centroid_vector, week_ago_snapshot.centroid_vector) - if centroid_active and week_ago_snapshot is not None - else None - ), - ) - if snapshot.computed_at != computed_at: - TopicCentroidSnapshot.objects.filter(pk=snapshot.pk).update( - computed_at=computed_at - ) - snapshot.computed_at = computed_at - return snapshot - - -def _cosine_distance(left: list[float], right: list[float]) -> float | None: - """Return cosine distance between two vectors when both are usable.""" - - if not left or not right or len(left) != len(right): - return None - left_norm = math.sqrt(sum(value * value for value in left)) - right_norm = math.sqrt(sum(value * value for value in right)) - if left_norm <= 0 or right_norm <= 0: - return None - cosine_similarity = sum( - left_value * right_value for left_value, right_value in zip(left, right) - ) / (left_norm * right_norm) - return max(0.0, min(2.0, 1.0 - max(-1.0, min(1.0, cosine_similarity)))) - - -def _weighted_mean_vector( - weighted_vectors: list[tuple[list[float], float]], -) -> tuple[list[float], float]: - """Compute the weighted mean vector and total contributing weight.""" - - if not weighted_vectors: - return [], 0.0 - dimension = len(weighted_vectors[0][0]) - totals = [0.0] * dimension - total_weight = 0.0 - for vector, weight in weighted_vectors: - total_weight += weight - for index, value in enumerate(vector): - totals[index] += float(value) * weight - if total_weight <= 0: - return [], 0.0 - return ([value / total_weight for value in totals], total_weight) - - -def _normalize_vector(vector: list[float]) -> list[float]: - """Normalize a dense vector to unit length.""" - - magnitude = math.sqrt(sum(value * value for value in vector)) - if magnitude <= 0: - return [] - return [float(value) / magnitude for value in vector] - - -def _topic_centroid_debounce_key(project_id: int) -> str: - """Return the cache key used to debounce centroid recomputations.""" - - return f"topic-centroid-recompute:{project_id}" - - def _get_decayed_prior_score( *, entity: Entity, @@ -659,146 +398,12 @@ def _clamp_unit_interval(value: float) -> float: return max(0.0, min(1.0, float(value))) -def _ingest_source_config(source_config: SourceConfig) -> tuple[int, int]: - """Fetch items from a configured source and create new content rows.""" - - plugin = get_plugin_for_source_config(source_config) - fetched_items = plugin.fetch_new_content(source_config.last_fetched_at) - ingested_count = 0 - for item in fetched_items: - if _content_exists_for_item(source_config, item): - continue - source_metadata = getattr(item, "source_metadata", None) or {} - content = Content.objects.create( - project=source_config.project, - entity=_match_entity_for_item(plugin, item), - url=item.url, - canonical_url=canonicalize_url(item.url), - title=item.title[:512], - author=item.author[:255], - source_plugin=item.source_plugin, - published_date=item.published_date, - content_text=item.content_text, - source_metadata=source_metadata, - ) - _schedule_content_processing(content) - ingested_count += 1 - source_config.last_fetched_at = timezone.now() - source_config.save(update_fields=["last_fetched_at"]) - return len(fetched_items), ingested_count - - -def _content_exists_for_item(source_config: SourceConfig, item) -> bool: - """Check whether a fetched item already exists for the project.""" - - post_uri = (getattr(item, "source_metadata", None) or {}).get("post_uri") - if post_uri: - return Content.objects.filter( - project=source_config.project, - source_plugin=item.source_plugin, - source_metadata__post_uri=post_uri, - ).exists() - canonical_url = canonicalize_url(item.url) - return ( - Content.objects.filter( - project=source_config.project, - source_plugin=item.source_plugin, - ) - .filter(Q(canonical_url=canonical_url) | Q(url=item.url)) - .exists() - ) - - -def _match_entity_for_item(plugin, item): - """Resolve the entity for an item while preserving older plugin mocks.""" - - if callable(getattr(type(plugin), "match_entity_for_item", None)): - return plugin.match_entity_for_item(item) - return plugin.match_entity_for_url(item.url) - - -@shared_task(name="core.tasks.process_newsletter_intake") -def process_newsletter_intake(intake_id: int): - """Convert a stored newsletter email into content rows. - - Args: - intake_id: Primary key of the ``NewsletterIntake`` row to process. - - Returns: - A summary containing the final intake status and ingested item count. - """ - - intake = NewsletterIntake.objects.select_related("project").get(pk=intake_id) - - allowlist = IntakeAllowlist.objects.filter( - project=intake.project, - sender_email=intake.sender_email, - confirmed_at__isnull=False, - ).first() - if allowlist is None: - intake.status = NewsletterIntakeStatus.PENDING - intake.error_message = "Sender has not confirmed newsletter intake." - intake.save(update_fields=["status", "error_message"]) - return {"status": intake.status, "items_ingested": 0} - - extracted_items = extract_newsletter_items( - subject=intake.subject, - raw_html=intake.raw_html, - raw_text=intake.raw_text, - ) - ingested_count = 0 - for item in extracted_items: - canonical_url = canonicalize_url(item.url) - if ( - Content.objects.filter( - project=intake.project, - source_plugin="newsletter", - ) - .filter(Q(canonical_url=canonical_url) | Q(url=item.url)) - .exists() - ): - continue - content = Content.objects.create( - project=intake.project, - url=item.url, - canonical_url=canonical_url, - title=item.title[:512], - author=intake.sender_email[:255], - source_plugin="newsletter", - published_date=timezone.now(), - content_text=item.excerpt or intake.raw_text, - source_metadata={ - "newsletter_intake_id": intake.id, - "sender_email": intake.sender_email, - "position": item.position, - }, - ) - _schedule_content_processing(content) - ingested_count += 1 - - intake.status = NewsletterIntakeStatus.EXTRACTED - intake.error_message = "" - intake.extraction_result = { - "method": "heuristic", - "items": [ - { - "url": item.url, - "title": item.title, - "excerpt": item.excerpt, - "position": item.position, - } - for item in extracted_items - ], - } - intake.save(update_fields=["status", "error_message", "extraction_result"]) - return {"status": intake.status, "items_ingested": ingested_count} - - def _schedule_content_processing(content: Content) -> None: """Ensure a content row is embedded before it enters the AI pipeline.""" upsert_content_embedding(content) + content_pk = _require_pk(content) if settings.CELERY_TASK_ALWAYS_EAGER: - process_content(content.id) + process_content(content_pk) else: - process_content.delay(content.id) + _enqueue_task(process_content, content_pk) diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index e7717181..71f96ad9 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -5,28 +5,17 @@ import pytest from django.contrib import messages from django.contrib.admin.sites import AdminSite -from django.contrib.auth.models import Group from django.utils import timezone +from content.admin import ContentAdmin, UserFeedbackAdmin from core.admin import ( - BlueskyCredentialsAdmin, - BlueskyCredentialsAdminForm, - ContentAdmin, DuplicateStateFilter, - EntityAdmin, - EntityAuthoritySnapshotAdmin, - EntityCandidateAdmin, HighValueFilter, - IngestionRunAdmin, - ProjectConfigAdmin, ReviewQueueAdmin, SkillResultAdmin, - SourceConfigAdmin, TopicCentroidSnapshotAdmin, - UserFeedbackAdmin, ) from core.models import ( - BlueskyCredentials, Content, Entity, EntityAuthoritySnapshot, @@ -34,17 +23,27 @@ EntityCandidateStatus, EntityMention, IngestionRun, - Project, - ProjectConfig, ReviewQueue, ReviewReason, RunStatus, SkillResult, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) +from entities.admin import ( + EntityAdmin, + EntityAuthoritySnapshotAdmin, + EntityCandidateAdmin, +) +from ingestion.admin import IngestionRunAdmin +from projects.admin import ( + BlueskyCredentialsAdmin, + BlueskyCredentialsAdminForm, + ProjectConfigAdmin, + SourceConfigAdmin, +) +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, ProjectConfig, SourceConfig pytestmark = pytest.mark.django_db @@ -54,12 +53,8 @@ def source_admin_context(django_user_model): user = django_user_model.objects.create_user( username="admin-owner", password="testpass123" ) - group = Group.objects.create(name="admin-team") - user.groups.add(group) - project = Project.objects.create( - name="Admin Project", group=group, topic_description="Infra" - ) - return SimpleNamespace(user=user, group=group, project=project) + project = Project.objects.create(name="Admin Project", topic_description="Infra") + return SimpleNamespace(user=user, project=project) def test_test_source_connection_reports_success(source_admin_context, mocker): @@ -71,11 +66,11 @@ def test_test_source_connection_reports_success(source_admin_context, mocker): plugin = mocker.Mock() plugin.health_check.return_value = True validate_mock = mocker.patch( - "core.admin.validate_plugin_config", + "projects.admin.validate_plugin_config", return_value={"feed_url": "https://example.com/feed.xml"}, ) get_plugin_mock = mocker.patch( - "core.admin.get_plugin_for_source_config", return_value=plugin + "projects.admin.get_plugin_for_source_config", return_value=plugin ) admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) admin_instance.message_user = mocker.Mock() @@ -130,7 +125,6 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( ): second_project = Project.objects.create( name="Second Admin Project", - group=source_admin_context.group, topic_description="Analytics", ) fixed_now = timezone.now() @@ -168,7 +162,7 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( "django.contrib.admin.options.ModelAdmin.changelist_view", side_effect=lambda request, extra_context=None: extra_context, ) - mocker.patch("core.admin.timezone.now", return_value=fixed_now) + mocker.patch("trends.admin.timezone.now", return_value=fixed_now) response = admin_instance.changelist_view(request=SimpleNamespace()) @@ -188,7 +182,7 @@ def test_topic_centroid_snapshot_admin_changelist_view_builds_dashboard_stats( assert len(response["centroid_project_drilldowns"]) == 2 assert response["centroid_project_drilldowns"][0]["project_name"] == "Admin Project" assert response["centroid_project_drilldowns"][0]["href"] == ( - "/admin/core/topiccentroidsnapshot/?project__id__exact=" + "/admin/trends/topiccentroidsnapshot/?project__id__exact=" f"{source_admin_context.project.id}" ) assert response["centroid_project_drilldowns"][0]["drift_from_previous"] == "10.0%" @@ -201,7 +195,7 @@ def test_test_source_connection_reports_failures(source_admin_context, mocker): config={"feed_url": "https://example.com/feed.xml"}, ) mocker.patch( - "core.admin.validate_plugin_config", + "projects.admin.validate_plugin_config", side_effect=ValueError("Missing required config field: feed_url"), ) admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) diff --git a/core/tests/test_api.py b/core/tests/test_api.py index 65fca82f..c78f4a80 100644 --- a/core/tests/test_api.py +++ b/core/tests/test_api.py @@ -2,13 +2,11 @@ from unittest.mock import patch from django.contrib.auth import get_user_model -from django.contrib.auth.models import Group from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from core.models import ( - BlueskyCredentials, Content, Entity, EntityAuthoritySnapshot, @@ -20,18 +18,23 @@ IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus, - Project, - ProjectConfig, ReviewQueue, ReviewReason, RunStatus, SkillResult, SkillStatus, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) +from projects.model_support import SourcePluginName +from projects.models import ( + BlueskyCredentials, + Project, + ProjectConfig, + ProjectMembership, + ProjectRole, + SourceConfig, +) class ProjectScopedApiTests(APITestCase): @@ -43,20 +46,24 @@ def setUp(self): self.other_user = user_model.objects.create_user( username="other", password="testpass123" ) - self.owner_group = Group.objects.create(name="owner-team") - self.owner.groups.add(self.owner_group) - self.other_group = Group.objects.create(name="other-team") - self.other_user.groups.add(self.other_group) self.owner_project = Project.objects.create( name="Owner Project", - group=self.owner_group, topic_description="Platform engineering", ) self.other_project = Project.objects.create( name="Other Project", - group=self.other_group, topic_description="Frontend", ) + ProjectMembership.objects.create( + user=self.owner, + project=self.owner_project, + role=ProjectRole.ADMIN, + ) + ProjectMembership.objects.create( + user=self.other_user, + project=self.other_project, + role=ProjectRole.ADMIN, + ) self.owner_entity = Entity.objects.create( project=self.owner_project, name="Owner Entity", @@ -172,7 +179,7 @@ def test_project_list_requires_authentication(self): }, ) - def test_project_list_is_scoped_to_request_user_groups(self): + def test_project_list_is_scoped_to_request_user_memberships(self): BlueskyCredentials.objects.create( project=self.owner_project, handle="owner-project.bsky.social", @@ -185,6 +192,7 @@ def test_project_list_is_scoped_to_request_user_groups(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.json()), 1) self.assertEqual(response.json()[0]["id"], self.owner_project.id) + self.assertEqual(response.json()[0]["user_role"], ProjectRole.ADMIN) self.assertEqual( response.json()[0]["intake_token"], self.owner_project.intake_token ) diff --git a/core/tests/test_bluesky.py b/core/tests/test_bluesky.py index 45910176..80443a72 100644 --- a/core/tests/test_bluesky.py +++ b/core/tests/test_bluesky.py @@ -2,30 +2,20 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import ( - BlueskyCredentials, Entity, - Project, - SourceConfig, - SourcePluginName, ) from core.plugins.bluesky import BlueskySourcePlugin +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def bluesky_context(django_user_model): - user = django_user_model.objects.create_user( - username="bluesky-owner", password="testpass123" - ) - group = Group.objects.create(name="bluesky-team") - user.groups.add(group) - project = Project.objects.create( - name="Bluesky Project", group=group, topic_description="Infra" - ) +def bluesky_context(): + project = Project.objects.create(name="Bluesky Project", topic_description="Infra") entity = Entity.objects.create( project=project, name="Alice", @@ -42,7 +32,9 @@ def bluesky_context(django_user_model): def test_bluesky_validate_config_normalizes_defaults_and_rejects_invalid_values(): - assert BlueskySourcePlugin.validate_config({"author_handle": "@Alice.BSKY.social"}) == { + assert BlueskySourcePlugin.validate_config( + {"author_handle": "@Alice.BSKY.social"} + ) == { "author_handle": "alice.bsky.social", "include_replies": False, "max_posts_per_fetch": 100, @@ -74,7 +66,9 @@ def test_bluesky_validate_config_normalizes_defaults_and_rejects_invalid_values( with pytest.raises(ValueError, match="feed_uri must be a Bluesky feed generator"): BlueskySourcePlugin.validate_config({"feed_uri": "https://example.com/feed"}) - with pytest.raises(ValueError, match="max_posts_per_fetch must be a positive integer"): + with pytest.raises( + ValueError, match="max_posts_per_fetch must be a positive integer" + ): BlueskySourcePlugin.validate_config( {"author_handle": "alice.bsky.social", "max_posts_per_fetch": 0} ) @@ -214,10 +208,10 @@ def test_bluesky_credentials_encrypt_password_and_normalize_pds_url(bluesky_cont assert credentials.get_app_password() == "app-password" -def test_bluesky_client_uses_authenticated_project_credentials( - bluesky_context, mocker -): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") +def test_bluesky_client_uses_authenticated_project_credentials(bluesky_context, mocker): + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() client = mocker.Mock() @@ -233,7 +227,9 @@ def test_bluesky_client_uses_authenticated_project_credentials( def test_bluesky_health_check_records_credential_errors(bluesky_context, mocker): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() plugin = BlueskySourcePlugin(bluesky_context.source_config) @@ -252,7 +248,9 @@ def test_bluesky_health_check_records_credential_errors(bluesky_context, mocker) def test_bluesky_verify_credentials_uses_authenticated_session_check( bluesky_context, mocker ): - credentials = BlueskyCredentials(project=bluesky_context.project, handle="alice.bsky.social") + credentials = BlueskyCredentials( + project=bluesky_context.project, handle="alice.bsky.social" + ) credentials.set_app_password("app-password") credentials.save() client = mocker.Mock() diff --git a/core/tests/test_embeddings.py b/core/tests/test_embeddings.py index 9b07e0be..c79990b0 100644 --- a/core/tests/test_embeddings.py +++ b/core/tests/test_embeddings.py @@ -4,7 +4,6 @@ import httpx import pytest -from django.contrib.auth.models import Group from django.core.management import CommandError, call_command from django.db.models import Count from qdrant_client.http.exceptions import ResponseHandlingException @@ -27,11 +26,8 @@ Content, Entity, IngestionRun, - Project, ReviewQueue, SkillResult, - SourceConfig, - SourcePluginName, UserFeedback, ) from core.pipeline import ( @@ -39,6 +35,8 @@ RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME, ) +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @@ -55,10 +53,8 @@ def embedding_context(django_user_model): user = django_user_model.objects.create_user( username="embed-owner", password="testpass123" ) - group = Group.objects.create(name="embedding-team") - user.groups.add(group) project = Project.objects.create( - name="Embedding Project", group=group, topic_description="Infra" + name="Embedding Project", topic_description="Infra" ) content = Content.objects.create( project=project, @@ -69,7 +65,7 @@ def embedding_context(django_user_model): published_date="2026-04-20T12:00:00Z", content_text="This article covers platform engineering practices.", ) - return SimpleNamespace(user=user, group=group, project=project, content=content) + return SimpleNamespace(user=user, project=project, content=content) def test_upsert_content_embedding_persists_embedding_id_and_payload( @@ -487,13 +483,8 @@ def test_sync_embeddings_scopes_to_requested_content_id(embedding_context, mocke def test_sync_embeddings_filters_project_and_references_only( embedding_context, django_user_model, mocker ): - other_user = django_user_model.objects.create_user( - username="embed-owner-2", password="testpass123" - ) - other_group = Group.objects.create(name="embedding-team-2") - other_user.groups.add(other_group) other_project = Project.objects.create( - name="Other Embedding Project", group=other_group, topic_description="Other" + name="Other Embedding Project", topic_description="Other" ) same_project_reference = Content.objects.create( project=embedding_context.project, diff --git a/core/tests/test_invitations.py b/core/tests/test_invitations.py new file mode 100644 index 00000000..855f7633 --- /dev/null +++ b/core/tests/test_invitations.py @@ -0,0 +1,249 @@ +from django.core import mail +from django.test import override_settings +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APITestCase + +from projects.models import Project, ProjectMembership, ProjectRole +from users.models import MembershipInvitation + + +@override_settings( + EMAIL_BACKEND="django.core.mail.backends.locmem.EmailBackend", + FRONTEND_BASE_URL="http://localhost:3000", +) +class ProjectMembershipAndInvitationApiTests(APITestCase): + def setUp(self): + user_model = self.get_user_model() + self.admin_user = user_model.objects.create_user( + username="project-admin", + email="admin@example.com", + password="testpass123", + display_name="Project Admin", + ) + self.second_admin = user_model.objects.create_user( + username="second-admin", + email="second-admin@example.com", + password="testpass123", + ) + self.member_user = user_model.objects.create_user( + username="project-member", + email="member@example.com", + password="testpass123", + ) + self.reader_user = user_model.objects.create_user( + username="project-reader", + email="reader@example.com", + password="testpass123", + ) + self.invited_user = user_model.objects.create_user( + username="invited-user", + email="invitee@example.com", + password="testpass123", + ) + + self.project = Project.objects.create( + name="Membership Project", + topic_description="Platform engineering", + ) + self.admin_membership = ProjectMembership.objects.create( + user=self.admin_user, + project=self.project, + role=ProjectRole.ADMIN, + ) + self.second_admin_membership = ProjectMembership.objects.create( + user=self.second_admin, + project=self.project, + role=ProjectRole.ADMIN, + ) + self.member_membership = ProjectMembership.objects.create( + user=self.member_user, + project=self.project, + role=ProjectRole.MEMBER, + ) + self.reader_membership = ProjectMembership.objects.create( + user=self.reader_user, + project=self.project, + role=ProjectRole.READER, + ) + + @staticmethod + def get_user_model(): + from django.contrib.auth import get_user_model + + return get_user_model() + + def test_project_create_assigns_creator_as_admin_membership(self): + creator = self.get_user_model().objects.create_user( + username="creator", + email="creator@example.com", + password="testpass123", + ) + self.client.force_authenticate(creator) + + response = self.client.post( + reverse("v1:project-list"), + { + "name": "Creator Project", + "topic_description": "Creator-owned project", + "content_retention_days": 90, + }, + format="json", + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + project = Project.objects.get(name="Creator Project") + membership = ProjectMembership.objects.get(project=project, user=creator) + self.assertEqual(membership.role, ProjectRole.ADMIN) + self.assertEqual(response.json()["user_role"], ProjectRole.ADMIN) + + def test_project_admin_can_list_update_and_remove_memberships(self): + self.client.force_authenticate(self.admin_user) + + list_response = self.client.get( + reverse( + "v1:project-membership-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(list_response.status_code, status.HTTP_200_OK) + self.assertEqual(len(list_response.json()), 4) + + update_response = self.client.patch( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_membership.id, + }, + ), + {"role": ProjectRole.READER}, + format="json", + ) + self.assertEqual(update_response.status_code, status.HTTP_200_OK) + self.member_membership.refresh_from_db() + self.assertEqual(self.member_membership.role, ProjectRole.READER) + + delete_response = self.client.delete( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.reader_membership.id, + }, + ) + ) + self.assertEqual(delete_response.status_code, status.HTTP_204_NO_CONTENT) + self.assertFalse( + ProjectMembership.objects.filter(pk=self.reader_membership.id).exists() + ) + + def test_last_admin_cannot_be_demoted_or_removed(self): + self.second_admin_membership.delete() + self.client.force_authenticate(self.admin_user) + + demote_response = self.client.patch( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_membership.id, + }, + ), + {"role": ProjectRole.MEMBER}, + format="json", + ) + self.assertEqual(demote_response.status_code, status.HTTP_400_BAD_REQUEST) + + delete_response = self.client.delete( + reverse( + "v1:project-membership-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_membership.id, + }, + ) + ) + self.assertEqual(delete_response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_project_admin_can_create_and_revoke_invitation(self): + self.client.force_authenticate(self.admin_user) + + create_response = self.client.post( + reverse( + "v1:project-invitation-list", + kwargs={"project_id": self.project.id}, + ), + {"email": "invitee@example.com", "role": ProjectRole.MEMBER}, + format="json", + ) + + self.assertEqual(create_response.status_code, status.HTTP_201_CREATED) + invitation = MembershipInvitation.objects.get(project=self.project) + self.assertEqual(invitation.invited_by, self.admin_user) + self.assertEqual(len(mail.outbox), 1) + self.assertIn(invitation.token, mail.outbox[0].body) + + revoke_response = self.client.delete( + reverse( + "v1:project-invitation-detail", + kwargs={"project_id": self.project.id, "pk": invitation.id}, + ) + ) + self.assertEqual(revoke_response.status_code, status.HTTP_204_NO_CONTENT) + invitation.refresh_from_db() + self.assertIsNotNone(invitation.revoked_at) + + def test_invited_user_can_view_and_accept_invitation_token(self): + invitation = MembershipInvitation.objects.create( + project=self.project, + email=self.invited_user.email, + role=ProjectRole.READER, + invited_by=self.admin_user, + ) + + public_response = self.client.get( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ) + ) + self.assertEqual(public_response.status_code, status.HTTP_200_OK) + self.assertEqual(public_response.json()["project_name"], self.project.name) + + self.client.force_authenticate(self.invited_user) + accept_response = self.client.post( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ), + format="json", + ) + self.assertEqual(accept_response.status_code, status.HTTP_200_OK) + + invitation.refresh_from_db() + membership = ProjectMembership.objects.get( + project=self.project, + user=self.invited_user, + ) + self.assertEqual(membership.role, ProjectRole.READER) + self.assertIsNotNone(invitation.accepted_at) + + def test_accept_requires_matching_email(self): + invitation = MembershipInvitation.objects.create( + project=self.project, + email="expected@example.com", + role=ProjectRole.MEMBER, + invited_by=self.admin_user, + ) + self.client.force_authenticate(self.member_user) + + response = self.client.post( + reverse( + "membership-invitation-token", + kwargs={"token": invitation.token}, + ), + format="json", + ) + + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) diff --git a/core/tests/test_membership.py b/core/tests/test_membership.py new file mode 100644 index 00000000..fd4cc701 --- /dev/null +++ b/core/tests/test_membership.py @@ -0,0 +1,77 @@ +import pytest + +from core.permissions import get_user_role, get_visible_projects_queryset +from projects.models import Project, ProjectMembership, ProjectRole + +pytestmark = pytest.mark.django_db + + +def test_visible_projects_queryset_uses_memberships(django_user_model): + user = django_user_model.objects.create_user( + username="member-user", + password="testpass123", + ) + outsider = django_user_model.objects.create_user( + username="outsider-user", + password="testpass123", + ) + project = Project.objects.create( + name="Membership Project", + topic_description="Platform engineering", + ) + other_project = Project.objects.create( + name="Outsider Project", + topic_description="Frontend", + ) + ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.ADMIN, + ) + ProjectMembership.objects.create( + user=outsider, + project=other_project, + role=ProjectRole.ADMIN, + ) + + assert list(get_visible_projects_queryset(user)) == [project] + + +def test_get_user_role_returns_membership_role(django_user_model): + user = django_user_model.objects.create_user( + username="role-user", + password="testpass123", + ) + project = Project.objects.create( + name="Role Project", + topic_description="Platform engineering", + ) + ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.READER, + ) + + assert get_user_role(user, project) == ProjectRole.READER + + +def test_removing_membership_removes_project_visibility(django_user_model): + user = django_user_model.objects.create_user( + username="remove-user", + password="testpass123", + ) + project = Project.objects.create( + name="Removal Project", + topic_description="Platform engineering", + ) + membership = ProjectMembership.objects.create( + user=user, + project=project, + role=ProjectRole.MEMBER, + ) + + assert list(get_visible_projects_queryset(user)) == [project] + + membership.delete() + + assert list(get_visible_projects_queryset(user)) == [] diff --git a/core/tests/test_newsletters.py b/core/tests/test_newsletters.py index 54a0466a..d5a14a9b 100644 --- a/core/tests/test_newsletters.py +++ b/core/tests/test_newsletters.py @@ -4,7 +4,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from django.core import mail from django.urls import reverse from svix.webhooks import Webhook @@ -28,10 +27,8 @@ @pytest.fixture def project(): - group = Group.objects.create(name="newsletter-team") return Project.objects.create( name="Newsletter Project", - group=group, topic_description="Platform engineering", intake_enabled=True, ) diff --git a/core/tests/test_permissions.py b/core/tests/test_permissions.py new file mode 100644 index 00000000..39a543d6 --- /dev/null +++ b/core/tests/test_permissions.py @@ -0,0 +1,355 @@ +from unittest.mock import patch + +from django.contrib.auth import get_user_model +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APITestCase + +from core.models import ( + Content, + Entity, + EntityCandidate, + FeedbackType, + ReviewQueue, + ReviewReason, + TopicCentroidSnapshot, + UserFeedback, +) +from projects.model_support import SourcePluginName +from projects.models import BlueskyCredentials, Project, ProjectMembership, ProjectRole + + +class ProjectRolePermissionTests(APITestCase): + def setUp(self): + queue_centroid_patcher = patch("core.signals.queue_topic_centroid_recompute") + queue_centroid_patcher.start() + self.addCleanup(queue_centroid_patcher.stop) + + user_model = get_user_model() + self.admin_user = user_model.objects.create_user( + username="project-admin", + password="testpass123", + ) + self.member_user = user_model.objects.create_user( + username="project-member", + password="testpass123", + ) + self.reader_user = user_model.objects.create_user( + username="project-reader", + password="testpass123", + ) + self.outsider_user = user_model.objects.create_user( + username="outsider", + password="testpass123", + ) + + self.project = Project.objects.create( + name="Permissions Project", + topic_description="Platform engineering", + ) + + ProjectMembership.objects.bulk_create( + [ + ProjectMembership( + user=self.admin_user, + project=self.project, + role=ProjectRole.ADMIN, + ), + ProjectMembership( + user=self.member_user, + project=self.project, + role=ProjectRole.MEMBER, + ), + ProjectMembership( + user=self.reader_user, + project=self.project, + role=ProjectRole.READER, + ), + ] + ) + + self.entity = Entity.objects.create( + project=self.project, + name="Permissions Entity", + type="vendor", + ) + self.content = Content.objects.create( + project=self.project, + url="https://example.com/permissions-content", + title="Permissions Content", + author="Author", + entity=self.entity, + source_plugin=SourcePluginName.RSS, + published_date="2026-04-29T00:00:00Z", + content_text="Permissions content body.", + ) + self.entity_candidate = EntityCandidate.objects.create( + project=self.project, + name="Candidate Vendor", + suggested_type="vendor", + first_seen_in=self.content, + ) + self.review_queue_item = ReviewQueue.objects.create( + project=self.project, + content=self.content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=0.55, + ) + self.topic_centroid_snapshot = TopicCentroidSnapshot.objects.create( + project=self.project, + centroid_active=True, + centroid_vector=[1.0, 0.0], + feedback_count=3, + upvote_count=2, + downvote_count=1, + drift_from_previous=0.1, + drift_from_week_ago=0.2, + ) + self.member_feedback = UserFeedback.objects.create( + project=self.project, + content=self.content, + user=self.member_user, + feedback_type=FeedbackType.UPVOTE, + ) + self.admin_feedback = UserFeedback.objects.create( + project=self.project, + content=self.content, + user=self.admin_user, + feedback_type=FeedbackType.DOWNVOTE, + ) + self.bluesky_credentials = BlueskyCredentials.objects.create( + project=self.project, + handle="permissions-project.bsky.social", + ) + + def test_project_list_includes_resolved_reader_role(self): + self.client.force_authenticate(self.reader_user) + + response = self.client.get(reverse("v1:project-list")) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()[0]["user_role"], ProjectRole.READER) + + def test_reader_is_denied_contributor_and_admin_endpoints(self): + self.client.force_authenticate(self.reader_user) + + cases = [ + ( + "patch", + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Reader Update"}, + ), + ( + "post", + reverse( + "v1:project-rotate-intake-token", + kwargs={"id": self.project.id}, + ), + None, + ), + ( + "get", + reverse( + "v1:project-review-queue-list", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ( + "post", + reverse( + "v1:project-source-config-list", + kwargs={"project_id": self.project.id}, + ), + { + "plugin_name": SourcePluginName.RSS, + "config": {"feed_url": "https://example.com/feed.xml"}, + "is_active": True, + }, + ), + ( + "get", + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ( + "post", + reverse( + "v1:project-feedback-list", + kwargs={"project_id": self.project.id}, + ), + { + "content": self.content.id, + "feedback_type": FeedbackType.UPVOTE, + }, + ), + ( + "post", + reverse( + "v1:project-entity-candidate-accept", + kwargs={ + "project_id": self.project.id, + "pk": self.entity_candidate.id, + }, + ), + None, + ), + ( + "get", + reverse( + "v1:project-topic-centroid-snapshot-summary", + kwargs={"project_id": self.project.id}, + ), + None, + ), + ] + + for method, url, payload in cases: + with self.subTest(method=method, url=url): + response = getattr(self.client, method)(url, payload, format="json") + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_member_can_use_contributor_endpoints_but_not_admin_only_ones(self): + self.client.force_authenticate(self.member_user) + + review_queue_response = self.client.get( + reverse( + "v1:project-review-queue-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(review_queue_response.status_code, status.HTTP_200_OK) + + source_config_response = self.client.post( + reverse( + "v1:project-source-config-list", + kwargs={"project_id": self.project.id}, + ), + { + "plugin_name": SourcePluginName.RSS, + "config": {"feed_url": "https://example.com/feed.xml"}, + "is_active": True, + }, + format="json", + ) + self.assertEqual(source_config_response.status_code, status.HTTP_201_CREATED) + + topic_summary_response = self.client.get( + reverse( + "v1:project-topic-centroid-snapshot-summary", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(topic_summary_response.status_code, status.HTTP_200_OK) + + accept_candidate_response = self.client.post( + reverse( + "v1:project-entity-candidate-accept", + kwargs={ + "project_id": self.project.id, + "pk": self.entity_candidate.id, + }, + ), + format="json", + ) + self.assertEqual(accept_candidate_response.status_code, status.HTTP_200_OK) + + delete_own_feedback_response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_feedback.id, + }, + ) + ) + self.assertEqual( + delete_own_feedback_response.status_code, status.HTTP_204_NO_CONTENT + ) + + update_project_response = self.client.patch( + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Member Update"}, + format="json", + ) + self.assertEqual(update_project_response.status_code, status.HTTP_403_FORBIDDEN) + + list_credentials_response = self.client.get( + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual( + list_credentials_response.status_code, status.HTTP_403_FORBIDDEN + ) + + rotate_token_response = self.client.post( + reverse("v1:project-rotate-intake-token", kwargs={"id": self.project.id}), + format="json", + ) + self.assertEqual(rotate_token_response.status_code, status.HTTP_403_FORBIDDEN) + + def test_member_cannot_delete_other_users_feedback(self): + self.client.force_authenticate(self.member_user) + + response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.admin_feedback.id, + }, + ) + ) + + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_admin_can_access_admin_endpoints_and_delete_other_feedback(self): + self.client.force_authenticate(self.admin_user) + + update_project_response = self.client.patch( + reverse("v1:project-detail", kwargs={"id": self.project.id}), + {"name": "Admin Updated Project"}, + format="json", + ) + self.assertEqual(update_project_response.status_code, status.HTTP_200_OK) + + list_credentials_response = self.client.get( + reverse( + "v1:project-bluesky-credentials-list", + kwargs={"project_id": self.project.id}, + ) + ) + self.assertEqual(list_credentials_response.status_code, status.HTTP_200_OK) + + rotate_token_response = self.client.post( + reverse("v1:project-rotate-intake-token", kwargs={"id": self.project.id}), + format="json", + ) + self.assertEqual(rotate_token_response.status_code, status.HTTP_200_OK) + + delete_feedback_response = self.client.delete( + reverse( + "v1:project-feedback-detail", + kwargs={ + "project_id": self.project.id, + "pk": self.member_feedback.id, + }, + ) + ) + self.assertEqual( + delete_feedback_response.status_code, status.HTTP_204_NO_CONTENT + ) + + def test_outsider_cannot_access_project_resources(self): + self.client.force_authenticate(self.outsider_user) + + response = self.client.get( + reverse("v1:project-detail", kwargs={"id": self.project.id}) + ) + + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) diff --git a/core/tests/test_pipeline.py b/core/tests/test_pipeline.py index b498f728..0fc461ae 100644 --- a/core/tests/test_pipeline.py +++ b/core/tests/test_pipeline.py @@ -1,7 +1,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.deduplication import canonicalize_url from core.models import ( @@ -50,10 +49,8 @@ def pipeline_context(django_user_model): user = django_user_model.objects.create_user( username="pipeline-owner", password="testpass123" ) - group = Group.objects.create(name="pipeline-team") - user.groups.add(group) project = Project.objects.create( - name="Pipeline Project", group=group, topic_description="Platform engineering" + name="Pipeline Project", topic_description="Platform engineering" ) content = Content.objects.create( project=project, @@ -65,7 +62,7 @@ def pipeline_context(django_user_model): content_text="This article covers a new Kubernetes release and what changed for platform teams.", embedding_id="emb_123", ) - return SimpleNamespace(user=user, group=group, project=project, content=content) + return SimpleNamespace(user=user, project=project, content=content) def test_process_content_runs_full_pipeline_for_relevant_content( diff --git a/core/tests/test_plugin_base.py b/core/tests/test_plugin_base.py index dbefc37c..f425c097 100644 --- a/core/tests/test_plugin_base.py +++ b/core/tests/test_plugin_base.py @@ -2,7 +2,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import Entity, Project from core.plugins.base import ContentItem, SourcePlugin @@ -30,15 +29,8 @@ def health_check(self) -> bool: @pytest.fixture -def plugin_context(django_user_model): - user = django_user_model.objects.create_user( - username="plugin-owner", password="testpass123" - ) - group = Group.objects.create(name="plugin-team") - user.groups.add(group) - project = Project.objects.create( - name="Plugin Project", group=group, topic_description="Infra" - ) +def plugin_context(): + project = Project.objects.create(name="Plugin Project", topic_description="Infra") source_config = SimpleNamespace(project=project, config={"api_key": "secret"}) return SimpleNamespace(project=project, source_config=source_config) diff --git a/core/tests/test_reddit.py b/core/tests/test_reddit.py index c6243629..7fe509c1 100644 --- a/core/tests/test_reddit.py +++ b/core/tests/test_reddit.py @@ -2,25 +2,18 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group -from core.models import Project, SourceConfig, SourcePluginName from core.plugins.reddit import RedditSourcePlugin from core.plugins.registry import validate_plugin_config +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def reddit_context(django_user_model): - user = django_user_model.objects.create_user( - username="reddit-owner", password="testpass123" - ) - group = Group.objects.create(name="reddit-team") - user.groups.add(group) - project = Project.objects.create( - name="Reddit Project", group=group, topic_description="Infra" - ) +def reddit_context(): + project = Project.objects.create(name="Reddit Project", topic_description="Infra") source_config = SourceConfig.objects.create( project=project, plugin_name=SourcePluginName.REDDIT, diff --git a/core/tests/test_rss.py b/core/tests/test_rss.py index 6a291977..01fdab3a 100644 --- a/core/tests/test_rss.py +++ b/core/tests/test_rss.py @@ -3,24 +3,17 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group -from core.models import Project, SourceConfig, SourcePluginName from core.plugins.rss import RSSSourcePlugin +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig pytestmark = pytest.mark.django_db @pytest.fixture -def rss_context(django_user_model): - user = django_user_model.objects.create_user( - username="rss-owner", password="testpass123" - ) - group = Group.objects.create(name="rss-team") - user.groups.add(group) - project = Project.objects.create( - name="RSS Project", group=group, topic_description="Infra" - ) +def rss_context(): + project = Project.objects.create(name="RSS Project", topic_description="Infra") source_config = SourceConfig.objects.create( project=project, plugin_name=SourcePluginName.RSS, diff --git a/core/tests/test_serializers.py b/core/tests/test_serializers.py index d896c3c5..c627116e 100644 --- a/core/tests/test_serializers.py +++ b/core/tests/test_serializers.py @@ -1,27 +1,25 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import AnonymousUser, Group +from django.contrib.auth.models import AnonymousUser from core.models import ( Content, Entity, - Project, ReviewReason, SkillResult, - SourceConfig, - SourcePluginName, ) from core.serializers import ( ContentSerializer, EntitySerializer, IngestionRunSerializer, - ProjectSerializer, ReviewQueueSerializer, SkillResultSerializer, - SourceConfigSerializer, UserFeedbackSerializer, ) +from projects.model_support import SourcePluginName +from projects.models import Project, ProjectMembership, ProjectRole, SourceConfig +from projects.serializers import ProjectSerializer, SourceConfigSerializer pytestmark = pytest.mark.django_db @@ -34,15 +32,17 @@ def serializer_context(django_user_model): other_user = django_user_model.objects.create_user( username="serializer-other", password="testpass123" ) - group = Group.objects.create(name="serializer-team") - other_group = Group.objects.create(name="serializer-other-team") - user.groups.add(group) - other_user.groups.add(other_group) project = Project.objects.create( - name="Serializer Project", group=group, topic_description="Infra" + name="Serializer Project", topic_description="Infra" ) other_project = Project.objects.create( - name="Other Serializer Project", group=other_group, topic_description="Data" + name="Other Serializer Project", topic_description="Data" + ) + ProjectMembership.objects.create(user=user, project=project, role=ProjectRole.ADMIN) + ProjectMembership.objects.create( + user=other_user, + project=other_project, + role=ProjectRole.ADMIN, ) entity = Entity.objects.create( project=project, name="Serializer Entity", type="vendor" @@ -133,7 +133,7 @@ def test_project_scoped_serializer_filters_related_querysets_without_project_con def test_project_scoped_serializer_skips_filtering_for_anonymous_user(): serializer = ProjectSerializer(context={"request": _request_for(AnonymousUser())}) - assert serializer.fields["group"].queryset.count() == Group.objects.count() + assert "project" not in serializer.fields def test_content_serializer_rejects_cross_project_entity(serializer_context): @@ -167,7 +167,9 @@ def test_content_serializer_exposes_duplicate_state_as_read_only_fields( ) serializer_context.content.duplicate_signal_count = 1 serializer_context.content.canonical_url = "https://example.com/serializer-content" - serializer_context.content.save(update_fields=["duplicate_signal_count", "canonical_url"]) + serializer_context.content.save( + update_fields=["duplicate_signal_count", "canonical_url"] + ) serializer = ContentSerializer(instance=duplicate) @@ -267,7 +269,7 @@ def test_source_config_serializer_surfaces_plugin_validation_errors(serializer_c ) assert serializer.is_valid() is False - assert serializer.errors == {"config": ["Missing required config field: feed_url"]} + assert serializer.errors == {"config": ["Invalid source configuration."]} def test_source_config_serializer_normalizes_bluesky_author_handle_config( diff --git a/core/tests/test_tasks.py b/core/tests/test_tasks.py index dced3e38..4a026d22 100644 --- a/core/tests/test_tasks.py +++ b/core/tests/test_tasks.py @@ -2,7 +2,6 @@ from types import SimpleNamespace import pytest -from django.contrib.auth.models import Group from core.models import ( Content, @@ -12,30 +11,28 @@ EntityMentionRole, FeedbackType, IngestionRun, - Project, - ProjectConfig, RunStatus, SkillStatus, - SourceConfig, - SourcePluginName, TopicCentroidSnapshot, UserFeedback, ) from core.pipeline import RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME from core.tasks import ( - TOPIC_CENTROID_MIN_UPVOTES, - _ingest_source_config, queue_content_skill, - queue_topic_centroid_recompute, recompute_authority_scores, - recompute_topic_centroid, run_all_authority_recomputations, - run_all_ingestions, - run_all_topic_centroid_recomputations, - run_ingestion, run_relevance_scoring_skill, run_summarization_skill, ) +from ingestion.tasks import _ingest_source_config, run_all_ingestions, run_ingestion +from projects.model_support import SourcePluginName +from projects.models import Project, ProjectConfig, SourceConfig +from trends.tasks import ( + TOPIC_CENTROID_MIN_UPVOTES, + queue_topic_centroid_recompute, + recompute_topic_centroid, + run_all_topic_centroid_recomputations, +) pytestmark = pytest.mark.django_db @@ -45,18 +42,14 @@ def source_plugin_context(django_user_model): user = django_user_model.objects.create_user( username="plugin-owner", password="testpass123" ) - group = Group.objects.create(name="plugin-team") - user.groups.add(group) - project = Project.objects.create( - name="Plugin Project", group=group, topic_description="Infra" - ) + project = Project.objects.create(name="Plugin Project", topic_description="Infra") entity = Entity.objects.create( project=project, name="Example", type="vendor", website_url="https://example.com", ) - return SimpleNamespace(user=user, group=group, project=project, entity=entity) + return SimpleNamespace(user=user, project=project, entity=entity) def test_run_ingestion_creates_content_from_rss_entries(source_plugin_context, mocker): @@ -175,7 +168,7 @@ def test_ingest_source_config_allows_cross_plugin_duplicate_urls_for_pipeline_de ], match_entity_for_url=lambda url: None, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) items_fetched, items_ingested = _ingest_source_config(source_config) @@ -258,7 +251,7 @@ def test_ingest_source_config_deduplicates_bluesky_posts_by_post_uri( ], match_entity_for_item=lambda item: source_plugin_context.entity, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) items_fetched, items_ingested = _ingest_source_config(source_config) @@ -272,7 +265,7 @@ def test_ingest_source_config_deduplicates_bluesky_posts_by_post_uri( def test_run_all_ingestions_enqueues_active_source_configs( source_plugin_context, mocker ): - delay_mock = mocker.patch("core.tasks.run_ingestion.delay") + delay_mock = mocker.patch("ingestion.tasks.run_ingestion.delay") active_one = SourceConfig.objects.create( project=source_plugin_context.project, plugin_name=SourcePluginName.RSS, @@ -302,8 +295,8 @@ def test_run_all_ingestions_executes_inline_when_eager( source_plugin_context, settings, mocker ): settings.CELERY_TASK_ALWAYS_EAGER = True - run_ingestion_mock = mocker.patch("core.tasks.run_ingestion") - delay_mock = mocker.patch("core.tasks.run_ingestion.delay") + run_ingestion_mock = mocker.patch("ingestion.tasks.run_ingestion") + delay_mock = mocker.patch("ingestion.tasks.run_ingestion.delay") active_one = SourceConfig.objects.create( project=source_plugin_context.project, plugin_name=SourcePluginName.RSS, @@ -328,10 +321,8 @@ def test_run_all_authority_recomputations_enqueues_all_projects( source_plugin_context, mocker ): delay_mock = mocker.patch("core.tasks.recompute_authority_scores.delay") - other_group = Group.objects.create(name="second-authority-team") other_project = Project.objects.create( name="Other Project", - group=other_group, topic_description="Security", ) @@ -349,10 +340,8 @@ def test_run_all_authority_recomputations_executes_inline_when_eager( settings.CELERY_TASK_ALWAYS_EAGER = True recompute_mock = mocker.patch("core.tasks.recompute_authority_scores") delay_mock = mocker.patch("core.tasks.recompute_authority_scores.delay") - other_group = Group.objects.create(name="inline-authority-team") other_project = Project.objects.create( name="Inline Project", - group=other_group, topic_description="Platform", ) @@ -368,11 +357,9 @@ def test_run_all_authority_recomputations_executes_inline_when_eager( def test_run_all_topic_centroid_recomputations_enqueues_all_projects( source_plugin_context, mocker ): - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") - other_group = Group.objects.create(name="second-centroid-team") + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") other_project = Project.objects.create( name="Other Centroid Project", - group=other_group, topic_description="Security", ) @@ -388,12 +375,10 @@ def test_run_all_topic_centroid_recomputations_executes_inline_when_eager( source_plugin_context, settings, mocker ): settings.CELERY_TASK_ALWAYS_EAGER = True - recompute_mock = mocker.patch("core.tasks.recompute_topic_centroid") - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") - other_group = Group.objects.create(name="inline-centroid-team") + recompute_mock = mocker.patch("trends.tasks.recompute_topic_centroid") + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") other_project = Project.objects.create( name="Inline Centroid Project", - group=other_group, topic_description="Platform", ) @@ -495,8 +480,8 @@ def test_recompute_topic_centroid_upserts_weighted_normalized_centroid( ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") vector_lookup = { **{ f"Upvote {index}": [1.0, 0.0] for index in range(TOPIC_CENTROID_MIN_UPVOTES) @@ -504,7 +489,7 @@ def test_recompute_topic_centroid_upserts_weighted_normalized_centroid( "Downvote": [0.0, 1.0], } mocker.patch( - "core.tasks.embed_text", + "trends.tasks.embed_text", side_effect=lambda text: vector_lookup[text.split("\n\n", 1)[0]], ) @@ -572,9 +557,9 @@ def test_recompute_topic_centroid_persists_drift_from_previous_and_week_old_snap ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") - mocker.patch("core.tasks.embed_text", return_value=[1.0, 0.0]) + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") + mocker.patch("trends.tasks.embed_text", return_value=[1.0, 0.0]) recent_snapshot = TopicCentroidSnapshot.objects.create( project=project, @@ -635,8 +620,8 @@ def test_recompute_topic_centroid_disables_centroid_below_minimum_upvotes( ): project = source_plugin_context.project mocker.patch("core.signals.queue_topic_centroid_recompute") - upsert_mock = mocker.patch("core.tasks.upsert_topic_centroid") - delete_mock = mocker.patch("core.tasks.delete_topic_centroid") + upsert_mock = mocker.patch("trends.tasks.upsert_topic_centroid") + delete_mock = mocker.patch("trends.tasks.delete_topic_centroid") for index in range(TOPIC_CENTROID_MIN_UPVOTES - 1): content = Content.objects.create( project=project, @@ -757,8 +742,8 @@ def test_queue_content_skill_executes_summary_inline_when_eager( def test_queue_topic_centroid_recompute_enqueues_background_task( source_plugin_context, mocker ): - cache_add_mock = mocker.patch("core.tasks.cache.add", return_value=True) - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") + cache_add_mock = mocker.patch("trends.tasks.cache.add", return_value=True) + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") queued = queue_topic_centroid_recompute(source_plugin_context.project.id) @@ -770,8 +755,8 @@ def test_queue_topic_centroid_recompute_enqueues_background_task( def test_queue_topic_centroid_recompute_skips_duplicate_queue_attempts( source_plugin_context, mocker ): - mocker.patch("core.tasks.cache.add", return_value=False) - delay_mock = mocker.patch("core.tasks.recompute_topic_centroid.delay") + mocker.patch("trends.tasks.cache.add", return_value=False) + delay_mock = mocker.patch("trends.tasks.recompute_topic_centroid.delay") queued = queue_topic_centroid_recompute(source_plugin_context.project.id) @@ -949,7 +934,7 @@ def test_ingest_source_config_truncates_fields_and_processes_inline( plugin_name=SourcePluginName.RSS, config={"feed_url": "https://example.com/feed.xml"}, ) - mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + mocker.patch("ingestion.tasks.get_plugin_for_source_config", return_value=plugin) upsert_mock = mocker.patch("core.tasks.upsert_content_embedding") process_mock = mocker.patch("core.tasks.process_content") delay_mock = mocker.patch("core.tasks.process_content.delay") diff --git a/core/tests/test_utils.py b/core/tests/test_utils.py index bbc12168..7f76e50d 100644 --- a/core/tests/test_utils.py +++ b/core/tests/test_utils.py @@ -1,8 +1,7 @@ import pytest -from django.contrib.auth.models import Group -from core.models import Project, ProjectConfig from core.utils import dashboard_callback +from projects.models import Project, ProjectConfig pytestmark = pytest.mark.django_db @@ -16,17 +15,12 @@ def test_dashboard_callback_uses_zero_when_no_project_configs(): assert result["avg_authority_weight"] == 0 -def test_dashboard_callback_rounds_average_authority_weight(django_user_model): - user = django_user_model.objects.create_user( - username="utils-owner", password="testpass123" - ) - group = Group.objects.create(name="utils-team") - user.groups.add(group) +def test_dashboard_callback_rounds_average_authority_weight(): project_one = Project.objects.create( - name="Utils Project 1", group=group, topic_description="Infra" + name="Utils Project 1", topic_description="Infra" ) project_two = Project.objects.create( - name="Utils Project 2", group=group, topic_description="Data" + name="Utils Project 2", topic_description="Data" ) ProjectConfig.objects.create(project=project_one, upvote_authority_weight=0.1234) ProjectConfig.objects.create(project=project_two, upvote_authority_weight=0.5678) diff --git a/core/urls.py b/core/urls.py index 5744a8fa..77888129 100644 --- a/core/urls.py +++ b/core/urls.py @@ -1,6 +1,7 @@ from django.urls import path -from core.views import confirm_newsletter_sender_view, healthz_view, readyz_view +from core.views import healthz_view, readyz_view +from newsletters.views import confirm_newsletter_sender_view urlpatterns = [ path("healthz/", healthz_view, name="healthz"), diff --git a/core/utils.py b/core/utils.py index 0a15ff70..9abbadb2 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,6 +1,6 @@ from django.db.models import Avg -from .models import ProjectConfig +from projects.models import ProjectConfig def dashboard_callback(request, context): diff --git a/core/views.py b/core/views.py index 33e68c10..3055ef59 100644 --- a/core/views.py +++ b/core/views.py @@ -1,18 +1,13 @@ -"""Operational and newsletter-intake views used outside the REST API.""" +"""Operational views used outside the REST API.""" from http import HTTPStatus from typing import cast from django.conf import settings as django_settings from django.db import connection -from django.http import HttpRequest, JsonResponse -from django.shortcuts import get_object_or_404 -from django.utils import timezone -from django.views.decorators.http import require_GET +from django.http import JsonResponse from qdrant_client import QdrantClient -from core.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus -from core.newsletters import queue_newsletter_intake from core.settings_types import CoreSettings settings = cast(CoreSettings, django_settings) @@ -64,34 +59,3 @@ def _check_qdrant() -> bool: except Exception: return False return True - - -@require_GET -def confirm_newsletter_sender_view(request: HttpRequest, token: str): - """Confirm a sender and queue any pending newsletter intake rows. - - Args: - request: Incoming confirmation request. - token: Confirmation token stored on the allowlist entry. - - Returns: - A JSON response showing that the sender was confirmed and how many pending - intake rows were queued for processing. - """ - - allowlist = get_object_or_404(IntakeAllowlist, confirmation_token=token) - if allowlist.confirmed_at is None: - allowlist.confirmed_at = timezone.now() - allowlist.save(update_fields=["confirmed_at"]) - - pending_intake_ids = list( - NewsletterIntake.objects.filter( - project=allowlist.project, - sender_email=allowlist.sender_email, - status=NewsletterIntakeStatus.PENDING, - ).values_list("id", flat=True) - ) - for intake_id in pending_intake_ids: - queue_newsletter_intake(intake_id) - - return JsonResponse({"status": "confirmed", "queued": len(pending_intake_ids)}) diff --git a/entities/__init__.py b/entities/__init__.py new file mode 100644 index 00000000..f30b9d91 --- /dev/null +++ b/entities/__init__.py @@ -0,0 +1 @@ +"""Entities app package.""" diff --git a/entities/admin.py b/entities/admin.py new file mode 100644 index 00000000..32b7fb4f --- /dev/null +++ b/entities/admin.py @@ -0,0 +1,256 @@ +"""Admin configuration for entity-domain models.""" + +from django.contrib import admin, messages +from django.db.models import QuerySet +from django.http import HttpRequest +from django.utils.html import format_html + +from core.entity_extraction import ( + accept_entity_candidate, + merge_entity_candidate, + reject_entity_candidate, +) +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityCandidate, + EntityCandidateStatus, + EntityMention, +) + + +def _score_to_percent(value): + """Normalize score-like values for display as percentages.""" + + if value is None: + return None + numeric_value = float(value) + if -1.0 <= numeric_value <= 1.0: + return numeric_value * 100 + return numeric_value + + +def _score_color(value) -> str: + """Return the admin display color for a score-like value.""" + + percent_value = _score_to_percent(value) + if percent_value is None: + return "inherit" + if percent_value >= 75: + return "green" + if percent_value >= 40: + return "orange" + return "red" + + +@admin.register(Entity) +class EntityAdmin(admin.ModelAdmin): + """Admin configuration for tracked people, vendors, and organizations.""" + + list_display = ( + "name", + "project", + "type", + "colored_score", + "latest_snapshot_summary", + "created_at", + ) + search_fields = ("name", "project__name") + + @admin.display(description="Authority Score", ordering="authority_score") + def colored_score(self, obj): + """Render the authority score with a traffic-light color cue.""" + + percent_value = _score_to_percent(obj.authority_score) + color = _score_color(obj.authority_score) + + return format_html( + '{}', + color, + f"{percent_value:.1f}%", + ) + + @admin.display(description="Latest Snapshot") + def latest_snapshot_summary(self, obj): + """Show the latest authority component breakdown for an entity.""" + + latest_snapshot = obj.authority_snapshots.order_by("-computed_at").first() + if latest_snapshot is None: + return "-" + mention_value = f"{_score_to_percent(latest_snapshot.mention_component):.1f}%" + feedback_value = f"{_score_to_percent(latest_snapshot.feedback_component):.1f}%" + duplicate_value = ( + f"{_score_to_percent(latest_snapshot.duplicate_component):.1f}%" + ) + decayed_value = f"{_score_to_percent(latest_snapshot.decayed_prior):.1f}%" + return format_html( + ( + '' + "M {} | F {} | D {} | Carry {}" + ), + mention_value, + feedback_value, + duplicate_value, + decayed_value, + mention_value, + feedback_value, + duplicate_value, + decayed_value, + ) + + +@admin.register(EntityAuthoritySnapshot) +class EntityAuthoritySnapshotAdmin(admin.ModelAdmin): + """Admin view for persisted authority-score history.""" + + list_display = ( + "entity", + "project", + "display_final_score", + "display_components", + "computed_at", + ) + list_filter = (("project", admin.RelatedOnlyFieldListFilter), "computed_at") + search_fields = ("entity__name", "project__name") + autocomplete_fields = ("entity", "project") + + @admin.display(description="Final Score", ordering="final_score") + def display_final_score(self, obj): + """Render the recomputed final authority score as a percentage.""" + + percent_value = _score_to_percent(obj.final_score) + return format_html( + '{}', + _score_color(obj.final_score), + f"{percent_value:.1f}%", + ) + + @admin.display(description="Components") + def display_components(self, obj): + """Render the stored authority components in a compact summary.""" + + mention_value = f"{_score_to_percent(obj.mention_component):.1f}%" + feedback_value = f"{_score_to_percent(obj.feedback_component):.1f}%" + duplicate_value = f"{_score_to_percent(obj.duplicate_component):.1f}%" + decayed_value = f"{_score_to_percent(obj.decayed_prior):.1f}%" + return format_html( + "M {} | F {} | D {} | Carry {}", + mention_value, + feedback_value, + duplicate_value, + decayed_value, + ) + + +@admin.register(EntityMention) +class EntityMentionAdmin(admin.ModelAdmin): + """Admin view for extracted tracked-entity mentions.""" + + list_display = ( + "entity", + "project", + "content", + "role", + "sentiment", + "confidence", + "created_at", + ) + list_filter = ("role", "sentiment", ("project", admin.RelatedOnlyFieldListFilter)) + search_fields = ("entity__name", "content__title", "span") + autocomplete_fields = ("entity", "content", "project") + + +@admin.register(EntityCandidate) +class EntityCandidateAdmin(admin.ModelAdmin): + """Admin view for candidate entities awaiting human review.""" + + actions = [ + "accept_selected_candidates", + "reject_selected_candidates", + "merge_into_existing_entities", + ] + list_display = ( + "name", + "project", + "suggested_type", + "occurrence_count", + "status", + "merged_into", + "first_seen_in", + "created_at", + ) + list_filter = ( + "status", + "suggested_type", + ("project", admin.RelatedOnlyFieldListFilter), + ) + search_fields = ("name", "project__name", "merged_into__name") + autocomplete_fields = ("project", "first_seen_in", "merged_into") + ordering = ("-occurrence_count", "name") + + @admin.action(description="Accept selected candidates") + def accept_selected_candidates(self, request, queryset): + """Promote selected candidates into tracked entities.""" + + accepted_count = 0 + for candidate in queryset.select_related("project"): + if candidate.status == EntityCandidateStatus.ACCEPTED: + continue + accept_entity_candidate(candidate) + accepted_count += 1 + self.message_user( + request, + f"Accepted {accepted_count} entity candidate(s).", + messages.SUCCESS, + ) + + @admin.action(description="Reject selected candidates") + def reject_selected_candidates(self, request, queryset): + """Mark selected candidates as rejected.""" + + rejected_count = 0 + for candidate in queryset: + if candidate.status == EntityCandidateStatus.REJECTED: + continue + reject_entity_candidate(candidate) + rejected_count += 1 + self.message_user( + request, + f"Rejected {rejected_count} entity candidate(s).", + messages.SUCCESS, + ) + + @admin.action(description="Merge selected candidates into existing entities") + def merge_into_existing_entities( + self, + request: HttpRequest, + queryset: QuerySet[EntityCandidate], + ) -> None: + """Merge candidates when a same-name entity already exists in the project.""" + + merged_count = 0 + unresolved_names: list[str] = [] + for candidate in queryset.select_related("project"): + matching_entities = Entity.objects.filter( + project=candidate.project, + name__iexact=candidate.name, + ) + if matching_entities.count() != 1: + unresolved_names.append(candidate.name) + continue + merge_entity_candidate(candidate, matching_entities.get()) + merged_count += 1 + + if merged_count: + self.message_user( + request, + f"Merged {merged_count} entity candidate(s) into existing entities.", + messages.SUCCESS, + ) + if unresolved_names: + self.message_user( + request, + "No unique same-name entity match was available for: " + + ", ".join(sorted(unresolved_names)), + messages.WARNING, + ) diff --git a/entities/api.py b/entities/api.py new file mode 100644 index 00000000..c54c1d8f --- /dev/null +++ b/entities/api.py @@ -0,0 +1,232 @@ +"""Entity-domain API viewsets kept under the existing nested project routes.""" + +from django.db.models import Count, Prefetch +from drf_spectacular.utils import OpenApiParameter, extend_schema +from rest_framework import serializers, viewsets +from rest_framework.decorators import action +from rest_framework.filters import OrderingFilter +from rest_framework.response import Response + +from core.api import ( + AUTHENTICATION_REQUIRED_RESPONSE, + ProjectOwnedQuerysetMixin, + build_crud_action_overrides, + document_project_owned_viewset, +) +from core.entity_extraction import ( + accept_entity_candidate, + merge_entity_candidate, + reject_entity_candidate, +) +from core.permissions import ( + IsProjectAdmin, + IsProjectContributor, + IsProjectMember, + IsProjectMemberWritable, +) +from entities.models import Entity, EntityCandidate, EntityMention +from entities.serializers import ( + EntityAuthoritySnapshotSerializer, + EntityCandidateMergeSerializer, + EntityCandidateSerializer, + EntityMentionSummarySerializer, + EntitySerializer, +) + + +@document_project_owned_viewset( + resource_plural="entities", + resource_singular="entity", + create_description="Create a new tracked entity for the selected project, such as a company, person, or organization.", + tag="Entity Catalog", + action_overrides=build_crud_action_overrides( + EntitySerializer, + resource_plural="entities for the selected project", + resource_singular="entity", + ), +) +class EntityViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): + """Manage tracked entities associated with a project.""" + + serializer_class = EntitySerializer + filter_backends = [OrderingFilter] + ordering_fields = ["authority_score", "created_at", "name"] + ordering = ["name"] + queryset = ( + Entity.objects.select_related("project") + .annotate(mention_count=Count("mentions", distinct=True)) + .prefetch_related( + Prefetch( + "mentions", + queryset=EntityMention.objects.select_related("content").order_by( + "-created_at" + ), + to_attr="prefetched_mentions", + ) + ) + ) + + def get_permissions(self): + """Apply read, contributor-write, and admin-delete permissions for entities.""" + + if self.action == "destroy": + permission_classes = [IsProjectAdmin] + elif self.action in {"create", "update", "partial_update"}: + permission_classes = [IsProjectMemberWritable] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + + @extend_schema( + summary="List entity mentions", + description="Return the extracted mention history for one tracked entity inside the selected project.", + request=None, + responses={ + 200: EntityMentionSummarySerializer(many=True), + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["get"], url_path="mentions") + def mentions(self, request, *args, **kwargs): + """Return the extracted mentions for the selected entity.""" + + entity = self.get_object() + mentions = entity.mentions.select_related("content").order_by("-created_at") + serializer = EntityMentionSummarySerializer(mentions, many=True) + return Response(serializer.data) + + @extend_schema( + summary="List authority history", + description=( + "Return persisted authority-score snapshots for one tracked entity. " + "Use the optional limit query parameter to cap the number of snapshots returned." + ), + parameters=[ + OpenApiParameter( + name="limit", + type=int, + location=OpenApiParameter.QUERY, + description="Maximum number of authority snapshots to return.", + required=False, + ) + ], + request=None, + responses={ + 200: EntityAuthoritySnapshotSerializer(many=True), + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["get"], url_path="authority_history") + def authority_history(self, request, *args, **kwargs): + """Return recent authority snapshots for the selected entity.""" + + entity = self.get_object() + snapshots = entity.authority_snapshots.order_by("-computed_at") + limit_param = request.query_params.get("limit") + if limit_param: + try: + limit = max(1, min(int(limit_param), 100)) + except ValueError as exc: + raise serializers.ValidationError( + {"limit": "Limit must be an integer between 1 and 100."} + ) from exc + snapshots = snapshots[:limit] + serializer = EntityAuthoritySnapshotSerializer(snapshots, many=True) + return Response(serializer.data) + + +@document_project_owned_viewset( + resource_plural="entity candidates", + resource_singular="entity candidate", + create_description="Entity candidates are created by the pipeline and can be reviewed through dedicated actions.", + tag="Entity Catalog", + action_overrides=build_crud_action_overrides( + EntityCandidateSerializer, + resource_plural="entity candidates for the selected project", + resource_singular="entity candidate", + ), +) +class EntityCandidateViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet): + """Inspect and resolve entity candidates surfaced by entity extraction.""" + + serializer_class = EntityCandidateSerializer + queryset = EntityCandidate.objects.select_related( + "project", "first_seen_in", "merged_into" + ) + + def get_permissions(self): + """Allow all members to read candidates and contributors to resolve them.""" + + if self.action in {"accept", "reject", "merge"}: + permission_classes = [IsProjectContributor] + else: + permission_classes = [IsProjectMember] + return [permission() for permission in permission_classes] + + @extend_schema( + summary="Accept entity candidate", + description="Promote a pending entity candidate into a tracked entity and backfill recent mentions.", + request=None, + responses={ + 200: EntityCandidateSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="accept") + def accept(self, request, *args, **kwargs): + """Accept an entity candidate and return its updated representation.""" + + candidate = self.get_object() + accept_entity_candidate(candidate) + candidate.refresh_from_db() + serializer = self.get_serializer(candidate) + return Response(serializer.data) + + @extend_schema( + summary="Reject entity candidate", + description="Mark a pending entity candidate as rejected without creating a tracked entity.", + request=None, + responses={ + 200: EntityCandidateSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="reject") + def reject(self, request, *args, **kwargs): + """Reject an entity candidate and return its updated representation.""" + + candidate = self.get_object() + reject_entity_candidate(candidate) + candidate.refresh_from_db() + serializer = self.get_serializer(candidate) + return Response(serializer.data) + + @extend_schema( + summary="Merge entity candidate", + description="Merge a pending entity candidate into an existing tracked entity from the same project.", + request=EntityCandidateMergeSerializer, + responses={ + 200: EntityCandidateSerializer, + 400: EntityCandidateMergeSerializer, + 403: AUTHENTICATION_REQUIRED_RESPONSE, + }, + tags=["Entity Catalog"], + ) + @action(detail=True, methods=["post"], url_path="merge") + def merge(self, request, *args, **kwargs): + """Merge an entity candidate into an existing tracked entity.""" + + candidate = self.get_object() + serializer = EntityCandidateMergeSerializer( + data=request.data, + context=self.get_serializer_context(), + ) + serializer.is_valid(raise_exception=True) + merge_entity_candidate(candidate, serializer.validated_data["merged_into"]) + candidate.refresh_from_db() + response_serializer = self.get_serializer(candidate) + return Response(response_serializer.data) diff --git a/entities/api_urls.py b/entities/api_urls.py new file mode 100644 index 00000000..49e6a244 --- /dev/null +++ b/entities/api_urls.py @@ -0,0 +1,16 @@ +"""API route registration for entity resources.""" + +from rest_framework_nested.routers import NestedSimpleRouter + +from entities.api import EntityCandidateViewSet, EntityViewSet + + +def register_project_routes(project_router: NestedSimpleRouter) -> None: + """Register nested entity endpoints.""" + + project_router.register(r"entities", EntityViewSet, basename="project-entity") + project_router.register( + r"entity-candidates", + EntityCandidateViewSet, + basename="project-entity-candidate", + ) diff --git a/entities/apps.py b/entities/apps.py new file mode 100644 index 00000000..cf7ba8e8 --- /dev/null +++ b/entities/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class EntitiesConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "entities" + verbose_name = "Entities" diff --git a/entities/migrations/0001_initial.py b/entities/migrations/0001_initial.py new file mode 100644 index 00000000..fec1dd5c --- /dev/null +++ b/entities/migrations/0001_initial.py @@ -0,0 +1,303 @@ +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [ + ("projects", "0003_remove_project_group"), + ("core", "0010_move_project_models_to_projects"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.CreateModel( + name="Entity", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=255)), + ( + "type", + models.CharField( + choices=[ + ("individual", "Individual"), + ("vendor", "Vendor"), + ("organization", "Organization"), + ], + max_length=32, + ), + ), + ("description", models.TextField(blank=True)), + ("authority_score", models.FloatField(default=0.5)), + ("website_url", models.URLField(blank=True)), + ("github_url", models.URLField(blank=True)), + ("linkedin_url", models.URLField(blank=True)), + ( + "bluesky_handle", + models.CharField(blank=True, max_length=255), + ), + ( + "mastodon_handle", + models.CharField(blank=True, max_length=255), + ), + ( + "twitter_handle", + models.CharField(blank=True, max_length=255), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entities", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["name"], + "db_table": "core_entity", + "constraints": [ + models.UniqueConstraint( + fields=("project", "name"), + name="core_entity_unique_project_name", + ) + ], + }, + ), + migrations.CreateModel( + name="EntityAuthoritySnapshot", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("computed_at", models.DateTimeField(auto_now_add=True)), + ("mention_component", models.FloatField()), + ("feedback_component", models.FloatField()), + ("duplicate_component", models.FloatField()), + ("decayed_prior", models.FloatField()), + ("final_score", models.FloatField()), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="authority_snapshots", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_authority_snapshots", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-computed_at"], + "db_table": "core_entityauthoritysnapshot", + "indexes": [ + models.Index( + fields=["entity", "-computed_at"], + name="core_entity_entity__9fe820_idx", + ), + models.Index( + fields=["project", "-computed_at"], + name="core_entity_project_a31e41_idx", + ), + ], + }, + ), + migrations.CreateModel( + name="EntityMention", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "role", + models.CharField( + choices=[ + ("author", "Author"), + ("subject", "Subject"), + ("quoted", "Quoted"), + ("mentioned", "Mentioned"), + ], + max_length=16, + ), + ), + ( + "sentiment", + models.CharField( + blank=True, + choices=[ + ("positive", "Positive"), + ("neutral", "Neutral"), + ("negative", "Negative"), + ], + default="", + max_length=16, + ), + ), + ("span", models.TextField(blank=True)), + ("confidence", models.FloatField(default=0.0)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "content", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="core.content", + ), + ), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="mentions", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-created_at"], + "db_table": "core_entitymention", + "indexes": [ + models.Index( + fields=["entity", "created_at"], + name="core_entity_entity__8ba01e_idx", + ), + models.Index( + fields=["project", "created_at"], + name="core_entity_project_dabde7_idx", + ), + ], + "constraints": [ + models.UniqueConstraint( + fields=("content", "entity", "role"), + name="core_entitymention_unique_content_entity_role", + ) + ], + }, + ), + migrations.CreateModel( + name="EntityCandidate", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=255)), + ( + "suggested_type", + models.CharField( + choices=[ + ("individual", "Individual"), + ("vendor", "Vendor"), + ("organization", "Organization"), + ], + max_length=32, + ), + ), + ("occurrence_count", models.IntegerField(default=1)), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("accepted", "Accepted"), + ("rejected", "Rejected"), + ("merged", "Merged"), + ], + default="pending", + max_length=16, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "first_seen_in", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="entity_candidates", + to="core.content", + ), + ), + ( + "merged_into", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="merged_entity_candidates", + to="entities.entity", + ), + ), + ( + "project", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_candidates", + to="projects.project", + ), + ), + ], + options={ + "ordering": ["-occurrence_count", "name"], + "db_table": "core_entitycandidate", + "indexes": [ + models.Index( + fields=["project", "status", "occurrence_count"], + name="core_entity_project_4c32ec_idx", + ) + ], + "constraints": [ + models.UniqueConstraint( + fields=("project", "name"), + name="core_entitycandidate_unique_project_name", + ) + ], + }, + ), + ], + database_operations=[], + ) + ] diff --git a/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py b/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py new file mode 100644 index 00000000..64ba7afd --- /dev/null +++ b/entities/migrations/0002_alter_entitycandidate_first_seen_in_and_more.py @@ -0,0 +1,37 @@ +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("content", "0001_initial"), + ("entities", "0001_initial"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name="entitycandidate", + name="first_seen_in", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="entity_candidates", + to="content.content", + ), + ), + migrations.AlterField( + model_name="entitymention", + name="content", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="entity_mentions", + to="content.content", + ), + ), + ], + database_operations=[], + ) + ] diff --git a/entities/migrations/__init__.py b/entities/migrations/__init__.py new file mode 100644 index 00000000..b1beed35 --- /dev/null +++ b/entities/migrations/__init__.py @@ -0,0 +1 @@ +"""Entities app migrations package.""" diff --git a/entities/models.py b/entities/models.py new file mode 100644 index 00000000..479c5781 --- /dev/null +++ b/entities/models.py @@ -0,0 +1,204 @@ +"""Entity-domain models split out from the historical core app.""" + +from __future__ import annotations + +from django.db import models + + +class EntityType(models.TextChoices): + """Supported types of tracked entities within a project.""" + + INDIVIDUAL = "individual", "Individual" + VENDOR = "vendor", "Vendor" + ORGANIZATION = "organization", "Organization" + + +class EntityMentionRole(models.TextChoices): + """Supported roles for how an entity appears inside content.""" + + AUTHOR = "author", "Author" + SUBJECT = "subject", "Subject" + QUOTED = "quoted", "Quoted" + MENTIONED = "mentioned", "Mentioned" + + +class EntityMentionSentiment(models.TextChoices): + """Supported editorial sentiment labels for entity mentions.""" + + POSITIVE = "positive", "Positive" + NEUTRAL = "neutral", "Neutral" + NEGATIVE = "negative", "Negative" + + +class EntityCandidateStatus(models.TextChoices): + """Review workflow states for extracted entity candidates.""" + + PENDING = "pending", "Pending" + ACCEPTED = "accepted", "Accepted" + REJECTED = "rejected", "Rejected" + MERGED = "merged", "Merged" + + +class Entity(models.Model): + """Represents a person, vendor, or organization tracked inside a project.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entities" + ) + name = models.CharField(max_length=255) + type = models.CharField(max_length=32, choices=EntityType.choices) + description = models.TextField(blank=True) + authority_score = models.FloatField(default=0.5) + website_url = models.URLField(blank=True) + github_url = models.URLField(blank=True) + linkedin_url = models.URLField(blank=True) + bluesky_handle = models.CharField(max_length=255, blank=True) + mastodon_handle = models.CharField(max_length=255, blank=True) + twitter_handle = models.CharField(max_length=255, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["name"] + db_table = "core_entity" + constraints = [ + models.UniqueConstraint( + fields=["project", "name"], name="core_entity_unique_project_name" + ), + ] + + def __str__(self) -> str: + return self.name + + +class EntityAuthoritySnapshot(models.Model): + """Captures one authority-score recomputation for a tracked entity.""" + + entity = models.ForeignKey( + Entity, on_delete=models.CASCADE, related_name="authority_snapshots" + ) + project = models.ForeignKey( + "projects.Project", + on_delete=models.CASCADE, + related_name="entity_authority_snapshots", + ) + computed_at = models.DateTimeField(auto_now_add=True) + mention_component = models.FloatField() + feedback_component = models.FloatField() + duplicate_component = models.FloatField() + decayed_prior = models.FloatField() + final_score = models.FloatField() + + class Meta: + ordering = ["-computed_at"] + db_table = "core_entityauthoritysnapshot" + indexes = [ + models.Index( + fields=["entity", "-computed_at"], + name="core_entity_entity__9fe820_idx", + ), + models.Index( + fields=["project", "-computed_at"], + name="core_entity_project_a31e41_idx", + ), + ] + + def __str__(self) -> str: + return f"Authority snapshot for {self.entity.name}" + + +class EntityMention(models.Model): + """Represents one tracked-entity mention detected in a content item.""" + + content = models.ForeignKey( + "content.Content", on_delete=models.CASCADE, related_name="entity_mentions" + ) + entity = models.ForeignKey( + Entity, on_delete=models.CASCADE, related_name="mentions" + ) + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entity_mentions" + ) + role = models.CharField(max_length=16, choices=EntityMentionRole.choices) + sentiment = models.CharField( + max_length=16, + choices=EntityMentionSentiment.choices, + blank=True, + default="", + ) + span = models.TextField(blank=True) + confidence = models.FloatField(default=0.0) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["-created_at"] + db_table = "core_entitymention" + constraints = [ + models.UniqueConstraint( + fields=["content", "entity", "role"], + name="core_entitymention_unique_content_entity_role", + ) + ] + indexes = [ + models.Index( + fields=["entity", "created_at"], + name="core_entity_entity__8ba01e_idx", + ), + models.Index( + fields=["project", "created_at"], + name="core_entity_project_dabde7_idx", + ), + ] + + def __str__(self) -> str: + return f"{self.entity.name} in {self.content.title}" + + +class EntityCandidate(models.Model): + """Stores an extracted named entity awaiting human confirmation.""" + + project = models.ForeignKey( + "projects.Project", on_delete=models.CASCADE, related_name="entity_candidates" + ) + name = models.CharField(max_length=255) + suggested_type = models.CharField(max_length=32, choices=EntityType.choices) + first_seen_in = models.ForeignKey( + "content.Content", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="entity_candidates", + ) + occurrence_count = models.IntegerField(default=1) + status = models.CharField( + max_length=16, + choices=EntityCandidateStatus.choices, + default=EntityCandidateStatus.PENDING, + ) + merged_into = models.ForeignKey( + Entity, + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="merged_entity_candidates", + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ["-occurrence_count", "name"] + db_table = "core_entitycandidate" + constraints = [ + models.UniqueConstraint( + fields=["project", "name"], + name="core_entitycandidate_unique_project_name", + ) + ] + indexes = [ + models.Index( + fields=["project", "status", "occurrence_count"], + name="core_entity_project_4c32ec_idx", + ), + ] + + def __str__(self) -> str: + return self.name diff --git a/entities/serializers.py b/entities/serializers.py new file mode 100644 index 00000000..33e6bf03 --- /dev/null +++ b/entities/serializers.py @@ -0,0 +1,124 @@ +"""DRF serializers for entity-domain models.""" + +from rest_framework import serializers + +from core.serializer_mixins import ProjectScopedSerializerMixin +from entities.models import ( + Entity, + EntityAuthoritySnapshot, + EntityCandidate, + EntityMention, +) + + +class EntitySerializer(ProjectScopedSerializerMixin, serializers.ModelSerializer): + """Serialize tracked entities for a project.""" + + mention_count = serializers.IntegerField(read_only=True) + latest_mentions = serializers.SerializerMethodField() + + class Meta: + model = Entity + fields = [ + "id", + "project", + "name", + "type", + "description", + "authority_score", + "website_url", + "github_url", + "linkedin_url", + "bluesky_handle", + "mastodon_handle", + "twitter_handle", + "mention_count", + "latest_mentions", + "created_at", + ] + read_only_fields = ["id", "project", "created_at"] + + def get_latest_mentions(self, obj): + """Return a compact summary of the most recent mentions for an entity.""" + + mentions = getattr(obj, "prefetched_mentions", None) + if mentions is None: + mentions = obj.mentions.select_related("content").order_by("-created_at") + return EntityMentionSummarySerializer(mentions[:3], many=True).data + + +class EntityAuthoritySnapshotSerializer(serializers.ModelSerializer): + """Serialize one persisted authority recomputation for an entity.""" + + class Meta: + model = EntityAuthoritySnapshot + fields = [ + "id", + "entity", + "project", + "computed_at", + "mention_component", + "feedback_component", + "duplicate_component", + "decayed_prior", + "final_score", + ] + read_only_fields = fields + + +class EntityMentionSummarySerializer(serializers.ModelSerializer): + """Serialize a compact entity-mention summary for frontend display.""" + + content_id = serializers.IntegerField(read_only=True) + content_title = serializers.CharField(source="content.title", read_only=True) + + class Meta: + model = EntityMention + fields = [ + "id", + "content_id", + "content_title", + "role", + "sentiment", + "span", + "confidence", + "created_at", + ] + read_only_fields = fields + + +class EntityCandidateSerializer( + ProjectScopedSerializerMixin, serializers.ModelSerializer +): + """Serialize extracted entity candidates awaiting editorial review.""" + + first_seen_title = serializers.CharField( + source="first_seen_in.title", read_only=True + ) + merged_into_name = serializers.CharField(source="merged_into.name", read_only=True) + + class Meta: + model = EntityCandidate + fields = [ + "id", + "project", + "name", + "suggested_type", + "first_seen_in", + "first_seen_title", + "occurrence_count", + "status", + "merged_into", + "merged_into_name", + "created_at", + "updated_at", + ] + read_only_fields = fields + + +class EntityCandidateMergeSerializer( + ProjectScopedSerializerMixin, serializers.Serializer +): + """Validate merge requests for entity candidates.""" + + merged_into = serializers.PrimaryKeyRelatedField(queryset=Entity.objects.none()) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 4e2683a3..bd9a06dd 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -25,6 +25,7 @@ "prettier": "^3.8.3", "react": "19.2.5", "react-dom": "19.2.5", + "react-dropzone": "^15.0.0", "react-hook-form": "^7.74.0", "tailwindcss": "^4.2.4", "typescript-eslint": "^8.59.1" @@ -3316,6 +3317,15 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", "license": "MIT" }, + "node_modules/attr-accept": { + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.5.tgz", + "integrity": "sha512-0bDNnY/u6pPwHDMoF0FieU354oBi0a8rD9FcsLwzcGWbc8KS8KPIi7y+s13OlVY+gMWc/9xEMUgNE6Qm8ZllYQ==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -4729,6 +4739,18 @@ "node": ">=16.0.0" } }, + "node_modules/file-selector": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/file-selector/-/file-selector-2.1.2.tgz", + "integrity": "sha512-QgXo+mXTe8ljeqUFaX3QVHc5osSItJ/Km+xpocx0aSqWGMSCf6qYs/VnzZgS864Pjn5iceMRFigeAV7AfTlaig==", + "license": "MIT", + "dependencies": { + "tslib": "^2.7.0" + }, + "engines": { + "node": ">= 12" + } + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", @@ -6990,6 +7012,23 @@ "react": "^19.2.5" } }, + "node_modules/react-dropzone": { + "version": "15.0.0", + "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-15.0.0.tgz", + "integrity": "sha512-lGjYV/EoqEjEWPnmiSvH4v5IoIAwQM2W4Z1C0Q/Pw2xD0eVzKPS359BQTUMum+1fa0kH2nrKjuavmTPOGhpLPg==", + "license": "MIT", + "dependencies": { + "attr-accept": "^2.2.4", + "file-selector": "^2.1.0", + "prop-types": "^15.8.1" + }, + "engines": { + "node": ">= 10.13" + }, + "peerDependencies": { + "react": ">= 16.8 || 18.0.0" + } + }, "node_modules/react-hook-form": { "version": "7.74.0", "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.74.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index fa5f07d3..5aa4253e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -33,6 +33,7 @@ "prettier": "^3.8.3", "react": "19.2.5", "react-dom": "19.2.5", + "react-dropzone": "^15.0.0", "react-hook-form": "^7.74.0", "tailwindcss": "^4.2.4", "typescript-eslint": "^8.59.1" diff --git a/frontend/src/app/__tests__/page.test.tsx b/frontend/src/app/__tests__/page.test.tsx index 08e1fe6e..e32fe613 100644 --- a/frontend/src/app/__tests__/page.test.tsx +++ b/frontend/src/app/__tests__/page.test.tsx @@ -91,9 +91,9 @@ function createProject(overrides: Partial = {}): Project { return { id: 1, name: "AI Weekly", - group: 10, topic_description: "AI news", content_retention_days: 30, + user_role: "admin", created_at: "2026-04-01T00:00:00Z", ...overrides, } diff --git a/frontend/src/app/admin/health/__tests__/page.test.tsx b/frontend/src/app/admin/health/__tests__/page.test.tsx index 7c030e23..f5926f9b 100644 --- a/frontend/src/app/admin/health/__tests__/page.test.tsx +++ b/frontend/src/app/admin/health/__tests__/page.test.tsx @@ -81,9 +81,9 @@ function createProject(overrides: Partial = {}): Project { return { id: 1, name: "AI Weekly", - group: 10, topic_description: "AI news", content_retention_days: 30, + user_role: "admin", created_at: "2026-04-01T00:00:00Z", ...overrides, } diff --git a/frontend/src/app/admin/projects/new/page.tsx b/frontend/src/app/admin/projects/new/page.tsx new file mode 100644 index 00000000..a724503a --- /dev/null +++ b/frontend/src/app/admin/projects/new/page.tsx @@ -0,0 +1,87 @@ +import { AppShell } from "@/components/app-shell" +import { getProjects } from "@/lib/api" +import { + getErrorMessage, + getSuccessMessage, + selectProject, +} from "@/lib/view-helpers" + +type NewProjectPageProps = { + searchParams: Promise> +} + +/** + * Render the self-service project creation page. + * + * @param props - Async server component props from the App Router. + * @param props.searchParams - Search params promise containing optional flash-message values. + * @returns The project creation workspace. + */ +export default async function NewProjectPage({ searchParams }: NewProjectPageProps) { + const resolvedSearchParams = await searchParams + const projects = await getProjects() + const selectedProject = selectProject(projects, resolvedSearchParams) + const errorMessage = getErrorMessage(resolvedSearchParams) + const successMessage = getSuccessMessage(resolvedSearchParams) + + return ( + + {errorMessage ? ( +
{errorMessage}
+ ) : null} + {successMessage ? ( +
{successMessage}
+ ) : null} + +
+
+

Provision

+

+ New project +

+
+ +
+ + +