diff --git a/.env.example b/.env.example index 9c6cfdbe..9df9fcb6 100644 --- a/.env.example +++ b/.env.example @@ -38,3 +38,7 @@ DJANGO_SUPERUSER_PASSWORD=adminpass NEWSLETTER_API_BASE_URL=http://127.0.0.1:8080 NEWSLETTER_API_USERNAME=admin NEWSLETTER_API_PASSWORD=adminpass + +DEBUG=True +ALLOWED_HOSTS=localhost,127.0.0.1,newslettermaker.tech +FRONTEND_URL=http://localhost:3000 diff --git a/.gitguardian.yaml b/.gitguardian.yaml new file mode 100644 index 00000000..f17af393 --- /dev/null +++ b/.gitguardian.yaml @@ -0,0 +1,4 @@ +version: 2 +secret: + ignored_paths: + - "frontend/src/lib/__tests__/api.test.ts" diff --git a/.gitignore b/.gitignore index 187dccf9..a8b32469 100644 --- a/.gitignore +++ b/.gitignore @@ -5,12 +5,15 @@ __pycache__/ .ruff_cache/ .venv/ venv/ +.coverage .env frontend/.env.local celerybeat-schedule* db.sqlite3 +htmlcov/ staticfiles/ frontend/.next/ +frontend/coverage/ frontend/node_modules/ docs/ diff --git a/.vscode/settings.json b/.vscode/settings.json index 55016634..d55b96fa 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,9 +2,13 @@ "cSpell.words": [ "ASGI", "buildx", + "cbranch", "cfgv", + "cstat", "djlint", "FAVICONS", + "Feedly", + "Fraunces", "gunicorn", "healthz", "HNSW", @@ -28,6 +32,9 @@ "QDRANT", "readyz", "Referer", + "simplejwt", + "Unparseable", + "unstub", "upserted", "upvote", "uritemplate", diff --git a/README.md b/README.md index c34487b0..d451cbd3 100644 --- a/README.md +++ b/README.md @@ -172,13 +172,6 @@ just changepassword your-username For the default local bootstrap, `.env` also seeds an `admin` superuser in the container database using `DJANGO_SUPERUSER_USERNAME`, `DJANGO_SUPERUSER_EMAIL`, and `DJANGO_SUPERUSER_PASSWORD`. -## Documentation - -- [PLANNING.md](docs/PLANNING.md) - Full architecture decisions, data model, and feedback loop design -- [VENDOR.md](docs/VENDOR.md) - Per-skill model selection, rationale, and API pricing -- [GENRES.md](docs/GENRES.md) - Newsletter format types and layout templates -- [IMPLEMENTATION.md](docs/IMPLEMENTATION.md) - Additional implementation notes - ## License This repository is licensed under the GNU Affero General Public License v3.0 or later. See [LICENSE](LICENSE). diff --git a/core/admin.py b/core/admin.py index 25b13188..5dbf3a2e 100644 --- a/core/admin.py +++ b/core/admin.py @@ -20,7 +20,6 @@ UserFeedback, ) from core.plugins import get_plugin_for_source_config, validate_plugin_config -from core.tasks import process_content @admin.register(Project) @@ -211,6 +210,8 @@ def changelist_view(self, request, extra_context=None): @admin.action(description="Generate Ideas for Newsletter") def generate_newsletter_ideas(self, request, queryset): + from core.tasks import process_content + content_ids = list(queryset.values_list("id", flat=True)) for content_id in content_ids: process_content.delay(content_id) @@ -252,7 +253,7 @@ class SkillResultAdmin(ModelAdmin): @admin.action(description="Retry Selected Skills") def retry_selected_skills(self, request, queryset): """Resets status to PENDING and clears errors for retry by the worker.""" - updated = queryset.update(status="PENDING", error_message="") + updated = queryset.update(status="pending", error_message="") self.message_user( request, f"Successfully reset {updated} skills to PENDING for retry.", @@ -275,11 +276,12 @@ def get_content_link(self, obj): @admin.display(description="Status") def display_status(self, obj): - colors = {"COMPLETED": "green", "FAILED": "red", "PENDING": "orange"} - color = colors.get(obj.status, "gray") + status_value = str(obj.status).lower() + colors = {"completed": "green", "failed": "red", "pending": "orange"} + color = colors.get(status_value, "gray") return format_html( '● {}', - color, obj.status + color, status_value.upper() ) @admin.display(description="Perf / Conf") @@ -308,7 +310,7 @@ def changelist_view(self, request, extra_context=None): extra_context = extra_context or {} metrics = qs.aggregate(avg_lat=Avg('latency_ms')) avg_latency = metrics['avg_lat'] or 0 - failure_count = qs.filter(status='FAILED').count() + failure_count = qs.filter(status='failed').count() total_count = qs.count() or 1 extra_context["dashboard_stats"] = [ @@ -343,7 +345,7 @@ class UserFeedbackAdmin(ModelAdmin): @admin.display(description="Type") def display_feedback(self, obj): - if obj.feedback_type == "UPVOTE": + if str(obj.feedback_type).lower() == "upvote": return format_html('{}', "1.2rem", "👍") return format_html('{}', "1.2rem", "👎") @@ -363,7 +365,7 @@ def get_ai_score(self, obj): def changelist_view(self, request, extra_context=None): qs = self.get_queryset(request) extra_context = extra_context or {} - upvotes = qs.filter(feedback_type="UPVOTE").count() + upvotes = qs.filter(feedback_type="upvote").count() total = qs.count() or 1 approval_rate = (upvotes / total) * 100 @@ -414,11 +416,12 @@ class IngestionRunAdmin(ModelAdmin): @admin.display(description="Status") def display_status(self, obj): - colors = {"COMPLETED": "success", "FAILED": "danger", "RUNNING": "info"} + status_value = str(obj.status).lower() + colors = {"success": "success", "failed": "danger", "running": "info"} return format_html( '{}', - colors.get(obj.status, "warning"), - obj.status + colors.get(status_value, "warning"), + status_value.upper() ) @admin.display(description="Efficiency (Ingested/Fetched)") @@ -445,7 +448,7 @@ def changelist_view(self, request, extra_context=None): qs = self.get_queryset(request) extra_context = extra_context or {} total_runs = qs.count() - failed_runs = qs.filter(status="FAILED").count() + failed_runs = qs.filter(status="failed").count() total_ingested = sum(qs.values_list('items_ingested', flat=True)) extra_context["dashboard_stats"] = [ diff --git a/core/api.py b/core/api.py index 4c4ccc4c..ebe03e1d 100644 --- a/core/api.py +++ b/core/api.py @@ -24,13 +24,6 @@ SourceConfig, UserFeedback, ) -from core.pipeline import ( - CLASSIFICATION_SKILL_NAME, - RELATED_CONTENT_SKILL_NAME, - RELEVANCE_SKILL_NAME, - SUMMARIZATION_SKILL_NAME, - execute_ad_hoc_skill, -) from core.serializers import ( ContentSerializer, EntitySerializer, @@ -42,7 +35,11 @@ SourceConfigSerializer, UserFeedbackSerializer, ) -from core.tasks import queue_content_skill + +CLASSIFICATION_SKILL_NAME = "content_classification" +RELEVANCE_SKILL_NAME = "relevance_scoring" +SUMMARIZATION_SKILL_NAME = "summarization" +RELATED_CONTENT_SKILL_NAME = "find_related" PROJECT_ID_PARAMETER = OpenApiParameter( name="project_id", @@ -532,6 +529,9 @@ class ContentViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet): ) @action(detail=True, methods=["post"], url_path=r"skills/(?P[^/.]+)") def run_skill(self, request, *args, **kwargs): + from core.pipeline import execute_ad_hoc_skill + from core.tasks import queue_content_skill + skill_name = str(kwargs["skill_name"]) if skill_name not in { CLASSIFICATION_SKILL_NAME, diff --git a/core/auth_views.py b/core/auth_views.py new file mode 100644 index 00000000..bceacbd8 --- /dev/null +++ b/core/auth_views.py @@ -0,0 +1,16 @@ +from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter +from allauth.socialaccount.providers.google.views import GoogleOAuth2Adapter +from dj_rest_auth.registration.views import SocialLoginView +from rest_framework.permissions import AllowAny + + +class BaseSocialLoginView(SocialLoginView): + permission_classes = [AllowAny] + + +class GitHubLoginView(BaseSocialLoginView): + adapter_class = GitHubOAuth2Adapter + + +class GoogleLoginView(BaseSocialLoginView): + adapter_class = GoogleOAuth2Adapter diff --git a/core/embeddings.py b/core/embeddings.py index 079c2b32..5bc5f444 100644 --- a/core/embeddings.py +++ b/core/embeddings.py @@ -6,7 +6,7 @@ from uuid import uuid4 import httpx -from django.conf import settings +from django.conf import settings as django_settings from django.utils.dateparse import parse_datetime from qdrant_client import QdrantClient from qdrant_client.models import ( @@ -17,9 +17,23 @@ PointStruct, VectorParams, ) -from sentence_transformers import SentenceTransformer from core.models import Content +from core.settings_types import CoreSettings + +SentenceTransformer = None +settings = cast(CoreSettings, django_settings) + + +def get_sentence_transformer_class(): + global SentenceTransformer + + if SentenceTransformer is None: + from sentence_transformers import SentenceTransformer as sentence_transformer_class + + SentenceTransformer = sentence_transformer_class + + return SentenceTransformer class EmbeddingProvider(ABC): @@ -33,7 +47,8 @@ def get_embedding_dimension(self) -> int: class SentenceTransformerEmbeddingProvider(EmbeddingProvider): def __init__(self): - self.model = SentenceTransformer( + sentence_transformer_class = get_sentence_transformer_class() + self.model = sentence_transformer_class( settings.EMBEDDING_MODEL, trust_remote_code=settings.EMBEDDING_TRUST_REMOTE_CODE, ) @@ -97,7 +112,7 @@ def collection_name_for_project(project_id: int) -> str: @lru_cache(maxsize=1) def get_qdrant_client() -> QdrantClient: - return QdrantClient(url=settings.QDRANT_URL, timeout=10) + return QdrantClient(url=settings.QDRANT_URL, timeout=10, check_compatibility=False) @lru_cache(maxsize=1) diff --git a/core/settings_types.py b/core/settings_types.py new file mode 100644 index 00000000..b06ac4b6 --- /dev/null +++ b/core/settings_types.py @@ -0,0 +1,13 @@ +from typing import Protocol + + +class CoreSettings(Protocol): + QDRANT_URL: str + EMBEDDING_MODEL: str + EMBEDDING_PROVIDER: str + EMBEDDING_TRUST_REMOTE_CODE: bool + OLLAMA_URL: str + OPENROUTER_API_KEY: str + OPENROUTER_API_BASE: str + OPENROUTER_APP_URL: str + OPENROUTER_APP_NAME: str diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index d0c3e33d..a819fdd4 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -9,19 +9,26 @@ from core.admin import ( ContentAdmin, + EntityAdmin, + HighValueFilter, IngestionRunAdmin, ReviewQueueAdmin, + SkillResultAdmin, SourceConfigAdmin, + UserFeedbackAdmin, ) from core.models import ( Content, + Entity, IngestionRun, Project, ReviewQueue, ReviewReason, RunStatus, + SkillResult, SourceConfig, SourcePluginName, + UserFeedback, ) pytestmark = pytest.mark.django_db @@ -207,3 +214,689 @@ def test_content_preview_returns_dash_when_content_text_blank(source_admin_conte admin_instance = ContentAdmin(Content, AdminSite()) assert admin_instance.preview_content(content) == "-" + + +def test_content_view_trace_prefers_external_trace_url(source_admin_context): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-trace", + title="Admin Trace", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Trace content.", + ) + SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="summarization", + status="COMPLETED", + result_data={"trace_url": "https://traces.example/run/123"}, + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + rendered = admin_instance.view_trace(content) + + assert "https://traces.example/run/123" in rendered + assert "📈 Trace" in rendered + + +def test_content_view_trace_falls_back_to_skill_runs_changelist(source_admin_context): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-trace-fallback", + title="Admin Trace Fallback", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Trace fallback content.", + ) + SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="relevance_scoring", + status="COMPLETED", + result_data={"relevance_score": 0.9}, + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + rendered = admin_instance.view_trace(content) + + assert "🧠 Skill runs" in rendered + assert f"content__id__exact={content.id}" in rendered + + +def test_content_changelist_view_builds_dashboard_stats(source_admin_context, mocker): + Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-dashboard-1", + title="Admin Dashboard 1", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Content one.", + relevance_score=80, + ) + Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-dashboard-2", + title="Admin Dashboard 2", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Content two.", + relevance_score=40, + ) + admin_instance = ContentAdmin(Content, AdminSite()) + mocker.patch.object(admin_instance, "get_queryset", return_value=Content.objects.all()) + super_changelist_view = mocker.patch( + "django.contrib.admin.options.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(request=SimpleNamespace()) + + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["value"] == "60.0%" + assert response["dashboard_stats"][1]["value"] == 2 + + +def test_generate_newsletter_ideas_queues_selected_content(source_admin_context, mocker): + first_content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-queue-1", + title="Admin Queue 1", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Queue one.", + ) + second_content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-queue-2", + title="Admin Queue 2", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Queue two.", + ) + delay_mock = mocker.patch("core.tasks.process_content.delay") + admin_instance = ContentAdmin(Content, AdminSite()) + admin_instance.message_user = mocker.Mock() + + admin_instance.generate_newsletter_ideas( + request=SimpleNamespace(), + queryset=Content.objects.filter(id__in=[first_content.id, second_content.id]).order_by("id"), + ) + + delay_mock.assert_any_call(first_content.id) + delay_mock.assert_any_call(second_content.id) + assert delay_mock.call_count == 2 + admin_instance.message_user.assert_called_once_with( + ANY, + "Successfully queued the pipeline for 2 items.", + messages.SUCCESS, + ) + + +@pytest.mark.parametrize( + ("authority_score", "expected_color"), + [ + (90, "green"), + (60, "orange"), + (20, "red"), + ], +) +def test_entity_colored_score_uses_expected_color(source_admin_context, authority_score, expected_color): + entity = Entity.objects.create( + project=source_admin_context.project, + name=f"Entity {authority_score}", + type="vendor", + authority_score=authority_score, + website_url=f"https://entity-{authority_score}.example.com", + ) + admin_instance = EntityAdmin(Entity, AdminSite()) + + rendered = admin_instance.colored_score(entity) + + assert expected_color in rendered + assert str(authority_score) in rendered + + +def test_high_value_filter_only_returns_high_value_reference_content(source_admin_context): + high_value = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/high-value", + title="High Value", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="High value content.", + relevance_score=81, + is_reference=True, + ) + Content.objects.create( + project=source_admin_context.project, + url="https://example.com/not-high-value", + title="Not High Value", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Not high value content.", + relevance_score=81, + is_reference=False, + ) + filter_instance = HighValueFilter( + request=SimpleNamespace(GET={}), + params={"value_tier": "high_value"}, + model=Content, + model_admin=ContentAdmin(Content, AdminSite()), + ) + filter_instance.value = lambda: "high_value" + + filtered = filter_instance.queryset(SimpleNamespace(), Content.objects.all()) + + assert list(filtered) == [high_value] + + +def test_content_view_trace_builds_template_trace_url(source_admin_context, settings): + settings.AI_TRACE_URL_TEMPLATE = ( + "https://trace.example/{project_id}/{skill_name}/{skill_result_id}/{trace_id}/{content_id}/{run_id}" + ) + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-template-trace", + title="Admin Template Trace", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Template trace content.", + ) + skill_result = SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="summarization", + status="COMPLETED", + result_data={"trace": {"trace_id": "trace-123"}}, + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + rendered = admin_instance.view_trace(content) + + assert f"https://trace.example/{content.project_id}/summarization/{skill_result.id}/trace-123/{content.id}/trace-123" in rendered + + +@pytest.mark.parametrize( + ("score", "expected_color"), + [ + (None, None), + (80, "green"), + (50, "orange"), + (10, "red"), + ], +) +def test_content_display_relevance_uses_expected_output(source_admin_context, score, expected_color): + content = Content.objects.create( + project=source_admin_context.project, + url=f"https://example.com/relevance-{score}", + title="Relevance Display", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Content.", + relevance_score=score, + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + rendered = admin_instance.display_relevance(content) + + if score is None: + assert rendered == "-" + else: + assert expected_color in rendered + assert str(score) in rendered + + +def test_skill_result_admin_helpers_and_dashboard_stats(source_admin_context, mocker): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/skill-result", + title="Skill Result Title For Preview", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Skill result content.", + ) + current_result = SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="summarization", + status="FAILED", + result_data={"summary": "Draft summary"}, + error_message="boom", + latency_ms=1250, + confidence=0.42, + ) + superseded_result = SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="relevance_scoring", + status="COMPLETED", + result_data=None, + latency_ms=250, + confidence=0.91, + superseded_by=current_result, + ) + admin_instance = SkillResultAdmin(SkillResult, AdminSite()) + admin_instance.message_user = mocker.Mock() + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + admin_instance.retry_selected_skills(SimpleNamespace(), SkillResult.objects.filter(pk=current_result.pk)) + current_result.refresh_from_db() + response = admin_instance.changelist_view(SimpleNamespace()) + + assert current_result.status == "pending" + assert current_result.error_message == "" + admin_instance.message_user.assert_called_once_with( + ANY, + "Successfully reset 1 skills to PENDING for retry.", + messages.SUCCESS, + ) + assert admin_instance.preview_json(current_result) == f'🔍 Preview' + assert admin_instance.preview_json(superseded_result) == "-" + assert admin_instance.get_content_link(current_result).endswith("...") + assert "● PENDING" in admin_instance.display_status(current_result) + assert admin_instance.display_performance(current_result) == "1250ms / 42%" + assert admin_instance.is_current(current_result) is True + assert admin_instance.is_current(superseded_result) is False + assert "Draft summary" in admin_instance.pretty_result_data(current_result) + assert admin_instance.pretty_result_data(superseded_result) == "No data available" + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["value"] == "750ms" + assert response["dashboard_stats"][1]["value"] == "0.0%" + + +def test_user_feedback_admin_helpers_and_dashboard_stats(source_admin_context, django_user_model, mocker): + user = django_user_model.objects.create_user(username="feedback-user", password="testpass123") + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/feedback", + title="Feedback Title That Is Long Enough To Truncate", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Feedback content.", + relevance_score=85, + ) + upvote = UserFeedback.objects.create( + content=content, + project=source_admin_context.project, + user=user, + feedback_type="upvote", + ) + other_content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/feedback-other", + title="Other Feedback Title", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Other feedback content.", + relevance_score=20, + ) + UserFeedback.objects.create( + content=other_content, + project=source_admin_context.project, + user=django_user_model.objects.create_user(username="feedback-user-2", password="testpass123"), + feedback_type="downvote", + ) + admin_instance = UserFeedbackAdmin(UserFeedback, AdminSite()) + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(SimpleNamespace()) + + assert "👍" in admin_instance.display_feedback(upvote) + assert admin_instance.get_content_title(upvote).endswith("...") + assert "green" in admin_instance.get_ai_score(upvote) + other_content.relevance_score = None + other_content.save(update_fields=["relevance_score"]) + downvote = UserFeedback.objects.get(content=other_content) + assert admin_instance.get_ai_score(downvote) == "-" + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["value"] == "50.0%" + assert response["dashboard_stats"][1]["value"] == 2 + + +def test_ingestion_run_display_duration_handles_running_and_completed(source_admin_context): + running_run = IngestionRun.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + status=RunStatus.RUNNING, + items_fetched=0, + items_ingested=0, + ) + completed_run = IngestionRun.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + status=RunStatus.SUCCESS, + items_fetched=10, + items_ingested=10, + ) + completed_run.started_at = timezone.now() - timezone.timedelta(minutes=3, seconds=5) + completed_run.completed_at = completed_run.started_at + timezone.timedelta(minutes=3, seconds=5) + completed_run.save(update_fields=["started_at", "completed_at"]) + admin_instance = IngestionRunAdmin(IngestionRun, AdminSite()) + + assert admin_instance.display_duration(running_run) == "In Progress..." + assert admin_instance.display_duration(completed_run) == "3m 5s" + + +def test_review_queue_actions_update_resolution_and_emit_message(source_admin_context, mocker): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/review-action", + title="Review Action", + author="Reviewer", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Review action content.", + ) + approve_item = ReviewQueue.objects.create( + project=source_admin_context.project, + content=content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=0.5, + resolved=False, + ) + reject_item = ReviewQueue.objects.create( + project=source_admin_context.project, + content=content, + reason=ReviewReason.LOW_CONFIDENCE_CLASSIFICATION, + confidence=0.2, + resolved=False, + ) + admin_instance = ReviewQueueAdmin(ReviewQueue, AdminSite()) + admin_instance.message_user = mocker.Mock() + + admin_instance.mark_as_approved(SimpleNamespace(), ReviewQueue.objects.filter(pk=approve_item.pk)) + admin_instance.mark_as_rejected(SimpleNamespace(), ReviewQueue.objects.filter(pk=reject_item.pk)) + + approve_item.refresh_from_db() + reject_item.refresh_from_db() + assert approve_item.resolved is True + assert approve_item.resolution == "APPROVED" + assert reject_item.resolved is True + assert reject_item.resolution == "REJECTED" + assert admin_instance.message_user.call_count == 2 + + +def test_high_value_filter_lookups_and_noop_queryset(source_admin_context): + filter_instance = HighValueFilter( + request=SimpleNamespace(GET={}), + params={}, + model=Content, + model_admin=ContentAdmin(Content, AdminSite()), + ) + filter_instance.value = lambda: None + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/high-value-noop", + title="Noop", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="noop", + ) + + assert filter_instance.lookups(None, None) == (("high_value", "🔥 High Value (Score > 80 & Reference)"),) + assert list(filter_instance.queryset(SimpleNamespace(), Content.objects.all())) == [content] + + +def test_content_view_trace_returns_dash_when_no_skill_results(source_admin_context): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/admin-no-trace", + title="No Trace", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="No trace content.", + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + assert admin_instance.view_trace(content) == "-" + + +def test_skill_result_admin_handles_unknown_status_and_empty_performance(source_admin_context): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/skill-result-empty", + title="", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Skill result content.", + ) + skill_result = SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="summarization", + status="QUEUED", + result_data={"summary": "Queued summary"}, + latency_ms=None, + confidence=None, + ) + admin_instance = SkillResultAdmin(SkillResult, AdminSite()) + + assert admin_instance.get_content_link(skill_result) == "Untitled" + assert "gray" in admin_instance.display_status(skill_result) + assert admin_instance.display_performance(skill_result) == "- / -" + + +def test_skill_result_changelist_view_uses_warning_and_danger_colors(source_admin_context, mocker): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/skill-result-slow", + title="Slow Skill Result", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Slow skill result content.", + ) + SkillResult.objects.create( + content=content, + project=source_admin_context.project, + skill_name="summarization", + status="failed", + latency_ms=3001, + ) + admin_instance = SkillResultAdmin(SkillResult, AdminSite()) + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(SimpleNamespace()) + + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["color"] == "warning" + assert response["dashboard_stats"][1]["color"] == "danger" + + +def test_user_feedback_admin_upvote_and_orange_score_branches(source_admin_context): + content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/feedback-orange", + title="Orange Feedback Title", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Feedback content.", + relevance_score=60, + ) + feedback = UserFeedback.objects.create( + content=content, + project=source_admin_context.project, + user=source_admin_context.user, + feedback_type="upvote", + ) + admin_instance = UserFeedbackAdmin(UserFeedback, AdminSite()) + + assert "👍" in admin_instance.display_feedback(feedback) + assert "orange" in admin_instance.get_ai_score(feedback) + + +def test_user_feedback_changelist_view_uses_success_color_for_high_approval(source_admin_context, django_user_model, mocker): + first_content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/feedback-success-1", + title="Feedback Success One", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Feedback content one.", + relevance_score=90, + ) + second_content = Content.objects.create( + project=source_admin_context.project, + url="https://example.com/feedback-success-2", + title="Feedback Success Two", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Feedback content two.", + relevance_score=90, + ) + UserFeedback.objects.create( + content=first_content, + project=source_admin_context.project, + user=source_admin_context.user, + feedback_type="upvote", + ) + UserFeedback.objects.create( + content=second_content, + project=source_admin_context.project, + user=django_user_model.objects.create_user(username="feedback-success-2", password="testpass123"), + feedback_type="upvote", + ) + admin_instance = UserFeedbackAdmin(UserFeedback, AdminSite()) + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(SimpleNamespace()) + + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["color"] == "success" + assert response["dashboard_stats"][0]["value"] == "100.0%" + + +def test_ingestion_run_admin_status_efficiency_and_dashboard_branches(source_admin_context, mocker): + IngestionRun.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + status="failed", + items_fetched=0, + items_ingested=0, + ) + running_run = IngestionRun.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + status=RunStatus.RUNNING, + items_fetched=5, + items_ingested=5, + ) + admin_instance = IngestionRunAdmin(IngestionRun, AdminSite()) + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(SimpleNamespace()) + + assert "danger" in admin_instance.display_status(IngestionRun.objects.filter(status="failed").first()) + assert admin_instance.display_efficiency(IngestionRun.objects.filter(status="failed").first()) == "0/0" + assert "info" in admin_instance.display_status(running_run) + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["value"] == "5" + assert response["dashboard_stats"][1]["color"] == "warning" + + +def test_source_config_admin_health_pretty_config_and_dashboard_branches(source_admin_context, mocker): + stale_config = SourceConfig.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/stale.xml"}, + is_active=True, + last_fetched_at=timezone.now() - timezone.timedelta(days=2), + ) + paused_config = SourceConfig.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.REDDIT, + config={}, + is_active=False, + ) + never_run_config = SourceConfig.objects.create( + project=source_admin_context.project, + plugin_name=SourcePluginName.RSS, + config={}, + is_active=True, + last_fetched_at=None, + ) + admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) + super_changelist_view = mocker.patch( + "core.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + response = admin_instance.changelist_view(SimpleNamespace()) + + assert "Stale" in admin_instance.display_health(stale_config) + assert "Paused" in admin_instance.display_health(paused_config) + assert "Never Run" in admin_instance.display_health(never_run_config) + assert admin_instance.pretty_config(paused_config) == "Empty" + super_changelist_view.assert_called_once() + assert response["dashboard_stats"][0]["color"] == "warning" + assert response["dashboard_stats"][1]["value"] == 2 + + +@pytest.mark.parametrize( + ("confidence", "expected_color"), + [ + (0.2, "red"), + (0.9, "green"), + ], +) +def test_review_queue_display_confidence_remaining_color_branches( + source_admin_context, + confidence, + expected_color, +): + content = Content.objects.create( + project=source_admin_context.project, + url=f"https://example.com/review-confidence-{confidence}", + title="Review Confidence Remaining", + author="Reviewer", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Review queue content", + ) + review_item = ReviewQueue.objects.create( + project=source_admin_context.project, + content=content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=confidence, + resolved=False, + ) + admin_instance = ReviewQueueAdmin(ReviewQueue, AdminSite()) + + rendered = admin_instance.display_confidence(review_item) + + assert expected_color in rendered diff --git a/core/tests/test_embeddings.py b/core/tests/test_embeddings.py index 1f003df8..af13a765 100644 --- a/core/tests/test_embeddings.py +++ b/core/tests/test_embeddings.py @@ -1,17 +1,23 @@ +from io import StringIO from types import SimpleNamespace +from unittest.mock import call import httpx import pytest from django.contrib.auth.models import Group -from django.core.management import call_command +from django.core.management import CommandError, call_command from qdrant_client.http.exceptions import ResponseHandlingException from core import embeddings from core.embeddings import ( + build_content_embedding_text, + build_search_filter, get_embedding_provider, get_reference_similarity, + normalize_text, search_similar, search_similar_content, + serialize_published_date, upsert_content_embedding, ) from core.models import ( @@ -169,6 +175,90 @@ def test_openrouter_embedding_provider_calls_embeddings_endpoint(settings, mocke assert post_mock.call_args.kwargs["headers"]["Authorization"] == "Bearer test-key" +def test_openrouter_embedding_provider_requires_api_key(settings): + settings.EMBEDDING_PROVIDER = "openrouter" + settings.OPENROUTER_API_KEY = "" + + with pytest.raises(RuntimeError, match="OPENROUTER_API_KEY must be set"): + embeddings.embed_text("api text") + + +def test_ollama_embedding_provider_falls_back_to_legacy_endpoint_on_404(settings, mocker): + settings.EMBEDDING_PROVIDER = "ollama" + settings.EMBEDDING_MODEL = "nomic-embed-text" + embed_response = SimpleNamespace(status_code=404) + legacy_response = SimpleNamespace( + json=lambda: {"embedding": [0.9, 0.8]}, + raise_for_status=lambda: None, + ) + post_mock = mocker.patch("core.embeddings.httpx.post", side_effect=[embed_response, legacy_response]) + + vector = embeddings.embed_text("legacy text") + + assert vector == [0.9, 0.8] + assert post_mock.call_args_list[1].args[0].endswith("/api/embeddings") + + +def test_get_embedding_provider_rejects_unsupported_backend(settings): + settings.EMBEDDING_PROVIDER = "unsupported" + + with pytest.raises(ValueError, match="Unsupported embedding provider"): + get_embedding_provider() + + +def test_ensure_project_collection_skips_create_when_collection_exists(embedding_context, mocker): + client_mock = mocker.patch("core.embeddings.get_qdrant_client") + exists_mock = mocker.patch("core.embeddings.project_collection_exists", return_value=True) + + embeddings.ensure_project_collection(embedding_context.project.id) + + exists_mock.assert_called_once_with(embedding_context.project.id) + client_mock.return_value.create_collection.assert_not_called() + + +def test_project_collection_exists_returns_false_when_lookup_raises(embedding_context, mocker): + client_mock = mocker.patch("core.embeddings.get_qdrant_client") + client_mock.return_value.get_collection.side_effect = RuntimeError("missing") + + assert embeddings.project_collection_exists(embedding_context.project.id) is False + + +def test_build_content_embedding_text_skips_blank_parts(embedding_context): + embedding_context.content.title = "" + + assert build_content_embedding_text(embedding_context.content) == "This article covers platform engineering practices." + + +@pytest.mark.parametrize( + ("raw_text", "expected"), + [ + (" trimmed ", "trimmed"), + (" ", "empty content"), + ], +) +def test_normalize_text_handles_blank_and_trimmed_input(raw_text, expected): + assert normalize_text(raw_text) == expected + + +def test_serialize_published_date_handles_string_and_fallback_values(): + assert serialize_published_date("2026-04-20T12:00:00Z") == "2026-04-20T12:00:00+00:00" + assert serialize_published_date("not-a-date") == "not-a-date" + assert serialize_published_date(123) == "123" + + +def test_build_search_filter_returns_none_without_conditions(): + assert build_search_filter() is None + + +def test_build_search_filter_supports_reference_and_exclusion_conditions(): + filter_value = build_search_filter(is_reference=True, exclude_content_id=42) + + assert filter_value.must[0].key == "is_reference" + assert filter_value.must[0].match.value is True + assert filter_value.must_not[0].key == "content_id" + assert filter_value.must_not[0].match.value == 42 + + def test_embedding_smoke_command_prints_dimension(mocker, capsys): embed_text_mock = mocker.patch("core.management.commands.embedding_smoke.embed_text", return_value=[0.1, 0.2, 0.3]) @@ -242,3 +332,74 @@ def test_seed_demo_skips_embeddings_when_vector_stack_is_unavailable(mocker, cap combined_output = capsys.readouterr() assert "Skipping remaining embedding sync" in combined_output.err assert "Upserted embeddings for 0 seeded content item(s)." in combined_output.out + + +def test_sync_embeddings_scopes_to_requested_content_id(embedding_context, mocker): + sibling_content = Content.objects.create( + project=embedding_context.project, + url="https://example.com/embed-sibling", + title="Sibling Content", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-21T12:00:00Z", + content_text="Sibling body.", + ) + upsert_mock = mocker.patch("core.management.commands.sync_embeddings.upsert_content_embedding") + stdout = StringIO() + + call_command("sync_embeddings", content_id=embedding_context.content.id, stdout=stdout) + + upsert_mock.assert_called_once_with(embedding_context.content) + assert sibling_content.id != embedding_context.content.id + assert "Synced embeddings for 1 content item(s)." in stdout.getvalue() + + +def test_sync_embeddings_filters_project_and_references_only(embedding_context, django_user_model, mocker): + other_user = django_user_model.objects.create_user(username="embed-owner-2", password="testpass123") + other_group = Group.objects.create(name="embedding-team-2") + other_user.groups.add(other_group) + other_project = Project.objects.create(name="Other Embedding Project", group=other_group, topic_description="Other") + same_project_reference = Content.objects.create( + project=embedding_context.project, + url="https://example.com/reference-item", + title="Reference Item", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-22T12:00:00Z", + content_text="Reference body.", + is_reference=True, + ) + Content.objects.create( + project=embedding_context.project, + url="https://example.com/non-reference-item", + title="Non Reference Item", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-23T12:00:00Z", + content_text="Non reference body.", + is_reference=False, + ) + Content.objects.create( + project=other_project, + url="https://example.com/other-project-reference", + title="Other Project Reference", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-24T12:00:00Z", + content_text="Other project reference body.", + is_reference=True, + ) + upsert_mock = mocker.patch("core.management.commands.sync_embeddings.upsert_content_embedding") + + call_command( + "sync_embeddings", + project_id=embedding_context.project.id, + references_only=True, + ) + + assert upsert_mock.call_args_list == [call(same_project_reference)] + + +def test_sync_embeddings_raises_command_error_when_scope_matches_no_content(): + with pytest.raises(CommandError, match="No content records matched the requested scope"): + call_command("sync_embeddings", project_id=999999) diff --git a/core/tests/test_entrypoints.py b/core/tests/test_entrypoints.py new file mode 100644 index 00000000..a2f9d313 --- /dev/null +++ b/core/tests/test_entrypoints.py @@ -0,0 +1,30 @@ +import importlib +import os +import sys + + +def _import_fresh(module_name: str): + sys.modules.pop(module_name, None) + return importlib.import_module(module_name) + + +def test_asgi_module_sets_default_settings_and_builds_application(mocker): + setdefault_mock = mocker.patch.object(os.environ, "setdefault") + get_app_mock = mocker.patch("django.core.asgi.get_asgi_application", return_value="asgi-app") + + module = _import_fresh("newsletter_maker.asgi") + + setdefault_mock.assert_called_once_with("DJANGO_SETTINGS_MODULE", "newsletter_maker.settings") + get_app_mock.assert_called_once_with() + assert module.application == "asgi-app" + + +def test_wsgi_module_sets_default_settings_and_builds_application(mocker): + setdefault_mock = mocker.patch.object(os.environ, "setdefault") + get_app_mock = mocker.patch("django.core.wsgi.get_wsgi_application", return_value="wsgi-app") + + module = _import_fresh("newsletter_maker.wsgi") + + setdefault_mock.assert_called_once_with("DJANGO_SETTINGS_MODULE", "newsletter_maker.settings") + get_app_mock.assert_called_once_with() + assert module.application == "wsgi-app" diff --git a/core/tests/test_health.py b/core/tests/test_health.py index 9c4d7e11..5c8714df 100644 --- a/core/tests/test_health.py +++ b/core/tests/test_health.py @@ -1,3 +1,8 @@ +from types import SimpleNamespace + +from core.views import _check_database, _check_qdrant + + def test_root_redirects_to_admin(client): response = client.get("/") @@ -30,3 +35,35 @@ def test_readyz_returns_service_unavailable_when_dependency_fails(client, mocker assert response.status_code == 503 assert response.json()["status"] == "degraded" + + +def test_check_database_returns_true_when_query_succeeds(mocker): + cursor = mocker.Mock() + cursor_cm = mocker.MagicMock() + cursor_cm.__enter__.return_value = cursor + mocker.patch("core.views.connection.cursor", return_value=cursor_cm) + + assert _check_database() is True + cursor.execute.assert_called_once_with("SELECT 1") + cursor.fetchone.assert_called_once_with() + + +def test_check_database_returns_false_when_query_raises(mocker): + mocker.patch("core.views.connection.cursor", side_effect=RuntimeError("db unavailable")) + + assert _check_database() is False + + +def test_check_qdrant_returns_true_when_client_can_list_collections(mocker, settings): + client_cls = mocker.patch("core.views.QdrantClient") + client_cls.return_value.get_collections.return_value = SimpleNamespace(collections=[]) + + assert _check_qdrant() is True + client_cls.assert_called_once_with(url=settings.QDRANT_URL, timeout=2, check_compatibility=False) + + +def test_check_qdrant_returns_false_when_client_errors(mocker): + client_cls = mocker.patch("core.views.QdrantClient") + client_cls.return_value.get_collections.side_effect = RuntimeError("qdrant unavailable") + + assert _check_qdrant() is False diff --git a/core/tests/test_llm.py b/core/tests/test_llm.py new file mode 100644 index 00000000..e999f8ee --- /dev/null +++ b/core/tests/test_llm.py @@ -0,0 +1,72 @@ +from types import SimpleNamespace + +import pytest + +from core.llm import _extract_json_object, openrouter_chat_json + + +def test_openrouter_chat_json_requires_api_key(settings): + settings.OPENROUTER_API_KEY = "" + + with pytest.raises(RuntimeError, match="OPENROUTER_API_KEY must be configured"): + openrouter_chat_json(model="test-model", system_prompt="system", user_prompt="user") + + +def test_openrouter_chat_json_posts_expected_request(settings, mocker): + settings.OPENROUTER_API_KEY = "test-key" + settings.OPENROUTER_API_BASE = "https://openrouter.example/api/v1/" + settings.OPENROUTER_APP_URL = "https://newsletter-maker.example" + settings.OPENROUTER_APP_NAME = "newsletter-maker" + settings.AI_REQUEST_TIMEOUT_SECONDS = 12.5 + + response = SimpleNamespace( + raise_for_status=lambda: None, + json=lambda: {"choices": [{"message": {"content": '{"summary": "Hello"}'}}]}, + ) + post_mock = mocker.patch("core.llm.httpx.post", return_value=response) + mocker.patch("core.llm.time.perf_counter", side_effect=[1.0, 1.123]) + + result = openrouter_chat_json( + model="openrouter/test-model", + system_prompt="Return JSON.", + user_prompt="Summarize this.", + ) + + assert result.payload == {"summary": "Hello"} + assert result.model == "openrouter/test-model" + assert result.latency_ms == 123 + assert post_mock.call_args.args[0] == "https://openrouter.example/api/v1/chat/completions" + assert post_mock.call_args.kwargs["headers"] == { + "Authorization": "Bearer test-key", + "Content-Type": "application/json", + "HTTP-Referer": "https://newsletter-maker.example", + "X-OpenRouter-Title": "newsletter-maker", + } + assert post_mock.call_args.kwargs["json"] == { + "model": "openrouter/test-model", + "temperature": 0, + "response_format": {"type": "json_object"}, + "messages": [ + {"role": "system", "content": "Return JSON."}, + {"role": "user", "content": "Summarize this."}, + ], + } + assert post_mock.call_args.kwargs["timeout"] == 12.5 + + +def test_extract_json_object_accepts_direct_json_object(): + assert _extract_json_object('{"score": 0.7}') == {"score": 0.7} + + +def test_extract_json_object_extracts_embedded_json_object_from_text(): + assert _extract_json_object('Here is the result:\n```json\n{"score": 0.7}\n```') == {"score": 0.7} + + +def test_extract_json_object_rejects_missing_json_object(): + with pytest.raises(ValueError, match="did not contain a JSON object"): + _extract_json_object("No JSON here.") + + +def test_extract_json_object_rejects_non_object_json(): + with pytest.raises(ValueError, match="JSON must be an object"): + _extract_json_object('["not", "an", "object"]') diff --git a/core/tests/test_pipeline.py b/core/tests/test_pipeline.py index e081925e..fe81f49f 100644 --- a/core/tests/test_pipeline.py +++ b/core/tests/test_pipeline.py @@ -3,8 +3,26 @@ import pytest from django.contrib.auth.models import Group -from core.models import Content, Project, ReviewQueue, ReviewReason, SkillResult -from core.pipeline import CLASSIFICATION_SKILL_NAME, RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME +from core.models import Content, Project, ReviewQueue, ReviewReason, SkillResult, SkillStatus +from core.pipeline import ( + CLASSIFICATION_SKILL_NAME, + RELATED_CONTENT_SKILL_NAME, + RELEVANCE_SKILL_NAME, + SUMMARIZATION_SKILL_NAME, + _clamp_score, + _execute_with_retries, + _heuristic_summary, + _normalize_summary, + _run_ad_hoc_relevance, + _serialize_related_match, + create_pending_skill_result, + execute_ad_hoc_skill, + execute_background_skill_result, + route_by_relevance, + run_content_classification, + run_relevance_scoring, + run_summarization, +) from core.tasks import process_content pytestmark = pytest.mark.django_db @@ -175,3 +193,387 @@ def test_process_content_adds_review_item_for_low_confidence_classification(pipe reason=ReviewReason.LOW_CONFIDENCE_CLASSIFICATION, ) assert review_item.confidence == pytest.approx(0.3) + + +def test_run_content_classification_uses_openrouter_response_and_normalizes_values( + pipeline_context, + settings, + mocker, +): + settings.OPENROUTER_API_KEY = "test-key" + mocker.patch( + "core.pipeline.openrouter_chat_json", + return_value=SimpleNamespace( + payload={ + "content_type": "unexpected_type", + "confidence": "1.7", + "explanation": "Model decided this was novel.", + }, + model="openrouter/test-model", + latency_ms=123, + ), + ) + + result = run_content_classification(pipeline_context.content) + + assert result == { + "content_type": "other", + "confidence": 1.0, + "explanation": "Model decided this was novel.", + "model_used": "openrouter/test-model", + "latency_ms": 123, + } + + +def test_run_content_classification_falls_back_to_heuristic_when_openrouter_fails( + pipeline_context, + settings, + mocker, +): + settings.OPENROUTER_API_KEY = "test-key" + mocker.patch("core.pipeline.openrouter_chat_json", side_effect=RuntimeError("llm unavailable")) + + result = run_content_classification(pipeline_context.content) + + assert result["content_type"] == "technical_article" + assert result["model_used"] == "heuristic" + assert result["latency_ms"] == 0 + + +def test_run_relevance_scoring_uses_openrouter_for_borderline_similarity( + pipeline_context, + settings, + mocker, +): + settings.OPENROUTER_API_KEY = "test-key" + mocker.patch("core.pipeline.build_content_embedding_text", return_value="embedding text") + mocker.patch("core.pipeline.embed_text", return_value=[0.1, 0.2, 0.3]) + mocker.patch("core.pipeline.get_reference_similarity", return_value=0.6) + openrouter_mock = mocker.patch( + "core.pipeline.openrouter_chat_json", + return_value=SimpleNamespace( + payload={ + "relevance_score": "0.74", + "explanation": "LLM confirmed the borderline match.", + }, + model="openrouter/relevance-model", + latency_ms=87, + ), + ) + + result = run_relevance_scoring(pipeline_context.content) + + assert result == { + "relevance_score": 0.74, + "explanation": "LLM confirmed the borderline match.", + "used_llm": True, + "model_used": "openrouter/relevance-model", + "latency_ms": 87, + } + openrouter_mock.assert_called_once() + + +def test_run_relevance_scoring_skips_llm_for_high_similarity( + pipeline_context, + settings, + mocker, +): + mocker.patch("core.pipeline.build_content_embedding_text", return_value="embedding text") + mocker.patch("core.pipeline.embed_text", return_value=[0.1, 0.2, 0.3]) + mocker.patch("core.pipeline.get_reference_similarity", return_value=0.95) + openrouter_mock = mocker.patch("core.pipeline.openrouter_chat_json") + + result = run_relevance_scoring(pipeline_context.content) + + assert result == { + "relevance_score": 0.95, + "explanation": "Reference corpus similarity score is 0.95; no LLM adjudication was required.", + "used_llm": False, + "model_used": f"embedding:{settings.EMBEDDING_MODEL}", + "latency_ms": 0, + } + openrouter_mock.assert_not_called() + + +def test_run_relevance_scoring_falls_back_when_openrouter_fails( + pipeline_context, + settings, + mocker, +): + settings.OPENROUTER_API_KEY = "test-key" + mocker.patch("core.pipeline.build_content_embedding_text", return_value="embedding text") + mocker.patch("core.pipeline.embed_text", return_value=[0.1, 0.2, 0.3]) + mocker.patch("core.pipeline.get_reference_similarity", return_value=0.6) + mocker.patch("core.pipeline.openrouter_chat_json", side_effect=RuntimeError("llm unavailable")) + + result = run_relevance_scoring(pipeline_context.content) + + assert result["relevance_score"] == 0.6 + assert result["used_llm"] is False + assert "Borderline reference similarity" in result["explanation"] + + +def test_run_summarization_falls_back_to_heuristic_when_openrouter_fails( + pipeline_context, + settings, + mocker, +): + settings.OPENROUTER_API_KEY = "test-key" + mocker.patch("core.pipeline.openrouter_chat_json", side_effect=RuntimeError("model unavailable")) + + result = run_summarization(pipeline_context.content) + + assert result["model_used"] == "heuristic" + assert result["latency_ms"] == 0 + assert result["summary"] == ( + "This article covers a new Kubernetes release and what changed for platform teams." + ) + + +def test_execute_ad_hoc_classification_supersedes_previous_result_and_updates_review_item( + pipeline_context, + mocker, +): + classification_mock = mocker.patch( + "core.pipeline.run_content_classification", + side_effect=[ + { + "content_type": "other", + "confidence": 0.3, + "explanation": "Very ambiguous.", + "model_used": "heuristic", + "latency_ms": 0, + }, + { + "content_type": "tutorial", + "confidence": 0.45, + "explanation": "Still low confidence, but improved.", + "model_used": "heuristic", + "latency_ms": 0, + }, + ], + ) + + first_result = execute_ad_hoc_skill(pipeline_context.content, CLASSIFICATION_SKILL_NAME) + second_result = execute_ad_hoc_skill(pipeline_context.content, CLASSIFICATION_SKILL_NAME) + + first_result.refresh_from_db() + pipeline_context.content.refresh_from_db() + review_item = ReviewQueue.objects.get( + content=pipeline_context.content, + reason=ReviewReason.LOW_CONFIDENCE_CLASSIFICATION, + resolved=False, + ) + + assert classification_mock.call_count == 2 + assert first_result.status == SkillStatus.COMPLETED + assert second_result.status == SkillStatus.COMPLETED + assert first_result.superseded_by_id == second_result.id + assert pipeline_context.content.content_type == "tutorial" + assert review_item.confidence == pytest.approx(0.45) + assert ReviewQueue.objects.filter( + content=pipeline_context.content, + reason=ReviewReason.LOW_CONFIDENCE_CLASSIFICATION, + ).count() == 1 + + +def test_execute_ad_hoc_relevance_creates_review_item_for_borderline_scores( + pipeline_context, + mocker, +): + mocker.patch( + "core.pipeline.run_relevance_scoring", + return_value={ + "relevance_score": 0.55, + "explanation": "Borderline relevance for manual review.", + "used_llm": False, + "model_used": "embedding:test", + "latency_ms": 0, + }, + ) + + result = execute_ad_hoc_skill(pipeline_context.content, RELEVANCE_SKILL_NAME) + + pipeline_context.content.refresh_from_db() + review_item = ReviewQueue.objects.get( + content=pipeline_context.content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + resolved=False, + ) + + assert result.status == SkillStatus.COMPLETED + assert result.confidence == pytest.approx(0.55) + assert pipeline_context.content.relevance_score == pytest.approx(0.55) + assert pipeline_context.content.is_active is True + assert review_item.confidence == pytest.approx(0.55) + + +def test_execute_ad_hoc_summarization_returns_failed_result_when_relevance_is_too_low( + pipeline_context, +): + pipeline_context.content.relevance_score = 0.2 + pipeline_context.content.save(update_fields=["relevance_score"]) + + result = execute_ad_hoc_skill(pipeline_context.content, SUMMARIZATION_SKILL_NAME) + + assert result.status == SkillStatus.FAILED + assert "Summarization requires relevance_score" in result.error_message + + +def test_execute_ad_hoc_related_content_returns_failed_result_on_search_error( + pipeline_context, + mocker, +): + mocker.patch("core.pipeline.search_similar_content", side_effect=RuntimeError("vector index unavailable")) + + result = execute_ad_hoc_skill(pipeline_context.content, RELATED_CONTENT_SKILL_NAME) + + assert result.status == SkillStatus.FAILED + assert result.skill_name == RELATED_CONTENT_SKILL_NAME + assert result.error_message == "vector index unavailable" + + +def test_create_pending_skill_result_rejects_non_async_skill(pipeline_context): + with pytest.raises(ValueError, match="Unsupported async skill name"): + create_pending_skill_result(pipeline_context.content, CLASSIFICATION_SKILL_NAME) + + +def test_execute_background_skill_result_rejects_skill_name_mismatch(pipeline_context): + pending_result = create_pending_skill_result(pipeline_context.content, RELEVANCE_SKILL_NAME) + + with pytest.raises(ValueError, match="is for relevance_scoring, not summarization"): + execute_background_skill_result(pending_result.id, SUMMARIZATION_SKILL_NAME) + + +def test_execute_background_skill_result_completes_summary_when_requirements_are_met( + pipeline_context, + mocker, +): + pipeline_context.content.relevance_score = 0.9 + pipeline_context.content.save(update_fields=["relevance_score"]) + pending_result = create_pending_skill_result(pipeline_context.content, SUMMARIZATION_SKILL_NAME) + mocker.patch( + "core.pipeline.run_summarization", + return_value={ + "summary": "Manual summary output.", + "model_used": "heuristic", + "latency_ms": 0, + }, + ) + + result = execute_background_skill_result(pending_result.id, SUMMARIZATION_SKILL_NAME) + + pending_result.refresh_from_db() + assert result.status == SkillStatus.COMPLETED + assert pending_result.status == SkillStatus.COMPLETED + assert pending_result.result_data == {"summary": "Manual summary output.", "model_used": "heuristic", "latency_ms": 0} + + +def test_execute_background_skill_result_marks_relevance_failed_when_execution_errors( + pipeline_context, + mocker, +): + pending_result = create_pending_skill_result(pipeline_context.content, RELEVANCE_SKILL_NAME) + mocker.patch("core.pipeline.run_relevance_scoring", side_effect=RuntimeError("embedding unavailable")) + + result = execute_background_skill_result(pending_result.id, RELEVANCE_SKILL_NAME) + + pending_result.refresh_from_db() + assert result.status == SkillStatus.FAILED + assert pending_result.status == SkillStatus.FAILED + assert pending_result.error_message == "embedding unavailable" + + +def test_route_by_relevance_uses_threshold_boundaries(settings): + assert route_by_relevance({"relevance": {"relevance_score": settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD}}) == "relevant" + assert route_by_relevance({"relevance": {"relevance_score": settings.AI_RELEVANCE_REVIEW_THRESHOLD - 0.01}}) == "irrelevant" + assert route_by_relevance({"relevance": {"relevance_score": settings.AI_RELEVANCE_REVIEW_THRESHOLD}}) == "borderline" + + +def test_run_ad_hoc_relevance_updates_existing_review_item(pipeline_context, mocker): + existing = ReviewQueue.objects.create( + project=pipeline_context.project, + content=pipeline_context.content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=0.2, + resolved=False, + ) + mocker.patch( + "core.pipeline.run_relevance_scoring", + return_value={ + "relevance_score": 0.58, + "explanation": "Borderline relevance for manual review.", + "used_llm": False, + "model_used": "embedding:test", + "latency_ms": 0, + }, + ) + + relevance, relevance_score = _run_ad_hoc_relevance(pipeline_context.content) + + existing.refresh_from_db() + assert relevance_score == pytest.approx(0.58) + assert existing.confidence == pytest.approx(0.58) + assert ReviewQueue.objects.filter(content=pipeline_context.content, reason=ReviewReason.BORDERLINE_RELEVANCE).count() == 1 + + +def test_execute_with_retries_retries_until_success(settings): + attempts = {"count": 0} + + def flaky_call(): + attempts["count"] += 1 + if attempts["count"] < 3: + raise RuntimeError("temporary") + return {"ok": True} + + result = _execute_with_retries("relevance_scoring", flaky_call) + + assert result == {"ok": True} + assert attempts["count"] == 3 + + +def test_execute_with_retries_raises_last_exception(settings): + def always_fail(): + raise RuntimeError("permanent") + + with pytest.raises(RuntimeError, match="permanent"): + _execute_with_retries("summarization", always_fail) + + +def test_pipeline_helper_utilities_cover_serialization_and_summary_edges(pipeline_context): + empty_content = Content( + project=pipeline_context.project, + url="https://example.com/empty", + title="Empty Content", + author="Editor", + source_plugin="rss", + published_date="2026-04-26T00:00:00Z", + content_text=" ", + ) + long_sentence = "A" * 401 + "." + long_content = Content( + project=pipeline_context.project, + url="https://example.com/long", + title="Long Content", + author="Editor", + source_plugin="rss", + published_date="2026-04-26T00:00:00Z", + content_text=f"{long_sentence} Second sentence.", + ) + + assert _serialize_related_match(SimpleNamespace(payload=None)) == { + "content_id": None, + "title": None, + "url": None, + "published_date": None, + "source_plugin": None, + "score": 0.0, + } + assert _heuristic_summary(empty_content) == "Empty Content: no summary was available from the source content." + assert _heuristic_summary(long_content).endswith("...") + assert _normalize_summary(" ", pipeline_context.content) == ( + "Kubernetes Release Notes: summary generation returned no content." + ) + assert _clamp_score("bad") == 0.0 + assert _clamp_score(2) == 1.0 + assert _clamp_score(-1) == 0.0 diff --git a/core/tests/test_plugin_base.py b/core/tests/test_plugin_base.py new file mode 100644 index 00000000..1c3c2240 --- /dev/null +++ b/core/tests/test_plugin_base.py @@ -0,0 +1,110 @@ +from datetime import UTC, datetime +from types import SimpleNamespace + +import pytest +from django.contrib.auth.models import Group + +from core.models import Entity, Project +from core.plugins.base import ContentItem, SourcePlugin + +pytestmark = pytest.mark.django_db + + +class DummySourcePlugin(SourcePlugin): + required_config_fields = ("api_key",) + + def fetch_new_content(self, since): + return [ + ContentItem( + url="https://example.com/item", + title="Example", + author="Author", + published_date=datetime(2026, 4, 28, 12, 0, tzinfo=UTC), + content_text="Body", + source_plugin="dummy", + ) + ] + + def health_check(self) -> bool: + return True + + +@pytest.fixture +def plugin_context(django_user_model): + user = django_user_model.objects.create_user(username="plugin-owner", password="testpass123") + group = Group.objects.create(name="plugin-team") + user.groups.add(group) + project = Project.objects.create(name="Plugin Project", group=group, topic_description="Infra") + source_config = SimpleNamespace(project=project, config={"api_key": "secret"}) + return SimpleNamespace(project=project, source_config=source_config) + + +def test_source_plugin_validate_config_requires_object_and_required_fields(): + assert DummySourcePlugin.validate_config({"api_key": "secret", "extra": True}) == { + "api_key": "secret", + "extra": True, + } + + with pytest.raises(ValueError, match="Config must be a JSON object"): + DummySourcePlugin.validate_config("not-an-object") + + with pytest.raises(ValueError, match="Missing required config field: api_key"): + DummySourcePlugin.validate_config({}) + + +def test_source_plugin_match_entity_for_url_matches_normalized_hostname(plugin_context): + matching_entity = Entity.objects.create( + project=plugin_context.project, + name="Matching Entity", + type="vendor", + website_url="https://www.example.com/company", + ) + Entity.objects.create( + project=plugin_context.project, + name="Blank Website", + type="vendor", + website_url="", + ) + plugin = DummySourcePlugin(plugin_context.source_config) + + result = plugin.match_entity_for_url("https://example.com/posts/123") + + assert result == matching_entity + + +def test_source_plugin_match_entity_for_url_returns_none_for_missing_hostname(plugin_context): + plugin = DummySourcePlugin(plugin_context.source_config) + + assert plugin.match_entity_for_url("not-a-valid-url") is None + assert DummySourcePlugin._normalize_hostname("https://www.EXAMPLE.com/path") == "example.com" + + +def test_source_plugin_match_entity_for_url_returns_none_when_no_entity_matches(plugin_context): + Entity.objects.create( + project=plugin_context.project, + name="Different Entity", + type="vendor", + website_url="https://other.example.com", + ) + plugin = DummySourcePlugin(plugin_context.source_config) + + assert plugin.match_entity_for_url("https://example.com/posts/123") is None + + +def test_source_plugin_abstract_methods_raise_not_implemented(plugin_context): + plugin = DummySourcePlugin(plugin_context.source_config) + + with pytest.raises(NotImplementedError): + SourcePlugin.fetch_new_content(plugin, since=None) + + with pytest.raises(NotImplementedError): + SourcePlugin.health_check(plugin) + + +def test_dummy_source_plugin_implements_abstract_contract(plugin_context): + plugin = DummySourcePlugin(plugin_context.source_config) + + items = plugin.fetch_new_content(since=None) + + assert plugin.health_check() is True + assert items[0].source_plugin == "dummy" diff --git a/core/tests/test_reddit.py b/core/tests/test_reddit.py new file mode 100644 index 00000000..bf0a7289 --- /dev/null +++ b/core/tests/test_reddit.py @@ -0,0 +1,125 @@ +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace + +import pytest +from django.contrib.auth.models import Group + +from core.models import Project, SourceConfig, SourcePluginName +from core.plugins.reddit import RedditSourcePlugin +from core.plugins.registry import validate_plugin_config + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def reddit_context(django_user_model): + user = django_user_model.objects.create_user(username="reddit-owner", password="testpass123") + group = Group.objects.create(name="reddit-team") + user.groups.add(group) + project = Project.objects.create(name="Reddit Project", group=group, topic_description="Infra") + source_config = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.REDDIT, + config={"subreddit": "python", "listing": "both", "limit": 2}, + ) + return SimpleNamespace(project=project, source_config=source_config) + + +def test_reddit_validate_config_normalizes_defaults_and_rejects_invalid_values(): + assert RedditSourcePlugin.validate_config({"subreddit": "python"}) == { + "subreddit": "python", + "listing": "both", + "limit": 25, + } + + with pytest.raises(ValueError, match="listing must be one of"): + RedditSourcePlugin.validate_config({"subreddit": "python", "listing": "top"}) + + with pytest.raises(ValueError, match="limit must be a positive integer"): + RedditSourcePlugin.validate_config({"subreddit": "python", "limit": 0}) + + +def test_validate_plugin_config_rejects_unknown_plugin_name(): + with pytest.raises(ValueError, match="Unsupported source plugin"): + validate_plugin_config("unknown-plugin", {}) + + +def test_reddit_fetch_new_content_deduplicates_and_filters_by_since(reddit_context, mocker): + plugin = RedditSourcePlugin(reddit_context.source_config) + now = datetime.now(tz=UTC) + duplicate_id = "dup-1" + old_submission = SimpleNamespace( + id="old-1", + url="https://reddit.com/r/python/comments/old-1/test", + permalink="/r/python/comments/old-1/test", + title="Old post", + selftext="Old body", + author=None, + created_utc=(now - timedelta(days=2)).timestamp(), + ) + first_submission = SimpleNamespace( + id=duplicate_id, + url="", + permalink="/r/python/comments/dup-1/test", + title=" Duplicate title ", + selftext="", + author="redditor", + created_utc=now.timestamp(), + ) + duplicate_submission = SimpleNamespace( + id=duplicate_id, + url="https://reddit.com/r/python/comments/dup-1/test", + permalink="/r/python/comments/dup-1/test", + title="Duplicate title", + selftext="Ignored duplicate", + author="redditor", + created_utc=now.timestamp(), + ) + subreddit = SimpleNamespace( + new=lambda limit: iter([old_submission, first_submission]), + hot=lambda limit: iter([duplicate_submission]), + ) + client = SimpleNamespace(subreddit=lambda name: subreddit) + mocker.patch.object(RedditSourcePlugin, "_client", return_value=client) + + items = plugin.fetch_new_content(since=now - timedelta(hours=1)) + + assert len(items) == 1 + assert items[0].url == "https://www.reddit.com/r/python/comments/dup-1/test" + assert items[0].title == "Duplicate title" + assert items[0].author == "redditor" + assert items[0].content_text == "Duplicate title" + assert items[0].source_plugin == SourcePluginName.REDDIT + + +def test_reddit_health_check_returns_true(reddit_context, mocker): + plugin = RedditSourcePlugin(reddit_context.source_config) + subreddit = SimpleNamespace(new=lambda limit: iter([object()])) + client = SimpleNamespace(subreddit=lambda name: subreddit) + mocker.patch.object(RedditSourcePlugin, "_client", return_value=client) + + assert plugin.health_check() is True + + +def test_reddit_client_requires_credentials(settings): + settings.REDDIT_CLIENT_ID = "" + settings.REDDIT_CLIENT_SECRET = "" + + with pytest.raises(RuntimeError, match="Reddit credentials are not configured"): + RedditSourcePlugin._client() + + +def test_reddit_client_builds_praw_client(settings, mocker): + settings.REDDIT_CLIENT_ID = "client-id" + settings.REDDIT_CLIENT_SECRET = "client-secret" + settings.REDDIT_USER_AGENT = "newsletter-maker-test" + reddit_cls = mocker.patch("core.plugins.reddit.praw.Reddit", return_value="reddit-client") + + client = RedditSourcePlugin._client() + + assert client == "reddit-client" + reddit_cls.assert_called_once_with( + client_id="client-id", + client_secret="client-secret", + user_agent="newsletter-maker-test", + ) diff --git a/core/tests/test_rss.py b/core/tests/test_rss.py new file mode 100644 index 00000000..bc70c4ab --- /dev/null +++ b/core/tests/test_rss.py @@ -0,0 +1,90 @@ +from datetime import UTC, datetime +from time import struct_time +from types import SimpleNamespace + +import pytest +from django.contrib.auth.models import Group + +from core.models import Project, SourceConfig, SourcePluginName +from core.plugins.rss import RSSSourcePlugin + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def rss_context(django_user_model): + user = django_user_model.objects.create_user(username="rss-owner", password="testpass123") + group = Group.objects.create(name="rss-team") + user.groups.add(group) + project = Project.objects.create(name="RSS Project", group=group, topic_description="Infra") + source_config = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + ) + return SimpleNamespace(project=project, source_config=source_config) + + +def test_rss_fetch_new_content_filters_invalid_and_old_entries(rss_context, mocker): + now = datetime(2026, 4, 28, 12, 0, tzinfo=UTC) + old_time = now.replace(day=27).timetuple() + fresh_time = now.timetuple() + parsed_feed = SimpleNamespace( + entries=[ + SimpleNamespace(link="", title="Missing link", published_parsed=fresh_time), + SimpleNamespace(link="https://example.com/no-title", title=" ", published_parsed=fresh_time), + SimpleNamespace(link="https://example.com/old", title="Old", published_parsed=old_time), + SimpleNamespace( + link="https://example.com/fresh", + title=" Fresh entry ", + author=" Author ", + description=" Feed description ", + updated_parsed=fresh_time, + ), + ] + ) + mocker.patch("core.plugins.rss.feedparser.parse", return_value=parsed_feed) + plugin = RSSSourcePlugin(rss_context.source_config) + + items = plugin.fetch_new_content(since=datetime(2026, 4, 28, 11, 0, tzinfo=UTC)) + + assert len(items) == 1 + assert items[0].url == "https://example.com/fresh" + assert items[0].title == "Fresh entry" + assert items[0].author == "Author" + assert items[0].content_text == "Feed description" + assert items[0].source_plugin == SourcePluginName.RSS + + +def test_rss_fetch_new_content_uses_title_when_summary_and_description_missing(rss_context, mocker): + parsed_feed = SimpleNamespace( + entries=[ + SimpleNamespace( + link="https://example.com/title-only", + title="Title Only", + created_parsed=datetime(2026, 4, 28, 12, 0, tzinfo=UTC).timetuple(), + ) + ] + ) + mocker.patch("core.plugins.rss.feedparser.parse", return_value=parsed_feed) + plugin = RSSSourcePlugin(rss_context.source_config) + + items = plugin.fetch_new_content(since=None) + + assert len(items) == 1 + assert items[0].content_text == "Title Only" + + +def test_rss_health_check_returns_false_for_empty_feed(rss_context, mocker): + mocker.patch("core.plugins.rss.feedparser.parse", return_value=SimpleNamespace(entries=[])) + plugin = RSSSourcePlugin(rss_context.source_config) + + assert plugin.health_check() is False + + +def test_struct_time_to_datetime_builds_utc_datetime(): + parsed_value = struct_time((2026, 4, 28, 12, 30, 45, 1, 118, -1)) + + result = RSSSourcePlugin._struct_time_to_datetime(parsed_value) + + assert result == datetime(2026, 4, 28, 12, 30, 45, tzinfo=UTC) diff --git a/core/tests/test_serializers.py b/core/tests/test_serializers.py new file mode 100644 index 00000000..65150745 --- /dev/null +++ b/core/tests/test_serializers.py @@ -0,0 +1,245 @@ +from types import SimpleNamespace + +import pytest +from django.contrib.auth.models import AnonymousUser, Group + +from core.models import Content, Entity, Project, ReviewReason, SkillResult, SourceConfig, SourcePluginName +from core.serializers import ( + ContentSerializer, + EntitySerializer, + IngestionRunSerializer, + ProjectSerializer, + ReviewQueueSerializer, + SkillResultSerializer, + SourceConfigSerializer, + UserFeedbackSerializer, +) + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def serializer_context(django_user_model): + user = django_user_model.objects.create_user(username="serializer-owner", password="testpass123") + other_user = django_user_model.objects.create_user(username="serializer-other", password="testpass123") + group = Group.objects.create(name="serializer-team") + other_group = Group.objects.create(name="serializer-other-team") + user.groups.add(group) + other_user.groups.add(other_group) + project = Project.objects.create(name="Serializer Project", group=group, topic_description="Infra") + other_project = Project.objects.create(name="Other Serializer Project", group=other_group, topic_description="Data") + entity = Entity.objects.create(project=project, name="Serializer Entity", type="vendor") + other_entity = Entity.objects.create(project=other_project, name="Other Entity", type="vendor") + content = Content.objects.create( + project=project, + url="https://example.com/serializer-content", + title="Serializer Content", + author="Author", + entity=entity, + source_plugin=SourcePluginName.RSS, + published_date="2026-04-28T00:00:00Z", + content_text="Serializer content body.", + ) + other_content = Content.objects.create( + project=other_project, + url="https://example.com/serializer-other-content", + title="Other Content", + author="Author", + entity=other_entity, + source_plugin=SourcePluginName.RSS, + published_date="2026-04-28T00:00:00Z", + content_text="Other serializer content body.", + ) + skill_result = SkillResult.objects.create( + project=project, + content=content, + skill_name="summarization", + status="completed", + result_data={"summary": "ok"}, + ) + other_skill_result = SkillResult.objects.create( + project=other_project, + content=other_content, + skill_name="summarization", + status="completed", + result_data={"summary": "other"}, + ) + return SimpleNamespace( + user=user, + other_user=other_user, + project=project, + other_project=other_project, + entity=entity, + other_entity=other_entity, + content=content, + other_content=other_content, + skill_result=skill_result, + other_skill_result=other_skill_result, + ) + + +def _request_for(user): + return SimpleNamespace(user=user) + + +def test_project_scoped_serializer_filters_related_querysets_with_project_context(serializer_context): + serializer = SkillResultSerializer( + context={ + "request": _request_for(serializer_context.user), + "project": serializer_context.project, + } + ) + + assert list(serializer.fields["content"].queryset) == [serializer_context.content] + assert list(serializer.fields["superseded_by"].queryset) == [serializer_context.skill_result] + assert list(serializer.fields["project"].queryset) == [serializer_context.project] + + +def test_project_scoped_serializer_filters_related_querysets_without_project_context(serializer_context): + serializer = ContentSerializer(context={"request": _request_for(serializer_context.user)}) + + assert list(serializer.fields["entity"].queryset) == [serializer_context.entity] + assert list(serializer.fields["project"].queryset) == [serializer_context.project] + + +def test_project_scoped_serializer_skips_filtering_for_anonymous_user(): + serializer = ProjectSerializer(context={"request": _request_for(AnonymousUser())}) + + assert serializer.fields["group"].queryset.count() == Group.objects.count() + + +def test_content_serializer_rejects_cross_project_entity(serializer_context): + serializer = ContentSerializer( + instance=serializer_context.content, + data={"entity": serializer_context.other_entity.id}, + partial=True, + context={"project": serializer_context.project}, + ) + + assert serializer.is_valid() is False + assert serializer.errors == {"entity": ["Entity must belong to the selected project."]} + + +def test_skill_result_serializer_rejects_cross_project_content(serializer_context): + serializer = SkillResultSerializer( + data={ + "content": serializer_context.other_content.id, + "skill_name": "summarization", + "status": "completed", + }, + context={ + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid() is False + assert serializer.errors == {"content": ["Content must belong to the selected project."]} + + +def test_review_queue_serializer_rejects_cross_project_content(serializer_context): + serializer = ReviewQueueSerializer( + data={ + "content": serializer_context.other_content.id, + "reason": ReviewReason.BORDERLINE_RELEVANCE, + "confidence": 0.5, + }, + context={ + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid() is False + assert serializer.errors == {"content": ["Content must belong to the selected project."]} + + +def test_source_config_serializer_normalizes_valid_config(serializer_context): + serializer = SourceConfigSerializer( + data={ + "plugin_name": SourcePluginName.RSS, + "config": {"feed_url": "https://example.com/feed.xml"}, + "is_active": True, + }, + context={ + "request": _request_for(serializer_context.user), + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid(), serializer.errors + assert serializer.validated_data["config"] == {"feed_url": "https://example.com/feed.xml"} + + +def test_source_config_serializer_surfaces_plugin_validation_errors(serializer_context): + serializer = SourceConfigSerializer( + instance=SourceConfig( + project=serializer_context.project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + ), + data={"config": {"feed_url": ""}}, + partial=True, + context={ + "request": _request_for(serializer_context.user), + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid() is False + assert serializer.errors == {"config": ["Missing required config field: feed_url"]} + + +def test_entity_serializer_filters_project_queryset_to_request_user(serializer_context): + serializer = EntitySerializer(context={"request": _request_for(serializer_context.user)}) + + assert list(serializer.fields["project"].queryset) == [serializer_context.project] + + +def test_user_feedback_serializer_rejects_cross_project_content(serializer_context): + serializer = UserFeedbackSerializer( + data={ + "content": serializer_context.other_content.id, + "feedback_type": "upvote", + }, + context={ + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid() is False + assert serializer.errors == {"content": ["Content must belong to the selected project."]} + + +def test_review_queue_serializer_accepts_same_project_content(serializer_context): + serializer = ReviewQueueSerializer( + data={ + "content": serializer_context.content.id, + "reason": ReviewReason.BORDERLINE_RELEVANCE, + "confidence": 0.5, + }, + context={ + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid(), serializer.errors + assert serializer.validated_data["content"] == serializer_context.content + + +def test_source_config_serializer_skips_plugin_validation_when_plugin_name_missing(serializer_context): + serializer = SourceConfigSerializer( + instance=SourceConfig(project=serializer_context.project, plugin_name="", config={}), + data={"config": {}}, + partial=True, + context={ + "project": serializer_context.project, + }, + ) + + assert serializer.is_valid(), serializer.errors + assert serializer.validated_data["config"] == {} + + +def test_ingestion_run_serializer_filters_project_queryset(serializer_context): + serializer = IngestionRunSerializer(context={"request": _request_for(serializer_context.user)}) + + assert list(serializer.fields["project"].queryset) == [serializer_context.project] diff --git a/core/tests/test_tasks.py b/core/tests/test_tasks.py index d811fa8a..953c9008 100644 --- a/core/tests/test_tasks.py +++ b/core/tests/test_tasks.py @@ -16,6 +16,7 @@ ) from core.pipeline import RELEVANCE_SKILL_NAME, SUMMARIZATION_SKILL_NAME from core.tasks import ( + _ingest_source_config, queue_content_skill, run_all_ingestions, run_ingestion, @@ -171,6 +172,30 @@ def test_run_all_ingestions_enqueues_active_source_configs(source_plugin_context delay_mock.assert_any_call(active_two.id) assert delay_mock.call_count == 2 + +def test_run_all_ingestions_executes_inline_when_eager(source_plugin_context, settings, mocker): + settings.CELERY_TASK_ALWAYS_EAGER = True + run_ingestion_mock = mocker.patch("core.tasks.run_ingestion") + delay_mock = mocker.patch("core.tasks.run_ingestion.delay") + active_one = SourceConfig.objects.create( + project=source_plugin_context.project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + ) + active_two = SourceConfig.objects.create( + project=source_plugin_context.project, + plugin_name=SourcePluginName.REDDIT, + config={"subreddit": "python"}, + ) + + enqueued_count = run_all_ingestions() + + assert enqueued_count == 2 + run_ingestion_mock.assert_any_call(active_one.id) + run_ingestion_mock.assert_any_call(active_two.id) + assert run_ingestion_mock.call_count == 2 + delay_mock.assert_not_called() + def test_run_ingestion_marks_failure_when_plugin_errors(source_plugin_context, mocker): parse_mock = mocker.patch("core.plugins.rss.feedparser.parse") source_config = SourceConfig.objects.create( @@ -207,6 +232,51 @@ def test_queue_content_skill_enqueues_relevance_task(source_plugin_context, mock delay_mock.assert_called_once_with(skill_result.id) +def test_queue_content_skill_executes_inline_when_eager(source_plugin_context, settings, mocker): + settings.CELERY_TASK_ALWAYS_EAGER = True + content = Content.objects.create( + project=source_plugin_context.project, + entity=source_plugin_context.entity, + url="https://example.com/manual-inline-content", + title="Manual Inline Content", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-20T12:00:00Z", + content_text="Manual content body", + ) + task_mock = mocker.patch("core.tasks.run_relevance_scoring_skill") + delay_mock = mocker.patch("core.tasks.run_relevance_scoring_skill.delay") + + skill_result = queue_content_skill(content, RELEVANCE_SKILL_NAME) + + assert skill_result.status == SkillStatus.PENDING + task_mock.assert_called_once_with(skill_result.id) + delay_mock.assert_not_called() + + +def test_queue_content_skill_executes_summary_inline_when_eager(source_plugin_context, settings, mocker): + settings.CELERY_TASK_ALWAYS_EAGER = True + content = Content.objects.create( + project=source_plugin_context.project, + entity=source_plugin_context.entity, + url="https://example.com/manual-inline-summary", + title="Manual Inline Summary", + author="Author", + source_plugin=SourcePluginName.RSS, + published_date="2026-04-20T12:00:00Z", + content_text="Manual content body", + relevance_score=0.9, + ) + task_mock = mocker.patch("core.tasks.run_summarization_skill") + delay_mock = mocker.patch("core.tasks.run_summarization_skill.delay") + + skill_result = queue_content_skill(content, SUMMARIZATION_SKILL_NAME) + + assert skill_result.status == SkillStatus.PENDING + task_mock.assert_called_once_with(skill_result.id) + delay_mock.assert_not_called() + + def test_run_relevance_scoring_skill_updates_pending_result(source_plugin_context, mocker): content = Content.objects.create( project=source_plugin_context.project, @@ -266,3 +336,40 @@ def test_run_summarization_skill_marks_result_failed_when_relevance_is_too_low(s assert result.status == SkillStatus.FAILED assert pending_result.status == SkillStatus.FAILED assert "Summarization requires relevance_score" in pending_result.error_message + + +def test_ingest_source_config_truncates_fields_and_processes_inline(source_plugin_context, settings, mocker): + settings.CELERY_TASK_ALWAYS_EAGER = True + plugin = mocker.Mock() + plugin.fetch_new_content.return_value = [ + SimpleNamespace( + url="https://example.com/post-long", + title="T" * 600, + author="A" * 300, + source_plugin=SourcePluginName.RSS, + published_date=datetime(2026, 4, 20, 12, 0, tzinfo=timezone.utc), + content_text="Summary", + ) + ] + plugin.match_entity_for_url.return_value = source_plugin_context.entity + source_config = SourceConfig.objects.create( + project=source_plugin_context.project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + ) + mocker.patch("core.tasks.get_plugin_for_source_config", return_value=plugin) + upsert_mock = mocker.patch("core.tasks.upsert_content_embedding") + process_mock = mocker.patch("core.tasks.process_content") + delay_mock = mocker.patch("core.tasks.process_content.delay") + + items_fetched, items_ingested = _ingest_source_config(source_config) + + created = Content.objects.get(url="https://example.com/post-long") + assert items_fetched == 1 + assert items_ingested == 1 + assert created.entity == source_plugin_context.entity + assert len(created.title) == 512 + assert len(created.author) == 255 + upsert_mock.assert_called_once_with(created) + process_mock.assert_called_once_with(created.id) + delay_mock.assert_not_called() diff --git a/core/tests/test_utils.py b/core/tests/test_utils.py new file mode 100644 index 00000000..a074b670 --- /dev/null +++ b/core/tests/test_utils.py @@ -0,0 +1,30 @@ +import pytest +from django.contrib.auth.models import Group + +from core.models import Project, ProjectConfig +from core.utils import dashboard_callback + +pytestmark = pytest.mark.django_db + + +def test_dashboard_callback_uses_zero_when_no_project_configs(): + context = {"existing": True} + + result = dashboard_callback(request=None, context=context) + + assert result["existing"] is True + assert result["avg_authority_weight"] == 0 + + +def test_dashboard_callback_rounds_average_authority_weight(django_user_model): + user = django_user_model.objects.create_user(username="utils-owner", password="testpass123") + group = Group.objects.create(name="utils-team") + user.groups.add(group) + project_one = Project.objects.create(name="Utils Project 1", group=group, topic_description="Infra") + project_two = Project.objects.create(name="Utils Project 2", group=group, topic_description="Data") + ProjectConfig.objects.create(project=project_one, upvote_authority_weight=0.1234) + ProjectConfig.objects.create(project=project_two, upvote_authority_weight=0.5678) + + result = dashboard_callback(request=None, context={}) + + assert result["avg_authority_weight"] == 0.35 diff --git a/core/views.py b/core/views.py index 73093a9c..afddb1ea 100644 --- a/core/views.py +++ b/core/views.py @@ -1,10 +1,15 @@ from http import HTTPStatus +from typing import cast -from django.conf import settings +from django.conf import settings as django_settings from django.db import connection from django.http import JsonResponse from qdrant_client import QdrantClient +from core.settings_types import CoreSettings + +settings = cast(CoreSettings, django_settings) + def healthz_view(request): return JsonResponse({"status": "ok", "service": "newsletter-maker"}, status=HTTPStatus.OK) @@ -35,7 +40,7 @@ def _check_database() -> bool: def _check_qdrant() -> bool: try: - client = QdrantClient(url=settings.QDRANT_URL, timeout=2) + client = QdrantClient(url=settings.QDRANT_URL, timeout=2, check_compatibility=False) client.get_collections() except Exception: return False diff --git a/frontend/.env.example b/frontend/.env.example index 47ec6d04..ba95b3b5 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -3,3 +3,13 @@ NEWSLETTER_API_BASE_URL=http://127.0.0.1:8080 NEWSLETTER_API_USERNAME=admin NEWSLETTER_API_PASSWORD=adminpass NEXT_TELEMETRY_DISABLED=1 + +# In production, change these to your live domain +NEXT_PUBLIC_API_URL=http://localhost:8000 +NEXTAUTH_URL=http://localhost:3000 + +# These come from your Dev/Prod portals +GITHUB_ID=your_id +GITHUB_SECRET=your_secret +GOOGLE_ID=your_id +GOOGLE_SECRET=your_secret diff --git a/frontend/app/entities/page.tsx b/frontend/app/entities/page.tsx deleted file mode 100644 index a66db94e..00000000 --- a/frontend/app/entities/page.tsx +++ /dev/null @@ -1,287 +0,0 @@ -import { AppShell } from "@/components/app-shell" -import { StatusBadge } from "@/components/status-badge" -import { getProjectEntities, getProjects } from "@/lib/api" -import { - formatDate, - getErrorMessage, - getSuccessMessage, - selectProject, -} from "@/lib/view-helpers" - -type EntitiesPageProps = { - searchParams: Promise> -} - -const panelClass = - "rounded-3xl border border-[#1f2b27]/12 bg-[rgba(255,250,244,0.86)] p-5 shadow-[0_24px_60px_rgba(35,30,22,0.12)] backdrop-blur-xl" -const eyebrowClass = "m-0 text-[0.78rem] uppercase tracking-[0.12em] opacity-70" -const emptyStateClass = - "rounded-[18px] bg-[#1f2b27]/6 px-4 py-4 text-sm leading-6 text-[#5d6d67]" -const errorBannerClass = - "rounded-[18px] bg-[#c55f4d]/14 px-4 py-4 text-sm leading-6 text-[#7c3023]" -const metaRowClass = "flex flex-wrap gap-2 text-sm text-[#5d6d67]" -const inputClass = - "w-full rounded-2xl border border-[#1f2b27]/12 bg-white/70 px-4 py-3 text-[#1f2b27] outline-none transition focus:border-[#156f68]/40 focus:ring-2 focus:ring-[#156f68]/15" -const labelClass = "grid gap-2" -const labelTextClass = "text-sm font-medium text-[#1f2b27]" -const primaryButtonClass = - "inline-flex min-h-11 items-center justify-center rounded-full bg-[linear-gradient(135deg,#156f68,#1d8d83)] px-4 py-3 text-sm font-medium text-white transition hover:brightness-105 disabled:cursor-not-allowed disabled:opacity-50" -const dangerButtonClass = - "inline-flex min-h-11 items-center justify-center rounded-full bg-[linear-gradient(135deg,#c55f4d,#da7a67)] px-4 py-3 text-sm font-medium text-white transition hover:brightness-105" - -export default async function EntitiesPage({ - searchParams, -}: EntitiesPageProps) { - const resolvedSearchParams = await searchParams - const projects = await getProjects() - const selectedProject = selectProject(projects, resolvedSearchParams) - - if (!selectedProject) { - return ( - -
- Create a project first in Django admin. -
-
- ) - } - - const entities = await getProjectEntities(selectedProject.id) - const errorMessage = getErrorMessage(resolvedSearchParams) - const successMessage = getSuccessMessage(resolvedSearchParams) - - return ( - - {errorMessage ? ( -
{errorMessage}
- ) : null} - {successMessage ? ( -
{successMessage}
- ) : null} - -
-
-

Create entity

-
- - -
- - -
-