diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index a078e038..805cf08a 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -39,6 +39,11 @@ jobs: deploy/helm/newsletter-maker/values-minikube.yaml > /tmp/newsletter-maker-chart.yaml + - name: Render staging overlay + run: helm template newsletter-maker-staging deploy/helm/newsletter-maker -f + deploy/helm/newsletter-maker/values-staging.yaml > + /tmp/newsletter-maker-staging-chart.yaml + build-frontend: name: Build frontend runs-on: ubuntu-latest diff --git a/content/migrations/0004_content_summary_text.py b/content/migrations/0004_content_summary_text.py new file mode 100644 index 00000000..1223ef88 --- /dev/null +++ b/content/migrations/0004_content_summary_text.py @@ -0,0 +1,15 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("content", "0003_content_pipeline_state"), + ] + + operations = [ + migrations.AddField( + model_name="content", + name="summary_text", + field=models.TextField(blank=True, default=""), + ), + ] diff --git a/content/models.py b/content/models.py index e05354c3..c3251247 100644 --- a/content/models.py +++ b/content/models.py @@ -44,6 +44,7 @@ class Content(models.Model): published_date = models.DateTimeField() ingested_at = models.DateTimeField(auto_now_add=True) content_text = models.TextField() + summary_text = models.TextField(blank=True, default="") relevance_score = models.FloatField(null=True, blank=True) authority_adjusted_score = models.FloatField(null=True, blank=True) embedding_id = models.CharField(max_length=64, blank=True) diff --git a/content/serializers.py b/content/serializers.py index b17cf7f4..ce3284ec 100644 --- a/content/serializers.py +++ b/content/serializers.py @@ -24,6 +24,7 @@ class Meta: "published_date", "ingested_at", "content_text", + "summary_text", "relevance_score", "authority_adjusted_score", "embedding_id", @@ -41,6 +42,7 @@ class Meta: "project", "canonical_url", "ingested_at", + "summary_text", "authority_adjusted_score", "embedding_id", "duplicate_of", diff --git a/content/tests/test_api.py b/content/tests/test_api.py index 184ca9f1..af2c225e 100644 --- a/content/tests/test_api.py +++ b/content/tests/test_api.py @@ -166,6 +166,25 @@ def test_content_create_uses_project_from_url(self): created_content = Content.objects.get(title="New Content") self.assertEqual(created_content.project, self.owner_project) + def test_content_detail_includes_summary_text(self): + self.owner_content.summary_text = "A concise summary ready for editors." + self.owner_content.save(update_fields=["summary_text"]) + + response = self.client.get( + reverse( + "v1:project-content-detail", + kwargs={ + "project_id": _require_pk(self.owner_project), + "pk": _require_pk(self.owner_content), + }, + ) + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual( + response.json()["summary_text"], "A concise summary ready for editors." + ) + @patch("core.tasks.run_relevance_scoring_skill.delay") def test_content_skill_action_queues_relevance_scoring( self, run_relevance_scoring_delay_mock diff --git a/core/api.py b/core/api.py index 39a50869..3e9ad698 100644 --- a/core/api.py +++ b/core/api.py @@ -250,6 +250,7 @@ "published_date": "2026-04-25T14:00:00Z", "ingested_at": "2026-04-26T12:05:00Z", "content_text": "A walkthrough of short-term and long-term memory patterns for production agents.", + "summary_text": "The piece explains practical patterns for combining short-term context with durable memory in production agents.", "relevance_score": 0.92, "authority_adjusted_score": 0.95, "embedding_id": "emb_01jabcxyz", diff --git a/core/management/commands/bootstrap_live_sources.py b/core/management/commands/bootstrap_live_sources.py new file mode 100644 index 00000000..a6847d98 --- /dev/null +++ b/core/management/commands/bootstrap_live_sources.py @@ -0,0 +1,229 @@ +"""Bootstrap real RSS and Reddit sources for local development.""" + +from __future__ import annotations + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +from ingestion.tasks import run_ingestion +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig + + +class Command(BaseCommand): + help = ( + "Create or reactivate RSS and Reddit source configs for one project, " + "optionally queueing ingestion immediately." + ) + + def add_arguments(self, parser): + parser.add_argument( + "--project-id", + type=int, + help="Project ID that should own the bootstrapped source configs.", + ) + parser.add_argument( + "--project-name", + help="Project name that should own the bootstrapped source configs.", + ) + parser.add_argument( + "--rss-feed", + action="append", + default=[], + help=( + "RSS feed URL to add. Repeat the flag or pass a comma-separated list." + ), + ) + parser.add_argument( + "--subreddit", + action="append", + default=[], + help=("Subreddit to add. Repeat the flag or pass a comma-separated list."), + ) + parser.add_argument( + "--reddit-listing", + default="both", + choices=("new", "hot", "both"), + help="Listing mode to use for bootstrapped Reddit sources.", + ) + parser.add_argument( + "--reddit-limit", + type=int, + default=25, + help="Per-listing fetch limit for bootstrapped Reddit sources.", + ) + parser.add_argument( + "--run-now", + action="store_true", + help="Queue ingestion immediately for every source config touched.", + ) + + def handle(self, *args, **options): + project = self._get_project(options) + rss_feeds = self._split_values(options["rss_feed"]) + subreddits = self._split_values(options["subreddit"]) + reddit_limit = int(options["reddit_limit"]) + + if reddit_limit <= 0: + raise CommandError("--reddit-limit must be a positive integer.") + if not rss_feeds and not subreddits: + raise CommandError( + "Provide at least one --rss-feed or --subreddit value to bootstrap." + ) + + created_count = 0 + reactivated_count = 0 + updated_count = 0 + touched_source_ids: list[int] = [] + + for feed_url in rss_feeds: + source_config, outcome = self._upsert_rss_source(project, feed_url) + touched_source_ids.append(int(source_config.pk)) + if outcome == "created": + created_count += 1 + elif outcome == "reactivated": + reactivated_count += 1 + elif outcome == "updated": + updated_count += 1 + + for subreddit in subreddits: + source_config, outcome = self._upsert_reddit_source( + project, + subreddit, + listing=options["reddit_listing"], + limit=reddit_limit, + ) + touched_source_ids.append(int(source_config.pk)) + if outcome == "created": + created_count += 1 + elif outcome == "reactivated": + reactivated_count += 1 + elif outcome == "updated": + updated_count += 1 + + queued_count = 0 + if options["run_now"]: + for source_config_id in touched_source_ids: + if settings.CELERY_TASK_ALWAYS_EAGER: + run_ingestion(source_config_id) + else: + run_ingestion.delay(source_config_id) + queued_count += 1 + + self.stdout.write( + self.style.SUCCESS( + f"Bootstrapped {len(touched_source_ids)} source config(s) for project {project.name}." + ) + ) + self.stdout.write(f"Created: {created_count}") + self.stdout.write(f"Reactivated: {reactivated_count}") + self.stdout.write(f"Updated: {updated_count}") + if options["run_now"]: + self.stdout.write(f"Queued ingestions: {queued_count}") + + def _get_project(self, options) -> Project: + """Resolve the project from either ID or exact name.""" + + project_id = options.get("project_id") + project_name = (options.get("project_name") or "").strip() + if bool(project_id) == bool(project_name): + raise CommandError("Pass exactly one of --project-id or --project-name.") + + if project_id: + try: + return Project.objects.get(pk=project_id) + except Project.DoesNotExist as exc: + raise CommandError( + f"Project with id {project_id} does not exist." + ) from exc + + try: + return Project.objects.get(name=project_name) + except Project.DoesNotExist as exc: + raise CommandError( + f"Project named '{project_name}' does not exist." + ) from exc + + @staticmethod + def _split_values(raw_values: list[str]) -> list[str]: + """Split repeated or comma-delimited CLI values into a clean list.""" + + normalized_values: list[str] = [] + seen_values: set[str] = set() + for raw_value in raw_values: + for item in raw_value.split(","): + normalized_item = item.strip() + if not normalized_item or normalized_item in seen_values: + continue + seen_values.add(normalized_item) + normalized_values.append(normalized_item) + return normalized_values + + def _upsert_rss_source( + self, + project: Project, + feed_url: str, + ) -> tuple[SourceConfig, str]: + """Create or reactivate one RSS source config.""" + + source_config = SourceConfig.objects.filter( + project=project, + plugin_name=SourcePluginName.RSS, + config__feed_url=feed_url, + ).first() + if source_config is None: + source_config = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": feed_url}, + is_active=True, + ) + return source_config, "created" + if not source_config.is_active: + source_config.is_active = True + source_config.save(update_fields=["is_active"]) + return source_config, "reactivated" + return source_config, "unchanged" + + def _upsert_reddit_source( + self, + project: Project, + subreddit: str, + *, + listing: str, + limit: int, + ) -> tuple[SourceConfig, str]: + """Create, reactivate, or refresh one Reddit source config.""" + + desired_config = { + "subreddit": subreddit, + "listing": listing, + "limit": limit, + } + source_config = SourceConfig.objects.filter( + project=project, + plugin_name=SourcePluginName.REDDIT, + config__subreddit=subreddit, + ).first() + if source_config is None: + source_config = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.REDDIT, + config=desired_config, + is_active=True, + ) + return source_config, "created" + + update_fields: list[str] = [] + outcome = "unchanged" + if source_config.config != desired_config: + source_config.config = desired_config + update_fields.append("config") + outcome = "updated" + if not source_config.is_active: + source_config.is_active = True + update_fields.append("is_active") + outcome = "reactivated" if outcome == "unchanged" else outcome + if update_fields: + source_config.save(update_fields=update_fields) + return source_config, outcome diff --git a/core/management/commands/seed_demo.py b/core/management/commands/seed_demo.py index a1df699c..11e4d83d 100644 --- a/core/management/commands/seed_demo.py +++ b/core/management/commands/seed_demo.py @@ -690,6 +690,7 @@ def _seed_pipeline_state( ) ) if relevance_score >= settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD: + content.summary_text = self._summary_for_article(article) skill_results.append( SkillResult( content=content, @@ -697,7 +698,7 @@ def _seed_pipeline_state( skill_name=SUMMARIZATION_SKILL_NAME, status=SkillStatus.COMPLETED, result_data={ - "summary": self._summary_for_article(article), + "summary": content.summary_text, }, model_used=settings.AI_SUMMARIZATION_MODEL, latency_ms=640 + (index % 6) * 40, @@ -730,7 +731,7 @@ def _seed_pipeline_state( Content.objects.bulk_update( content_updates, - ["content_type", "relevance_score", "is_active"], + ["content_type", "relevance_score", "is_active", "summary_text"], ) SkillResult.objects.bulk_create(skill_results) ReviewQueue.objects.bulk_create(review_items) diff --git a/core/pipeline.py b/core/pipeline.py index d4719303..c8f35ebf 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -117,6 +117,21 @@ def _project_pk(content: Content) -> int: return _require_pk(content.project) +def _persist_content_summary( + content: Content, + summary: dict[str, Any], + *, + extra_update_fields: list[str] | None = None, +) -> None: + """Persist the latest summary text onto the content row.""" + + update_fields = ["summary_text"] + content.summary_text = str(summary.get("summary", "")).strip() + if extra_update_fields: + update_fields.extend(extra_update_fields) + content.save(update_fields=update_fields) + + def _content_id_from_state(state: PipelineState) -> int: """Extract a required content id from pipeline state.""" @@ -462,7 +477,11 @@ def summarize_node(state: PipelineState) -> PipelineState: "status": "awaiting_review", } content.pipeline_state = ContentPipelineState.COMPLETED - content.save(update_fields=["pipeline_state"]) + _persist_content_summary( + content, + summary, + extra_update_fields=["pipeline_state"], + ) return { "summary": summary, "retry_budget_remaining": retry_budget.remaining_retries, @@ -1155,6 +1174,7 @@ def execute_background_skill_result( ) if skill_name == SUMMARIZATION_SKILL_NAME: summary = _run_ad_hoc_summarization(skill_result.content) + _persist_content_summary(skill_result.content, summary) return _update_skill_result( skill_result, status=SkillStatus.COMPLETED, @@ -1230,6 +1250,7 @@ def _execute_ad_hoc_summarization(content: Content) -> SkillResult: try: summary = _run_ad_hoc_summarization(content) + _persist_content_summary(content, summary) return _create_skill_result( content, skill_name=SUMMARIZATION_SKILL_NAME, diff --git a/core/tests/test_bootstrap_live_sources.py b/core/tests/test_bootstrap_live_sources.py new file mode 100644 index 00000000..92692fdd --- /dev/null +++ b/core/tests/test_bootstrap_live_sources.py @@ -0,0 +1,101 @@ +from io import StringIO + +import pytest +from django.core.management import CommandError, call_command + +from projects.model_support import SourcePluginName +from projects.models import Project, SourceConfig + +pytestmark = pytest.mark.django_db + + +def test_bootstrap_live_sources_creates_requested_source_configs(): + project = Project.objects.create( + name="Bootstrap Project", + topic_description="Platform engineering", + ) + stdout = StringIO() + + call_command( + "bootstrap_live_sources", + project_id=project.id, + rss_feed=[ + "https://example.com/feed.xml,https://example.com/another.xml", + ], + subreddit=["devops,kubernetes"], + reddit_listing="hot", + reddit_limit=30, + stdout=stdout, + ) + + rss_configs = SourceConfig.objects.filter( + project=project, + plugin_name=SourcePluginName.RSS, + ).order_by("id") + reddit_configs = SourceConfig.objects.filter( + project=project, + plugin_name=SourcePluginName.REDDIT, + ).order_by("id") + + assert rss_configs.count() == 2 + assert reddit_configs.count() == 2 + assert list(rss_configs.values_list("config__feed_url", flat=True)) == [ + "https://example.com/feed.xml", + "https://example.com/another.xml", + ] + assert list(reddit_configs.values_list("config__subreddit", flat=True)) == [ + "devops", + "kubernetes", + ] + assert all(config.config["listing"] == "hot" for config in reddit_configs) + assert all(config.config["limit"] == 30 for config in reddit_configs) + assert "Bootstrapped 4 source config(s)" in stdout.getvalue() + + +def test_bootstrap_live_sources_reactivates_and_updates_existing_sources(): + project = Project.objects.create( + name="Bootstrap Project", + topic_description="Platform engineering", + ) + rss_source = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + is_active=False, + ) + reddit_source = SourceConfig.objects.create( + project=project, + plugin_name=SourcePluginName.REDDIT, + config={"subreddit": "devops", "listing": "new", "limit": 10}, + is_active=False, + ) + + call_command( + "bootstrap_live_sources", + project_name=project.name, + rss_feed=["https://example.com/feed.xml"], + subreddit=["devops"], + reddit_listing="both", + reddit_limit=25, + ) + + rss_source.refresh_from_db() + reddit_source.refresh_from_db() + + assert rss_source.is_active is True + assert reddit_source.is_active is True + assert reddit_source.config == { + "subreddit": "devops", + "listing": "both", + "limit": 25, + } + assert SourceConfig.objects.filter(project=project).count() == 2 + + +def test_bootstrap_live_sources_requires_one_project_selector(): + with pytest.raises( + CommandError, match="exactly one of --project-id or --project-name" + ): + call_command( + "bootstrap_live_sources", rss_feed=["https://example.com/feed.xml"] + ) diff --git a/core/tests/test_embeddings.py b/core/tests/test_embeddings.py index d534586f..8d83c8ea 100644 --- a/core/tests/test_embeddings.py +++ b/core/tests/test_embeddings.py @@ -405,6 +405,7 @@ def test_seed_demo_creates_reference_corpus_and_embeds_demo_content(mocker, caps ).count() == 115 ) + assert Content.objects.filter(project=project, summary_text__gt="").count() == 115 assert ReviewQueue.objects.filter(project=project).exists() assert UserFeedback.objects.filter(project=project).count() == 45 assert IngestionRun.objects.filter(project=project).count() == 6 diff --git a/deploy/argocd/newsletter-maker-staging-application.yaml b/deploy/argocd/newsletter-maker-staging-application.yaml new file mode 100644 index 00000000..71b79a64 --- /dev/null +++ b/deploy/argocd/newsletter-maker-staging-application.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: newsletter-maker-staging + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/webstackdev/newsletter-maker.git + targetRevision: main + path: deploy/helm/newsletter-maker + helm: + valueFiles: + - values-staging.yaml + destination: + server: https://kubernetes.default.svc + namespace: newsletter-maker-staging + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true \ No newline at end of file diff --git a/deploy/helm/newsletter-maker/templates/_helpers.tpl b/deploy/helm/newsletter-maker/templates/_helpers.tpl index 92a43ead..ab1b98fa 100644 --- a/deploy/helm/newsletter-maker/templates/_helpers.tpl +++ b/deploy/helm/newsletter-maker/templates/_helpers.tpl @@ -10,6 +10,14 @@ {{- end -}} {{- end -}} +{{- define "newsletter-maker.secretName" -}} +{{- if .Values.secrets.existingSecretName -}} +{{- .Values.secrets.existingSecretName | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-secret" (include "newsletter-maker.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + {{- define "newsletter-maker.labels" -}} app.kubernetes.io/name: {{ include "newsletter-maker.name" . }} helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} diff --git a/deploy/helm/newsletter-maker/templates/celery-beat-deployment.yaml b/deploy/helm/newsletter-maker/templates/celery-beat-deployment.yaml index 131f7f91..d83e082a 100644 --- a/deploy/helm/newsletter-maker/templates/celery-beat-deployment.yaml +++ b/deploy/helm/newsletter-maker/templates/celery-beat-deployment.yaml @@ -27,6 +27,6 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} resources: {{- toYaml .Values.celeryBeat.resources | nindent 12 }} diff --git a/deploy/helm/newsletter-maker/templates/celery-worker-deployment.yaml b/deploy/helm/newsletter-maker/templates/celery-worker-deployment.yaml index daca25fb..52411af3 100644 --- a/deploy/helm/newsletter-maker/templates/celery-worker-deployment.yaml +++ b/deploy/helm/newsletter-maker/templates/celery-worker-deployment.yaml @@ -27,6 +27,6 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} resources: {{- toYaml .Values.celeryWorker.resources | nindent 12 }} diff --git a/deploy/helm/newsletter-maker/templates/django-deployment.yaml b/deploy/helm/newsletter-maker/templates/django-deployment.yaml index 91cc4b37..0a938f51 100644 --- a/deploy/helm/newsletter-maker/templates/django-deployment.yaml +++ b/deploy/helm/newsletter-maker/templates/django-deployment.yaml @@ -27,7 +27,7 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} ports: - name: http containerPort: {{ .Values.django.port }} diff --git a/deploy/helm/newsletter-maker/templates/django-service.yaml b/deploy/helm/newsletter-maker/templates/django-service.yaml index 693e0cf8..32eaf84a 100644 --- a/deploy/helm/newsletter-maker/templates/django-service.yaml +++ b/deploy/helm/newsletter-maker/templates/django-service.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "newsletter-maker.djangoHost" . }} labels: {{- include "newsletter-maker.labels" . | nindent 4 }} + {{- include "newsletter-maker.componentLabels" (dict "Release" .Release "Values" .Values "Chart" .Chart "component" "django") | nindent 4 }} spec: ports: - name: http diff --git a/deploy/helm/newsletter-maker/templates/migrate-job.yaml b/deploy/helm/newsletter-maker/templates/migrate-job.yaml index 47dacdda..2a78cf7e 100644 --- a/deploy/helm/newsletter-maker/templates/migrate-job.yaml +++ b/deploy/helm/newsletter-maker/templates/migrate-job.yaml @@ -33,5 +33,5 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} {{- end }} diff --git a/deploy/helm/newsletter-maker/templates/postgres-statefulset.yaml b/deploy/helm/newsletter-maker/templates/postgres-statefulset.yaml index 2d0d2444..34cd6dec 100644 --- a/deploy/helm/newsletter-maker/templates/postgres-statefulset.yaml +++ b/deploy/helm/newsletter-maker/templates/postgres-statefulset.yaml @@ -28,7 +28,7 @@ spec: - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} key: POSTGRES_PASSWORD ports: - containerPort: {{ .Values.postgres.service.port }} diff --git a/deploy/helm/newsletter-maker/templates/secret.yaml b/deploy/helm/newsletter-maker/templates/secret.yaml index 39289b94..bb33233b 100644 --- a/deploy/helm/newsletter-maker/templates/secret.yaml +++ b/deploy/helm/newsletter-maker/templates/secret.yaml @@ -1,7 +1,8 @@ +{{- if not .Values.secrets.existingSecretName }} apiVersion: v1 kind: Secret metadata: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} labels: {{- include "newsletter-maker.labels" . | nindent 4 }} type: Opaque @@ -20,3 +21,4 @@ stringData: DJANGO_SUPERUSER_USERNAME: {{ .Values.secrets.djangoSuperuserUsername | quote }} DJANGO_SUPERUSER_EMAIL: {{ .Values.secrets.djangoSuperuserEmail | quote }} DJANGO_SUPERUSER_PASSWORD: {{ .Values.secrets.djangoSuperuserPassword | quote }} +{{- end }} diff --git a/deploy/helm/newsletter-maker/templates/seed-demo-job.yaml b/deploy/helm/newsletter-maker/templates/seed-demo-job.yaml index 0c8d34cb..6c85ead2 100644 --- a/deploy/helm/newsletter-maker/templates/seed-demo-job.yaml +++ b/deploy/helm/newsletter-maker/templates/seed-demo-job.yaml @@ -27,5 +27,5 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} {{- end }} diff --git a/deploy/helm/newsletter-maker/templates/service-monitor.yaml b/deploy/helm/newsletter-maker/templates/service-monitor.yaml new file mode 100644 index 00000000..3dadc35e --- /dev/null +++ b/deploy/helm/newsletter-maker/templates/service-monitor.yaml @@ -0,0 +1,23 @@ +{{- if and .Values.serviceMonitor.enabled (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1") }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "newsletter-maker.djangoHost" . }} + labels: + {{- include "newsletter-maker.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "newsletter-maker.componentLabels" (dict "Release" .Release "Values" .Values "Chart" .Chart "component" "django") | nindent 6 }} + endpoints: + - port: http + path: {{ .Values.serviceMonitor.path | quote }} + interval: {{ .Values.serviceMonitor.interval | quote }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout | quote }} + bearerTokenSecret: + name: {{ include "newsletter-maker.secretName" . }} + key: METRICS_TOKEN +{{- end }} \ No newline at end of file diff --git a/deploy/helm/newsletter-maker/templates/sync-embeddings-job.yaml b/deploy/helm/newsletter-maker/templates/sync-embeddings-job.yaml index 70f49e1c..81f021cd 100644 --- a/deploy/helm/newsletter-maker/templates/sync-embeddings-job.yaml +++ b/deploy/helm/newsletter-maker/templates/sync-embeddings-job.yaml @@ -27,5 +27,5 @@ spec: - configMapRef: name: {{ include "newsletter-maker.fullname" . }}-env - secretRef: - name: {{ include "newsletter-maker.fullname" . }}-secret + name: {{ include "newsletter-maker.secretName" . }} {{- end }} diff --git a/deploy/helm/newsletter-maker/values-staging.yaml b/deploy/helm/newsletter-maker/values-staging.yaml new file mode 100644 index 00000000..595f4548 --- /dev/null +++ b/deploy/helm/newsletter-maker/values-staging.yaml @@ -0,0 +1,26 @@ +image: + repository: ghcr.io/webstackdev/newsletter-maker + tag: main + pullPolicy: IfNotPresent + +env: + debug: "false" + allowedHosts: "staging.newsletter-maker.example.com" + csrfTrustedOrigins: "https://staging.newsletter-maker.example.com" + newsletterApiBaseUrl: "https://staging.newsletter-maker.example.com" + logLevel: INFO + +secrets: + existingSecretName: newsletter-maker-staging-secrets + +jobs: + seedDemo: + enabled: false + +networkPolicy: + enabled: true + +serviceMonitor: + enabled: true + labels: + release: kube-prometheus-stack \ No newline at end of file diff --git a/deploy/helm/newsletter-maker/values.yaml b/deploy/helm/newsletter-maker/values.yaml index afefa7fd..2efac430 100644 --- a/deploy/helm/newsletter-maker/values.yaml +++ b/deploy/helm/newsletter-maker/values.yaml @@ -38,6 +38,7 @@ env: ollamaUrl: http://ollama:11434 secrets: + existingSecretName: "" secretKey: change-me blueskyCredentialsEncryptionKey: "" metricsToken: "" @@ -160,3 +161,10 @@ keda: networkPolicy: enabled: false + +serviceMonitor: + enabled: false + interval: 30s + scrapeTimeout: 10s + path: /metrics + labels: {} diff --git a/frontend/src/app/__tests__/page.test.tsx b/frontend/src/app/page.test.tsx similarity index 99% rename from frontend/src/app/__tests__/page.test.tsx rename to frontend/src/app/page.test.tsx index 505c0a15..b7464be4 100644 --- a/frontend/src/app/__tests__/page.test.tsx +++ b/frontend/src/app/page.test.tsx @@ -261,7 +261,7 @@ function createDashboardView(overrides: Record = {}) { } async function loadHomePageModule() { - return import("../page") + return import("./page") } async function renderHomePage( diff --git a/justfile b/justfile index af4cf498..de012def 100644 --- a/justfile +++ b/justfile @@ -209,6 +209,17 @@ seed: @{{backend_env}} @{{compose}} exec django python manage.py seed_demo +# Bootstrap RSS and Reddit source configs for one project in local development +bootstrap-live-sources project_id: + @{{backend_env}} + @python3 manage.py bootstrap_live_sources \ + --project-id {{project_id}} \ + ${RSS_FEEDS:+--rss-feed "$RSS_FEEDS"} \ + ${SUBREDDITS:+--subreddit "$SUBREDDITS"} \ + ${REDDIT_LISTING:+--reddit-listing "$REDDIT_LISTING"} \ + ${REDDIT_LIMIT:+--reddit-limit "$REDDIT_LIMIT"} \ + ${RUN_NOW:+--run-now} + # Sync embeddings for all eligible content embed-all: @{{backend_env}} diff --git a/newsletters/admin.py b/newsletters/admin.py new file mode 100644 index 00000000..fc54516a --- /dev/null +++ b/newsletters/admin.py @@ -0,0 +1,157 @@ +"""Admin configuration for newsletter intake and sender allowlists.""" + +import json +from typing import Any, cast + +from django.contrib import admin +from django.utils.html import format_html +from django.utils.safestring import mark_safe +from unfold.admin import ModelAdmin + +from newsletters.models import IntakeAllowlist, NewsletterIntake + + +@admin.register(IntakeAllowlist) +class IntakeAllowlistAdmin(ModelAdmin): + """Admin view for project newsletter sender allowlists.""" + + list_display = ( + "sender_email", + "project", + "confirmation_state", + "confirmed_at", + "created_at", + ) + list_filter = ( + ("project", admin.RelatedOnlyFieldListFilter), + "confirmed_at", + ) + search_fields = ("sender_email", "project__name") + readonly_fields = ("confirmation_token", "created_at") + + @admin.display(description="Confirmed") + def confirmation_state(self, obj): + """Render allowlist confirmation state as a compact badge.""" + + if obj.is_confirmed: + return format_html( + '{}', + "success", + "CONFIRMED", + ) + return format_html( + '{}', + "warning", + "PENDING", + ) + + +@admin.register(NewsletterIntake) +class NewsletterIntakeAdmin(ModelAdmin): + """Admin view for inbound newsletter audit and extraction results.""" + + list_display = ( + "subject", + "project", + "sender_email", + "display_status", + "received_at", + ) + list_filter = ( + "status", + ("project", admin.RelatedOnlyFieldListFilter), + ) + search_fields = ( + "subject", + "sender_email", + "message_id", + "error_message", + ) + readonly_fields = ( + "received_at", + "message_id", + "pretty_extraction_result", + ) + fieldsets = ( + ( + "Intake", + { + "fields": ( + "project", + "sender_email", + "subject", + "status", + "received_at", + "message_id", + ) + }, + ), + ( + "Payload", + {"fields": ("raw_html", "raw_text")}, + ), + ( + "Extraction", + {"fields": ("pretty_extraction_result", "error_message")}, + ), + ) + + @admin.display(description="Status") + def display_status(self, obj): + """Render intake status as an Unfold badge.""" + + status_value = str(obj.status).lower() + colors = { + "pending": "warning", + "extracted": "success", + "failed": "danger", + "rejected": "danger", + } + return format_html( + '{}', + colors.get(status_value, "info"), + status_value.upper(), + ) + + @admin.display(description="Extraction Result JSON") + def pretty_extraction_result(self, obj): + """Render extraction metadata in a readable preformatted block.""" + + if not obj.extraction_result: + return "No extraction result recorded" + formatted_json = json.dumps(obj.extraction_result, indent=4) + return mark_safe( + '
'
+            f"{formatted_json}"
+            "
" + ) + + def changelist_view(self, request, extra_context=None): + """Augment the changelist with intake status summary cards.""" + + queryset = self.get_queryset(request) + extra_context = cast(dict[str, Any], extra_context or {}) + total_count = queryset.count() + extracted_count = queryset.filter(status="extracted").count() + failed_count = queryset.filter(status__in=["failed", "rejected"]).count() + + extra_context["dashboard_stats"] = [ + { + "title": "Recorded Intakes", + "value": f"{total_count}", + "icon": "mail", + }, + { + "title": "Extracted", + "value": f"{extracted_count}", + "icon": "check_circle", + "color": "success", + }, + { + "title": "Failed or Rejected", + "value": f"{failed_count}", + "icon": "error", + "color": "danger" if failed_count else "success", + }, + ] + return super().changelist_view(request, extra_context=extra_context) diff --git a/newsletters/extraction.py b/newsletters/extraction.py index 51fec279..34e40445 100644 --- a/newsletters/extraction.py +++ b/newsletters/extraction.py @@ -1,12 +1,18 @@ -"""Heuristics for extracting article candidates from newsletter emails.""" +"""Newsletter extraction helpers with OpenRouter fallback to heuristics.""" from __future__ import annotations import re from dataclasses import dataclass from html.parser import HTMLParser +from typing import Any + +from django.conf import settings + +from core.llm import build_skill_user_prompt, get_skill_definition, openrouter_chat_json URL_PATTERN = re.compile(r"https?://[^\s<>'\"]+") +NEWSLETTER_EXTRACTION_SKILL_NAME = "newsletter_extraction" @dataclass(slots=True) @@ -19,6 +25,14 @@ class ExtractedNewsletterItem: position: int +@dataclass(slots=True) +class NewsletterExtractionResult: + """Structured extraction output plus operational metadata.""" + + items: list[ExtractedNewsletterItem] + metadata: dict[str, Any] + + class _NewsletterLinkParser(HTMLParser): """Collect anchor tags with HTTP(S) targets from newsletter HTML.""" @@ -59,6 +73,18 @@ def handle_endtag(self, tag: str) -> None: def extract_newsletter_items( *, subject: str, raw_html: str, raw_text: str ) -> list[ExtractedNewsletterItem]: + """Return extracted newsletter items while preserving older call sites.""" + + return extract_newsletter_payload( + subject=subject, + raw_html=raw_html, + raw_text=raw_text, + ).items + + +def extract_newsletter_payload( + *, subject: str, raw_html: str, raw_text: str +) -> NewsletterExtractionResult: """Extract ordered newsletter items from HTML anchors and plain-text URLs. Args: @@ -67,9 +93,82 @@ def extract_newsletter_items( raw_text: Plain-text body of the newsletter email. Returns: - A de-duplicated ordered list of extracted article candidates. + The extracted article candidates plus extraction metadata. """ + heuristic_items = _extract_newsletter_items_heuristically( + subject=subject, + raw_html=raw_html, + raw_text=raw_text, + ) + fallback_metadata = { + "method": "heuristic", + "model_used": "heuristic", + "latency_ms": 0, + "degraded": False, + "items_extracted": len(heuristic_items), + } + + if not settings.OPENROUTER_API_KEY: + return NewsletterExtractionResult( + items=heuristic_items, metadata=fallback_metadata + ) + + try: + response = openrouter_chat_json( + model=settings.AI_SUMMARIZATION_MODEL, + system_prompt=get_skill_definition( + NEWSLETTER_EXTRACTION_SKILL_NAME + ).instructions_markdown, + user_prompt=build_skill_user_prompt( + NEWSLETTER_EXTRACTION_SKILL_NAME, + { + "subject": subject, + "raw_html": raw_html[:12000], + "raw_text": raw_text[:12000], + }, + ), + ) + normalized_items = _normalize_llm_items( + response.payload.get("items", []), + subject=subject, + raw_text=raw_text, + ) + if not normalized_items: + return NewsletterExtractionResult( + items=heuristic_items, + metadata={ + **fallback_metadata, + "degraded": True, + "fallback_reason": "OpenRouter returned no valid newsletter items.", + }, + ) + return NewsletterExtractionResult( + items=normalized_items, + metadata={ + "method": "openrouter", + "model_used": response.model, + "latency_ms": response.latency_ms, + "degraded": False, + "items_extracted": len(normalized_items), + }, + ) + except Exception as exc: + return NewsletterExtractionResult( + items=heuristic_items, + metadata={ + **fallback_metadata, + "degraded": True, + "fallback_reason": str(exc), + }, + ) + + +def _extract_newsletter_items_heuristically( + *, subject: str, raw_html: str, raw_text: str +) -> list[ExtractedNewsletterItem]: + """Extract newsletter items from anchors and text without model calls.""" + parser = _NewsletterLinkParser() if raw_html: parser.feed(raw_html) @@ -105,3 +204,39 @@ def extract_newsletter_items( ) return extracted_items + + +def _normalize_llm_items( + raw_items: object, + *, + subject: str, + raw_text: str, +) -> list[ExtractedNewsletterItem]: + """Normalize OpenRouter extraction results into saved newsletter items.""" + + if not isinstance(raw_items, list): + return [] + + normalized_items: list[ExtractedNewsletterItem] = [] + seen_urls: set[str] = set() + fallback_excerpt = raw_text[:500].strip() + + for raw_item in raw_items: + if not isinstance(raw_item, dict): + continue + url = str(raw_item.get("url", "")).strip().rstrip(".,)") + if not url.startswith(("http://", "https://")) or url in seen_urls: + continue + seen_urls.add(url) + title = str(raw_item.get("title", "")).strip() or subject or url + excerpt = str(raw_item.get("excerpt", "")).strip() or fallback_excerpt + normalized_items.append( + ExtractedNewsletterItem( + url=url, + title=title, + excerpt=excerpt, + position=len(normalized_items) + 1, + ) + ) + + return normalized_items diff --git a/newsletters/tasks.py b/newsletters/tasks.py index dd29f538..f7fa3324 100644 --- a/newsletters/tasks.py +++ b/newsletters/tasks.py @@ -17,7 +17,7 @@ from newsletters.composition import ( regenerate_newsletter_draft_section as compose_newsletter_draft_section, ) -from newsletters.extraction import extract_newsletter_items +from newsletters.extraction import extract_newsletter_payload from newsletters.models import ( IntakeAllowlist, NewsletterDraft, @@ -66,11 +66,12 @@ def process_newsletter_intake(intake_id: int): intake.save(update_fields=["status", "error_message"]) return {"status": intake.status, "items_ingested": 0} - extracted_items = extract_newsletter_items( + extraction = extract_newsletter_payload( subject=intake.subject, raw_html=intake.raw_html, raw_text=intake.raw_text, ) + extracted_items = extraction.items ingested_count = 0 for item in extracted_items: canonical_url = canonicalize_url(item.url) @@ -104,7 +105,8 @@ def process_newsletter_intake(intake_id: int): intake.status = NewsletterIntakeStatus.EXTRACTED intake.error_message = "" intake.extraction_result = { - "method": "heuristic", + **extraction.metadata, + "items_ingested": ingested_count, "items": [ { "url": item.url, diff --git a/newsletters/tests/test_admin.py b/newsletters/tests/test_admin.py new file mode 100644 index 00000000..f8bb4d89 --- /dev/null +++ b/newsletters/tests/test_admin.py @@ -0,0 +1,93 @@ +from types import SimpleNamespace +from typing import Any, cast + +import pytest +from django.contrib.admin.sites import AdminSite +from django.test import RequestFactory + +from newsletters.admin import IntakeAllowlistAdmin, NewsletterIntakeAdmin +from newsletters.models import IntakeAllowlist, NewsletterIntake, NewsletterIntakeStatus +from projects.models import Project + +pytestmark = pytest.mark.django_db + + +def _create_user(user_model: Any, **kwargs: object): + """Create a user through the custom manager with a typed escape hatch.""" + + return cast(Any, user_model.objects).create_user(**kwargs) + + +def _request(): + """Build a typed request object for admin actions and filters.""" + + return RequestFactory().get("/admin/") + + +@pytest.fixture +def newsletter_admin_context(django_user_model): + user = _create_user( + django_user_model, username="newsletter-admin", password="testpass123" + ) + project = Project.objects.create(name="Newsletter Admin", topic_description="DX") + return SimpleNamespace(user=user, project=project) + + +def test_intake_allowlist_admin_renders_confirmation_state(newsletter_admin_context): + allowlist = IntakeAllowlist.objects.create( + project=newsletter_admin_context.project, + sender_email="sender@example.com", + ) + admin_instance = IntakeAllowlistAdmin(IntakeAllowlist, AdminSite()) + + assert "PENDING" in admin_instance.confirmation_state(allowlist) + allowlist.confirmed_at = "2026-05-01T00:00:00Z" + allowlist.save(update_fields=["confirmed_at"]) + + assert "CONFIRMED" in admin_instance.confirmation_state(allowlist) + + +def test_newsletter_intake_admin_pretty_result_and_dashboard( + newsletter_admin_context, mocker +): + NewsletterIntake.objects.create( + project=newsletter_admin_context.project, + sender_email="sender@example.com", + subject="Pending Digest", + raw_text="Pending body", + message_id="pending-msg", + status=NewsletterIntakeStatus.PENDING, + ) + intake = NewsletterIntake.objects.create( + project=newsletter_admin_context.project, + sender_email="sender@example.com", + subject="Extracted Digest", + raw_text="Extracted body", + message_id="extracted-msg", + status=NewsletterIntakeStatus.EXTRACTED, + extraction_result={"method": "openrouter", "items_extracted": 1}, + ) + NewsletterIntake.objects.create( + project=newsletter_admin_context.project, + sender_email="sender@example.com", + subject="Failed Digest", + raw_text="Failed body", + message_id="failed-msg", + status=NewsletterIntakeStatus.FAILED, + error_message="provider timeout", + ) + + admin_instance = NewsletterIntakeAdmin(NewsletterIntake, AdminSite()) + super_changelist_view = mocker.patch( + "newsletters.admin.ModelAdmin.changelist_view", + side_effect=lambda request, extra_context=None: extra_context, + ) + + rendered_json = admin_instance.pretty_extraction_result(intake) + response = admin_instance.changelist_view(_request()) + + assert "openrouter" in rendered_json + assert "EXTRACTED" in admin_instance.display_status(intake) + assert response["dashboard_stats"][0]["value"] == "3" + assert response["dashboard_stats"][2]["value"] == "1" + super_changelist_view.assert_called_once() diff --git a/pipeline/tests/test_pipeline.py b/pipeline/tests/test_pipeline.py index 03bb5943..c217a6fe 100644 --- a/pipeline/tests/test_pipeline.py +++ b/pipeline/tests/test_pipeline.py @@ -131,6 +131,7 @@ def test_process_content_runs_full_pipeline_for_relevant_content( assert pipeline_context.content.content_type == "release_notes" assert pipeline_context.content.relevance_score == pytest.approx(0.92) assert pipeline_context.content.authority_adjusted_score == pytest.approx(1.0) + assert pipeline_context.content.summary_text == "A concise summary for the editor." assert pipeline_context.content.is_active is True assert ( SkillResult.objects.filter( @@ -861,7 +862,12 @@ def test_execute_ad_hoc_summarization_allows_adjusted_score_to_pass_gate( result = execute_ad_hoc_skill(pipeline_context.content, SUMMARIZATION_SKILL_NAME) + pipeline_context.content.refresh_from_db() assert result.status == SkillStatus.COMPLETED + assert ( + pipeline_context.content.summary_text + == "Authority-adjusted content is now eligible." + ) assert result.result_data == { "summary": "Authority-adjusted content is now eligible.", "model_used": "heuristic", @@ -925,8 +931,10 @@ def test_execute_background_skill_result_completes_summary_when_requirements_are ) pending_result.refresh_from_db() + pipeline_context.content.refresh_from_db() assert result.status == SkillStatus.COMPLETED assert pending_result.status == SkillStatus.COMPLETED + assert pipeline_context.content.summary_text == "Manual summary output." assert pending_result.result_data == { "summary": "Manual summary output.", "model_used": "heuristic", diff --git a/skills/newsletter_extraction/SKILL.md b/skills/newsletter_extraction/SKILL.md new file mode 100644 index 00000000..5cec8b22 --- /dev/null +++ b/skills/newsletter_extraction/SKILL.md @@ -0,0 +1,22 @@ +--- +name: newsletter_extraction +description: Extract ordered article candidates from a forwarded newsletter email. +input: subject, raw_html, raw_text +output: items +--- + +Extract the editorial article candidates from one forwarded newsletter email. + +Return only JSON with an `items` array. Each item must contain: + +- `url`: the canonical article URL +- `title`: a concise article title +- `excerpt`: a short excerpt or why-it-matters style summary from the newsletter body + +Rules: + +- Keep the original newsletter order. +- Ignore unsubscribe links, privacy links, share links, logo links, and obvious navigation links. +- Prefer article destinations over tracking or redirect wrappers when the destination is visible in the email body. +- Use the newsletter subject only when no better title is available. +- Return an empty `items` array when the email contains no article candidates. \ No newline at end of file