Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/build-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ jobs:
deploy/helm/newsletter-maker/values-minikube.yaml >
/tmp/newsletter-maker-chart.yaml

- name: Render staging overlay
run: helm template newsletter-maker-staging deploy/helm/newsletter-maker -f
deploy/helm/newsletter-maker/values-staging.yaml >
/tmp/newsletter-maker-staging-chart.yaml

build-frontend:
name: Build frontend
runs-on: ubuntu-latest
Expand Down
15 changes: 15 additions & 0 deletions content/migrations/0004_content_summary_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("content", "0003_content_pipeline_state"),
]

operations = [
migrations.AddField(
model_name="content",
name="summary_text",
field=models.TextField(blank=True, default=""),
),
]
1 change: 1 addition & 0 deletions content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Content(models.Model):
published_date = models.DateTimeField()
ingested_at = models.DateTimeField(auto_now_add=True)
content_text = models.TextField()
summary_text = models.TextField(blank=True, default="")
relevance_score = models.FloatField(null=True, blank=True)
authority_adjusted_score = models.FloatField(null=True, blank=True)
embedding_id = models.CharField(max_length=64, blank=True)
Expand Down
2 changes: 2 additions & 0 deletions content/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Meta:
"published_date",
"ingested_at",
"content_text",
"summary_text",
"relevance_score",
"authority_adjusted_score",
"embedding_id",
Expand All @@ -41,6 +42,7 @@ class Meta:
"project",
"canonical_url",
"ingested_at",
"summary_text",
"authority_adjusted_score",
"embedding_id",
"duplicate_of",
Expand Down
19 changes: 19 additions & 0 deletions content/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,25 @@ def test_content_create_uses_project_from_url(self):
created_content = Content.objects.get(title="New Content")
self.assertEqual(created_content.project, self.owner_project)

def test_content_detail_includes_summary_text(self):
self.owner_content.summary_text = "A concise summary ready for editors."
self.owner_content.save(update_fields=["summary_text"])

response = self.client.get(
reverse(
"v1:project-content-detail",
kwargs={
"project_id": _require_pk(self.owner_project),
"pk": _require_pk(self.owner_content),
},
)
)

self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(
response.json()["summary_text"], "A concise summary ready for editors."
)

@patch("core.tasks.run_relevance_scoring_skill.delay")
def test_content_skill_action_queues_relevance_scoring(
self, run_relevance_scoring_delay_mock
Expand Down
1 change: 1 addition & 0 deletions core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@
"published_date": "2026-04-25T14:00:00Z",
"ingested_at": "2026-04-26T12:05:00Z",
"content_text": "A walkthrough of short-term and long-term memory patterns for production agents.",
"summary_text": "The piece explains practical patterns for combining short-term context with durable memory in production agents.",
"relevance_score": 0.92,
"authority_adjusted_score": 0.95,
"embedding_id": "emb_01jabcxyz",
Expand Down
229 changes: 229 additions & 0 deletions core/management/commands/bootstrap_live_sources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
"""Bootstrap real RSS and Reddit sources for local development."""

from __future__ import annotations

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from ingestion.tasks import run_ingestion
from projects.model_support import SourcePluginName
from projects.models import Project, SourceConfig


class Command(BaseCommand):
help = (
"Create or reactivate RSS and Reddit source configs for one project, "
"optionally queueing ingestion immediately."
)

def add_arguments(self, parser):
parser.add_argument(
"--project-id",
type=int,
help="Project ID that should own the bootstrapped source configs.",
)
parser.add_argument(
"--project-name",
help="Project name that should own the bootstrapped source configs.",
)
parser.add_argument(
"--rss-feed",
action="append",
default=[],
help=(
"RSS feed URL to add. Repeat the flag or pass a comma-separated list."
),
)
parser.add_argument(
"--subreddit",
action="append",
default=[],
help=("Subreddit to add. Repeat the flag or pass a comma-separated list."),
)
parser.add_argument(
"--reddit-listing",
default="both",
choices=("new", "hot", "both"),
help="Listing mode to use for bootstrapped Reddit sources.",
)
parser.add_argument(
"--reddit-limit",
type=int,
default=25,
help="Per-listing fetch limit for bootstrapped Reddit sources.",
)
parser.add_argument(
"--run-now",
action="store_true",
help="Queue ingestion immediately for every source config touched.",
)

def handle(self, *args, **options):
project = self._get_project(options)
rss_feeds = self._split_values(options["rss_feed"])
subreddits = self._split_values(options["subreddit"])
reddit_limit = int(options["reddit_limit"])

if reddit_limit <= 0:
raise CommandError("--reddit-limit must be a positive integer.")
if not rss_feeds and not subreddits:
raise CommandError(
"Provide at least one --rss-feed or --subreddit value to bootstrap."
)

created_count = 0
reactivated_count = 0
updated_count = 0
touched_source_ids: list[int] = []

for feed_url in rss_feeds:
source_config, outcome = self._upsert_rss_source(project, feed_url)
touched_source_ids.append(int(source_config.pk))
if outcome == "created":
created_count += 1
elif outcome == "reactivated":
reactivated_count += 1
elif outcome == "updated":
updated_count += 1

for subreddit in subreddits:
source_config, outcome = self._upsert_reddit_source(
project,
subreddit,
listing=options["reddit_listing"],
limit=reddit_limit,
)
touched_source_ids.append(int(source_config.pk))
if outcome == "created":
created_count += 1
elif outcome == "reactivated":
reactivated_count += 1
elif outcome == "updated":
updated_count += 1

queued_count = 0
if options["run_now"]:
for source_config_id in touched_source_ids:
if settings.CELERY_TASK_ALWAYS_EAGER:
run_ingestion(source_config_id)
else:
run_ingestion.delay(source_config_id)
queued_count += 1

self.stdout.write(
self.style.SUCCESS(
f"Bootstrapped {len(touched_source_ids)} source config(s) for project {project.name}."
)
)
self.stdout.write(f"Created: {created_count}")
self.stdout.write(f"Reactivated: {reactivated_count}")
self.stdout.write(f"Updated: {updated_count}")
if options["run_now"]:
self.stdout.write(f"Queued ingestions: {queued_count}")

def _get_project(self, options) -> Project:
"""Resolve the project from either ID or exact name."""

project_id = options.get("project_id")
project_name = (options.get("project_name") or "").strip()
if bool(project_id) == bool(project_name):
raise CommandError("Pass exactly one of --project-id or --project-name.")

if project_id:
try:
return Project.objects.get(pk=project_id)
except Project.DoesNotExist as exc:
raise CommandError(
f"Project with id {project_id} does not exist."
) from exc

try:
return Project.objects.get(name=project_name)
except Project.DoesNotExist as exc:
raise CommandError(
f"Project named '{project_name}' does not exist."
) from exc

@staticmethod
def _split_values(raw_values: list[str]) -> list[str]:
"""Split repeated or comma-delimited CLI values into a clean list."""

normalized_values: list[str] = []
seen_values: set[str] = set()
for raw_value in raw_values:
for item in raw_value.split(","):
normalized_item = item.strip()
if not normalized_item or normalized_item in seen_values:
continue
seen_values.add(normalized_item)
normalized_values.append(normalized_item)
return normalized_values

def _upsert_rss_source(
self,
project: Project,
feed_url: str,
) -> tuple[SourceConfig, str]:
"""Create or reactivate one RSS source config."""

source_config = SourceConfig.objects.filter(
project=project,
plugin_name=SourcePluginName.RSS,
config__feed_url=feed_url,
).first()
if source_config is None:
source_config = SourceConfig.objects.create(
project=project,
plugin_name=SourcePluginName.RSS,
config={"feed_url": feed_url},
is_active=True,
)
return source_config, "created"
if not source_config.is_active:
source_config.is_active = True
source_config.save(update_fields=["is_active"])
return source_config, "reactivated"
return source_config, "unchanged"

def _upsert_reddit_source(
self,
project: Project,
subreddit: str,
*,
listing: str,
limit: int,
) -> tuple[SourceConfig, str]:
"""Create, reactivate, or refresh one Reddit source config."""

desired_config = {
"subreddit": subreddit,
"listing": listing,
"limit": limit,
}
source_config = SourceConfig.objects.filter(
project=project,
plugin_name=SourcePluginName.REDDIT,
config__subreddit=subreddit,
).first()
if source_config is None:
source_config = SourceConfig.objects.create(
project=project,
plugin_name=SourcePluginName.REDDIT,
config=desired_config,
is_active=True,
)
return source_config, "created"

update_fields: list[str] = []
outcome = "unchanged"
if source_config.config != desired_config:
source_config.config = desired_config
update_fields.append("config")
outcome = "updated"
if not source_config.is_active:
source_config.is_active = True
update_fields.append("is_active")
outcome = "reactivated" if outcome == "unchanged" else outcome
if update_fields:
source_config.save(update_fields=update_fields)
return source_config, outcome
5 changes: 3 additions & 2 deletions core/management/commands/seed_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,14 +690,15 @@ def _seed_pipeline_state(
)
)
if relevance_score >= settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD:
content.summary_text = self._summary_for_article(article)
skill_results.append(
SkillResult(
content=content,
project=project,
skill_name=SUMMARIZATION_SKILL_NAME,
status=SkillStatus.COMPLETED,
result_data={
"summary": self._summary_for_article(article),
"summary": content.summary_text,
},
model_used=settings.AI_SUMMARIZATION_MODEL,
latency_ms=640 + (index % 6) * 40,
Expand Down Expand Up @@ -730,7 +731,7 @@ def _seed_pipeline_state(

Content.objects.bulk_update(
content_updates,
["content_type", "relevance_score", "is_active"],
["content_type", "relevance_score", "is_active", "summary_text"],
)
SkillResult.objects.bulk_create(skill_results)
ReviewQueue.objects.bulk_create(review_items)
Expand Down
23 changes: 22 additions & 1 deletion core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ def _project_pk(content: Content) -> int:
return _require_pk(content.project)


def _persist_content_summary(
content: Content,
summary: dict[str, Any],
*,
extra_update_fields: list[str] | None = None,
) -> None:
"""Persist the latest summary text onto the content row."""

update_fields = ["summary_text"]
content.summary_text = str(summary.get("summary", "")).strip()
if extra_update_fields:
update_fields.extend(extra_update_fields)
content.save(update_fields=update_fields)


def _content_id_from_state(state: PipelineState) -> int:
"""Extract a required content id from pipeline state."""

Expand Down Expand Up @@ -462,7 +477,11 @@ def summarize_node(state: PipelineState) -> PipelineState:
"status": "awaiting_review",
}
content.pipeline_state = ContentPipelineState.COMPLETED
content.save(update_fields=["pipeline_state"])
_persist_content_summary(
content,
summary,
extra_update_fields=["pipeline_state"],
)
return {
"summary": summary,
"retry_budget_remaining": retry_budget.remaining_retries,
Expand Down Expand Up @@ -1155,6 +1174,7 @@ def execute_background_skill_result(
)
if skill_name == SUMMARIZATION_SKILL_NAME:
summary = _run_ad_hoc_summarization(skill_result.content)
_persist_content_summary(skill_result.content, summary)
return _update_skill_result(
skill_result,
status=SkillStatus.COMPLETED,
Expand Down Expand Up @@ -1230,6 +1250,7 @@ def _execute_ad_hoc_summarization(content: Content) -> SkillResult:

try:
summary = _run_ad_hoc_summarization(content)
_persist_content_summary(content, summary)
return _create_skill_result(
content,
skill_name=SUMMARIZATION_SKILL_NAME,
Expand Down
Loading
Loading