From c111cb23c0f496a19cda55957b06cacb362f77de Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 27 Apr 2026 15:54:36 +0300 Subject: [PATCH 1/4] Seed fixtures setup --- SESSION.md | 168 +++++ celerybeat-schedule | Bin 12288 -> 12288 bytes core/management/commands/seed_demo.py | 976 +++++++++++++++++++++++++- core/tests/test_embeddings.py | 72 +- justfile | 2 +- 5 files changed, 1181 insertions(+), 37 deletions(-) create mode 100644 SESSION.md diff --git a/SESSION.md b/SESSION.md new file mode 100644 index 00000000..63b2621e --- /dev/null +++ b/SESSION.md @@ -0,0 +1,168 @@ +# Session Restore Point + +## Date + +- 2026-04-27 + +## What Was Completed + +- Implemented WP7 demo seeding in `core/management/commands/seed_demo.py`. +- `seed_demo` now creates deterministic offline demo data instead of depending on live Reddit/RSS/LLM calls. +- Seeded dataset shape: + - 1 demo tenant: `Platform Engineering Weekly` + - 15 entities + - 8 source configs + - 50 reference corpus items + - 200 demo content items + - 515 skill results total + - review queue items + - 45 feedback rows + - 6 ingestion runs +- Added resilience in `seed_demo` so embedding sync warns and stops cleanly if vector/embedding infrastructure is unavailable. +- Changed `justfile` so `seed` now runs inside the live Django container: + - `docker compose exec django python manage.py seed_demo` + +## Tests / Validation That Passed + +- `pytest core/tests/test_embeddings.py -q` + - Final status before stopping: `15 passed` +- `ruff check core/management/commands/seed_demo.py core/tests/test_embeddings.py` + - Passed +- `just --list` + - Passed after `justfile` changes + +## Current Blocker + +`just seed` still fails inside the running Django container because Django cannot import `drf_standardized_errors`. + +Exact failure inside the live service container: + +```bash +docker compose exec django python -c "import drf_standardized_errors; print('ok')" +``` + +Result: + +```text +ModuleNotFoundError: No module named 'drf_standardized_errors' +``` + +## Important Verified Facts + +1. The dependency is declared in `requirements.txt`: + - `drf-standardized-errors==0.15.0` + +2. Django settings require it in `newsletter_maker/settings/base.py`: + - `"drf_standardized_errors"` is present in `INSTALLED_APPS` + +3. The freshly built local image is good. + This command succeeds: + +```bash +docker run --rm newsletter-maker-app:dev python -c "import sys; print(sys.executable); import drf_standardized_errors; print(drf_standardized_errors.__file__)" +``` + +Observed result: + +```text +/usr/local/bin/python +/usr/local/lib/python3.13/site-packages/drf_standardized_errors/__init__.py +``` + +4. The running Django service container claims to use the same image ID as the good image. + +Observed: + +```bash +docker inspect newsletter-maker-django-1 --format '{{.Id}} {{.Image}} {{.Config.Image}}' +``` + +Result was: + +```text + sha256:6e0170b468e9316f0dfdcb9e2a52d4d45c22e9f888ea752f35373f70c0579cf8 docker.io/library/newsletter-maker-app:dev +``` + +5. The running Django container still cannot see the package files or pip metadata. + +Observed inside live container: + +```bash +docker compose exec django sh -lc "ls -d /usr/local/lib/python3.13/site-packages/drf_standardized_errors* 2>/dev/null || true; python -m pip show drf-standardized-errors || true" +``` + +Result: + +```text +WARNING: Package(s) not found: drf-standardized-errors +``` + +6. The live Django container mount set looks normal and only bind-mounts the repo at `/app`. + +Observed: + +```bash +docker inspect newsletter-maker-django-1 --format '{{json .Mounts}}' +``` + +Result: + +```json +[{"Type":"bind","Source":"/home/kevin/Repos/newsletter-maker","Destination":"/app","Mode":"","RW":true,"Propagation":"rprivate"}] +``` + +## Best Current Hypothesis + +There is a runtime divergence between the fresh image and the live Compose service container, even though the service container reports the same backing image ID. + +Most likely remaining explanation: + +- the live Django container writable layer has diverged and is hiding/removing files under site-packages, or +- there is some Compose/container lifecycle oddity involving the running service container that is not visible from the static image inspection. + +## Next Steps To Resume Tomorrow + +1. Inspect container filesystem diff for the live Django service container. + + Commands to run: + +```bash +docker diff newsletter-maker-django-1 | grep drf_standardized_errors || true +docker diff newsletter-maker-django-1 | head -200 +``` + + Note: these were about to be run when work stopped; the tool calls were cancelled by the user. + +2. If the diff shows site-packages deletions or unexpected mutations, remove and recreate the Django/celery service containers again and re-test import immediately. + +3. If the diff is clean, compare a full directory listing of `/usr/local/lib/python3.13/site-packages` between: + - `docker run --rm newsletter-maker-app:dev ...` + - `docker compose exec django ...` + +4. Once `docker compose exec django python -c "import drf_standardized_errors; print('ok')"` works, rerun: + +```bash +just seed +``` + +5. After `just seed` works, validate the seeded UI/admin state manually. + +## Useful Commands From Today + +```bash +docker run --rm newsletter-maker-app:dev python -c "import drf_standardized_errors; print('ok')" +docker compose exec django python -c "import drf_standardized_errors; print('ok')" +docker compose exec django pip show drf-standardized-errors +docker inspect newsletter-maker-django-1 --format '{{.Id}} {{.Image}} {{.Config.Image}}' +docker inspect newsletter-maker-django-1 --format '{{json .Mounts}}' +pytest core/tests/test_embeddings.py -q +ruff check core/management/commands/seed_demo.py core/tests/test_embeddings.py +``` + +## Files Changed Today + +- `core/management/commands/seed_demo.py` +- `core/tests/test_embeddings.py` +- `justfile` +- `SESSION.md` + diff --git a/celerybeat-schedule b/celerybeat-schedule index 023c1aed6181eabbd05028b636997620c7c2a884..16237b18324b2dc114c24665eb702f416ea331e2 100644 GIT binary patch delta 97 zcmZojXh_&#CCAi~Fxf^d7QydSo6jN$mNl|8MF;j{Fvl3HjNpgH@USd*CY6??402{v=b^rhX delta 97 zcmZojXh_&#CCAj9J=sQ1nW;Hza-y8DY;z{FfI&%B1Itu)1_ Tenant: user_model = get_user_model() user, _ = user_model.objects.get_or_create( username="demo_editor", - defaults={ - "email": "demo@example.com", - }, + defaults={"email": "demo@example.com"}, ) user.set_password("demo-password") user.save(update_fields=["password"]) tenant, created = Tenant.objects.get_or_create( user=user, - name="Platform Engineering Weekly", - defaults={ - "topic_description": "Platform engineering, DevOps, cloud infrastructure, reliability, and developer experience.", - }, + name=DEMO_TENANT_NAME, + defaults={"topic_description": DEMO_TOPIC_DESCRIPTION}, ) - if not created and not tenant.topic_description: - tenant.topic_description = "Platform engineering, DevOps, cloud infrastructure, reliability, and developer experience." + if not created and tenant.topic_description != DEMO_TOPIC_DESCRIPTION: + tenant.topic_description = DEMO_TOPIC_DESCRIPTION tenant.save(update_fields=["topic_description"]) TenantConfig.objects.get_or_create(tenant=tenant) return tenant - def _seed_articles(self, tenant: Tenant, articles: list[dict], *, is_reference: bool, source_plugin: str | None = None) -> int: - seeded_count = 0 + def _reset_demo_runtime_state(self, tenant: Tenant) -> None: + SkillResult.objects.filter(tenant=tenant).delete() + ReviewQueue.objects.filter(tenant=tenant).delete() + UserFeedback.objects.filter(tenant=tenant).delete() + IngestionRun.objects.filter(tenant=tenant).delete() + SourceConfig.objects.filter(tenant=tenant).delete() + + def _seed_entities(self, tenant: Tenant) -> dict[str, Entity]: + entities_by_name: dict[str, Entity] = {} + for spec in ENTITY_SPECS: + defaults = { + "type": spec["type"], + "description": spec["description"], + "authority_score": spec["authority_score"], + "website_url": spec.get("website_url", ""), + "github_url": spec.get("github_url", ""), + "linkedin_url": spec.get("linkedin_url", ""), + "bluesky_handle": spec.get("bluesky_handle", ""), + "mastodon_handle": spec.get("mastodon_handle", ""), + "twitter_handle": spec.get("twitter_handle", ""), + } + entity, _ = Entity.objects.update_or_create( + tenant=tenant, + name=spec["name"], + defaults=defaults, + ) + entities_by_name[entity.name] = entity + return entities_by_name + + def _seed_source_configs(self, tenant: Tenant) -> int: + now = timezone.now() + for spec in SOURCE_CONFIG_SPECS: + last_fetched_at = None + if spec["hours_ago"] is not None: + last_fetched_at = now - timedelta(hours=spec["hours_ago"]) + SourceConfig.objects.create( + tenant=tenant, + plugin_name=spec["plugin_name"], + config=spec["config"], + is_active=spec["is_active"], + last_fetched_at=last_fetched_at, + ) + return len(SOURCE_CONFIG_SPECS) + + def _seed_articles( + self, + tenant: Tenant, + articles: list[dict[str, Any]], + entities_by_name: dict[str, Entity], + *, + is_reference: bool, + source_plugin: str | None = None, + ) -> list[Content]: now = timezone.now() + seeded_contents: list[Content] = [] for article in articles: defaults = { "title": article["title"], "author": article["author"], + "entity": entities_by_name.get(article.get("entity_name", "")), "source_plugin": source_plugin or article["source_plugin"], "published_date": now - timedelta(days=article["days_ago"]), "content_text": article["content_text"], "is_reference": is_reference, "is_active": True, } - Content.objects.update_or_create( + content, _ = Content.objects.update_or_create( tenant=tenant, url=article["url"], defaults=defaults, ) - seeded_count += 1 - return seeded_count + seeded_contents.append(content) + return seeded_contents + + def _seed_pipeline_state( + self, + tenant: Tenant, + article_specs: list[dict[str, Any]], + contents: list[Content], + ) -> tuple[int, int]: + content_by_url = {content.url: content for content in contents} + content_updates: list[Content] = [] + skill_results: list[SkillResult] = [] + review_items: list[ReviewQueue] = [] + + for index, article in enumerate(article_specs): + content = content_by_url[article["url"]] + classification_confidence = float(article["classification_confidence"]) + relevance_score = float(article["relevance_score"]) + review_reason = self._review_reason_for_article( + classification_confidence, + relevance_score, + ) + content.content_type = article["content_type"] + content.relevance_score = relevance_score + content.is_active = relevance_score >= settings.AI_RELEVANCE_REVIEW_THRESHOLD + content_updates.append(content) + + skill_results.append( + SkillResult( + content=content, + tenant=tenant, + skill_name=CLASSIFICATION_SKILL_NAME, + status=SkillStatus.COMPLETED, + result_data={ + "content_type": article["content_type"], + "confidence": classification_confidence, + "explanation": self._classification_explanation(article), + }, + model_used=settings.AI_CLASSIFICATION_MODEL, + latency_ms=240 + (index % 5) * 35, + confidence=classification_confidence, + ) + ) + skill_results.append( + SkillResult( + content=content, + tenant=tenant, + skill_name=RELEVANCE_SKILL_NAME, + status=SkillStatus.COMPLETED, + result_data={ + "relevance_score": relevance_score, + "explanation": self._relevance_explanation(article), + "used_llm": bool(article.get("used_llm", False)), + }, + model_used=self._relevance_model_used(article), + latency_ms=( + 0 + if not article.get("used_llm", False) + else 900 + (index % 4) * 120 + ), + confidence=relevance_score, + ) + ) + if relevance_score >= settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD: + skill_results.append( + SkillResult( + content=content, + tenant=tenant, + skill_name=SUMMARIZATION_SKILL_NAME, + status=SkillStatus.COMPLETED, + result_data={ + "summary": self._summary_for_article(article), + }, + model_used=settings.AI_SUMMARIZATION_MODEL, + latency_ms=640 + (index % 6) * 40, + ) + ) + if review_reason is not None: + resolved = index % 6 == 0 + resolution = "" + if resolved: + resolution = ( + ReviewResolution.HUMAN_APPROVED + if relevance_score >= settings.AI_RELEVANCE_REVIEW_THRESHOLD + else ReviewResolution.HUMAN_REJECTED + ) + confidence = ( + classification_confidence + if review_reason == ReviewReason.LOW_CONFIDENCE_CLASSIFICATION + else relevance_score + ) + review_items.append( + ReviewQueue( + tenant=tenant, + content=content, + reason=review_reason, + confidence=confidence, + resolved=resolved, + resolution=resolution, + ) + ) + + Content.objects.bulk_update( + content_updates, + ["content_type", "relevance_score", "is_active"], + ) + SkillResult.objects.bulk_create(skill_results) + ReviewQueue.objects.bulk_create(review_items) + return len(skill_results), len(review_items) + + def _seed_feedback(self, tenant: Tenant, contents: list[Content]) -> int: + user_model = get_user_model() + voters = [] + for index in range(1, 7): + user, _ = user_model.objects.get_or_create( + username=f"demo_reader_{index}", + defaults={"email": f"demo-reader-{index}@example.com"}, + ) + user.set_password("demo-password") + user.save(update_fields=["password"]) + voters.append(user) + + active_contents = sorted( + [content for content in contents if content.is_active], + key=lambda content: (content.relevance_score or 0.0, content.published_date), + reverse=True, + ) + feedback_count = 0 + + for index, content in enumerate(active_contents[:30]): + UserFeedback.objects.update_or_create( + content=content, + user=voters[index % len(voters)], + defaults={ + "tenant": tenant, + "feedback_type": FeedbackType.UPVOTE, + }, + ) + feedback_count += 1 + + for index, content in enumerate(active_contents[-15:]): + UserFeedback.objects.update_or_create( + content=content, + user=voters[(index + 2) % len(voters)], + defaults={ + "tenant": tenant, + "feedback_type": FeedbackType.DOWNVOTE, + }, + ) + feedback_count += 1 + + return feedback_count + + def _seed_ingestion_runs(self, tenant: Tenant) -> int: + run_specs = [ + { + "plugin_name": SourcePluginName.RSS, + "status": RunStatus.SUCCESS, + "items_fetched": 92, + "items_ingested": 57, + "error_message": "", + "started_hours_ago": 6, + "duration_minutes": 14, + }, + { + "plugin_name": SourcePluginName.REDDIT, + "status": RunStatus.SUCCESS, + "items_fetched": 28, + "items_ingested": 18, + "error_message": "", + "started_hours_ago": 4, + "duration_minutes": 6, + }, + { + "plugin_name": SourcePluginName.RSS, + "status": RunStatus.FAILED, + "items_fetched": 0, + "items_ingested": 0, + "error_message": "Timed out while polling one disabled feed mirror.", + "started_hours_ago": 30, + "duration_minutes": 5, + }, + { + "plugin_name": SourcePluginName.REDDIT, + "status": RunStatus.SUCCESS, + "items_fetched": 24, + "items_ingested": 16, + "error_message": "", + "started_hours_ago": 32, + "duration_minutes": 7, + }, + { + "plugin_name": SourcePluginName.RSS, + "status": RunStatus.SUCCESS, + "items_fetched": 88, + "items_ingested": 54, + "error_message": "", + "started_hours_ago": 54, + "duration_minutes": 13, + }, + { + "plugin_name": SourcePluginName.REDDIT, + "status": RunStatus.FAILED, + "items_fetched": 0, + "items_ingested": 0, + "error_message": "Community listing temporarily unavailable during sync.", + "started_hours_ago": 80, + "duration_minutes": 4, + }, + ] + now = timezone.now() + for spec in run_specs: + run = IngestionRun.objects.create( + tenant=tenant, + plugin_name=spec["plugin_name"], + status=spec["status"], + items_fetched=spec["items_fetched"], + items_ingested=spec["items_ingested"], + error_message=spec["error_message"], + ) + run.started_at = now - timedelta(hours=spec["started_hours_ago"]) + run.completed_at = run.started_at + timedelta(minutes=spec["duration_minutes"]) + run.save(update_fields=["started_at", "completed_at"]) + return len(run_specs) - def _sync_embeddings(self, tenant: Tenant) -> int: + def _sync_embeddings(self, contents: list[Content]) -> int: embedded_count = 0 - for content in Content.objects.filter(tenant=tenant).order_by("id"): - upsert_content_embedding(content) + for content in sorted(contents, key=lambda item: item.id): + try: + upsert_content_embedding(content) + except (HTTPError, ResponseHandlingException) as exc: + self.stderr.write( + self.style.WARNING( + "Skipping remaining embedding sync because the embedding or " + f"vector service is unavailable: {exc}" + ) + ) + break embedded_count += 1 return embedded_count + + def _build_reference_articles(self) -> list[dict[str, Any]]: + articles = list(LEGACY_REFERENCE_ARTICLES) + for round_index in range(5): + for topic_index, topic in enumerate(REFERENCE_TOPICS): + articles.append( + { + "url": f"https://example.com/reference/{topic['slug']}-{round_index + 1}", + "title": topic["title"], + "author": "Reference Corpus", + "content_text": topic["content"], + "days_ago": 32 + round_index * 7 + topic_index, + } + ) + return articles + + def _build_demo_content(self) -> list[dict[str, Any]]: + articles = list(LEGACY_SAMPLE_CONTENT) + articles.extend(self._build_generated_rss_content()) + articles.extend(self._build_generated_reddit_content()) + return articles + + def _build_generated_rss_content(self) -> list[dict[str, Any]]: + articles = [] + for index in range(147): + band = self._band_for_index(index, relevant_cutoff=87, borderline_cutoff=122) + publication = RSS_PUBLICATIONS[index % len(RSS_PUBLICATIONS)] + topic = RSS_TOPIC_BLUEPRINTS[index % len(RSS_TOPIC_BLUEPRINTS)] + relevance_score = self._relevance_score(index, band) + articles.append( + { + "url": ( + f"https://{publication['host']}/2026/04/" + f"{topic['slug']}-{index + 1:03d}" + ), + "title": self._rss_title(publication["label"], topic["headline"], band), + "author": f"{publication['label']} Editorial", + "source_plugin": SourcePluginName.RSS, + "content_text": self._rss_body(publication["label"], topic["body"], band), + "days_ago": 1 + (index % 30), + "content_type": self._content_type_for_band(topic["content_type"], band), + "classification_confidence": self._classification_confidence(index), + "relevance_score": relevance_score, + "entity_name": publication["entity_name"], + "used_llm": band == "borderline", + } + ) + return articles + + def _build_generated_reddit_content(self) -> list[dict[str, Any]]: + articles = [] + for index in range(49): + band = self._band_for_index(index, relevant_cutoff=24, borderline_cutoff=34) + subreddit = REDDIT_COMMUNITIES[index % len(REDDIT_COMMUNITIES)] + topic = REDDIT_TOPIC_BLUEPRINTS[index % len(REDDIT_TOPIC_BLUEPRINTS)] + articles.append( + { + "url": ( + f"https://www.reddit.com/r/{subreddit}/comments/" + f"demo{index + 1:03d}/{topic['slug']}/" + ), + "title": self._reddit_title(subreddit, topic["headline"], band), + "author": f"u/demo_{subreddit}_{index + 1:03d}", + "source_plugin": SourcePluginName.REDDIT, + "content_text": self._reddit_body(subreddit, topic["body"], band, index), + "days_ago": 1 + ((index * 2) % 30), + "content_type": self._content_type_for_band(topic["content_type"], band), + "classification_confidence": self._classification_confidence(index + 200), + "relevance_score": self._relevance_score(index + 200, band), + "entity_name": None, + "used_llm": band == "borderline", + } + ) + return articles + + @staticmethod + def _band_for_index(index: int, *, relevant_cutoff: int, borderline_cutoff: int) -> str: + if index < relevant_cutoff: + return "relevant" + if index < borderline_cutoff: + return "borderline" + return "irrelevant" + + @staticmethod + def _classification_confidence(index: int) -> float: + if index % 11 == 0: + return 0.55 + return round(0.66 + (index % 8) * 0.03, 2) + + @staticmethod + def _relevance_score(index: int, band: str) -> float: + if band == "relevant": + return round(0.74 + (index % 18) * 0.012, 2) + if band == "borderline": + return round(0.44 + (index % 15) * 0.015, 2) + return round(0.12 + (index % 12) * 0.015, 2) + + @staticmethod + def _content_type_for_band(base_content_type: str, band: str) -> str: + if band == "irrelevant" and base_content_type == "technical_article": + return "other" + return base_content_type + + @staticmethod + def _rss_title(source_label: str, headline: str, band: str) -> str: + if band == "relevant": + return f"{headline}" + if band == "borderline": + return f"{headline} for teams still shaping their platform charter" + return f"{source_label}: {headline.lower().capitalize()} outside the core platform loop" + + @staticmethod + def _reddit_title(subreddit: str, headline: str, band: str) -> str: + if band == "relevant": + return f"r/{subreddit}: {headline}" + if band == "borderline": + return f"r/{subreddit}: {headline} and where teams disagree" + return f"r/{subreddit}: {headline} with limited editorial fit" + + @staticmethod + def _rss_body(source_label: str, body: str, band: str) -> str: + if band == "relevant": + return ( + f"{source_label} reports a concrete platform engineering practice. " + f"{body} The example is directly applicable to infrastructure, " + "developer experience, and reliability workflows." + ) + if band == "borderline": + return ( + f"{source_label} touches on a topic adjacent to platform engineering. " + f"{body} Editors would probably want to review whether the angle is " + "specific enough for the newsletter audience." + ) + return ( + f"{source_label} covers a topic that only partially overlaps with the " + f"newsletter focus. {body} It is still technical, but the connection to " + "platform engineering is weak compared with stronger candidates." + ) + + @staticmethod + def _reddit_body(subreddit: str, body: str, band: str, index: int) -> str: + score = 18 + (index % 35) + if band == "relevant": + return ( + f"A discussion in r/{subreddit} highlights platform operations trade-offs. " + f"{body} The post picked up roughly {score} upvotes and several replies " + "from practitioners sharing first-hand implementation details." + ) + if band == "borderline": + return ( + f"A thread in r/{subreddit} raises a useful but mixed operational question. " + f"{body} The discussion is practical, yet it needs editorial judgment to " + "decide whether it is specific enough for platform readers." + ) + return ( + f"A thread in r/{subreddit} is only loosely connected to the tenant topic. " + f"{body} The conversation is interesting, but it is more peripheral than the " + "other seeded stories." + ) + + @staticmethod + def _review_reason_for_article( + classification_confidence: float, + relevance_score: float, + ) -> str | None: + if classification_confidence < settings.AI_CLASSIFICATION_REVIEW_THRESHOLD: + return ReviewReason.LOW_CONFIDENCE_CLASSIFICATION + if relevance_score < settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD and ( + relevance_score >= settings.AI_RELEVANCE_REVIEW_THRESHOLD + ): + return ReviewReason.BORDERLINE_RELEVANCE + return None + + @staticmethod + def _classification_explanation(article: dict[str, Any]) -> str: + return ( + f"The seeded classifier maps this item to {article['content_type']} based " + "on its language, operating context, and editorial angle." + ) + + @staticmethod + def _relevance_explanation(article: dict[str, Any]) -> str: + relevance_score = float(article["relevance_score"]) + if article.get("used_llm", False): + return ( + f"Borderline similarity of {relevance_score:.2f} required editorial " + "adjudication, so the seeded result keeps this item in the review band." + ) + if relevance_score >= settings.AI_RELEVANCE_SUMMARIZE_THRESHOLD: + return ( + f"Reference corpus similarity is strong at {relevance_score:.2f}, so the " + "item is ready to surface without additional review." + ) + return ( + f"Reference corpus similarity is weak at {relevance_score:.2f}, so the " + "seed marks the item as archived rather than surfaced." + ) + + @staticmethod + def _relevance_model_used(article: dict[str, Any]) -> str: + if article.get("used_llm", False): + return settings.AI_RELEVANCE_MODEL + return f"embedding:{settings.EMBEDDING_MODEL}" + + @staticmethod + def _summary_for_article(article: dict[str, Any]) -> str: + source_name = article["source_plugin"].upper() + return ( + f"{article['title']} gives platform teams a concrete angle on delivery, " + f"reliability, or developer experience. The seeded summary keeps the focus " + f"on why this {source_name} item is worth surfacing in the newsletter." + ) diff --git a/core/tests/test_embeddings.py b/core/tests/test_embeddings.py index 231980a9..a1fba3d6 100644 --- a/core/tests/test_embeddings.py +++ b/core/tests/test_embeddings.py @@ -1,7 +1,9 @@ from types import SimpleNamespace +import httpx import pytest from django.core.management import call_command +from qdrant_client.http.exceptions import ResponseHandlingException from core import embeddings from core.embeddings import ( @@ -11,7 +13,22 @@ search_similar_content, upsert_content_embedding, ) -from core.models import Content, SourcePluginName, Tenant +from core.models import ( + Content, + Entity, + IngestionRun, + ReviewQueue, + SkillResult, + SourceConfig, + SourcePluginName, + Tenant, + UserFeedback, +) +from core.pipeline import ( + CLASSIFICATION_SKILL_NAME, + RELEVANCE_SKILL_NAME, + SUMMARIZATION_SKILL_NAME, +) pytestmark = pytest.mark.django_db @@ -173,7 +190,52 @@ def test_seed_demo_creates_reference_corpus_and_embeds_demo_content(mocker, caps call_command("seed_demo") tenant = Tenant.objects.get(name="Platform Engineering Weekly") - assert Content.objects.filter(tenant=tenant, is_reference=True).exists() - assert Content.objects.filter(tenant=tenant, is_reference=False).exists() - assert upsert_mock.call_count == Content.objects.filter(tenant=tenant).count() - assert "Reference corpus items" in capsys.readouterr().out + assert Entity.objects.filter(tenant=tenant).count() == 15 + assert SourceConfig.objects.filter(tenant=tenant).count() == 8 + assert Content.objects.filter(tenant=tenant, is_reference=True).count() == 50 + assert Content.objects.filter(tenant=tenant, is_reference=False).count() == 200 + assert SkillResult.objects.filter(tenant=tenant, skill_name=CLASSIFICATION_SKILL_NAME).count() == 200 + assert SkillResult.objects.filter(tenant=tenant, skill_name=RELEVANCE_SKILL_NAME).count() == 200 + assert SkillResult.objects.filter(tenant=tenant, skill_name=SUMMARIZATION_SKILL_NAME).count() == 115 + assert ReviewQueue.objects.filter(tenant=tenant).exists() + assert UserFeedback.objects.filter(tenant=tenant).count() == 45 + assert IngestionRun.objects.filter(tenant=tenant).count() == 6 + assert upsert_mock.call_count == 250 + output = capsys.readouterr().out + assert "Reference corpus items: 50" in output + assert "Demo content items: 200" in output + + +def test_seed_demo_is_stable_on_rerun(mocker): + mocker.patch("core.management.commands.seed_demo.upsert_content_embedding") + + call_command("seed_demo") + call_command("seed_demo") + + tenant = Tenant.objects.get(name="Platform Engineering Weekly") + assert Entity.objects.filter(tenant=tenant).count() == 15 + assert SourceConfig.objects.filter(tenant=tenant).count() == 8 + assert Content.objects.filter(tenant=tenant, is_reference=True).count() == 50 + assert Content.objects.filter(tenant=tenant, is_reference=False).count() == 200 + assert SkillResult.objects.filter(tenant=tenant).count() == 515 + assert ReviewQueue.objects.filter(tenant=tenant).count() > 0 + assert UserFeedback.objects.filter(tenant=tenant).count() == 45 + assert IngestionRun.objects.filter(tenant=tenant).count() == 6 + + +def test_seed_demo_skips_embeddings_when_vector_stack_is_unavailable(mocker, capsys): + upsert_mock = mocker.patch( + "core.management.commands.seed_demo.upsert_content_embedding", + side_effect=ResponseHandlingException(httpx.ConnectError("connection refused")), + ) + + call_command("seed_demo") + + tenant = Tenant.objects.get(name="Platform Engineering Weekly") + assert Content.objects.filter(tenant=tenant, is_reference=True).count() == 50 + assert Content.objects.filter(tenant=tenant, is_reference=False).count() == 200 + assert SkillResult.objects.filter(tenant=tenant).count() == 515 + assert upsert_mock.call_count == 1 + combined_output = capsys.readouterr() + assert "Skipping remaining embedding sync" in combined_output.err + assert "Upserted embeddings for 0 seeded content item(s)." in combined_output.out diff --git a/justfile b/justfile index f78e2bc9..803c6cd8 100644 --- a/justfile +++ b/justfile @@ -135,7 +135,7 @@ migrate: seed: if [ ! -f .env ]; then cp .env.example .env; fi - python3 manage.py seed_demo + {{compose}} exec django python manage.py seed_demo embed-all: if [ ! -f .env ]; then cp .env.example .env; fi From 900236735b974a406fcc0d82b12308cee396bd65 Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 27 Apr 2026 16:02:18 +0300 Subject: [PATCH 2/4] Change admin preview renderer to use content_text for description, add regression test --- celerybeat-schedule | Bin 12288 -> 12288 bytes core/admin.py | 7 ++++--- core/tests/test_admin.py | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/celerybeat-schedule b/celerybeat-schedule index 16237b18324b2dc114c24665eb702f416ea331e2..ea142b958ec0088caafb1290c183a48347b55189 100644 GIT binary patch delta 110 zcmZojXh_&#Eyq|n*+x!SsqTFdV z9(g4`p(m4*8sbVVUVU}V_%_}L&Of6=r5@1$hDlJKlPt8kA K%1KROssI3tavpmC delta 110 zcmZojXh_&#EytKJ*+x!rWP}$2rw%#m6jyOr{*Ol K<)o%C#RC9N%^g_) diff --git a/core/admin.py b/core/admin.py index 33b66f8a..bb84543c 100644 --- a/core/admin.py +++ b/core/admin.py @@ -107,12 +107,13 @@ class ContentAdmin(admin.ModelAdmin): @admin.display(description="Preview") def preview_content(self, obj): - """Adds a 'Quick Look' icon that shows the description in an Unfold modal.""" - if not obj.description: + """Adds a quick preview based on the stored content text.""" + preview_text = (obj.content_text or "").strip() + if not preview_text: return "-" return format_html( '🔍 View', - obj.description[:500] + preview_text[:500] ) @admin.display(description="AI Trace") diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index 31756aec..09db286a 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -6,7 +6,7 @@ from django.contrib.admin.sites import AdminSite from django.utils import timezone -from core.admin import ReviewQueueAdmin, SourceConfigAdmin +from core.admin import ContentAdmin, ReviewQueueAdmin, SourceConfigAdmin from core.models import Content, ReviewQueue, ReviewReason, SourceConfig, SourcePluginName, Tenant pytestmark = pytest.mark.django_db @@ -104,3 +104,35 @@ def test_review_queue_changelist_view_builds_dashboard_stats(source_admin_contex super_changelist_view.assert_called_once() assert response["dashboard_stats"][0]["value"] == 1 assert response["dashboard_stats"][1]["value"] == "42%" + + +def test_content_preview_uses_content_text(source_admin_context): + content = Content.objects.create( + tenant=source_admin_context.tenant, + url="https://example.com/admin-preview", + title="Admin Preview", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="A short preview from the content body.", + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + preview = admin_instance.preview_content(content) + + assert 'title="A short preview from the content body."' in preview + + +def test_content_preview_returns_dash_when_content_text_blank(source_admin_context): + content = Content.objects.create( + tenant=source_admin_context.tenant, + url="https://example.com/admin-preview-empty", + title="Admin Preview Empty", + author="Editor", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text=" ", + ) + admin_instance = ContentAdmin(Content, AdminSite()) + + assert admin_instance.preview_content(content) == "-" From 7b3ef58f25fd7ae304d1bc4d81c7c5debb14ae98 Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 27 Apr 2026 16:07:46 +0300 Subject: [PATCH 3/4] Fix model helpers in admin views to use content_text for description, add regression test --- SESSION.md | 152 --------------------------------------- celerybeat-schedule | Bin 12288 -> 12288 bytes core/admin.py | 12 ++-- core/tests/test_admin.py | 15 ++++ 4 files changed, 21 insertions(+), 158 deletions(-) diff --git a/SESSION.md b/SESSION.md index 63b2621e..e3b5508e 100644 --- a/SESSION.md +++ b/SESSION.md @@ -1,151 +1,6 @@ # Session Restore Point -## Date -- 2026-04-27 - -## What Was Completed - -- Implemented WP7 demo seeding in `core/management/commands/seed_demo.py`. -- `seed_demo` now creates deterministic offline demo data instead of depending on live Reddit/RSS/LLM calls. -- Seeded dataset shape: - - 1 demo tenant: `Platform Engineering Weekly` - - 15 entities - - 8 source configs - - 50 reference corpus items - - 200 demo content items - - 515 skill results total - - review queue items - - 45 feedback rows - - 6 ingestion runs -- Added resilience in `seed_demo` so embedding sync warns and stops cleanly if vector/embedding infrastructure is unavailable. -- Changed `justfile` so `seed` now runs inside the live Django container: - - `docker compose exec django python manage.py seed_demo` - -## Tests / Validation That Passed - -- `pytest core/tests/test_embeddings.py -q` - - Final status before stopping: `15 passed` -- `ruff check core/management/commands/seed_demo.py core/tests/test_embeddings.py` - - Passed -- `just --list` - - Passed after `justfile` changes - -## Current Blocker - -`just seed` still fails inside the running Django container because Django cannot import `drf_standardized_errors`. - -Exact failure inside the live service container: - -```bash -docker compose exec django python -c "import drf_standardized_errors; print('ok')" -``` - -Result: - -```text -ModuleNotFoundError: No module named 'drf_standardized_errors' -``` - -## Important Verified Facts - -1. The dependency is declared in `requirements.txt`: - - `drf-standardized-errors==0.15.0` - -2. Django settings require it in `newsletter_maker/settings/base.py`: - - `"drf_standardized_errors"` is present in `INSTALLED_APPS` - -3. The freshly built local image is good. - This command succeeds: - -```bash -docker run --rm newsletter-maker-app:dev python -c "import sys; print(sys.executable); import drf_standardized_errors; print(drf_standardized_errors.__file__)" -``` - -Observed result: - -```text -/usr/local/bin/python -/usr/local/lib/python3.13/site-packages/drf_standardized_errors/__init__.py -``` - -4. The running Django service container claims to use the same image ID as the good image. - -Observed: - -```bash -docker inspect newsletter-maker-django-1 --format '{{.Id}} {{.Image}} {{.Config.Image}}' -``` - -Result was: - -```text - sha256:6e0170b468e9316f0dfdcb9e2a52d4d45c22e9f888ea752f35373f70c0579cf8 docker.io/library/newsletter-maker-app:dev -``` - -5. The running Django container still cannot see the package files or pip metadata. - -Observed inside live container: - -```bash -docker compose exec django sh -lc "ls -d /usr/local/lib/python3.13/site-packages/drf_standardized_errors* 2>/dev/null || true; python -m pip show drf-standardized-errors || true" -``` - -Result: - -```text -WARNING: Package(s) not found: drf-standardized-errors -``` - -6. The live Django container mount set looks normal and only bind-mounts the repo at `/app`. - -Observed: - -```bash -docker inspect newsletter-maker-django-1 --format '{{json .Mounts}}' -``` - -Result: - -```json -[{"Type":"bind","Source":"/home/kevin/Repos/newsletter-maker","Destination":"/app","Mode":"","RW":true,"Propagation":"rprivate"}] -``` - -## Best Current Hypothesis - -There is a runtime divergence between the fresh image and the live Compose service container, even though the service container reports the same backing image ID. - -Most likely remaining explanation: - -- the live Django container writable layer has diverged and is hiding/removing files under site-packages, or -- there is some Compose/container lifecycle oddity involving the running service container that is not visible from the static image inspection. - -## Next Steps To Resume Tomorrow - -1. Inspect container filesystem diff for the live Django service container. - - Commands to run: - -```bash -docker diff newsletter-maker-django-1 | grep drf_standardized_errors || true -docker diff newsletter-maker-django-1 | head -200 -``` - - Note: these were about to be run when work stopped; the tool calls were cancelled by the user. - -2. If the diff shows site-packages deletions or unexpected mutations, remove and recreate the Django/celery service containers again and re-test import immediately. - -3. If the diff is clean, compare a full directory listing of `/usr/local/lib/python3.13/site-packages` between: - - `docker run --rm newsletter-maker-app:dev ...` - - `docker compose exec django ...` - -4. Once `docker compose exec django python -c "import drf_standardized_errors; print('ok')"` works, rerun: - -```bash -just seed -``` - -5. After `just seed` works, validate the seeded UI/admin state manually. ## Useful Commands From Today @@ -159,10 +14,3 @@ pytest core/tests/test_embeddings.py -q ruff check core/management/commands/seed_demo.py core/tests/test_embeddings.py ``` -## Files Changed Today - -- `core/management/commands/seed_demo.py` -- `core/tests/test_embeddings.py` -- `justfile` -- `SESSION.md` - diff --git a/celerybeat-schedule b/celerybeat-schedule index ea142b958ec0088caafb1290c183a48347b55189..572c8ad98da6104a759476fa45e1c7a990cdb3ab 100644 GIT binary patch delta 96 zcmZojXh_&#CCAjtIoU=|nW>dya-y8DY%4pnfI&%B1Itu)1_|gYF}|Qch|LQwIRJ!Wp^% delta 96 zcmZojXh_&#CCAiKIoU=|nW?2>a-y8DY)d(_fI&%B1Itu)1_shOa(rrDVp2|O3R49DFFzYL diff --git a/core/admin.py b/core/admin.py index bb84543c..04a8cad6 100644 --- a/core/admin.py +++ b/core/admin.py @@ -344,8 +344,8 @@ class UserFeedbackAdmin(ModelAdmin): @admin.display(description="Type") def display_feedback(self, obj): if obj.feedback_type == "UPVOTE": - return format_html('👍') - return format_html('👎') + return format_html('{}', "1.2rem", "👍") + return format_html('{}', "1.2rem", "👎") @admin.display(description="Content Title") def get_content_title(self, obj): @@ -492,15 +492,15 @@ class SourceConfigAdmin(ModelAdmin): @admin.display(description="Status") def display_health(self, obj): if not obj.is_active: - return format_html('● Paused') + return format_html('{}', "gray", "● Paused") if obj.last_fetched_at: hours_since = (timezone.now() - obj.last_fetched_at).total_seconds() / 3600 if hours_since > 24: - return format_html('● Stale') - return format_html('● Healthy') + return format_html('{}', "red", "● Stale") + return format_html('{}', "green", "● Healthy") - return format_html('● Never Run') + return format_html('{}', "orange", "● Never Run") @admin.display(description="Config Preview") def pretty_config(self, obj): diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index 09db286a..3d13069c 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -75,6 +75,21 @@ def test_test_source_connection_reports_failures(source_admin_context, mocker): ) +def test_source_config_display_health_renders_without_django6_format_html_error(source_admin_context): + source_config = SourceConfig.objects.create( + tenant=source_admin_context.tenant, + plugin_name=SourcePluginName.RSS, + config={"feed_url": "https://example.com/feed.xml"}, + is_active=True, + last_fetched_at=timezone.now(), + ) + admin_instance = SourceConfigAdmin(SourceConfig, AdminSite()) + + rendered = admin_instance.display_health(source_config) + + assert "Healthy" in rendered + + def test_review_queue_changelist_view_builds_dashboard_stats(source_admin_context, mocker): content = Content.objects.create( tenant=source_admin_context.tenant, From c9d4eea592f76fb0a55babd20ef1460bbec1e816 Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 27 Apr 2026 16:10:24 +0300 Subject: [PATCH 4/4] Fix issue with numeric format specifiers in admin view --- celerybeat-schedule | Bin 12288 -> 12288 bytes core/admin.py | 8 +++++--- core/tests/test_admin.py | 43 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/celerybeat-schedule b/celerybeat-schedule index 572c8ad98da6104a759476fa45e1c7a990cdb3ab..6c433076edbbca8a7201bb87f6274f334507c500 100644 GIT binary patch delta 93 zcmZojXh_&#EypN1*+x!6a0j82Frs+J)QcS6NB}JL3#Z0pVn3b4HOOoSL^AeMCQd5{_002ei7^?sP delta 93 zcmZojXh_&#Eyu_?*+x!shOa(rrDVp2|O3R4FFE~XeO diff --git a/core/admin.py b/core/admin.py index 04a8cad6..9b16c9fb 100644 --- a/core/admin.py +++ b/core/admin.py @@ -427,9 +427,10 @@ def display_efficiency(self, obj): return "0/0" percent = (obj.items_ingested / obj.items_fetched) * 100 color = "green" if percent > 90 else "orange" if percent > 50 else "red" + percent_label = f"({percent:.0f}%)" return format_html( - '{} / {} ({:.0f}%)', - obj.items_ingested, obj.items_fetched, color, percent + '{} / {} {}', + obj.items_ingested, obj.items_fetched, color, percent_label ) @admin.display(description="Duration") @@ -590,7 +591,8 @@ def get_content_title(self, obj): @admin.display(description="Confidence") def display_confidence(self, obj): color = "red" if obj.confidence < 0.3 else "orange" if obj.confidence < 0.6 else "green" - return format_html('{:.0f}%', color, obj.confidence * 100) + confidence_label = f"{obj.confidence * 100:.0f}%" + return format_html('{}', color, confidence_label) @admin.action(description="Approve selected items") def mark_as_approved(self, request, queryset): diff --git a/core/tests/test_admin.py b/core/tests/test_admin.py index 3d13069c..0aede5b5 100644 --- a/core/tests/test_admin.py +++ b/core/tests/test_admin.py @@ -6,8 +6,8 @@ from django.contrib.admin.sites import AdminSite from django.utils import timezone -from core.admin import ContentAdmin, ReviewQueueAdmin, SourceConfigAdmin -from core.models import Content, ReviewQueue, ReviewReason, SourceConfig, SourcePluginName, Tenant +from core.admin import ContentAdmin, IngestionRunAdmin, ReviewQueueAdmin, SourceConfigAdmin +from core.models import Content, IngestionRun, ReviewQueue, ReviewReason, RunStatus, SourceConfig, SourcePluginName, Tenant pytestmark = pytest.mark.django_db @@ -121,6 +121,45 @@ def test_review_queue_changelist_view_builds_dashboard_stats(source_admin_contex assert response["dashboard_stats"][1]["value"] == "42%" +def test_review_queue_display_confidence_renders_without_django6_format_error(source_admin_context): + content = Content.objects.create( + tenant=source_admin_context.tenant, + url="https://example.com/review-confidence", + title="Review Confidence", + author="Reviewer", + source_plugin=SourcePluginName.RSS, + published_date=timezone.now(), + content_text="Review queue content", + ) + review_item = ReviewQueue.objects.create( + tenant=source_admin_context.tenant, + content=content, + reason=ReviewReason.BORDERLINE_RELEVANCE, + confidence=0.42, + resolved=False, + ) + admin_instance = ReviewQueueAdmin(ReviewQueue, AdminSite()) + + rendered = admin_instance.display_confidence(review_item) + + assert "42%" in rendered + + +def test_ingestion_run_display_efficiency_renders_without_django6_format_error(source_admin_context): + run = IngestionRun.objects.create( + tenant=source_admin_context.tenant, + plugin_name=SourcePluginName.RSS, + status=RunStatus.SUCCESS, + items_fetched=12, + items_ingested=9, + ) + admin_instance = IngestionRunAdmin(IngestionRun, AdminSite()) + + rendered = admin_instance.display_efficiency(run) + + assert "75%" in rendered + + def test_content_preview_uses_content_text(source_admin_context): content = Content.objects.create( tenant=source_admin_context.tenant,