Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ OLLAMA_URL=http://ollama:11434
REDDIT_CLIENT_ID=client
REDDIT_CLIENT_SECRET=secret
REDDIT_USER_AGENT=newsletter-maker/test
CELERY_BROKER_URL=memory://
CELERY_RESULT_BACKEND=cache+memory://
6 changes: 6 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ You are working in Newsletter Maker, a Django + DRF + Celery + Qdrant backend wi
- TypeScript and React code should use JSDoc for exported utilities, hooks, route handlers, and non-trivial components when behavior is not obvious from the type signature alone.
- If architecture or workflow behavior changes, update the most relevant docs in `docs/`, especially `docs/DEVELOPER_GUIDE.md`, `docs/IMPLEMENTATION_OVERVIEW.md`, `docs/MODELS.md`, `docs/RELEVANCE_SCORING.md`, or `docs/LOGGING.md`.

## Prompt Skill Conventions

- Application prompt skills live under `skills/<skill_name>/SKILL.md` and are loaded by `core/llm.py` using the folder name rather than the frontmatter `name`.
- When adding or editing one of these prompt skills, always include a short frontmatter `description` so VS Code does not report incomplete skill metadata.
- If a frontmatter `name` is present, prefer lowercase letters, numbers, and hyphens there to satisfy the Copilot markdown validator, even when the runtime skill key elsewhere in the app still uses underscores.

## Testing And Validation

- Backend tests use `pytest`.
Expand Down
15 changes: 15 additions & 0 deletions .github/instructions/prompt-skills.instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
name: "Prompt Skill Markdown Guidelines"
description: "Use when editing application prompt skills in skills/**/SKILL.md. Covers custom frontmatter, runtime lookup behavior, and keeping VS Code's skill validator quiet."
applyTo:
- "skills/**/SKILL.md"
---

# Prompt Skill Markdown Guidelines

- Files under `skills/**/SKILL.md` are application prompt specs loaded by `core/llm.py`, not Copilot repo skills under `.github/skills/`.
- The runtime skill key is the folder name, so keep folder names and code constants aligned with the app's expected key.
- Always include a short frontmatter `description` to avoid VS Code skill-schema warnings.
- If a frontmatter `name` is present, prefer lowercase letters, numbers, and hyphens so the Copilot validator stays quiet, even when the runtime key elsewhere in the app uses underscores.
- Preserve the repo's `input` and `output` frontmatter fields because `core/llm.py` reads them to build prompts.
- Keep the body concise and instruction-focused. Return-shape requirements should stay explicit in the markdown body.
27 changes: 27 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,28 +1,45 @@
{
"cSpell.words": [
"abangser",
"ASGI",
"botocore",
"bsky",
"buildx",
"cbranch",
"cfgv",
"cstat",
"dabde",
"dateutil",
"djlint",
"dnspython",
"doseq",
"exif",
"falsey",
"Farcic",
"FAVICONS",
"favourites",
"fbclid",
"Feedly",
"Fraunces",
"gclid",
"gunicorn",
"Hashimoto",
"healthz",
"HNSW",
"httpx",
"Hykes",
"isready",
"jsbeautifier",
"LANCZOS",
"libipld",
"libpq",
"librt",
"lnkd",
"mday",
"membe",
"mipsytipsy",
"mitchellh",
"newsle",
"nodeenv",
"noinput",
"nomic",
Expand All @@ -39,14 +56,24 @@
"readyz",
"recomputations",
"Referer",
"repost",
"reposts",
"rollup",
"scaffolder",
"simplejwt",
"skillr",
"solomonstre",
"svix",
"topicv",
"Unparseable",
"unstub",
"upserted",
"upserts",
"upvote",
"uritemplate",
"Vercel",
"vfarcic",
"Viktor",
"xrpc",
"xxhash"
]
Expand Down
41 changes: 41 additions & 0 deletions content/migrations/0002_content_newsletter_promotion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("content", "0001_initial"),
("trends", "0003_theme_suggestion"),
]

operations = [
migrations.AddField(
model_name="content",
name="newsletter_promotion_at",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="content",
name="newsletter_promotion_by",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="newsletter_promoted_content",
to=settings.AUTH_USER_MODEL,
),
),
migrations.AddField(
model_name="content",
name="newsletter_promotion_theme",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="promoted_contents",
to="trends.themesuggestion",
),
),
]
15 changes: 15 additions & 0 deletions content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ class Content(models.Model):
duplicate_signal_count = models.IntegerField(default=0)
is_reference = models.BooleanField(default=False)
is_active = models.BooleanField(default=True)
newsletter_promotion_at = models.DateTimeField(null=True, blank=True)
newsletter_promotion_by = models.ForeignKey(
settings.AUTH_USER_MODEL,
null=True,
blank=True,
on_delete=models.SET_NULL,
related_name="newsletter_promoted_content",
)
newsletter_promotion_theme = models.ForeignKey(
"trends.ThemeSuggestion",
null=True,
blank=True,
on_delete=models.SET_NULL,
related_name="promoted_contents",
)

class Meta:
ordering = ["-published_date"]
Expand Down
6 changes: 6 additions & 0 deletions content/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class Meta:
"duplicate_signal_count",
"is_reference",
"is_active",
"newsletter_promotion_at",
"newsletter_promotion_by",
"newsletter_promotion_theme",
]
read_only_fields = [
"id",
Expand All @@ -42,6 +45,9 @@ class Meta:
"embedding_id",
"duplicate_of",
"duplicate_signal_count",
"newsletter_promotion_at",
"newsletter_promotion_by",
"newsletter_promotion_theme",
]

def validate(self, attrs):
Expand Down
20 changes: 16 additions & 4 deletions core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

import logging
from typing import Any
from typing import Any, cast

from drf_spectacular.utils import (
OpenApiExample,
Expand Down Expand Up @@ -589,6 +589,16 @@ class ProjectOwnedQuerysetMixin:

queryset: Any = None

def _kwargs(self) -> dict[str, Any]:
"""Return the DRF route kwargs for typed nested-project lookups."""

return cast(dict[str, Any], getattr(self, "kwargs"))

def _request(self) -> Any:
"""Return the DRF request object for typed access checks."""

return getattr(self, "request")

def get_project(self):
"""Return the project referenced by ``project_id`` after access checks.

Expand All @@ -597,13 +607,15 @@ def get_project(self):
NotFound: If the project does not exist or the user lacks access.
"""

project_id = self.kwargs.get("project_id")
project_id = self._kwargs().get("project_id")
if project_id is None:
raise AssertionError(
"project_id must be present in nested project-scoped routes"
)
try:
return get_visible_projects_queryset(self.request.user).get(pk=project_id)
return get_visible_projects_queryset(self._request().user).get(
pk=project_id
)
except Project.DoesNotExist as exc:
raise NotFound("Project not found.") from exc

Expand All @@ -618,7 +630,7 @@ def get_queryset(self):
def get_serializer_context(self):
"""Inject the resolved project into serializer context."""

context = super().get_serializer_context()
context = cast(dict[str, Any], cast(Any, super()).get_serializer_context())
context["project"] = self.get_project()
return context

Expand Down
46 changes: 32 additions & 14 deletions core/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import httpx
from django.conf import settings as django_settings
from django.db.models import Model
from django.utils.dateparse import parse_datetime
from qdrant_client import QdrantClient
from qdrant_client.models import (
Expand All @@ -34,6 +35,15 @@
settings = cast(CoreSettings, django_settings)


def _require_pk(instance: Model) -> int:
"""Return a saved model primary key for typed Qdrant payload construction."""

instance_pk = instance.pk
if instance_pk is None:
raise ValueError(f"{instance.__class__.__name__} must be saved first.")
return int(instance_pk)


def get_sentence_transformer_class():
"""Lazily import and cache the sentence-transformer class.

Expand Down Expand Up @@ -90,7 +100,10 @@ def embed_text(self, text: str) -> list[float]:
def get_embedding_dimension(self) -> int:
"""Return the model's native embedding dimension without probing text."""

return int(self.model.get_sentence_embedding_dimension())
dimension = self.model.get_sentence_embedding_dimension()
if dimension is None:
raise RuntimeError("Embedding model did not report a vector dimension.")
return int(dimension)


class OllamaEmbeddingProvider(EmbeddingProvider):
Expand Down Expand Up @@ -222,18 +235,20 @@ def upsert_content_embedding(content: Content) -> str:
"""

client = get_qdrant_client()
ensure_project_collection(content.project_id)
project_id = _require_pk(content.project)
content_id = _require_pk(content)
ensure_project_collection(project_id)
embedding_id = content.embedding_id or str(uuid4())
vector = embed_text(build_content_embedding_text(content))
client.upsert(
collection_name=collection_name_for_project(content.project_id),
collection_name=collection_name_for_project(project_id),
points=[
PointStruct(
id=embedding_id,
vector=vector,
payload={
"content_id": content.id,
"project_id": content.project_id,
"content_id": content_id,
"project_id": project_id,
"url": content.url,
"title": content.title,
"published_date": serialize_published_date(content.published_date),
Expand All @@ -254,18 +269,20 @@ def upsert_entity_embedding(entity: Entity) -> str:
"""Write or update an entity embedding in the project's entity collection."""

client = get_qdrant_client()
ensure_project_entity_collection(entity.project_id)
project_id = _require_pk(entity.project)
entity_id = _require_pk(entity)
ensure_project_entity_collection(project_id)
vector = embed_text(build_entity_embedding_text(entity))
embedding_id = f"entity-{entity.id}"
embedding_id = f"entity-{entity_id}"
client.upsert(
collection_name=entity_collection_name_for_project(entity.project_id),
collection_name=entity_collection_name_for_project(project_id),
points=[
PointStruct(
id=embedding_id,
vector=vector,
payload={
"entity_id": entity.id,
"project_id": entity.project_id,
"entity_id": entity_id,
"project_id": project_id,
"name": entity.name,
"type": entity.type,
},
Expand Down Expand Up @@ -343,11 +360,11 @@ def search_similar_content(
"""Find content similar to an existing content row within the same project."""

return search_similar(
content.project_id,
_require_pk(content.project),
embed_text(build_content_embedding_text(content)),
limit=limit,
is_reference=is_reference,
exclude_content_id=content.id,
exclude_content_id=_require_pk(content),
)


Expand All @@ -370,9 +387,10 @@ def search_similar_entities(
def search_similar_entities_for_content(content: Content, limit: int = 8):
"""Find tracked entities whose embeddings are close to a content item."""

sync_project_entity_embeddings(content.project_id)
project_id = _require_pk(content.project)
sync_project_entity_embeddings(project_id)
return search_similar_entities(
content.project_id,
project_id,
embed_text(build_content_embedding_text(content)),
limit=limit,
)
Expand Down
Loading
Loading