Skip to content

Commit 4abcd01

Browse files
authored
Merge pull request #32 from webstackdev/feature/theme-suggestions
Feature/theme suggestions
2 parents 437dd95 + 1f991b3 commit 4abcd01

57 files changed

Lines changed: 2553 additions & 592 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.env.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ OLLAMA_URL=http://ollama:11434
55
REDDIT_CLIENT_ID=client
66
REDDIT_CLIENT_SECRET=secret
77
REDDIT_USER_AGENT=newsletter-maker/test
8+
CELERY_BROKER_URL=memory://
9+
CELERY_RESULT_BACKEND=cache+memory://

.github/copilot-instructions.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ You are working in Newsletter Maker, a Django + DRF + Celery + Qdrant backend wi
4545
- TypeScript and React code should use JSDoc for exported utilities, hooks, route handlers, and non-trivial components when behavior is not obvious from the type signature alone.
4646
- If architecture or workflow behavior changes, update the most relevant docs in `docs/`, especially `docs/DEVELOPER_GUIDE.md`, `docs/IMPLEMENTATION_OVERVIEW.md`, `docs/MODELS.md`, `docs/RELEVANCE_SCORING.md`, or `docs/LOGGING.md`.
4747

48+
## Prompt Skill Conventions
49+
50+
- Application prompt skills live under `skills/<skill_name>/SKILL.md` and are loaded by `core/llm.py` using the folder name rather than the frontmatter `name`.
51+
- When adding or editing one of these prompt skills, always include a short frontmatter `description` so VS Code does not report incomplete skill metadata.
52+
- If a frontmatter `name` is present, prefer lowercase letters, numbers, and hyphens there to satisfy the Copilot markdown validator, even when the runtime skill key elsewhere in the app still uses underscores.
53+
4854
## Testing And Validation
4955

5056
- Backend tests use `pytest`.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
name: "Prompt Skill Markdown Guidelines"
3+
description: "Use when editing application prompt skills in skills/**/SKILL.md. Covers custom frontmatter, runtime lookup behavior, and keeping VS Code's skill validator quiet."
4+
applyTo:
5+
- "skills/**/SKILL.md"
6+
---
7+
8+
# Prompt Skill Markdown Guidelines
9+
10+
- Files under `skills/**/SKILL.md` are application prompt specs loaded by `core/llm.py`, not Copilot repo skills under `.github/skills/`.
11+
- The runtime skill key is the folder name, so keep folder names and code constants aligned with the app's expected key.
12+
- Always include a short frontmatter `description` to avoid VS Code skill-schema warnings.
13+
- If a frontmatter `name` is present, prefer lowercase letters, numbers, and hyphens so the Copilot validator stays quiet, even when the runtime key elsewhere in the app uses underscores.
14+
- Preserve the repo's `input` and `output` frontmatter fields because `core/llm.py` reads them to build prompts.
15+
- Keep the body concise and instruction-focused. Return-shape requirements should stay explicit in the markdown body.

.vscode/settings.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,45 @@
11
{
22
"cSpell.words": [
3+
"abangser",
34
"ASGI",
45
"botocore",
6+
"bsky",
57
"buildx",
68
"cbranch",
79
"cfgv",
810
"cstat",
11+
"dabde",
912
"dateutil",
1013
"djlint",
1114
"dnspython",
15+
"doseq",
16+
"exif",
1217
"falsey",
18+
"Farcic",
1319
"FAVICONS",
20+
"favourites",
21+
"fbclid",
1422
"Feedly",
1523
"Fraunces",
24+
"gclid",
1625
"gunicorn",
26+
"Hashimoto",
1727
"healthz",
1828
"HNSW",
1929
"httpx",
30+
"Hykes",
2031
"isready",
2132
"jsbeautifier",
33+
"LANCZOS",
2234
"libipld",
2335
"libpq",
2436
"librt",
37+
"lnkd",
2538
"mday",
39+
"membe",
40+
"mipsytipsy",
41+
"mitchellh",
42+
"newsle",
2643
"nodeenv",
2744
"noinput",
2845
"nomic",
@@ -39,14 +56,24 @@
3956
"readyz",
4057
"recomputations",
4158
"Referer",
59+
"repost",
60+
"reposts",
61+
"rollup",
4262
"scaffolder",
4363
"simplejwt",
64+
"skillr",
65+
"solomonstre",
4466
"svix",
67+
"topicv",
4568
"Unparseable",
4669
"unstub",
4770
"upserted",
71+
"upserts",
4872
"upvote",
4973
"uritemplate",
74+
"Vercel",
75+
"vfarcic",
76+
"Viktor",
5077
"xrpc",
5178
"xxhash"
5279
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import django.db.models.deletion
2+
from django.conf import settings
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
9+
("content", "0001_initial"),
10+
("trends", "0003_theme_suggestion"),
11+
]
12+
13+
operations = [
14+
migrations.AddField(
15+
model_name="content",
16+
name="newsletter_promotion_at",
17+
field=models.DateTimeField(blank=True, null=True),
18+
),
19+
migrations.AddField(
20+
model_name="content",
21+
name="newsletter_promotion_by",
22+
field=models.ForeignKey(
23+
blank=True,
24+
null=True,
25+
on_delete=django.db.models.deletion.SET_NULL,
26+
related_name="newsletter_promoted_content",
27+
to=settings.AUTH_USER_MODEL,
28+
),
29+
),
30+
migrations.AddField(
31+
model_name="content",
32+
name="newsletter_promotion_theme",
33+
field=models.ForeignKey(
34+
blank=True,
35+
null=True,
36+
on_delete=django.db.models.deletion.SET_NULL,
37+
related_name="promoted_contents",
38+
to="trends.themesuggestion",
39+
),
40+
),
41+
]

content/models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ class Content(models.Model):
4747
duplicate_signal_count = models.IntegerField(default=0)
4848
is_reference = models.BooleanField(default=False)
4949
is_active = models.BooleanField(default=True)
50+
newsletter_promotion_at = models.DateTimeField(null=True, blank=True)
51+
newsletter_promotion_by = models.ForeignKey(
52+
settings.AUTH_USER_MODEL,
53+
null=True,
54+
blank=True,
55+
on_delete=models.SET_NULL,
56+
related_name="newsletter_promoted_content",
57+
)
58+
newsletter_promotion_theme = models.ForeignKey(
59+
"trends.ThemeSuggestion",
60+
null=True,
61+
blank=True,
62+
on_delete=models.SET_NULL,
63+
related_name="promoted_contents",
64+
)
5065

5166
class Meta:
5267
ordering = ["-published_date"]

content/serializers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ class Meta:
3232
"duplicate_signal_count",
3333
"is_reference",
3434
"is_active",
35+
"newsletter_promotion_at",
36+
"newsletter_promotion_by",
37+
"newsletter_promotion_theme",
3538
]
3639
read_only_fields = [
3740
"id",
@@ -42,6 +45,9 @@ class Meta:
4245
"embedding_id",
4346
"duplicate_of",
4447
"duplicate_signal_count",
48+
"newsletter_promotion_at",
49+
"newsletter_promotion_by",
50+
"newsletter_promotion_theme",
4551
]
4652

4753
def validate(self, attrs):

core/api.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""
77

88
import logging
9-
from typing import Any
9+
from typing import Any, cast
1010

1111
from drf_spectacular.utils import (
1212
OpenApiExample,
@@ -589,6 +589,16 @@ class ProjectOwnedQuerysetMixin:
589589

590590
queryset: Any = None
591591

592+
def _kwargs(self) -> dict[str, Any]:
593+
"""Return the DRF route kwargs for typed nested-project lookups."""
594+
595+
return cast(dict[str, Any], getattr(self, "kwargs"))
596+
597+
def _request(self) -> Any:
598+
"""Return the DRF request object for typed access checks."""
599+
600+
return getattr(self, "request")
601+
592602
def get_project(self):
593603
"""Return the project referenced by ``project_id`` after access checks.
594604
@@ -597,13 +607,15 @@ def get_project(self):
597607
NotFound: If the project does not exist or the user lacks access.
598608
"""
599609

600-
project_id = self.kwargs.get("project_id")
610+
project_id = self._kwargs().get("project_id")
601611
if project_id is None:
602612
raise AssertionError(
603613
"project_id must be present in nested project-scoped routes"
604614
)
605615
try:
606-
return get_visible_projects_queryset(self.request.user).get(pk=project_id)
616+
return get_visible_projects_queryset(self._request().user).get(
617+
pk=project_id
618+
)
607619
except Project.DoesNotExist as exc:
608620
raise NotFound("Project not found.") from exc
609621

@@ -618,7 +630,7 @@ def get_queryset(self):
618630
def get_serializer_context(self):
619631
"""Inject the resolved project into serializer context."""
620632

621-
context = super().get_serializer_context()
633+
context = cast(dict[str, Any], cast(Any, super()).get_serializer_context())
622634
context["project"] = self.get_project()
623635
return context
624636

core/embeddings.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import httpx
1717
from django.conf import settings as django_settings
18+
from django.db.models import Model
1819
from django.utils.dateparse import parse_datetime
1920
from qdrant_client import QdrantClient
2021
from qdrant_client.models import (
@@ -34,6 +35,15 @@
3435
settings = cast(CoreSettings, django_settings)
3536

3637

38+
def _require_pk(instance: Model) -> int:
39+
"""Return a saved model primary key for typed Qdrant payload construction."""
40+
41+
instance_pk = instance.pk
42+
if instance_pk is None:
43+
raise ValueError(f"{instance.__class__.__name__} must be saved first.")
44+
return int(instance_pk)
45+
46+
3747
def get_sentence_transformer_class():
3848
"""Lazily import and cache the sentence-transformer class.
3949
@@ -90,7 +100,10 @@ def embed_text(self, text: str) -> list[float]:
90100
def get_embedding_dimension(self) -> int:
91101
"""Return the model's native embedding dimension without probing text."""
92102

93-
return int(self.model.get_sentence_embedding_dimension())
103+
dimension = self.model.get_sentence_embedding_dimension()
104+
if dimension is None:
105+
raise RuntimeError("Embedding model did not report a vector dimension.")
106+
return int(dimension)
94107

95108

96109
class OllamaEmbeddingProvider(EmbeddingProvider):
@@ -222,18 +235,20 @@ def upsert_content_embedding(content: Content) -> str:
222235
"""
223236

224237
client = get_qdrant_client()
225-
ensure_project_collection(content.project_id)
238+
project_id = _require_pk(content.project)
239+
content_id = _require_pk(content)
240+
ensure_project_collection(project_id)
226241
embedding_id = content.embedding_id or str(uuid4())
227242
vector = embed_text(build_content_embedding_text(content))
228243
client.upsert(
229-
collection_name=collection_name_for_project(content.project_id),
244+
collection_name=collection_name_for_project(project_id),
230245
points=[
231246
PointStruct(
232247
id=embedding_id,
233248
vector=vector,
234249
payload={
235-
"content_id": content.id,
236-
"project_id": content.project_id,
250+
"content_id": content_id,
251+
"project_id": project_id,
237252
"url": content.url,
238253
"title": content.title,
239254
"published_date": serialize_published_date(content.published_date),
@@ -254,18 +269,20 @@ def upsert_entity_embedding(entity: Entity) -> str:
254269
"""Write or update an entity embedding in the project's entity collection."""
255270

256271
client = get_qdrant_client()
257-
ensure_project_entity_collection(entity.project_id)
272+
project_id = _require_pk(entity.project)
273+
entity_id = _require_pk(entity)
274+
ensure_project_entity_collection(project_id)
258275
vector = embed_text(build_entity_embedding_text(entity))
259-
embedding_id = f"entity-{entity.id}"
276+
embedding_id = f"entity-{entity_id}"
260277
client.upsert(
261-
collection_name=entity_collection_name_for_project(entity.project_id),
278+
collection_name=entity_collection_name_for_project(project_id),
262279
points=[
263280
PointStruct(
264281
id=embedding_id,
265282
vector=vector,
266283
payload={
267-
"entity_id": entity.id,
268-
"project_id": entity.project_id,
284+
"entity_id": entity_id,
285+
"project_id": project_id,
269286
"name": entity.name,
270287
"type": entity.type,
271288
},
@@ -343,11 +360,11 @@ def search_similar_content(
343360
"""Find content similar to an existing content row within the same project."""
344361

345362
return search_similar(
346-
content.project_id,
363+
_require_pk(content.project),
347364
embed_text(build_content_embedding_text(content)),
348365
limit=limit,
349366
is_reference=is_reference,
350-
exclude_content_id=content.id,
367+
exclude_content_id=_require_pk(content),
351368
)
352369

353370

@@ -370,9 +387,10 @@ def search_similar_entities(
370387
def search_similar_entities_for_content(content: Content, limit: int = 8):
371388
"""Find tracked entities whose embeddings are close to a content item."""
372389

373-
sync_project_entity_embeddings(content.project_id)
390+
project_id = _require_pk(content.project)
391+
sync_project_entity_embeddings(project_id)
374392
return search_similar_entities(
375-
content.project_id,
393+
project_id,
376394
embed_text(build_content_embedding_text(content)),
377395
limit=limit,
378396
)

0 commit comments

Comments
 (0)