Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
411 changes: 392 additions & 19 deletions core/tasks.py

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions core/tests/test_entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,16 @@ def test_wsgi_module_sets_default_settings_and_builds_application(mocker):

def test_celery_app_redirects_worker_stdout_at_info_level():
assert app.conf.worker_redirect_stdouts_level == "INFO"


def test_celery_app_schedules_source_quality_before_authority_recompute():
beat_schedule = app.conf.beat_schedule

assert (
beat_schedule["run-all-source-quality-recomputations-nightly"]["task"]
== "core.tasks.run_all_source_quality_recomputations"
)
assert (
beat_schedule["run-all-authority-recomputations-nightly"]["task"]
== "core.tasks.run_all_authority_recomputations"
)
124 changes: 123 additions & 1 deletion core/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
queue_content_skill,
recompute_authority_scores,
run_all_authority_recomputations,
run_all_source_quality_recomputations,
run_relevance_scoring_skill,
run_summarization_skill,
)
Expand Down Expand Up @@ -86,6 +87,43 @@ def test_run_all_authority_recomputations_executes_inline_when_eager(
delay_mock.assert_not_called()


def test_run_all_source_quality_recomputations_enqueues_all_projects(
source_plugin_context, mocker
):
delay_mock = mocker.patch("core.tasks.recompute_source_quality.delay")
other_project = Project.objects.create(
name="Source Quality Project",
topic_description="Signals",
)

enqueued_count = run_all_source_quality_recomputations()

assert enqueued_count == 2
delay_mock.assert_any_call(source_plugin_context.project.id)
delay_mock.assert_any_call(_require_pk(other_project))
assert delay_mock.call_count == 2


def test_run_all_source_quality_recomputations_executes_inline_when_eager(
source_plugin_context, settings, mocker
):
settings.CELERY_TASK_ALWAYS_EAGER = True
recompute_mock = mocker.patch("core.tasks.recompute_source_quality")
delay_mock = mocker.patch("core.tasks.recompute_source_quality.delay")
other_project = Project.objects.create(
name="Inline Source Quality Project",
topic_description="Signals",
)

enqueued_count = run_all_source_quality_recomputations()

assert enqueued_count == 2
recompute_mock.assert_any_call(source_plugin_context.project.id)
recompute_mock.assert_any_call(_require_pk(other_project))
assert recompute_mock.call_count == 2
delay_mock.assert_not_called()


def test_recompute_authority_scores_updates_entities_and_creates_snapshots(
source_plugin_context, mocker
):
Expand All @@ -108,23 +146,41 @@ def test_recompute_authority_scores_updates_entities_and_creates_snapshots(
project=project,
entity=primary_entity,
url="https://example.com/authority-primary",
canonical_url="https://example.com/authority-primary",
title="Authority Primary",
author="Reporter",
source_plugin=SourcePluginName.RSS,
source_plugin="linkedin",
published_date="2026-04-28T12:00:00Z",
content_text="Primary authority content.",
duplicate_signal_count=3,
source_metadata={"like_count": 9, "comment_count": 3, "share_count": 2},
)
secondary_content = Content.objects.create(
project=project,
entity=secondary_entity,
url="https://example.com/authority-secondary",
canonical_url="https://example.com/authority-secondary",
title="Authority Secondary",
author="Reporter",
source_plugin=SourcePluginName.RSS,
published_date="2026-04-28T13:00:00Z",
content_text="Secondary authority content.",
)
corroborating_project = Project.objects.create(
name="Corroborating Project",
topic_description="Newsletter coverage",
)
Content.objects.create(
project=corroborating_project,
url="https://example.com/newsletter-primary",
canonical_url=primary_content.canonical_url,
title="Authority Primary Mention",
author="newsletter@example.com",
source_plugin="newsletter",
published_date="2026-04-28T14:00:00Z",
content_text="Authority primary newsletter mention.",
source_metadata={"sender_email": "newsletter@example.com"},
)
EntityMention.objects.create(
project=project,
content=primary_content,
Expand Down Expand Up @@ -163,11 +219,77 @@ def test_recompute_authority_scores_updates_entities_and_creates_snapshots(
assert secondary_snapshot.final_score == pytest.approx(
secondary_entity.authority_score
)
assert (
primary_snapshot.engagement_component > secondary_snapshot.engagement_component
)
assert primary_snapshot.recency_component > 0.8
assert primary_snapshot.source_quality_component > 0.0
assert (
primary_snapshot.cross_newsletter_component
> secondary_snapshot.cross_newsletter_component
)
assert primary_snapshot.feedback_component > 0.5
assert primary_snapshot.duplicate_component > secondary_snapshot.duplicate_component
assert primary_snapshot.decayed_prior == pytest.approx(
config.authority_decay_rate * 0.5
)
assert primary_snapshot.weights_at_compute["engagement"] == pytest.approx(0.15)


def test_recompute_authority_scores_uses_bluesky_and_reddit_engagement_metadata(
source_plugin_context, mocker
):
mocker.patch("content.signals.queue_topic_centroid_recompute")
project = source_plugin_context.project
ProjectConfig.objects.create(project=project)
entity = source_plugin_context.entity
bluesky_content = Content.objects.create(
project=project,
entity=entity,
url="https://bsky.app/profile/example/post/abc",
canonical_url="https://bsky.app/profile/example/post/abc",
title="Bluesky authority content",
author="example.bsky.social",
source_plugin=SourcePluginName.BLUESKY,
published_date="2026-04-28T12:00:00Z",
content_text="Bluesky authority content.",
source_metadata={"like_count": 4, "reply_count": 1, "repost_count": 2},
)
reddit_content = Content.objects.create(
project=project,
entity=entity,
url="https://www.reddit.com/r/python/comments/abc123/test",
canonical_url="https://www.reddit.com/r/python/comments/abc123/test",
title="Reddit authority content",
author="redditor",
source_plugin=SourcePluginName.REDDIT,
published_date="2026-04-28T13:00:00Z",
content_text="Reddit authority content.",
source_metadata={"score": 10, "comment_count": 3},
)
EntityMention.objects.create(
project=project,
content=bluesky_content,
entity=entity,
role=EntityMentionRole.AUTHOR,
sentiment="positive",
span="Example",
confidence=0.9,
)
EntityMention.objects.create(
project=project,
content=reddit_content,
entity=entity,
role=EntityMentionRole.AUTHOR,
sentiment="neutral",
span="Example",
confidence=0.9,
)

recompute_authority_scores(project.id)

snapshot = EntityAuthoritySnapshot.objects.get(entity=entity)
assert snapshot.engagement_component == pytest.approx(1.0)


def test_queue_content_skill_enqueues_relevance_task(source_plugin_context, mocker):
Expand Down
35 changes: 32 additions & 3 deletions entities/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,40 @@ def latest_snapshot_summary(self, obj):
if latest_snapshot is None:
return "-"
mention_value = f"{_score_to_percent(latest_snapshot.mention_component):.1f}%"
engagement_value = (
f"{_score_to_percent(latest_snapshot.engagement_component):.1f}%"
)
recency_value = f"{_score_to_percent(latest_snapshot.recency_component):.1f}%"
source_quality_value = (
f"{_score_to_percent(latest_snapshot.source_quality_component):.1f}%"
)
cross_newsletter_value = (
f"{_score_to_percent(latest_snapshot.cross_newsletter_component):.1f}%"
)
feedback_value = f"{_score_to_percent(latest_snapshot.feedback_component):.1f}%"
duplicate_value = (
f"{_score_to_percent(latest_snapshot.duplicate_component):.1f}%"
)
decayed_value = f"{_score_to_percent(latest_snapshot.decayed_prior):.1f}%"
return format_html(
(
'<span title="Mention {}, Feedback {}, Duplicate {}, Carry {}">'
"M {} | F {} | D {} | Carry {}</span>"
'<span title="Mention {}, Engagement {}, Recency {}, Source {}, '
'Cross-newsletter {}, Feedback {}, Duplicate {}, Carry {}">'
"M {} | E {} | R {} | S {} | X {} | F {} | D {} | Carry {}</span>"
),
mention_value,
engagement_value,
recency_value,
source_quality_value,
cross_newsletter_value,
feedback_value,
duplicate_value,
decayed_value,
mention_value,
engagement_value,
recency_value,
source_quality_value,
cross_newsletter_value,
feedback_value,
duplicate_value,
decayed_value,
Expand Down Expand Up @@ -130,12 +149,22 @@ def display_components(self, obj):
"""Render the stored authority components in a compact summary."""

mention_value = f"{_score_to_percent(obj.mention_component):.1f}%"
engagement_value = f"{_score_to_percent(obj.engagement_component):.1f}%"
recency_value = f"{_score_to_percent(obj.recency_component):.1f}%"
source_quality_value = f"{_score_to_percent(obj.source_quality_component):.1f}%"
cross_newsletter_value = (
f"{_score_to_percent(obj.cross_newsletter_component):.1f}%"
)
feedback_value = f"{_score_to_percent(obj.feedback_component):.1f}%"
duplicate_value = f"{_score_to_percent(obj.duplicate_component):.1f}%"
decayed_value = f"{_score_to_percent(obj.decayed_prior):.1f}%"
return format_html(
"M {} | F {} | D {} | Carry {}",
"M {} | E {} | R {} | S {} | X {} | F {} | D {} | Carry {}",
mention_value,
engagement_value,
recency_value,
source_quality_value,
cross_newsletter_value,
feedback_value,
duplicate_value,
decayed_value,
Expand Down
25 changes: 25 additions & 0 deletions entities/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from drf_spectacular.utils import OpenApiParameter, extend_schema
from rest_framework import serializers, viewsets
from rest_framework.decorators import action
from rest_framework.exceptions import NotFound
from rest_framework.filters import OrderingFilter
from rest_framework.response import Response

Expand Down Expand Up @@ -97,6 +98,30 @@ def mentions(self, request, *args, **kwargs):
serializer = EntityMentionSummarySerializer(mentions, many=True)
return Response(serializer.data)

@extend_schema(
summary="Get current authority components",
description=(
"Return the latest persisted authority-score component breakdown for one "
"tracked entity inside the selected project."
),
request=None,
responses={
200: EntityAuthoritySnapshotSerializer,
403: AUTHENTICATION_REQUIRED_RESPONSE,
},
tags=["Entity Catalog"],
)
@action(detail=True, methods=["get"], url_path="authority_components")
def authority_components(self, request, *args, **kwargs):
"""Return the latest authority snapshot for the selected entity."""

entity = self.get_object()
snapshot = entity.authority_snapshots.order_by("-computed_at").first()
if snapshot is None:
raise NotFound("No authority snapshots exist for this entity yet.")
serializer = EntityAuthoritySnapshotSerializer(snapshot)
return Response(serializer.data)

@extend_schema(
summary="List authority history",
description=(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 6.0.4 on 2026-05-02 17:04

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("entities", "0003_entitycandidate_auto_promotion_blocked_reason_and_more"),
]

operations = [
migrations.AddField(
model_name="entityauthoritysnapshot",
name="cross_newsletter_component",
field=models.FloatField(default=0.0),
),
migrations.AddField(
model_name="entityauthoritysnapshot",
name="engagement_component",
field=models.FloatField(default=0.0),
),
migrations.AddField(
model_name="entityauthoritysnapshot",
name="recency_component",
field=models.FloatField(default=0.0),
),
migrations.AddField(
model_name="entityauthoritysnapshot",
name="source_quality_component",
field=models.FloatField(default=0.0),
),
migrations.AddField(
model_name="entityauthoritysnapshot",
name="weights_at_compute",
field=models.JSONField(blank=True, default=dict),
),
]
5 changes: 5 additions & 0 deletions entities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,14 @@ class EntityAuthoritySnapshot(models.Model):
)
computed_at = models.DateTimeField(auto_now_add=True)
mention_component = models.FloatField()
engagement_component = models.FloatField(default=0.0)
recency_component = models.FloatField(default=0.0)
source_quality_component = models.FloatField(default=0.0)
cross_newsletter_component = models.FloatField(default=0.0)
feedback_component = models.FloatField()
duplicate_component = models.FloatField()
decayed_prior = models.FloatField()
weights_at_compute = models.JSONField(default=dict, blank=True)
final_score = models.FloatField()

class Meta:
Expand Down
5 changes: 5 additions & 0 deletions entities/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,14 @@ class Meta:
"project",
"computed_at",
"mention_component",
"engagement_component",
"recency_component",
"source_quality_component",
"cross_newsletter_component",
"feedback_component",
"duplicate_component",
"decayed_prior",
"weights_at_compute",
"final_score",
]
read_only_fields = fields
Expand Down
Loading
Loading