Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions SESSION.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
# Session Restore Point

## 2026-04-29 End Of Day

- WP4 entity extraction is implemented end-to-end.
- Backend added `EntityMention` and `EntityCandidate`, migration `core/migrations/0006_entitycandidate_entitymention.py`, new `core/entity_extraction.py`, and an `extract_entities` pipeline node between classification and relevance.
- Admin now supports reviewing entity candidates and mentions; candidate accept/reject/merge actions are wired in `core/admin.py`.
- API/frontend work is in place for entity mention summaries, project-scoped entity candidate review actions, and the new entity detail page at `frontend/src/app/entities/[id]/page.tsx`.
- `/entities` now shows pending candidates plus recent mention summaries, and links into the entity detail page.
- Focused validation that passed today:
- `pytest core/tests/test_pipeline.py core/tests/test_admin.py -q`
- `pytest core/tests/test_api.py -q`
- `python manage.py check`
- `python manage.py makemigrations --check --dry-run`
- `python3 -m mypy core/pipeline.py core/entity_extraction.py core/embeddings.py core/models.py core/admin.py core/tests/test_pipeline.py core/tests/test_admin.py`
- `cd frontend && npm run typecheck`
- `cd frontend && npm run lint`
- `cd frontend && npx vitest run src/app/entities/__tests__/page.test.tsx src/app/api/entity-candidates/[id]/__tests__/route.test.ts src/app/entities/[id]/__tests__/page.test.tsx`
- Repo-wide `just lint` was rerun after fixing `frontend/src/lib/api.ts` import ordering; backend lint fully passed and direct frontend lint now passes with `FRONTEND_LINT_OK`.



## Useful Commands From Today
Expand Down
119 changes: 119 additions & 0 deletions core/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,18 @@
from import_export.admin import ExportActionMixin
from unfold.admin import ModelAdmin

from core.entity_extraction import (
accept_entity_candidate,
merge_entity_candidate,
reject_entity_candidate,
)
from core.models import (
BlueskyCredentials,
Content,
Entity,
EntityCandidate,
EntityCandidateStatus,
EntityMention,
IngestionRun,
Project,
ProjectConfig,
Expand Down Expand Up @@ -199,6 +207,7 @@ class EntityAdmin(admin.ModelAdmin):

# Replace 'authority_score' with your new method name
list_display = ("name", "project", "type", "colored_score", "created_at")
search_fields = ("name", "project__name")

@admin.display(description="Authority Score", ordering="authority_score")
def colored_score(self, obj):
Expand All @@ -219,6 +228,116 @@ def colored_score(self, obj):
)


@admin.register(EntityMention)
class EntityMentionAdmin(admin.ModelAdmin):
"""Admin view for extracted tracked-entity mentions."""

list_display = (
"entity",
"project",
"content",
"role",
"sentiment",
"confidence",
"created_at",
)
list_filter = ("role", "sentiment", ("project", admin.RelatedOnlyFieldListFilter))
search_fields = ("entity__name", "content__title", "span")
autocomplete_fields = ("entity", "content", "project")


@admin.register(EntityCandidate)
class EntityCandidateAdmin(admin.ModelAdmin):
"""Admin view for candidate entities awaiting human review."""

actions = [
"accept_selected_candidates",
"reject_selected_candidates",
"merge_into_existing_entities",
]
list_display = (
"name",
"project",
"suggested_type",
"occurrence_count",
"status",
"merged_into",
"first_seen_in",
"created_at",
)
list_filter = (
"status",
"suggested_type",
("project", admin.RelatedOnlyFieldListFilter),
)
search_fields = ("name", "project__name", "merged_into__name")
autocomplete_fields = ("project", "first_seen_in", "merged_into")
ordering = ("-occurrence_count", "name")

@admin.action(description="Accept selected candidates")
def accept_selected_candidates(self, request, queryset):
"""Promote selected candidates into tracked entities."""

accepted_count = 0
for candidate in queryset.select_related("project"):
if candidate.status == EntityCandidateStatus.ACCEPTED:
continue
accept_entity_candidate(candidate)
accepted_count += 1
self.message_user(
request,
f"Accepted {accepted_count} entity candidate(s).",
messages.SUCCESS,
)

@admin.action(description="Reject selected candidates")
def reject_selected_candidates(self, request, queryset):
"""Mark selected candidates as rejected."""

rejected_count = 0
for candidate in queryset:
if candidate.status == EntityCandidateStatus.REJECTED:
continue
reject_entity_candidate(candidate)
rejected_count += 1
self.message_user(
request,
f"Rejected {rejected_count} entity candidate(s).",
messages.SUCCESS,
)

@admin.action(description="Merge selected candidates into existing entities")
def merge_into_existing_entities(self, request, queryset):
"""Merge candidates when a same-name entity already exists in the project."""

merged_count = 0
unresolved_names: list[str] = []
for candidate in queryset.select_related("project"):
matching_entities = Entity.objects.filter(
project=candidate.project,
name__iexact=candidate.name,
)
if matching_entities.count() != 1:
unresolved_names.append(candidate.name)
continue
merge_entity_candidate(candidate, matching_entities.get())
merged_count += 1

if merged_count:
self.message_user(
request,
f"Merged {merged_count} entity candidate(s) into existing entities.",
messages.SUCCESS,
)
if unresolved_names:
self.message_user(
request,
"No unique same-name entity match was available for: "
+ ", ".join(sorted(unresolved_names)),
messages.WARNING,
)


class HighValueFilter(admin.SimpleListFilter):
"""Filter content down to high-value reference items."""

Expand Down
117 changes: 116 additions & 1 deletion core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
from typing import Any

from django.db.models import Count, Prefetch
from drf_spectacular.utils import (
OpenApiExample,
OpenApiParameter,
Expand All @@ -21,10 +22,17 @@
from rest_framework.exceptions import NotFound
from rest_framework.response import Response

from core.entity_extraction import (
accept_entity_candidate,
merge_entity_candidate,
reject_entity_candidate,
)
from core.models import (
BlueskyCredentials,
Content,
Entity,
EntityCandidate,
EntityMention,
IngestionRun,
Project,
ProjectConfig,
Expand All @@ -35,6 +43,9 @@
)
from core.serializers import (
ContentSerializer,
EntityCandidateMergeSerializer,
EntityCandidateSerializer,
EntityMentionSummarySerializer,
EntitySerializer,
IngestionRunSerializer,
ProjectConfigSerializer,
Expand Down Expand Up @@ -693,7 +704,111 @@ class EntityViewSet(ProjectOwnedQuerysetMixin, viewsets.ModelViewSet):
"""Manage tracked entities associated with a project."""

serializer_class = EntitySerializer
queryset = Entity.objects.select_related("project")
queryset = (
Entity.objects.select_related("project")
.annotate(mention_count=Count("mentions", distinct=True))
.prefetch_related(
Prefetch(
"mentions",
queryset=EntityMention.objects.select_related("content").order_by(
"-created_at"
),
to_attr="prefetched_mentions",
)
)
)

@extend_schema(
summary="List entity mentions",
description="Return the extracted mention history for one tracked entity inside the selected project.",
request=None,
responses={200: EntityMentionSummarySerializer(many=True), 403: AUTHENTICATION_REQUIRED_RESPONSE},
tags=["Entity Catalog"],
)
@action(detail=True, methods=["get"], url_path="mentions")
def mentions(self, request, *args, **kwargs):
"""Return the extracted mentions for the selected entity."""

entity = self.get_object()
mentions = entity.mentions.select_related("content").order_by("-created_at")
serializer = EntityMentionSummarySerializer(mentions, many=True)
return Response(serializer.data)


@document_project_owned_viewset(
resource_plural="entity candidates",
resource_singular="entity candidate",
create_description="Entity candidates are created by the pipeline and can be reviewed through dedicated actions.",
tag="Entity Catalog",
action_overrides=build_crud_action_overrides(
EntityCandidateSerializer,
resource_plural="entity candidates for the selected project",
resource_singular="entity candidate",
),
)
class EntityCandidateViewSet(ProjectOwnedQuerysetMixin, viewsets.ReadOnlyModelViewSet):
"""Inspect and resolve entity candidates surfaced by entity extraction."""

serializer_class = EntityCandidateSerializer
queryset = EntityCandidate.objects.select_related(
"project", "first_seen_in", "merged_into"
)

@extend_schema(
summary="Accept entity candidate",
description="Promote a pending entity candidate into a tracked entity and backfill recent mentions.",
request=None,
responses={200: EntityCandidateSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE},
tags=["Entity Catalog"],
)
@action(detail=True, methods=["post"], url_path="accept")
def accept(self, request, *args, **kwargs):
"""Accept an entity candidate and return its updated representation."""

candidate = self.get_object()
accept_entity_candidate(candidate)
candidate.refresh_from_db()
serializer = self.get_serializer(candidate)
return Response(serializer.data)

@extend_schema(
summary="Reject entity candidate",
description="Mark a pending entity candidate as rejected without creating a tracked entity.",
request=None,
responses={200: EntityCandidateSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE},
tags=["Entity Catalog"],
)
@action(detail=True, methods=["post"], url_path="reject")
def reject(self, request, *args, **kwargs):
"""Reject an entity candidate and return its updated representation."""

candidate = self.get_object()
reject_entity_candidate(candidate)
candidate.refresh_from_db()
serializer = self.get_serializer(candidate)
return Response(serializer.data)

@extend_schema(
summary="Merge entity candidate",
description="Merge a pending entity candidate into an existing tracked entity from the same project.",
request=EntityCandidateMergeSerializer,
responses={200: EntityCandidateSerializer, 400: EntityCandidateMergeSerializer, 403: AUTHENTICATION_REQUIRED_RESPONSE},
tags=["Entity Catalog"],
)
@action(detail=True, methods=["post"], url_path="merge")
def merge(self, request, *args, **kwargs):
"""Merge an entity candidate into an existing tracked entity."""

candidate = self.get_object()
serializer = EntityCandidateMergeSerializer(
data=request.data,
context=self.get_serializer_context(),
)
serializer.is_valid(raise_exception=True)
merge_entity_candidate(candidate, serializer.validated_data["merged_into"])
candidate.refresh_from_db()
response_serializer = self.get_serializer(candidate)
return Response(response_serializer.data)


@document_project_owned_viewset(
Expand Down
6 changes: 6 additions & 0 deletions core/api_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from core.api import (
ContentViewSet,
EntityCandidateViewSet,
EntityViewSet,
IngestionRunViewSet,
ProjectConfigViewSet,
Expand All @@ -23,6 +24,11 @@
r"project-configs", ProjectConfigViewSet, basename="project-config"
)
project_router.register(r"entities", EntityViewSet, basename="project-entity")
project_router.register(
r"entity-candidates",
EntityCandidateViewSet,
basename="project-entity-candidate",
)
project_router.register(r"contents", ContentViewSet, basename="project-content")
project_router.register(
r"skill-results", SkillResultViewSet, basename="project-skill-result"
Expand Down
Loading
Loading