Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions institution/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import logging
import sys

from django.db.models import Q

from config import celery_app
from tracker.models import UnexpectedEvent

logger = logging.getLogger(__name__)

RAW_ORG_FIELDS = [
"raw_text",
"raw_institution_name",
"raw_country_name",
"raw_country_code",
"raw_state_name",
"raw_state_acron",
"raw_city_name",
]


def _build_raw_data_filter():
"""Build a Q filter matching instances where any RawOrganizationMixin field is filled."""
q_filter = Q()
for field in RAW_ORG_FIELDS:
q_filter |= Q(**{f"{field}__isnull": False}) & ~Q(**{field: ""})
return q_filter


@celery_app.task(bind=True, name="task_delete_unlinked_institutions_and_locations")
def task_delete_unlinked_institutions_and_locations(self):
"""
Task to clean up unlinked Institution and Location objects.

1. For all *History instances with RawOrganizationMixin data filled,
set institution = None
2. Delete Institution instances not linked to any *History
(clear FK/M2M fields first)
3. Delete all Location instances (clear FK/M2M fields first)
"""
from institution.models import Institution
from journal.models import (
CopyrightHolderHistory,
OwnerHistory,
PublisherHistory,
SponsorHistory,
)
from location.models import Location

try:
history_classes = [
OwnerHistory,
PublisherHistory,
SponsorHistory,
CopyrightHolderHistory,
]

# Step 1: For *History instances with any RawOrganizationMixin field
# filled, set institution = None
raw_data_q = _build_raw_data_filter()
for history_class in history_classes:
updated = history_class.objects.filter(
raw_data_q,
institution__isnull=False,
).update(institution=None)
logger.info(
"Set institution=None for %d %s instances",
updated,
history_class.__name__,
)

# Step 2: Delete Institution instances not linked to any *History
all_institution_ids = set(
Institution.objects.values_list("id", flat=True)
)
linked_institution_ids = set()
for history_class in history_classes:
linked_institution_ids.update(
history_class.objects.filter(
institution__institution__isnull=False,
).values_list("institution__institution_id", flat=True)
)
unlinked_ids = all_institution_ids - linked_institution_ids

# Clear M2M fields for unlinked institutions
for inst in Institution.objects.filter(id__in=unlinked_ids):
inst.institution_type_scielo.clear()
Comment on lines +85 to +87

# Clear FK fields and delete
Institution.objects.filter(id__in=unlinked_ids).update(
institution_identification=None,
location=None,
)
deleted_institutions = Institution.objects.filter(
id__in=unlinked_ids
).count()
Institution.objects.filter(id__in=unlinked_ids).delete()
Comment on lines +73 to +97

logger.info(
"Deleted %d unlinked Institution instances", deleted_institutions
)

# Step 3: Delete all Location instances
deleted_locations = Location.objects.count()
Location.objects.update(city=None, state=None, country=None)
Location.objects.all().delete()

Comment on lines +103 to +107
logger.info("Deleted %d Location instances", deleted_locations)

return {
"deleted_institutions": deleted_institutions,
"deleted_locations": deleted_locations,
}

except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
UnexpectedEvent.create(
exception=e,
exc_traceback=exc_traceback,
detail={
"task": "task_delete_unlinked_institutions_and_locations",
},
)
raise
214 changes: 213 additions & 1 deletion institution/tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,215 @@
from django.test import TestCase

# Create your tests here.
from core.users.models import User
from institution.models import (
CopyrightHolder,
Institution,
InstitutionIdentification,
Owner,
Publisher,
Sponsor,
)
from institution.tasks import task_delete_unlinked_institutions_and_locations
from journal.models import (
CopyrightHolderHistory,
Journal,
OwnerHistory,
PublisherHistory,
SponsorHistory,
)
from location.models import City, Country, Location, State


class TaskDeleteUnlinkedInstitutionsAndLocationsTest(TestCase):
"""Tests for task_delete_unlinked_institutions_and_locations"""

def setUp(self):
self.user = User.objects.create_user(username="testuser")
self.journal = Journal.objects.create(title="Test Journal")

# Create Location with related objects
self.country = Country.objects.create(
name="Brazil", acronym="BR", acron3="BRA"
)
self.state = State.objects.create(name="São Paulo", acronym="SP")
self.city = City.objects.create(name="Campinas")
self.location = Location.objects.create(
creator=self.user,
country=self.country,
state=self.state,
city=self.city,
)

# Create InstitutionIdentification
self.inst_id = InstitutionIdentification.objects.create(
creator=self.user, name="Test Institution"
)

# Create Institution
self.institution = Institution.objects.create(
creator=self.user,
institution_identification=self.inst_id,
location=self.location,
)

def _create_linked_history(self, history_class, base_institution_class, **kwargs):
"""Helper to create a *History instance linked to an Institution."""
base_inst = base_institution_class.objects.create(
creator=self.user,
institution=self.institution,
)
history = history_class.objects.create(
journal=self.journal,
institution=base_inst,
creator=self.user,
**kwargs,
)
return history, base_inst

def test_history_with_raw_data_has_institution_set_to_none(self):
"""History instances with filled raw data should have institution set to None."""
history, _ = self._create_linked_history(
PublisherHistory,
Publisher,
raw_institution_name="Publisher Name",
)
self.assertIsNotNone(history.institution)

task_delete_unlinked_institutions_and_locations()

history.refresh_from_db()
self.assertIsNone(history.institution)

def test_history_without_raw_data_keeps_institution(self):
"""History instances without raw data should keep their institution link."""
history, _ = self._create_linked_history(
OwnerHistory,
Owner,
)
self.assertIsNotNone(history.institution)

task_delete_unlinked_institutions_and_locations()

history.refresh_from_db()
self.assertIsNotNone(history.institution)

def test_institution_linked_to_history_not_deleted(self):
"""Institution linked to a *History (without raw data) should not be deleted."""
self._create_linked_history(OwnerHistory, Owner)

task_delete_unlinked_institutions_and_locations()

self.assertTrue(
Institution.objects.filter(pk=self.institution.pk).exists()
)

def test_institution_not_linked_to_history_is_deleted(self):
"""Institution not linked to any *History should be deleted."""
task_delete_unlinked_institutions_and_locations()

self.assertFalse(
Institution.objects.filter(pk=self.institution.pk).exists()
)

def test_institution_fk_m2m_cleared_before_deletion(self):
"""Institution FK/M2M fields should be cleared before deletion."""
# Create a second institution to survive and verify the first is deleted
inst_id2 = InstitutionIdentification.objects.create(
creator=self.user, name="Surviving Institution"
)
institution2 = Institution.objects.create(
creator=self.user,
institution_identification=inst_id2,
)
# Link institution2 to a history without raw data so it survives
owner = Owner.objects.create(
creator=self.user, institution=institution2
)
OwnerHistory.objects.create(
journal=self.journal, institution=owner, creator=self.user
)

task_delete_unlinked_institutions_and_locations()

# The first institution (unlinked) should be deleted
self.assertFalse(
Institution.objects.filter(pk=self.institution.pk).exists()
)
# The InstitutionIdentification should still exist (FK was cleared)
self.assertTrue(
InstitutionIdentification.objects.filter(pk=self.inst_id.pk).exists()
)

def test_institution_unlinked_after_raw_data_filled(self):
"""Institution becomes unlinked when all its *History instances have raw data."""
self._create_linked_history(
PublisherHistory,
Publisher,
raw_text="Some raw text",
)

task_delete_unlinked_institutions_and_locations()

# After step 1, the history's institution is set to None,
# so in step 2 the institution is no longer linked
self.assertFalse(
Institution.objects.filter(pk=self.institution.pk).exists()
)

def test_all_locations_deleted(self):
"""All Location instances should be deleted."""
task_delete_unlinked_institutions_and_locations()

self.assertEqual(Location.objects.count(), 0)

def test_location_fk_cleared_before_deletion(self):
"""Location FK fields are cleared before deletion."""
task_delete_unlinked_institutions_and_locations()

# Location is deleted
self.assertEqual(Location.objects.count(), 0)
# Country, State, City still exist (FK was cleared before deletion)
self.assertTrue(Country.objects.filter(pk=self.country.pk).exists())
self.assertTrue(State.objects.filter(pk=self.state.pk).exists())
self.assertTrue(City.objects.filter(pk=self.city.pk).exists())

def test_multiple_history_classes(self):
"""Test with multiple history classes simultaneously."""
# Publisher with raw data
pub_history, _ = self._create_linked_history(
PublisherHistory,
Publisher,
raw_country_code="BR",
)
# Sponsor without raw data (linked)
spn_history, _ = self._create_linked_history(
SponsorHistory,
Sponsor,
)
# CopyrightHolder with raw data
ch_history, _ = self._create_linked_history(
CopyrightHolderHistory,
CopyrightHolder,
raw_city_name="São Paulo",
)

task_delete_unlinked_institutions_and_locations()

pub_history.refresh_from_db()
spn_history.refresh_from_db()
ch_history.refresh_from_db()

# Publisher and CopyrightHolder had raw data → institution = None
self.assertIsNone(pub_history.institution)
self.assertIsNone(ch_history.institution)
# Sponsor had no raw data → institution kept
self.assertIsNotNone(spn_history.institution)

def test_task_returns_counts(self):
"""Task should return counts of deleted objects."""
result = task_delete_unlinked_institutions_and_locations()

self.assertIn("deleted_institutions", result)
self.assertIn("deleted_locations", result)
self.assertEqual(result["deleted_institutions"], 1)
self.assertEqual(result["deleted_locations"], 1)
Loading