From dddfe5fddeed831b904f5968a8cb3b6f154006ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Mar 2026 15:57:18 +0000 Subject: [PATCH 1/3] Initial plan From 641d4b5d055b4e2cea8e70217a569b3252236519 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:29:52 +0000 Subject: [PATCH 2/3] Add task to delete unlinked Institution and Location objects Create institution/tasks.py with task_delete_unlinked_institutions_and_locations: - Step 1: Set institution=None on *History instances with RawOrganizationMixin data filled - Step 2: Delete Institution instances not linked to any *History (clear FK/M2M first) - Step 3: Delete all Location instances (clear FK/M2M first) Add comprehensive tests in institution/tests.py Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- institution/tasks.py | 124 +++++++++++++++++++++++++ institution/tests.py | 214 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 institution/tasks.py diff --git a/institution/tasks.py b/institution/tasks.py new file mode 100644 index 000000000..cf92e2d3b --- /dev/null +++ b/institution/tasks.py @@ -0,0 +1,124 @@ +import logging +import sys + +from django.db.models import Q + +from config import celery_app +from tracker.models import UnexpectedEvent + +logger = logging.getLogger(__name__) + +RAW_ORG_FIELDS = [ + "raw_text", + "raw_institution_name", + "raw_country_name", + "raw_country_code", + "raw_state_name", + "raw_state_acron", + "raw_city_name", +] + + +def _has_raw_data_filter(): + """Build a Q filter matching instances where any RawOrganizationMixin field is filled.""" + q_filter = Q() + for field in RAW_ORG_FIELDS: + q_filter |= Q(**{f"{field}__isnull": False}) & ~Q(**{field: ""}) + return q_filter + + +@celery_app.task(bind=True, name="task_delete_unlinked_institutions_and_locations") +def task_delete_unlinked_institutions_and_locations(self): + """ + Task to clean up unlinked Institution and Location objects. + + 1. For all *History instances with RawOrganizationMixin data filled, + set institution = None + 2. Delete Institution instances not linked to any *History + (clear FK/M2M fields first) + 3. Delete all Location instances (clear FK/M2M fields first) + """ + from institution.models import Institution + from journal.models import ( + CopyrightHolderHistory, + OwnerHistory, + PublisherHistory, + SponsorHistory, + ) + from location.models import Location + + try: + history_classes = [ + OwnerHistory, + PublisherHistory, + SponsorHistory, + CopyrightHolderHistory, + ] + + # Step 1: For *History instances with any RawOrganizationMixin field + # filled, set institution = None + raw_data_q = _has_raw_data_filter() + for history_class in history_classes: + updated = history_class.objects.filter( + raw_data_q, + institution__isnull=False, + ).update(institution=None) + logger.info( + "Set institution=None for %d %s instances", + updated, + history_class.__name__, + ) + + # Step 2: Delete Institution instances not linked to any *History + all_institution_ids = set( + Institution.objects.values_list("id", flat=True) + ) + linked_institution_ids = set() + for history_class in history_classes: + linked_institution_ids.update( + history_class.objects.filter( + institution__institution__isnull=False, + ).values_list("institution__institution_id", flat=True) + ) + unlinked_ids = all_institution_ids - linked_institution_ids + + # Clear M2M fields for unlinked institutions + for inst in Institution.objects.filter(id__in=unlinked_ids): + inst.institution_type_scielo.clear() + + # Clear FK fields and delete + Institution.objects.filter(id__in=unlinked_ids).update( + institution_identification=None, + location=None, + ) + deleted_institutions = Institution.objects.filter( + id__in=unlinked_ids + ).count() + Institution.objects.filter(id__in=unlinked_ids).delete() + + logger.info( + "Deleted %d unlinked Institution instances", deleted_institutions + ) + + # Step 3: Delete all Location instances + deleted_locations = Location.objects.count() + Location.objects.update(city=None, state=None, country=None) + Location.objects.all().delete() + + logger.info("Deleted %d Location instances", deleted_locations) + + return { + "deleted_institutions": deleted_institutions, + "deleted_locations": deleted_locations, + } + + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "task": "task_delete_unlinked_institutions_and_locations", + }, + ) + raise diff --git a/institution/tests.py b/institution/tests.py index 7ce503c2d..f589f9a9f 100755 --- a/institution/tests.py +++ b/institution/tests.py @@ -1,3 +1,215 @@ from django.test import TestCase -# Create your tests here. +from core.users.models import User +from institution.models import ( + CopyrightHolder, + Institution, + InstitutionIdentification, + Owner, + Publisher, + Sponsor, +) +from institution.tasks import task_delete_unlinked_institutions_and_locations +from journal.models import ( + CopyrightHolderHistory, + Journal, + OwnerHistory, + PublisherHistory, + SponsorHistory, +) +from location.models import City, Country, Location, State + + +class TaskDeleteUnlinkedInstitutionsAndLocationsTest(TestCase): + """Tests for task_delete_unlinked_institutions_and_locations""" + + def setUp(self): + self.user = User.objects.create_user(username="testuser") + self.journal = Journal.objects.create(title="Test Journal") + + # Create Location with related objects + self.country = Country.objects.create( + name="Brazil", acronym="BR", acron3="BRA" + ) + self.state = State.objects.create(name="São Paulo", acronym="SP") + self.city = City.objects.create(name="Campinas") + self.location = Location.objects.create( + creator=self.user, + country=self.country, + state=self.state, + city=self.city, + ) + + # Create InstitutionIdentification + self.inst_id = InstitutionIdentification.objects.create( + creator=self.user, name="Test Institution" + ) + + # Create Institution + self.institution = Institution.objects.create( + creator=self.user, + institution_identification=self.inst_id, + location=self.location, + ) + + def _create_linked_history(self, history_class, base_institution_class, **kwargs): + """Helper to create a *History instance linked to an Institution.""" + base_inst = base_institution_class.objects.create( + creator=self.user, + institution=self.institution, + ) + history = history_class.objects.create( + journal=self.journal, + institution=base_inst, + creator=self.user, + **kwargs, + ) + return history, base_inst + + def test_history_with_raw_data_has_institution_set_to_none(self): + """History instances with filled raw data should have institution set to None.""" + history, _ = self._create_linked_history( + PublisherHistory, + Publisher, + raw_institution_name="Publisher Name", + ) + self.assertIsNotNone(history.institution) + + task_delete_unlinked_institutions_and_locations() + + history.refresh_from_db() + self.assertIsNone(history.institution) + + def test_history_without_raw_data_keeps_institution(self): + """History instances without raw data should keep their institution link.""" + history, _ = self._create_linked_history( + OwnerHistory, + Owner, + ) + self.assertIsNotNone(history.institution) + + task_delete_unlinked_institutions_and_locations() + + history.refresh_from_db() + self.assertIsNotNone(history.institution) + + def test_institution_linked_to_history_not_deleted(self): + """Institution linked to a *History (without raw data) should not be deleted.""" + self._create_linked_history(OwnerHistory, Owner) + + task_delete_unlinked_institutions_and_locations() + + self.assertTrue( + Institution.objects.filter(pk=self.institution.pk).exists() + ) + + def test_institution_not_linked_to_history_is_deleted(self): + """Institution not linked to any *History should be deleted.""" + task_delete_unlinked_institutions_and_locations() + + self.assertFalse( + Institution.objects.filter(pk=self.institution.pk).exists() + ) + + def test_institution_fk_m2m_cleared_before_deletion(self): + """Institution FK/M2M fields should be cleared before deletion.""" + # Create a second institution to survive and verify the first is deleted + inst_id2 = InstitutionIdentification.objects.create( + creator=self.user, name="Surviving Institution" + ) + institution2 = Institution.objects.create( + creator=self.user, + institution_identification=inst_id2, + ) + # Link institution2 to a history without raw data so it survives + owner = Owner.objects.create( + creator=self.user, institution=institution2 + ) + OwnerHistory.objects.create( + journal=self.journal, institution=owner, creator=self.user + ) + + task_delete_unlinked_institutions_and_locations() + + # The first institution (unlinked) should be deleted + self.assertFalse( + Institution.objects.filter(pk=self.institution.pk).exists() + ) + # The InstitutionIdentification should still exist (FK was cleared) + self.assertTrue( + InstitutionIdentification.objects.filter(pk=self.inst_id.pk).exists() + ) + + def test_institution_unlinked_after_raw_data_filled(self): + """Institution becomes unlinked when all its *History instances have raw data.""" + self._create_linked_history( + PublisherHistory, + Publisher, + raw_text="Some raw text", + ) + + task_delete_unlinked_institutions_and_locations() + + # After step 1, the history's institution is set to None, + # so in step 2 the institution is no longer linked + self.assertFalse( + Institution.objects.filter(pk=self.institution.pk).exists() + ) + + def test_all_locations_deleted(self): + """All Location instances should be deleted.""" + task_delete_unlinked_institutions_and_locations() + + self.assertEqual(Location.objects.count(), 0) + + def test_location_fk_cleared_before_deletion(self): + """Location FK fields are cleared before deletion.""" + task_delete_unlinked_institutions_and_locations() + + # Location is deleted + self.assertEqual(Location.objects.count(), 0) + # Country, State, City still exist (FK was cleared before deletion) + self.assertTrue(Country.objects.filter(pk=self.country.pk).exists()) + self.assertTrue(State.objects.filter(pk=self.state.pk).exists()) + self.assertTrue(City.objects.filter(pk=self.city.pk).exists()) + + def test_multiple_history_classes(self): + """Test with multiple history classes simultaneously.""" + # Publisher with raw data + pub_history, _ = self._create_linked_history( + PublisherHistory, + Publisher, + raw_country_code="BR", + ) + # Sponsor without raw data (linked) + spn_history, _ = self._create_linked_history( + SponsorHistory, + Sponsor, + ) + # CopyrightHolder with raw data + ch_history, _ = self._create_linked_history( + CopyrightHolderHistory, + CopyrightHolder, + raw_city_name="São Paulo", + ) + + task_delete_unlinked_institutions_and_locations() + + pub_history.refresh_from_db() + spn_history.refresh_from_db() + ch_history.refresh_from_db() + + # Publisher and CopyrightHolder had raw data → institution = None + self.assertIsNone(pub_history.institution) + self.assertIsNone(ch_history.institution) + # Sponsor had no raw data → institution kept + self.assertIsNotNone(spn_history.institution) + + def test_task_returns_counts(self): + """Task should return counts of deleted objects.""" + result = task_delete_unlinked_institutions_and_locations() + + self.assertIn("deleted_institutions", result) + self.assertIn("deleted_locations", result) + self.assertEqual(result["deleted_institutions"], 1) + self.assertEqual(result["deleted_locations"], 1) From b935a44787ac973fc90906567c7fd7e4eac983b2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:30:34 +0000 Subject: [PATCH 3/3] Rename _has_raw_data_filter to _build_raw_data_filter for clarity Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- institution/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/institution/tasks.py b/institution/tasks.py index cf92e2d3b..164b82242 100644 --- a/institution/tasks.py +++ b/institution/tasks.py @@ -19,7 +19,7 @@ ] -def _has_raw_data_filter(): +def _build_raw_data_filter(): """Build a Q filter matching instances where any RawOrganizationMixin field is filled.""" q_filter = Q() for field in RAW_ORG_FIELDS: @@ -57,7 +57,7 @@ def task_delete_unlinked_institutions_and_locations(self): # Step 1: For *History instances with any RawOrganizationMixin field # filled, set institution = None - raw_data_q = _has_raw_data_filter() + raw_data_q = _build_raw_data_filter() for history_class in history_classes: updated = history_class.objects.filter( raw_data_q,