Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added article/management/__init__.py
Empty file.
Empty file.
144 changes: 144 additions & 0 deletions article/management/commands/delete_article_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from django.core.management.base import BaseCommand

from article import models as article_models
from education_directory.models import EducationDirectory
from event_directory.models import EventDirectory
from infrastructure_directory.models import InfrastructureDirectory
from policy_directory.models import PolicyDirectory
from scholarly_articles.models import ScholarlyArticles
from usefulmodels.models import ThematicArea


class Command(BaseCommand):
help = "Remove all Article data and related models data."

def handle(self, *args, **options):
# Delete Articles
article_count = article_models.Article.objects.count()
article_models.Article.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {article_count} Article(s)")
)

# Delete SourceArticles
source_article_count = article_models.SourceArticle.objects.count()
article_models.SourceArticle.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {source_article_count} SourceArticle(s)")
)

# Delete Contributors
contributor_count = article_models.Contributor.objects.count()
article_models.Contributor.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {contributor_count} Contributor(s)")
)

# Delete Affiliations
affiliation_count = article_models.Affiliation.objects.count()
article_models.Affiliation.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {affiliation_count} Affiliation(s)")
)

# Delete Journals
journal_count = article_models.Journal.objects.count()
article_models.Journal.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {journal_count} Journal(s)")
)

# Delete Programs
program_count = article_models.Program.objects.count()
article_models.Program.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {program_count} Program(s)")
)

# Delete Licenses (check ScholarlyArticles first)
self._delete_licenses()

# Collect ThematicArea IDs from Concepts, delete Concepts,
# then delete ThematicAreas if not used by directory models
self._delete_concepts_and_thematic_areas()

def _delete_licenses(self):
# Collect all license IDs referenced by ScholarlyArticles upfront
scholarly_license_ids = set(
ScholarlyArticles.objects.filter(
license__isnull=False
).values_list("license_id", flat=True)
)

licenses = article_models.License.objects.all()
deleted_count = 0
skipped_count = 0

for license_obj in licenses:
if license_obj.pk in scholarly_license_ids:
self.stdout.write(
self.style.WARNING(
f"License '{license_obj}' is referenced by ScholarlyArticles. Skipping."
)
)
skipped_count += 1
else:
license_obj.delete()
deleted_count += 1

self.stdout.write(
self.style.SUCCESS(
f"Deleted {deleted_count} License(s), skipped {skipped_count}"
)
)

def _delete_concepts_and_thematic_areas(self):
# Collect ThematicArea IDs referenced by Concepts before deleting them
thematic_area_ids = set(
article_models.Concepts.objects.filter(
thematic_areas__isnull=False
).values_list("thematic_areas__id", flat=True)
)

# Delete Concepts
concepts_count = article_models.Concepts.objects.count()
article_models.Concepts.objects.all().delete()
self.stdout.write(
self.style.SUCCESS(f"Deleted {concepts_count} Concepts")
)

# Collect ThematicArea IDs referenced by directory models
directory_ta_ids = set()
for DirectoryModel in (
EducationDirectory,
InfrastructureDirectory,
EventDirectory,
PolicyDirectory,
):
directory_ta_ids.update(
DirectoryModel.objects.filter(
thematic_areas__id__in=thematic_area_ids
).values_list("thematic_areas__id", flat=True)
)

# Try to delete the ThematicAreas that were associated with Concepts
deleted_count = 0
skipped_count = 0

for ta in ThematicArea.objects.filter(id__in=thematic_area_ids):
if ta.pk in directory_ta_ids:
self.stdout.write(
self.style.WARNING(
f"ThematicArea '{ta}' is referenced by directory models. Skipping."
)
)
skipped_count += 1
else:
ta.delete()
deleted_count += 1

self.stdout.write(
self.style.SUCCESS(
f"Deleted {deleted_count} ThematicArea(s), skipped {skipped_count}"
)
)
219 changes: 219 additions & 0 deletions article/tests_delete_article_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
import pytest
from django.core.management import call_command
from io import StringIO

from article.models import (
Affiliation,
Article,
Concepts,
Contributor,
Journal,
License,
Program,
SourceArticle,
)
from core.users.tests.factories import UserFactory
from education_directory.models import EducationDirectory
from usefulmodels.models import ThematicArea


@pytest.fixture
def user(db):
return UserFactory()


@pytest.fixture
def thematic_area(db, user):
return ThematicArea.objects.create(
creator=user,
level0="Ciências Exatas e da Terra",
level1="Ciência da Computação",
level2="Teoria da Computação",
)


@pytest.fixture
def thematic_area_in_directory(db, user):
ta = ThematicArea.objects.create(
creator=user,
level0="Ciências Humanas",
level1="Educação",
level2="Ensino-Aprendizagem",
)
ed = EducationDirectory.objects.create(creator=user)
ed.thematic_areas.add(ta)
return ta


@pytest.fixture
def article_license(db):
return License.objects.create(name="CC-BY-4.0", url="https://creativecommons.org/licenses/by/4.0/")


@pytest.fixture
def journal(db):
return Journal.objects.create(journal_name="Test Journal", journal_issn_l="1234-5678")


@pytest.fixture
def affiliation(db):
return Affiliation.objects.create(name="Test University")


@pytest.fixture
def program(db, affiliation):
return Program.objects.create(name="Test Program", affiliation=affiliation)


@pytest.fixture
def contributor(db, affiliation):
c = Contributor.objects.create(family="Doe", given="John")
c.affiliations.add(affiliation)
return c


@pytest.fixture
def concept(db, thematic_area):
c = Concepts.objects.create(specific_id="C123", name="Machine Learning", level=1)
c.thematic_areas.add(thematic_area)
return c


@pytest.fixture
def source_article(db):
return SourceArticle.objects.create(
specific_id="SA001",
doi="10.1234/test",
title="Test Source Article",
)


@pytest.fixture
def article(db, article_license, journal, contributor, concept, program, user):
a = Article.objects.create(
title="Test Article",
doi="10.1234/article",
year="2023",
license=article_license,
journal=journal,
creator=user,
)
a.contributors.add(contributor)
a.concepts.add(concept)
a.programs.add(program)
return a


@pytest.mark.django_db
class TestDeleteArticleDataCommand:
def test_deletes_articles(self, article):
assert Article.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Article.objects.count() == 0
assert "Deleted 1 Article(s)" in out.getvalue()

def test_deletes_source_articles(self, source_article):
assert SourceArticle.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert SourceArticle.objects.count() == 0
assert "Deleted 1 SourceArticle(s)" in out.getvalue()

def test_deletes_contributors(self, contributor):
assert Contributor.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Contributor.objects.count() == 0
assert "Deleted 1 Contributor(s)" in out.getvalue()

def test_deletes_affiliations(self, affiliation):
assert Affiliation.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Affiliation.objects.count() == 0
assert "Deleted 1 Affiliation(s)" in out.getvalue()

def test_deletes_journals(self, journal):
assert Journal.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Journal.objects.count() == 0
assert "Deleted 1 Journal(s)" in out.getvalue()

def test_deletes_programs(self, program):
assert Program.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Program.objects.count() == 0
assert "Deleted 1 Program(s)" in out.getvalue()

def test_deletes_licenses(self, article_license):
assert License.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert License.objects.count() == 0
assert "Deleted 1 License(s), skipped 0" in out.getvalue()

def test_deletes_concepts(self, concept):
assert Concepts.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Concepts.objects.count() == 0
assert "Deleted 1 Concepts" in out.getvalue()

def test_deletes_thematic_area_not_in_directory(self, concept, thematic_area):
assert ThematicArea.objects.count() == 1
out = StringIO()
call_command("delete_article_data", stdout=out)
assert ThematicArea.objects.count() == 0
assert "Deleted 1 ThematicArea(s), skipped 0" in out.getvalue()

def test_skips_thematic_area_in_directory(
self, concept, thematic_area, thematic_area_in_directory
):
# thematic_area is used only by Concepts -> should be deleted
# thematic_area_in_directory is used by EducationDirectory -> should be skipped
concept.thematic_areas.add(thematic_area_in_directory)
assert ThematicArea.objects.count() == 2
out = StringIO()
call_command("delete_article_data", stdout=out)
output = out.getvalue()
# The one used by directory should remain
assert ThematicArea.objects.count() == 1
assert ThematicArea.objects.filter(id=thematic_area_in_directory.id).exists()
assert "Deleted 1 ThematicArea(s), skipped 1" in output
assert "is referenced by directory models. Skipping." in output

def test_deletes_all_related_data(
self,
article,
source_article,
thematic_area,
):
out = StringIO()
call_command("delete_article_data", stdout=out)
assert Article.objects.count() == 0
assert SourceArticle.objects.count() == 0
assert Contributor.objects.count() == 0
assert Affiliation.objects.count() == 0
assert Journal.objects.count() == 0
assert Program.objects.count() == 0
assert License.objects.count() == 0
assert Concepts.objects.count() == 0
# ThematicArea not used by directories should be deleted
assert ThematicArea.objects.filter(id=thematic_area.id).exists() is False

def test_empty_database(self, db):
out = StringIO()
call_command("delete_article_data", stdout=out)
output = out.getvalue()
assert "Deleted 0 Article(s)" in output
assert "Deleted 0 SourceArticle(s)" in output
assert "Deleted 0 Contributor(s)" in output
assert "Deleted 0 Affiliation(s)" in output
assert "Deleted 0 Journal(s)" in output
assert "Deleted 0 Program(s)" in output
assert "Deleted 0 License(s), skipped 0" in output
assert "Deleted 0 Concepts" in output
assert "Deleted 0 ThematicArea(s), skipped 0" in output