Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 56 additions & 3 deletions core/home/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
from wagtailcaptcha.models import WagtailCaptchaEmailForm

from collection.models import Collection
from core.home.utils.export_journals import (
generate_csv_response,
generate_xls_response,
get_scielo_journals_data,
)
from core.home.utils.get_social_networks import get_social_networks
from journal.choices import STUDY_AREA
from journal.models import OwnerHistory, SciELOJournal
Expand Down Expand Up @@ -79,6 +84,23 @@ def _default_context(context):
context["page_about"] = get_page_about()


class JournalDownloadMixin:
def get_export_filters(self, request):
return None

@re_path(r"^download-csv/$", name="download_csv")
def download_csv(self, request):
filters = self.get_export_filters(request)
journals_data = get_scielo_journals_data(filters)
return generate_csv_response(journals_data)
Comment on lines +91 to +95
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The key new behavior is “export respects the same filters as the listing page” via get_export_filters(request) + routable endpoints. There are good unit tests for the export utility, but there’s no coverage shown for the routable download routes applying real request query params (e.g., search_term, start_with_letter, tab) end-to-end. Add an integration-style test that requests the page’s download-csv/ (and/or download-xls/) with query parameters and asserts the response contains only the filtered journal(s).

Copilot uses AI. Check for mistakes.

@re_path(r"^download-xls/$", name="download_xls")
def download_xls(self, request):
filters = self.get_export_filters(request)
journals_data = get_scielo_journals_data(filters)
return generate_xls_response(journals_data)


def get_page_about():
try:
locale = _get_current_locale()
Expand Down Expand Up @@ -221,7 +243,15 @@ def get_context(self, request, *args, **kwargs):
return context


class ListPageJournal(Page):
class ListPageJournal(JournalDownloadMixin, RoutablePageMixin, Page):
def get_export_filters(self, request):
search_term = request.GET.get("search_term", "")
starts_with_letter = request.GET.get("start_with_letter", "")
active_or_discontinued = list(request.GET.get("tab", ""))
return default_journal_filter(
search_term, starts_with_letter, active_or_discontinued
)
Comment on lines +247 to +253
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list(request.GET.get("tab", "")) turns a tab value like "active" into a list of characters, which will make Q(status__in=active_or_discontinued) effectively never match. Use request.GET.getlist("tab") (if the UI can send multiple values) or normalize a single tab value into a one-element list (e.g., [tab] if tab else []) before passing to default_journal_filter.

Copilot uses AI. Check for mistakes.

def get_context(self, request, *args, **kwargs):
context = super().get_context(request, *args, **kwargs)
search_term = request.GET.get("search_term", "")
Expand All @@ -237,7 +267,22 @@ def get_context(self, request, *args, **kwargs):
return context


class ListPageJournalByPublisher(Page):
class ListPageJournalByPublisher(JournalDownloadMixin, RoutablePageMixin, Page):
def get_export_filters(self, request):
search_term = request.GET.get("search_term", "")
starts_with_letter = request.GET.get("start_with_letter", "")
active_or_discontinued = list(request.GET.get("tab", ""))
filters = Q(status__in=SCIELO_STATUS_CHOICES)
if search_term:
filters &= Q(journal__title__icontains=search_term) | Q(
journal__owner_history__institution__institution__institution_identification__name__icontains=search_term
)
if starts_with_letter:
filters &= Q(journal__title__istartswith=starts_with_letter)
if active_or_discontinued:
filters &= Q(status__in=active_or_discontinued)
return filters
Comment on lines +271 to +284
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: list(request.GET.get("tab", "")) produces a character list and breaks the status__in filter. Switch to request.GET.getlist("tab") or wrap the single tab string into a list; otherwise the export for the “tab” filter will be incorrect.

Copilot uses AI. Check for mistakes.

def get_context(self, request, *args, **kwargs):
context = super().get_context(request, *args, **kwargs)
search_term = request.GET.get("search_term", "")
Expand Down Expand Up @@ -283,7 +328,15 @@ def get_context(self, request, *args, **kwargs):
return context


class ListPageJournalByCategory(RoutablePageMixin, Page):
class ListPageJournalByCategory(JournalDownloadMixin, RoutablePageMixin, Page):
def get_export_filters(self, request):
search_term = request.GET.get("search_term", "")
starts_with_letter = request.GET.get("start_with_letter", "")
active_or_discontinued = list(request.GET.get("tab", ""))
return default_journal_filter(
search_term, starts_with_letter, active_or_discontinued
)

def get_context(self, request, *args, **kwargs):
context = super().get_context(request, *args, **kwargs)

Expand Down
90 changes: 85 additions & 5 deletions core/home/tests.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from unittest.mock import patch

from django.db.models import Q
from django.test import TestCase

from collection.models import Collection
from core.users.models import User
from journal.models import Journal, SciELOJournal

from core.home.views import _get_scielo_journals_data
from core.home.utils.export_journals import (
generate_csv_response,
generate_xls_response,
get_scielo_journals_data,
)


class TestGetScieloJournalsData(TestCase):
Expand All @@ -30,15 +35,15 @@ def setUp(self):

def test_scielo_url_does_not_have_double_http_prefix(self):
"""URL must not contain 'http://http://' when domain already has http://"""
data = _get_scielo_journals_data()
data = get_scielo_journals_data()
self.assertTrue(len(data) > 0)
for item in data:
self.assertNotIn("http://http://", item["scielo_url"])
self.assertNotIn("http://https://", item["scielo_url"])

def test_scielo_url_is_well_formed(self):
"""URL must be a valid scielo.php URL with the correct domain"""
data = _get_scielo_journals_data()
data = get_scielo_journals_data()
self.assertEqual(len(data), 1)
expected_url = (
"http://www.scielo.org.pe/scielo.php?script=sci_serial"
Expand All @@ -50,15 +55,90 @@ def test_scielo_url_strips_trailing_slash_from_domain(self):
"""Trailing slash in domain must not produce double slash in URL"""
self.collection.domain = "http://www.scielo.org.pe/"
self.collection.save()
data = _get_scielo_journals_data()
data = get_scielo_journals_data()
self.assertEqual(len(data), 1)
self.assertNotIn("//scielo.php", data[0]["scielo_url"])

def test_scielo_url_with_https_domain(self):
"""URL must be correct when domain uses https://"""
self.collection.domain = "https://www.scielo.br"
self.collection.save()
data = _get_scielo_journals_data()
data = get_scielo_journals_data()
self.assertEqual(len(data), 1)
self.assertTrue(data[0]["scielo_url"].startswith("https://www.scielo.br/"))
self.assertNotIn("https://https://", data[0]["scielo_url"])

def test_get_scielo_journals_data_with_title_filter(self):
"""Filters should be applied to the queryset"""
other_journal = Journal.objects.create(
creator=self.user,
title="Other Journal",
)
SciELOJournal.objects.create(
issn_scielo="1111-1111",
collection=self.collection,
journal=other_journal,
journal_acron="other",
)
filters = Q(journal__title__icontains="Peru")
data = get_scielo_journals_data(filters)
self.assertEqual(len(data), 1)
self.assertEqual(data[0]["title"], "Test Journal Peru")

def test_get_scielo_journals_data_without_filters_returns_all(self):
"""Without filters, all journals should be returned"""
other_journal = Journal.objects.create(
creator=self.user,
title="Other Journal",
)
SciELOJournal.objects.create(
issn_scielo="1111-1111",
collection=self.collection,
journal=other_journal,
journal_acron="other",
)
data = get_scielo_journals_data()
self.assertEqual(len(data), 2)


class TestGenerateCsvResponse(TestCase):
def test_csv_response_content_type(self):
response = generate_csv_response([])
self.assertEqual(response["Content-Type"], "text/csv")

def test_csv_response_has_attachment_header(self):
response = generate_csv_response([])
self.assertIn("attachment", response["Content-Disposition"])
self.assertIn(".csv", response["Content-Disposition"])

def test_csv_response_contains_headers(self):
response = generate_csv_response([])
content = response.content.decode("utf-8")
self.assertIn("journals", content)
self.assertIn("scielo_url", content)
self.assertIn("publisher", content)

def test_csv_response_contains_data(self):
data = [
{
"title": "Test Journal",
"scielo_url": "http://example.com/journal",
"owner": "Test Publisher",
}
]
response = generate_csv_response(data)
content = response.content.decode("utf-8")
self.assertIn("Test Journal", content)
self.assertIn("http://example.com/journal", content)
self.assertIn("Test Publisher", content)


class TestGenerateXlsResponse(TestCase):
def test_xls_response_content_type(self):
response = generate_xls_response([])
self.assertEqual(response["Content-Type"], "application/vnd.ms-excel")

def test_xls_response_has_attachment_header(self):
response = generate_xls_response([])
self.assertIn("attachment", response["Content-Disposition"])
self.assertIn(".xls", response["Content-Disposition"])
Empty file added core/home/utils/__init__.py
Empty file.
90 changes: 90 additions & 0 deletions core/home/utils/export_journals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import csv
import logging

import xlwt
from django.http import HttpResponse
from django.utils import timezone

from journal.models import SciELOJournal

logger = logging.getLogger(__name__)

HEADERS = ["journals", "scielo_url", "publisher"]
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exported column headers are journals/publisher, but the internal data keys are title/owner. This mismatch makes the utility harder to reuse correctly and increases the chance of future mistakes. Consider aligning the dict keys to the export contract (e.g., use journals and publisher everywhere) and then read those same keys in both generate_csv_response and generate_xls_response.

Copilot uses AI. Check for mistakes.


def get_scielo_journals_data(filters=None):
try:
qs = SciELOJournal.objects.all()
if filters is not None:
qs = qs.filter(filters)
scielo_journals = qs.values(
"journal__title",
"collection__domain",
"journal__owner_history__institution__institution__institution_identification__name",
"issn_scielo",
)
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Export rows can be duplicated if the joins (notably journal__owner_history...) produce multiple rows per SciELOJournal. This will lead to repeated lines in CSV/XLS exports. A concrete mitigation is applying distinct() to the values() queryset (e.g., qs.values(...).distinct()), or otherwise selecting a single “current” owner record before exporting.

Suggested change
)
).distinct()

Copilot uses AI. Check for mistakes.

formatted_data = []
for journal in scielo_journals:
title = journal.get("journal__title", "")
issn_scielo = journal.get("issn_scielo", "")
domain = journal.get("collection__domain", "")
owner = journal.get(
"journal__owner_history__institution__institution__institution_identification__name",
"",
)
scielo_url = (
f"{domain.rstrip('/')}/scielo.php?script=sci_serial&pid={issn_scielo}&lng=en"
)
formatted_data.append(
{
"title": title,
"scielo_url": scielo_url,
"owner": owner,
}
Comment on lines +40 to +44
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exported column headers are journals/publisher, but the internal data keys are title/owner. This mismatch makes the utility harder to reuse correctly and increases the chance of future mistakes. Consider aligning the dict keys to the export contract (e.g., use journals and publisher everywhere) and then read those same keys in both generate_csv_response and generate_xls_response.

Copilot uses AI. Check for mistakes.
)
return formatted_data
except Exception as e:
logger.error(f"Error fetching scielo journals data: {e}")
return []
Comment on lines +47 to +49
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logging the exception message without a traceback makes diagnosing production issues harder. Prefer logger.exception("Error fetching scielo journals data") (or logger.error(..., exc_info=True)) so the traceback is captured.

Copilot uses AI. Check for mistakes.


def generate_csv_response(journals_data):
date = timezone.now().strftime("%Y-%m-%d")
filename = f"journals_{date}.csv"
response = HttpResponse(content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{filename}"'
try:
writer = csv.writer(response)
writer.writerow(HEADERS)
for journal in journals_data:
writer.writerow(
[journal.get("title"), journal.get("scielo_url"), journal.get("owner")]
)
Comment on lines +61 to +63
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exported column headers are journals/publisher, but the internal data keys are title/owner. This mismatch makes the utility harder to reuse correctly and increases the chance of future mistakes. Consider aligning the dict keys to the export contract (e.g., use journals and publisher everywhere) and then read those same keys in both generate_csv_response and generate_xls_response.

Copilot uses AI. Check for mistakes.
logger.info(f"Generated CSV file with: {len(journals_data)} journals")
except Exception as e:
logger.error(f"Error generating CSV file: {e}")
response = HttpResponse("Error generating CSV file", status=500)
return response


def generate_xls_response(journals_data):
date = timezone.now().strftime("%Y-%m-%d")
filename = f"journals_{date}.xls"
response = HttpResponse(content_type="application/vnd.ms-excel")
response["Content-Disposition"] = f'attachment; filename="{filename}"'
try:
wb = xlwt.Workbook(encoding="utf-8")
ws = wb.add_sheet("journals")
for col, header in enumerate(HEADERS):
ws.write(0, col, header)
for row, journal in enumerate(journals_data, start=1):
ws.write(row, 0, journal.get("title"))
ws.write(row, 1, journal.get("scielo_url"))
ws.write(row, 2, journal.get("owner"))
wb.save(response)
logger.info(f"Generated XLS file with: {len(journals_data)} journals")
except Exception as e:
logger.error(f"Error generating XLS file: {e}")
response = HttpResponse("Error generating XLS file", status=500)
return response
Loading
Loading