diging · Girik1105 · May 11, 2026 · May 12, 2026 · May 19, 2026 · May 20, 2026
diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py
@@ -16,7 +16,18 @@
 from django.shortcuts import render
 from django.urls import path, reverse
 
-from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource, PDFResource
+from ask.models import (
+    Conversation,
+    TermsAcceptance,
+    QARecord,
+    SimWorkflow,
+    WebsiteResource,
+    PDFResource,
+    DocumentType,
+    DocumentAuthorInstitution,
+    InstitutionType,
+)
+from ask.admin_csv import import_names_csv, normalize_partial_date
 from ask.kb_connector import delete_kb_document
 from ask.tasks import run_kb_resource_upload
 
@@ -205,16 +216,98 @@ def delete_queryset(self, request, queryset):
                 return
 
 
+class LookupCSVImportMixin:
+    """Adds an Import CSV button + upload view to a lookup ModelAdmin.
+
+    CSV is single-column name. Duplicates are skipped, header row optional.
+    """
+
+    change_list_template = "admin/ask/lookup_change_list.html"
+
+    def get_urls(self):
+        urls = super().get_urls()
+        info = (self.model._meta.app_label, self.model._meta.model_name)
+        return [
+            path(
+                "import-csv/",
+                self.admin_site.admin_view(self.import_csv_view),
+                name=f"{info[0]}_{info[1]}_import_csv",
+            ),
+        ] + urls
+
+    def import_csv_view(self, request):
+        info = (self.model._meta.app_label, self.model._meta.model_name)
+        changelist_url = reverse(f"admin:{info[0]}_{info[1]}_changelist")
+
+        if request.method == "POST":
+            file_obj = request.FILES.get("csv_file")
+            if file_obj is None:
+                self.message_user(request, "No file provided.", level="error")
+            elif not file_obj.name.lower().endswith(".csv"):
+                self.message_user(request, "File must have a .csv extension.", level="error")
+            else:
+                try:
+                    created, skipped = import_names_csv(self.model, file_obj)
+                except Exception as e:
+                    logger.exception("CSV import failed for %s", self.model.__name__)
+                    self.message_user(request, f"Import failed: {e}", level="error")
+                else:
+                    self.message_user(
+                        request,
+                        f"Imported {created} new {self.model._meta.verbose_name_plural} "
+                        f"(skipped {skipped} duplicate or empty rows).",
+                    )
+            return HttpResponseRedirect(changelist_url)
+
+        context = {
+            **self.admin_site.each_context(request),
+            "title": f"Import {self.model._meta.verbose_name_plural} from CSV",
+            "opts": self.model._meta,
+            "changelist_url": changelist_url,
+        }
+        return render(request, "admin/ask/lookup_csv_import.html", context)
+
+
+@admin.register(DocumentType)
+class DocumentTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin):
+    list_display = ("name",)
+    search_fields = ("name",)
+
+
+@admin.register(DocumentAuthorInstitution)
+class DocumentAuthorInstitutionAdmin(LookupCSVImportMixin, admin.ModelAdmin):
+    list_display = ("name",)
+    search_fields = ("name",)
+
+
+@admin.register(InstitutionType)
+class InstitutionTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin):
+    list_display = ("name",)
+    search_fields = ("name",)
+
+
 @admin.register(WebsiteResource)
 class WebsiteResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin):
     list_display = ("title", "url", "creator", "status", "modified_at")
     list_filter = ("status",)
     search_fields = ("title", "url")
     readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message")
+    fieldsets = (
+        (None, {"fields": ("title", "description", "url")}),
+        ("Metadata", {"fields": (
+            "date_published",
+            "document_type", "document_author_institution", "institution_type",
+        )}),
+        ("Status", {"fields": (
+            "status", "status_message", "mcp_kb_document_id",
+            "created_at", "modified_at", "creator", "modifier",
+        )}),
+    )
     help_texts = {
         "title": "A short name to identify this website resource.",
         "description": "Optional details about what this website covers.",
         "url": "The URL the LLM will use as context when answering questions.",
+        "date_published": "Partial ISO date: YYYY, YYYY-MM, or YYYY-MM-DD. Leave blank if unknown.",
     }
 
     def get_form(self, request, obj=None, **kwargs):
@@ -248,16 +341,70 @@ def save_model(self, request, obj, form, change):
         )
 
 
+# Optional metadata columns the zip-CSV importer reads onto each PDFResource.
+# Controlled-list values create the matching lookup row the first time they
+# appear, so the available options grow from what the imports actually use.
+ZIP_CSV_LOOKUP_COLUMNS = {
+    "document_type": DocumentType,
+    "document_author_institution": DocumentAuthorInstitution,
+    "institution_type": InstitutionType,
+}
+
+
+def _apply_zip_csv_metadata(obj, row):
+    """Populate a resource's metadata fields from one zip-CSV row.
+
+    Every metadata column is optional. Returns a list of human-readable
+    warnings for values that could not be applied — the row is still imported,
+    just with that field left blank.
+    """
+    warnings = []
+
+    date_raw = (row.get("date_published") or "").strip()
+    if date_raw:
+        try:
+            obj.date_published = normalize_partial_date(date_raw)
+        except ValueError:
+            warnings.append(
+                f"invalid date_published '{date_raw}' "
+                "(use YYYY, YYYY-MM or YYYY-MM-DD); left blank"
+            )
+
+    for column, model in ZIP_CSV_LOOKUP_COLUMNS.items():
+        value = (row.get(column) or "").strip()
+        if not value:
+            continue
+        if len(value) > 255:
+            warnings.append(f"{column} value exceeds 255 characters; left blank")
+            continue
+        lookup, _ = model.objects.get_or_create(name=value)
+        setattr(obj, column, lookup)
+
+    return warnings
+
+
 @admin.register(PDFResource)
 class PDFResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin):
     list_display = ("title", "file", "creator", "status", "modified_at")
     list_filter = ("status",)
     search_fields = ("title",)
     readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message")
+    fieldsets = (
+        (None, {"fields": ("title", "description", "file")}),
+        ("Metadata", {"fields": (
+            "date_published",
+            "document_type", "document_author_institution", "institution_type",
+        )}),
+        ("Status", {"fields": (
+            "status", "status_message", "mcp_kb_document_id",
+            "created_at", "modified_at", "creator", "modifier",
+        )}),
+    )
     help_texts = {
         "title": "A short name to identify this PDF resource.",
         "description": "Optional details about what this PDF covers.",
         "file": "The PDF file the LLM will use as context when answering questions.",
+        "date_published": "Partial ISO date: YYYY, YYYY-MM, or YYYY-MM-DD. Leave blank if unknown.",
     }
 
     # Column names the bulk-import CSV must define (first = zip member, second = resource title)
@@ -329,6 +476,9 @@ def zip_upload_view(self, request):
                 messages.error(request, "Please select a zip file to upload.")
                 return HttpResponseRedirect(request.path)
 
+            update_file = bool(request.POST.get("update_file"))
+            update_metadata = bool(request.POST.get("update_metadata"))
+
             try:
                 archive = zipfile.ZipFile(zip_file)
             except zipfile.BadZipFile:
@@ -356,7 +506,12 @@ def _is_real(name):
 
                 csv_text = archive.read(csv_names[0]).decode("utf-8-sig")
                 reader = csv.DictReader(io.StringIO(csv_text))
-                csv_columns = {(name or "").strip() for name in (reader.fieldnames or [])}
+                # strip header names so the column check and per-row lookups use
+                # the same keys; otherwise a header like "filename, title" leaves
+                # stray spaces and every row reads as missing its required fields
+                if reader.fieldnames:
+                    reader.fieldnames = [(name or "").strip() for name in reader.fieldnames]
+                csv_columns = set(reader.fieldnames or [])
                 if not required_columns.issubset(csv_columns):
                     missing = ", ".join(sorted(required_columns - csv_columns))
                     messages.error(request, f"CSV is missing required columns: {missing}.")
@@ -375,7 +530,9 @@ def _is_real(name):
 
                 total = 0
                 saved = 0
-                queued_ids = []
+                updated = 0
+                queued_new_ids = []
+                queued_replace_ids = []
                 for row in reader:
                     total += 1
                     filename = (row.get(filename_col) or "").strip()
@@ -388,19 +545,45 @@ def _is_real(name):
                         continue
 
                     basename = os.path.basename(filename)
-                    if (basename, title) in existing_pdfs:
-                        messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.")
-                        continue
+                    is_update = (basename, title) in existing_pdfs
 
-                    member = zip_members.get(filename) or zip_members.get(basename)
-                    if not member:
-                        messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.")
+                    if is_update and not (update_file or update_metadata):
+                        messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.")
                         continue
 
-                    try:
-                        pdf_bytes = archive.read(member)
-                    except KeyError:
-                        messages.warning(request, f"Row {total}: could not read '{filename}'; skipped.")
+                    # only read PDF bytes when we'll actually use them: new rows
+                    # always need them; existing rows only when update_file is set
+                    pdf_bytes = None
+                    if (not is_update) or update_file:
+                        member = zip_members.get(filename) or zip_members.get(basename)
+                        if not member:
+                            messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.")
+                            continue
+                        try:
+                            pdf_bytes = archive.read(member)
+                        except KeyError:
+                            messages.warning(request, f"Row {total}: could not read '{filename}'; skipped.")
+                            continue
+
+                    if is_update:
+                        match = PDFResource.objects.filter(
+                            original_filename=basename, title=title
+                        ).first()
+                        if match is None:
+                            messages.warning(request, f"Row {total}: '{filename}' lookup failed; skipped.")
+                            continue
+                        if update_metadata:
+                            for warning in _apply_zip_csv_metadata(match, row):
+                                messages.warning(request, f"Row {total}: {warning}")
+                        if update_file:
+                            match.file.delete(save=False)
+                            match.file.save(basename, ContentFile(pdf_bytes), save=False)
+                        match.modifier = request.user
+                        match.status = PDFResource.Status.PROCESSING
+                        match.status_message = "Queued for Knowledge Base re-upload."
+                        match.save()
+                        updated += 1
+                        queued_replace_ids.append(match.pk)
                         continue
 
                     obj = PDFResource(
@@ -411,26 +594,39 @@ def _is_real(name):
                         status=PDFResource.Status.PROCESSING,
                         status_message="Queued for Knowledge Base upload.",
                     )
+                    for warning in _apply_zip_csv_metadata(obj, row):
+                        messages.warning(request, f"Row {total}: {warning}")
                     obj.file.save(basename, ContentFile(pdf_bytes), save=True)
                     saved += 1
                     existing_pdfs.add((basename, title))
-                    queued_ids.append(obj.pk)
+                    queued_new_ids.append(obj.pk)
 
                 # fire KB uploads after the request transaction commits so background
                 # threads see the just-saved rows
-                def _start_uploads(ids=tuple(queued_ids)):
-                    for pk in ids:
+                def _start_uploads(
+                    new_ids=tuple(queued_new_ids),
+                    replace_ids=tuple(queued_replace_ids),
+                ):
+                    for pk in new_ids:
+                        threading.Thread(
+                            target=run_kb_resource_upload,
+                            args=("pdf", pk),
+                            daemon=True,
+                        ).start()
+                    for pk in replace_ids:
                         threading.Thread(
                             target=run_kb_resource_upload,
                             args=("pdf", pk),
+                            kwargs={"replace": True},
                             daemon=True,
                         ).start()
                 transaction.on_commit(_start_uploads)
 
                 messages.success(
                     request,
-                    f"Imported {saved} of {total} PDFs. Knowledge Base uploads are running in the "
-                    "background — refresh the list to see each row's final status.",
+                    f"Imported {saved} new and updated {updated} of {total} PDF rows. "
+                    "Knowledge Base uploads are running in the background — "
+                    "refresh the list to see each row's final status.",
                 )
                 return HttpResponseRedirect(changelist_url)
 

diff --git a/hospexplorer/ask/admin_csv.py b/hospexplorer/ask/admin_csv.py
@@ -0,0 +1,49 @@
+import csv
+import datetime
+import io
+
+
+def normalize_partial_date(value):
+    """Validate a partial ISO 8601 date string and return its trimmed form.
+
+    Accepts ``YYYY``, ``YYYY-MM`` or ``YYYY-MM-DD`` (zero-padded). Calendar
+    correctness is delegated to ``datetime.date.fromisoformat`` by padding
+    the missing components with ``-01``. Empty / whitespace input returns
+    ``""``; any other malformed value raises ``ValueError``.
+    """
+    s = (value or "").strip()
+    if not s:
+        return ""
+    if len(s) == 4:
+        datetime.date.fromisoformat(s + "-01-01")
+    elif len(s) == 7:
+        datetime.date.fromisoformat(s + "-01")
+    elif len(s) == 10:
+        datetime.date.fromisoformat(s)
+    else:
+        raise ValueError(f"not a partial ISO date: {value!r}")
+    return s
+
+
+def import_names_csv(model, file_obj):
+    """Import a one-column CSV into a model with a ``name`` field.
+
+    Returns ``(created, skipped)``. Blank rows, a leading header row of ``name``,
+    and rows whose name already exists in the table are all counted as skipped.
+    """
+    text = file_obj.read().decode("utf-8-sig", errors="replace")
+    reader = csv.reader(io.StringIO(text))
+
+    created = 0
+    skipped = 0
+    for row in reader:
+        name = row[0].strip() if row else ""
+        if not name or name.lower() == "name":
+            skipped += 1
+            continue
+        _, was_created = model.objects.get_or_create(name=name)
+        if was_created:
+            created += 1
+        else:
+            skipped += 1
+    return created, skipped