From b831b72396425184a0f59e5f87bfa3e1c0af54d4 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Tue, 21 Apr 2026 10:10:00 -0400 Subject: [PATCH 1/4] Required controlledVocabulary metadata marked as valid while empty --- .../iq/dataverse/DatasetFieldValidator.java | 20 ++++++++++++-- .../edu/harvard/iq/dataverse/DatasetPage.java | 4 +-- .../api/imports/ImportServiceBean.java | 26 +++++++------------ .../command/impl/AbstractDatasetCommand.java | 6 ++++- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java index 6d3fda2812d..96378e87795 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java @@ -59,10 +59,26 @@ public boolean isValid(DatasetField value, ConstraintValidatorContext context) { } // if value is not primitive or not empty - if (!dsfType.isPrimitive() || !StringUtils.isBlank(value.getValue())) { + // For controlled vocabulary fields, check that actual CV values are selected, + // not just that datasetFieldValues contains something (which might be an invalid N/A placeholder) + // See https://github.com/IQSS/dataverse/issues/11900 + if (!dsfType.isPrimitive()) { return true; } - + + if (dsfType.isControlledVocabulary()) { + // For CV fields, check if there are actual controlled vocabulary values selected + if (value.getControlledVocabularyValues() != null && !value.getControlledVocabularyValues().isEmpty()) { + return true; + } + // If no CV values, fall through to required field check below + } else { + // For non-CV primitive fields, check if value is not blank + if (!StringUtils.isBlank(value.getValue())) { + return true; + } + } + if (value.isRequired()) { String errorMessage = null; DatasetFieldCompoundValue parent = value.getParentDatasetFieldCompoundValue(); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 5738ff8cfa9..f5f11d0eaba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -4039,8 +4039,8 @@ public String save() { dataset.setOwner(ownerId != null ? dataverseService.find(ownerId) : null); } // Validate - Set constraintViolations = workingVersion.validate(); - if (!constraintViolations.isEmpty()) { + workingVersion.validate(); // add validation messages to dataset fields + if (!workingVersion.isValid()) { FacesContext.getCurrentInstance().validationFailed(); return ""; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index 0b5fae8ee31..ecaeaed4536 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -7,20 +7,7 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetFieldValue; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseContact; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.MetadataBlockServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType; import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean; @@ -762,16 +749,21 @@ private void validateVersionMetadata(DatasetVersion version, PrintWriter log) th */ private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize, PrintWriter cleanupLog) throws ImportException { boolean fixed = false; + boolean allowHarvestingMissingCVV = version.getDataset().getHarvestedFrom() != null ? version.getDataset().getHarvestedFrom().getAllowHarvestingMissingCVV() : false; Set invalidViolations = version.validate(); if (!invalidViolations.isEmpty()) { for (ConstraintViolation v : invalidViolations) { Object invalid = v.getRootBean(); String msg = ""; if (invalid instanceof DatasetField) { - DatasetField f = (DatasetField) invalid; - - msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";"; + DatasetField f = (DatasetField) invalid; + + msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";"; if (sanitize) { + if (allowHarvestingMissingCVV && f.getDatasetFieldType().isControlledVocabulary()) { + ControlledVocabularyValue ccv = new ControlledVocabularyValue(null, DatasetField.NA_VALUE, f.getDatasetFieldType()); + f.setControlledVocabularyValues(List.of(ccv)); + } msg += " populated with '" + DatasetField.NA_VALUE + "'"; f.setSingleValue(DatasetField.NA_VALUE); fixed = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 2d09b22925f..096238f5637 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -110,10 +110,14 @@ protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws Command Set constraintViolations = dsv.validate(); if (!constraintViolations.isEmpty()) { if (lenient) { - // populate invalid fields with N/A + // populate invalid primitive fields with N/A + // Note: controlled vocabulary fields should NOT get N/A values in datasetfieldvalue, + // as this creates an inconsistent state where the CV field appears valid but is empty. + // See https://github.com/IQSS/dataverse/issues/11900 constraintViolations.stream() .filter(cv -> cv.getRootBean() instanceof DatasetField) .map(cv -> ((DatasetField) cv.getRootBean())) + .filter(f -> !f.getDatasetFieldType().isControlledVocabulary()) .forEach(f -> f.setSingleValue(DatasetField.NA_VALUE)); } else { From 7aa0debc1e9f74057e3167b5e8ec4325cf445eaf Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 28 Apr 2026 14:24:49 +0200 Subject: [PATCH 2/4] Remove unused import for ConstraintViolation in DatasetPage.java --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index f5f11d0eaba..85d39abd77a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -111,7 +111,6 @@ import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; -import jakarta.validation.ConstraintViolation; import java.util.Arrays; import java.util.HashSet; import jakarta.faces.model.SelectItem; From ccbf5f081562fcd05ffa91d172235389bd104bbd Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 28 Apr 2026 16:28:57 +0200 Subject: [PATCH 3/4] Refactor CVV validation logic in validateVersionMetadata method --- .../harvard/iq/dataverse/api/imports/ImportServiceBean.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index ecaeaed4536..bf050da8854 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -749,7 +749,6 @@ private void validateVersionMetadata(DatasetVersion version, PrintWriter log) th */ private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize, PrintWriter cleanupLog) throws ImportException { boolean fixed = false; - boolean allowHarvestingMissingCVV = version.getDataset().getHarvestedFrom() != null ? version.getDataset().getHarvestedFrom().getAllowHarvestingMissingCVV() : false; Set invalidViolations = version.validate(); if (!invalidViolations.isEmpty()) { for (ConstraintViolation v : invalidViolations) { @@ -760,12 +759,13 @@ private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";"; if (sanitize) { - if (allowHarvestingMissingCVV && f.getDatasetFieldType().isControlledVocabulary()) { + if (f.getDatasetFieldType().isControlledVocabulary()) { ControlledVocabularyValue ccv = new ControlledVocabularyValue(null, DatasetField.NA_VALUE, f.getDatasetFieldType()); f.setControlledVocabularyValues(List.of(ccv)); + } else { + f.setSingleValue(DatasetField.NA_VALUE); } msg += " populated with '" + DatasetField.NA_VALUE + "'"; - f.setSingleValue(DatasetField.NA_VALUE); fixed = true; } } else if (invalid instanceof DatasetFieldValue) { From 6542a4aada2ba90cf8a10d14ae179af5734fd847 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 22 May 2026 11:29:04 +0200 Subject: [PATCH 4/4] Fixed N/A cvoc values DB pollution --- .../iq/dataverse/api/imports/ImportServiceBean.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index bf050da8854..bbd8260e3fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -760,13 +760,17 @@ private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";"; if (sanitize) { if (f.getDatasetFieldType().isControlledVocabulary()) { - ControlledVocabularyValue ccv = new ControlledVocabularyValue(null, DatasetField.NA_VALUE, f.getDatasetFieldType()); - f.setControlledVocabularyValues(List.of(ccv)); + ControlledVocabularyValue naValue = datasetfieldService.findNAControlledVocabularyValue(); + if (naValue != null) { + f.setControlledVocabularyValues(List.of(naValue)); + msg += " populated with '" + DatasetField.NA_VALUE + "'"; + fixed = true; + } } else { f.setSingleValue(DatasetField.NA_VALUE); + msg += " populated with '" + DatasetField.NA_VALUE + "'"; + fixed = true; } - msg += " populated with '" + DatasetField.NA_VALUE + "'"; - fixed = true; } } else if (invalid instanceof DatasetFieldValue) { DatasetFieldValue fv = (DatasetFieldValue) invalid;