From f5c7288ba05271d9bd8f8b4d6a4de4d284533a55 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 21 Apr 2026 11:01:35 -0400 Subject: [PATCH 1/8] add language entry to DataCite xml --- .../pidproviders/doi/XmlMetadataTemplate.java | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 1d14b89e11a..4e593c5408d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -24,25 +24,11 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import edu.harvard.iq.dataverse.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringEscapeUtils; import org.ocpsoft.common.util.Strings; -import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetAuthor; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetRelPublication; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.ExternalIdentifier; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetType; import edu.harvard.iq.dataverse.dataset.DatasetUtil; @@ -67,9 +53,9 @@ public class XmlMetadataTemplate { private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; - public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.7/metadata.xsd"; public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; - public static final String XML_SCHEMA_VERSION = "4.5"; + public static final String XML_SCHEMA_VERSION = "4.7"; private DoiMetadata doiMetadata; @@ -425,7 +411,8 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt * 7, Contributor (with optional given name, family name, name identifier and * affiliation sub-properties) * - * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * @see #writeEntityElements(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, jakarta.json.JsonObject, * java.lang.String, java.lang.String, java.lang.String) * * @param xmlw @@ -570,7 +557,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X //List from https://schema.datacite.org/meta/kernel-4/include/datacite-contributorType-v4.xsd private Set contributorTypes = new HashSet<>(Arrays.asList("ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", "HostingInstitution", "Other", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", - "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "WorkPackageLeader")); + "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "Translator", "WorkPackageLeader")); private String getCanonicalContributorType(String contributorType) { if(StringUtils.isBlank(contributorType) || !contributorTypes.contains(contributorType)) { @@ -824,10 +811,34 @@ private String cleanUpDate(String date) { // 9, Language (MA), language private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - // Currently not supported. Spec indicates one 'primary' language. Could send - // the first entry in DatasetFieldConstant.language or send iff there is only - // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? + // Spec indicates one 'primary' language. Sending a language iff there is only + // one citation mdb language entry (Could send first entry if there are several and/or default to the machine's default lang, or use the dataset's metadatalang?) + if (dvObject.isInstanceofDataFile()) { + dvObject = dvObject.getOwner(); + } + if (!(dvObject instanceof Dataset dataset)) { + return; + } + + DatasetVersion dv = dataset.getLatestVersionForCopy(); + if (dv == null) { + return; + } + Optional dsf = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.language)).findFirst(); + if (dsf.isPresent()) { + String languageIdentifier = null; + List controlledVocabularyValues = dsf.get().getControlledVocabularyValues(); + if (controlledVocabularyValues != null && controlledVocabularyValues.size() == 1) { + ControlledVocabularyValue cvv = controlledVocabularyValues.get(0); + languageIdentifier = cvv.getIdentifier(); + } + // 'Not applicable' has no identifier - we want to skip it. + if (StringUtils.isNotBlank(languageIdentifier)) { + XmlWriterUtil.writeFullElement(xmlw, "language", StringEscapeUtils.escapeXml10(languageIdentifier)); + } + } return; + } // 10, ResourceType (with mandatory general type From 5c9778fc2f16fa361c78fafd7f406f9691b8d1e1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 21 Apr 2026 11:58:19 -0400 Subject: [PATCH 2/8] release note, tests --- .../12346-DataCite-xml-updates.md | 1 + .../doi/datacite/XmlMetadataTemplateTest.java | 50 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 doc/release-notes/12346-DataCite-xml-updates.md diff --git a/doc/release-notes/12346-DataCite-xml-updates.md b/doc/release-notes/12346-DataCite-xml-updates.md new file mode 100644 index 00000000000..48c7b5cb4c7 --- /dev/null +++ b/doc/release-notes/12346-DataCite-xml-updates.md @@ -0,0 +1 @@ +This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, and adds a language element when a dataset has one language defined in its Citation block metadata. \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index 14d8dfb13a8..9c82f908eee 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -166,6 +166,53 @@ public void testDataCiteXMLCreation() throws IOException { testDatasetField.setSingleValue("First Title"); List fields = new ArrayList<>(); fields.add(testDatasetField); + + DatasetFieldType contributorTypeFieldType = new DatasetFieldType(DatasetFieldConstant.contributor, + DatasetFieldType.FieldType.TEXT, false); + DatasetFieldType contributorNameFieldType = new DatasetFieldType(DatasetFieldConstant.contributorName, + DatasetFieldType.FieldType.TEXT, false); + DatasetFieldType contributorRoleFieldType = new DatasetFieldType(DatasetFieldConstant.contributorType, + DatasetFieldType.FieldType.TEXT, false); + + DatasetField translatorField = new DatasetField(); + translatorField.setDatasetVersion(dv); + translatorField.setDatasetFieldType(contributorTypeFieldType); + DatasetFieldCompoundValue translatorValue = new DatasetFieldCompoundValue(); + + DatasetField translatorName = new DatasetField(); + translatorName.setDatasetVersion(dv); + translatorName.setDatasetFieldType(contributorNameFieldType); + translatorName.setSingleValue("Translator Name"); + + DatasetField translatorRole = new DatasetField(); + translatorRole.setDatasetVersion(dv); + translatorRole.setDatasetFieldType(contributorRoleFieldType); + translatorRole.setSingleValue("Translator"); + + List translatorChildren = new ArrayList<>(); + translatorChildren.add(translatorName); + translatorChildren.add(translatorRole); + translatorValue.setChildDatasetFields(translatorChildren); + + List translatorValues = new ArrayList<>(); + translatorValues.add(translatorValue); + translatorField.setDatasetFieldCompoundValues(translatorValues); + fields.add(translatorField); + + DatasetFieldType languageFieldType = new DatasetFieldType(DatasetFieldConstant.language, + DatasetFieldType.FieldType.TEXT, false); + DatasetField languageField = new DatasetField(); + languageField.setDatasetVersion(dv); + languageField.setDatasetFieldType(languageFieldType); + languageField.setSingleValue("en"); + ControlledVocabularyValue languageCvv = new ControlledVocabularyValue(); + languageCvv.setId(1L); + languageCvv.setIdentifier("en"); + languageCvv.setStrValue("English"); + languageCvv.setDatasetFieldType(languageFieldType); + languageField.setControlledVocabularyValues(List.of(languageCvv)); + fields.add(languageField); + dv.setDatasetFields(fields); ArrayList dsvs = new ArrayList<>(); dsvs.add(0, dv); @@ -200,6 +247,9 @@ public void testDataCiteXMLCreation() throws IOException { assertEquals("ROR", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@nameIdentifierScheme")); assertEquals("https://ror.org", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@schemeURI")); assertEquals("Dataverse", XmlPath.from(xml).getString("resource.publisher")); + assertEquals("Translator", XmlPath.from(xml).getString("resource.contributors.contributor[0].@contributorType")); + assertEquals("Translator Name", XmlPath.from(xml).getString("resource.contributors.contributor[0].contributorName")); + assertEquals("en", XmlPath.from(xml).getString("resource.language")); dv.setVersionNumber(1L); dv.setMinorVersionNumber(0l); From 0a1aca0b709372a2889b1758d590e60763e516d3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 21 Apr 2026 12:14:29 -0400 Subject: [PATCH 3/8] add translator to citation block, update release note --- doc/release-notes/12346-DataCite-xml-updates.md | 4 +++- scripts/api/data/metadatablocks/citation.tsv | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/release-notes/12346-DataCite-xml-updates.md b/doc/release-notes/12346-DataCite-xml-updates.md index 48c7b5cb4c7..4c325b00d0f 100644 --- a/doc/release-notes/12346-DataCite-xml-updates.md +++ b/doc/release-notes/12346-DataCite-xml-updates.md @@ -1 +1,3 @@ -This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, and adds a language element when a dataset has one language defined in its Citation block metadata. \ No newline at end of file +This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, and adds a language element when a dataset has one language defined in its Citation block metadata. + +As it adds Translator to the contributorTypes allowed in the citation block, people would have to reload the block to get the new option. \ No newline at end of file diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b6bed2b9c5b..b0d2fefef37 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -129,8 +129,9 @@ contributorType Research Group 11 contributorType Rights Holder 12 contributorType Sponsor 13 - contributorType Supervisor 14 - contributorType Work Package Leader 15 + contributorType Translator 14 + contributorType Supervisor 15 + contributorType Work Package Leader 16 contributorType Other 16 authorIdentifierScheme ORCID 0 authorIdentifierScheme ROR 1 From 2200cb5753441f6c96ac80f9450df865e7f14053 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 21 Apr 2026 12:36:04 -0400 Subject: [PATCH 4/8] add property for translator --- src/main/java/propertyFiles/citation.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 00f25740a19..5e6c3dd56f4 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -295,6 +295,7 @@ controlledvocabulary.contributorType.research_group=Research Group controlledvocabulary.contributorType.rights_holder=Rights Holder controlledvocabulary.contributorType.sponsor=Sponsor controlledvocabulary.contributorType.supervisor=Supervisor +controlledvocabulary.contributorType.translator=Translator controlledvocabulary.contributorType.work_package_leader=Work Package Leader controlledvocabulary.contributorType.other=Other controlledvocabulary.authorIdentifierScheme.orcid=ORCID From 2c97f8a1faed4765f79b2c8d5fe788a238d36008 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 27 Apr 2026 14:07:16 -0400 Subject: [PATCH 5/8] add valueURI from keyword to subject element --- .../pidproviders/doi/XmlMetadataTemplate.java | 7 ++++ .../doi/datacite/XmlMetadataTemplateTest.java | 35 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 4e593c5408d..e4e6db0bbaf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -345,6 +345,7 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt String keyword = null; String scheme = null; String schemeUri = null; + String valueUri = null; for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { @@ -357,6 +358,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt case DatasetFieldConstant.keywordVocabURI: schemeUri = subField.getValue(); break; + case DatasetFieldConstant.keywordTermURI: + valueUri = subField.getValue(); + break; } } if (StringUtils.isNotBlank(keyword)) { @@ -367,6 +371,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt if (StringUtils.isNotBlank(schemeUri)) { attributesMap.put("schemeURI", schemeUri); } + if (StringUtils.isNotBlank(valueUri)) { + attributesMap.put("valueURI", valueUri); + } subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); } diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index 9c82f908eee..1855553c1b6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -164,8 +164,38 @@ public void testDataCiteXMLCreation() throws IOException { testDatasetField.setDatasetVersion(dv); testDatasetField.setDatasetFieldType(primitiveDSFType); testDatasetField.setSingleValue("First Title"); + + DatasetFieldType keywordType = new DatasetFieldType(DatasetFieldConstant.keyword, FieldType.NONE, true); + DatasetFieldType keywordValueType = new DatasetFieldType(DatasetFieldConstant.keywordValue, FieldType.TEXT, false); + DatasetFieldType keywordTermURIType = new DatasetFieldType(DatasetFieldConstant.keywordTermURI, FieldType.URL, false); + + DatasetField keywordField = new DatasetField(); + keywordField.setDatasetVersion(dv); + keywordField.setDatasetFieldType(keywordType); + + DatasetFieldCompoundValue compoundValue = new DatasetFieldCompoundValue(); + compoundValue.setParentDatasetField(keywordField); + + DatasetField valField = new DatasetField(); + valField.setDatasetFieldType(keywordValueType); + DatasetFieldValue val = new DatasetFieldValue(); + val.setDatasetField(valField); + val.setValue("Keyword1"); + valField.setDatasetFieldValues(new ArrayList<>(List.of(val))); + + DatasetField uriField = new DatasetField(); + uriField.setDatasetFieldType(keywordTermURIType); + DatasetFieldValue uriVal = new DatasetFieldValue(); + uriVal.setDatasetField(uriField); + uriVal.setValue("https://example.com/keyword1"); + uriField.setDatasetFieldValues(new ArrayList<>(List.of(uriVal))); + + compoundValue.setChildDatasetFields(new ArrayList<>(List.of(valField, uriField))); + keywordField.setDatasetFieldCompoundValues(new ArrayList<>(List.of(compoundValue))); + List fields = new ArrayList<>(); fields.add(testDatasetField); + fields.add(keywordField); DatasetFieldType contributorTypeFieldType = new DatasetFieldType(DatasetFieldConstant.contributor, DatasetFieldType.FieldType.TEXT, false); @@ -247,6 +277,8 @@ public void testDataCiteXMLCreation() throws IOException { assertEquals("ROR", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@nameIdentifierScheme")); assertEquals("https://ror.org", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@schemeURI")); assertEquals("Dataverse", XmlPath.from(xml).getString("resource.publisher")); + assertEquals("Keyword1", XmlPath.from(xml).getString("resource.subjects.subject")); + assertEquals("https://example.com/keyword1", XmlPath.from(xml).getString("resource.subjects.subject.@valueURI")); assertEquals("Translator", XmlPath.from(xml).getString("resource.contributors.contributor[0].@contributorType")); assertEquals("Translator Name", XmlPath.from(xml).getString("resource.contributors.contributor[0].contributorName")); assertEquals("en", XmlPath.from(xml).getString("resource.language")); @@ -310,7 +342,8 @@ public void testDataCiteXMLCreationAllFields() throws IOException { d.setDatasetType(dType); String xml = DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), new DataCitation(dv).getDataCiteMetadata(), dv.getDataset()); - System.out.println("Output from dataset-all-defaults is " + xml); + assertTrue(xml.contains("valueURI=\"http://keywordTermURI1.org\"")); + assertTrue(xml.contains("valueURI=\"http://keywordTermURI2.org\"")); try { StreamSource source = new StreamSource(new StringReader(xml)); source.setSystemId("DataCite XML for test dataset"); From 425029876bb7c54556043c0089f37669e4ae81d1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 27 Apr 2026 15:58:35 -0400 Subject: [PATCH 6/8] rel note update --- doc/release-notes/12346-DataCite-xml-updates.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/12346-DataCite-xml-updates.md b/doc/release-notes/12346-DataCite-xml-updates.md index 4c325b00d0f..0dedfd97036 100644 --- a/doc/release-notes/12346-DataCite-xml-updates.md +++ b/doc/release-notes/12346-DataCite-xml-updates.md @@ -1,3 +1,3 @@ -This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, and adds a language element when a dataset has one language defined in its Citation block metadata. +This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, adds a valueURI attribute to a subject element when a value exists in the keywordTermURI field, and adds a language element when a dataset has one language defined in its Citation block metadata. As it adds Translator to the contributorTypes allowed in the citation block, people would have to reload the block to get the new option. \ No newline at end of file From e3328931712cf9a94d5fadb4b0446c2bff7946be Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 Apr 2026 14:57:33 -0400 Subject: [PATCH 7/8] allow yyyy-MM or yyyy as well --- .../pidproviders/doi/XmlMetadataTemplate.java | 69 ++++++++++++------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e4e6db0bbaf..b10690abab1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -7,8 +7,10 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.text.ParseException; -import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.Year; +import java.time.YearMonth; +import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; @@ -34,7 +36,6 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; @@ -752,17 +753,17 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { case DatasetFieldConstant.dateOfCollectionStart: - startDate = subField.getValue(); + startDate = subField.getValue().trim(); break; case DatasetFieldConstant.dateOfCollectionEnd: - endDate = subField.getValue(); + endDate = subField.getValue().trim(); break; } } - // Minimal clean-up - useful? Parse/format would remove unused chars, and an - // exception would clear the date so we don't send nonsense - startDate = cleanUpDate(startDate); - endDate = cleanUpDate(endDate); + // Verify valid date format + + startDate = isValidYearMonthOrDay(startDate) ? startDate:""; + endDate = isValidYearMonthOrDay(endDate) ? endDate:""; if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); @@ -778,17 +779,16 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea for (DatasetField subField : timePeriodFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { case DatasetFieldConstant.timePeriodCoveredStart: - startDate = subField.getValue(); + startDate = subField.getValue().trim(); break; case DatasetFieldConstant.timePeriodCoveredEnd: - endDate = subField.getValue(); + endDate = subField.getValue().trim(); break; } } - // Minimal clean-up - useful? Parse/format would remove unused chars, and an - // exception would clear the date so we don't send nonsense - startDate = cleanUpDate(startDate); - endDate = cleanUpDate(endDate); + // Verify valid date format + startDate = isValidYearMonthOrDay(startDate) ? startDate:""; + endDate = isValidYearMonthOrDay(endDate) ? endDate:""; if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Other"); @@ -802,18 +802,35 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } - private String cleanUpDate(String date) { - String newDate = null; - if (!StringUtils.isBlank(date)) { - try { - SimpleDateFormat sdf = Util.getDateFormat(); - Date start = sdf.parse(date); - newDate = sdf.format(start); - } catch (ParseException e) { - logger.warning("Could not parse date: " + date); + /** Checks for yyyy, yyyy-MM, or yyyy-MM-dd format + * @param value + * @return true if valid date format, false otherwise + */ + private boolean isValidYearMonthOrDay(String value) { + if (StringUtils.isBlank(value)) { + return false; + } + + try { + if (value.matches("\\d{4}")) { + Year.parse(value); + return true; } + + if (value.matches("\\d{4}-\\d{2}")) { + YearMonth.parse(value); + return true; + } + + if (value.matches("\\d{4}-\\d{2}-\\d{2}")) { + LocalDate.parse(value); + return true; + } + } catch (DateTimeParseException e) { + return false; } - return newDate; + + return false; } // 9, Language (MA), language @@ -1587,7 +1604,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr funder = jo.getString("termName"); } } - + xmlw.writeStartElement("fundingReference"); // XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (isROR) { From 1ff845bda5d633e677abd641c03b74b7ce3ff811 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 Apr 2026 15:28:38 -0400 Subject: [PATCH 8/8] update relnote --- doc/release-notes/12346-DataCite-xml-updates.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/12346-DataCite-xml-updates.md b/doc/release-notes/12346-DataCite-xml-updates.md index 0dedfd97036..4ad541925ee 100644 --- a/doc/release-notes/12346-DataCite-xml-updates.md +++ b/doc/release-notes/12346-DataCite-xml-updates.md @@ -1,3 +1,9 @@ -This release updates the DataCite XML metadata format to indicate compliance with the version 4.7 schema, adds support for specifying a 'Translator' contributor, adds a valueURI attribute to a subject element when a value exists in the keywordTermURI field, and adds a language element when a dataset has one language defined in its Citation block metadata. +This release updates the DataCite XML metadata format to +- indicate compliance with the version 4.7 schema, +- add support for specifying a 'Translator' contributor, +- add a valueURI attribute to a subject element when a value exists in the keywordTermURI field, +- add a language element when a dataset has one language defined in its Citation block metadata, +- accept dates of the form YYYY or YYYY-MM in the timePeriodCovered and dateOfCollection fields, and +- avoids sending the word 'null' as part of a date range when the start or end date is unspecified. As it adds Translator to the contributorTypes allowed in the citation block, people would have to reload the block to get the new option. \ No newline at end of file