Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions modules/dataverse-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@

<!-- NEW gdcc XOAI library implementation -->
<gdcc.xoai.version>5.3.0</gdcc.xoai.version>
<gdcc.spi.version>2.2.0-SNAPSHOT</gdcc.spi.version>

<!-- Testing dependencies -->
<testcontainers.version>2.0.2</testcontainers.version>
Expand Down Expand Up @@ -425,20 +426,20 @@
</repository>
<!-- Uncomment when using snapshot releases from Maven Central -->
<!-- Note the new - as of Sept. 2025 sonatype url -->
<!--
<repository>
<id>central-portal-snapshots</id>
<name>Central Portal Snapshots</name>
<url>
https://central.sonatype.com/repository/maven-snapshots/
</url>
<releases>
<enabled>false</enabled>
</releases>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<!--
<repository>
<id>s01-oss-sonatype</id>
<name>s01-oss-sonatype</name>
Expand Down
11 changes: 9 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@
<dependency>
<groupId>io.gdcc</groupId>
<artifactId>dataverse-spi</artifactId>
<version>2.0.0</version>
<version>${gdcc.spi.version}</version>
</dependency>
<dependency>
<groupId>javax.cache</groupId>
Expand Down Expand Up @@ -878,13 +878,20 @@
<release>${target.java.version}</release>
<!-- for use with `mvn -DcompilerArgument=-Xlint:unchecked compile` -->
<compilerArgument>${compilerArgument}</compilerArgument>
<!-- Ensure the annotation processor for @AutoService is picked up. Especially important as of JDK 23 -->
<!-- As of JDK 23, annotation processors are opt-in. -->
<annotationProcessorPaths>
<!-- Ensure the annotation processor for @AutoService is picked up. (older plugins) -->
<path>
<groupId>com.google.auto.service</groupId>
<artifactId>auto-service</artifactId>
<version>${auto-service.version}</version>
</path>
<!-- Ensure the annotation processor for Dataverse Plugins is picked up. -->
<path>
<groupId>io.gdcc</groupId>
<artifactId>dataverse-spi</artifactId>
<version>${gdcc.spi.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package edu.harvard.iq.dataverse.api;

import java.util.List;

public final class ApiConstants {

private ApiConstants() {
Expand All @@ -17,6 +19,10 @@ private ApiConstants() {
public static final String DS_VERSION_LATEST = ":latest";
public static final String DS_VERSION_DRAFT = ":draft";
public static final String DS_VERSION_LATEST_PUBLISHED = ":latest-published";
public static final String DS_VERSION_IDENTIFIER_REGEX = "^(:latest|:draft|:latest-published|\\d+(?:\\.\\d+)?)$";
public static final List<String> DS_VERSION_RESERVED_IDENTIFIERS = List.of(DS_VERSION_DRAFT, DS_VERSION_LATEST_PUBLISHED, DS_VERSION_LATEST);
// TODO: should be replaced by a bundle reference
public static final String DS_VERSION_IDENTIFIER_MESSAGE = "version must be one of :latest, :latest-published, :draft, or a numeric version like 1.0";

// addFiles call
public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files";
Expand Down
171 changes: 145 additions & 26 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package edu.harvard.iq.dataverse.api.dto;

import edu.harvard.iq.dataverse.api.ApiConstants;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;

import java.util.List;

/**
* Request payload for exporting multiple datasets with a single exporter.
*
* <p>Each request identifies the exporter to use and a list of dataset selections.
* Every dataset selection couples a persistent identifier with a version
* identifier so the server can resolve the exact dataset version to export.
*
* <p>This request model is intended for JSON request bodies sent to a bulk or
* multi-dataset export endpoint.
*
* @param exporter the name or identifier of the exporter to use; must not be blank
* @param datasets the datasets to export; must not be {@code null} and at least have 1 element.
* The list is defensively copied into an unmodifiable snapshot.
*/
public record MultiDatasetExportRequest(
@NotBlank String exporter,
@NotNull @Size(min = 1) List<ExportItem> datasets
) {
/**
* Creates a new multi-dataset export request. The supplied dataset list is copied to preserve
* immutability and prevent later external modification of the request contents.
* @throws NullPointerException if {@code datasets} is {@code null}
*/
public MultiDatasetExportRequest {
// Make sure to create a readonly copy, but keep null around to have bean validation catch it and complain later
datasets = datasets == null ? null : List.copyOf(datasets);
}

/**
* A single dataset selection within a {@link MultiDatasetExportRequest}.
*
* <p>Each item identifies one dataset to export by its persistent identifier and the specific version to resolve.
* The version may be one of the symbolic dataset version identifiers supported by the API, or a numeric version
* identifier, as validated by {@link ApiConstants#DS_VERSION_IDENTIFIER_REGEX}.
*
* @param persistentId the persistent identifier of the dataset to export; must not be blank
* @param version the dataset version identifier to export;
* must not be blank and must match the API-supported dataset version syntax
*/
public record ExportItem(
@NotBlank String persistentId,
@NotBlank
@Pattern(
regexp = ApiConstants.DS_VERSION_IDENTIFIER_REGEX,
// TODO: replace message with bundle reference
message = ApiConstants.DS_VERSION_IDENTIFIER_MESSAGE
)
String version
) {
// If omitted or blank, apply a default version to look up (aligned with GET endpoint behavior)
public ExportItem {
if (version == null || version.isBlank()) {
version = ApiConstants.DS_VERSION_LATEST_PUBLISHED;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,9 @@ public JsonResponseBuilder log(Logger logger, Level level, Optional<Throwable> e
metadata.deleteCharAt(metadata.length()-1);

if (ex.isPresent()) {
ex.get().printStackTrace();
metadata.append("|");
logger.log(level, metadata.toString(), ex);
if(includeStackTrace) {
if (includeStackTrace) {
logger.log(level, ExceptionUtils.getStackTrace(ex.get()));
}
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
package edu.harvard.iq.dataverse.export;

import java.io.InputStream;
import java.util.Optional;

import jakarta.json.Json;
import jakarta.json.JsonArray;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObject;
import jakarta.json.JsonObjectBuilder;
import edu.harvard.iq.dataverse.DataCitation;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService;
import io.gdcc.spi.export.ExportDataProvider;
import edu.harvard.iq.dataverse.util.bagit.OREMap;
import edu.harvard.iq.dataverse.util.json.JsonPrinter;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import io.gdcc.spi.export.DatasetExportQuery;
import io.gdcc.spi.export.ExportDataProvider;
import io.gdcc.spi.export.FileExportQuery;
import io.gdcc.spi.export.PageRequest;
import jakarta.json.Json;
import jakarta.json.JsonArray;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObject;
import jakarta.json.JsonObjectBuilder;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.InputStream;
import java.io.StringReader;
import java.util.Optional;
import java.util.stream.Stream;

/**
* Provides all data necessary to create an export
Expand All @@ -39,6 +48,11 @@
this.is=is;
}

@Override
public JsonObject getDatasetJson(DatasetExportQuery datasetExportQuery) {
return getDatasetJson();
}

@Override
public JsonObject getDatasetJson() {
if (jsonRepresentation == null) {
Expand All @@ -47,7 +61,15 @@
}
return jsonRepresentation;
}


/**
* Needs a better implementation, as it should replace the deprecated method.
*/
@Override
public JsonObject getDatasetSchemaDotOrg(DatasetExportQuery datasetExportQuery) {
return getDatasetSchemaDotOrg();
}

@Override
public JsonObject getDatasetSchemaDotOrg() {
if (schemaDotOrgRepresentation == null) {
Expand All @@ -56,21 +78,79 @@
}
return schemaDotOrgRepresentation;
}


/**
* Needs a better implementation, as it should replace the deprecated method.
*/
@Override
public JsonObject getDatasetORE(DatasetExportQuery datasetExportQuery) {
return getDatasetORE();
}

@Override
public JsonObject getDatasetORE() {
if (oreRepresentation == null) {
oreRepresentation = new OREMap(dv).getOREMap();
}
return oreRepresentation;
}


/**
* Needs a better implementation, as it should replace the deprecated method.
*/
@Override
public Document getDataCiteXml(DatasetExportQuery datasetExportQuery) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try
{

Check failure on line 105 in src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java

View workflow job for this annotation

GitHub Actions / Checkstyle job

[reviewdog] reported by reviewdog 🐶 '{' at column 9 should be on the previous line. Raw Output: /github/workspace/./src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java:105:9: error: '{' at column 9 should be on the previous line. (com.puppycrawl.tools.checkstyle.checks.blocks.LeftCurlyCheck)
String dataCiteXml = getDataCiteXml();
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new InputSource(new StringReader(dataCiteXml)));
// TODO: remove this anti-pattern of catcha-all
} catch (Exception e) {
e.printStackTrace();
}
return null;
}

@Override
public String getDataCiteXml() {
return DOIDataCiteRegisterService.getMetadataFromDvObject(
dv.getDataset().getGlobalId().asString(), new DataCitation(dv).getDataCiteMetadata(), dv.getDataset());
}

/**
* Needs a better implementation, as it should replace the deprecated method.
* This will trigger all sorts of N+1 query expansions, it would be much better to put the
* lookup in a factory method instead of on-demand when the exporter requests it.
* It does not at all filter anything as may be requested.
*/
@Override
public Stream<JsonObject> getDatasetFileDetails(FileExportQuery fileExportQuery) {
return dv.getFileMetadatas()
.stream()
.map(fileMetadata -> {
DataFile dataFile = fileMetadata.getDataFile();
return JsonPrinter.json(dataFile, fileMetadata, true).build();
});
}

/**
* Needs a better implementation, as it should replace the deprecated method.
* This will trigger all sorts of N+1 query expansions, it would be much better to put the
* lookup in a factory method instead of on-demand when the exporter requests it.
* It does not at all filter anything as may be requested.
*/
@Override
public Stream<JsonObject> getDatasetFileDetails(FileExportQuery fileExportQuery, PageRequest pageRequest) {
return dv.getFileMetadatas().subList(pageRequest.getOffset(), pageRequest.getOffset() + pageRequest.getLimit())
.stream()
.map(fileMetadata -> {
DataFile dataFile = fileMetadata.getDataFile();
return JsonPrinter.json(dataFile, fileMetadata, true).build();
});
}

@Override
public JsonArray getDatasetFileDetails() {
JsonArrayBuilder jab = Json.createArrayBuilder();
Expand Down
Loading
Loading