From e534680a0926e249c10a1e499a0038633acffad4 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:39:33 -0400 Subject: [PATCH 01/12] feat(S3AccessIT): scrub minio from S3AccessIT test --- .../harvard/iq/dataverse/api/S3AccessIT.java | 43 +++---------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 48a64490796..1d15f87b131 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -46,7 +46,7 @@ import org.junit.jupiter.api.Test; /** - * This test requires LocalStack and Minio to be running. Developers can use our + * This test requires LocalStack to be running. Developers can use our * docker-compose file, which has all the necessary configuration. */ public class S3AccessIT { @@ -55,7 +55,6 @@ public class S3AccessIT { static final String BUCKET_NAME = "mybucket"; static S3Client s3localstack = null; - static S3Client s3minio = null; @BeforeAll public static void setUp() { @@ -71,45 +70,21 @@ public static void setUp() { .region(Region.US_EAST_2) .build(); - String accessKeyMinio = "4cc355_k3y"; - String secretKeyMinio = "s3cr3t_4cc355_k3y"; - s3minio = S3Client.builder() - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKeyMinio, secretKeyMinio))) - .endpointOverride(URI.create("http://localhost:9000")) - .region(Region.US_EAST_1) - .forcePathStyle(true) - .build(); - // create bucket if it doesn't exist try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); } catch (NoSuchBucketException ex) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); } - - try { - s3minio.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); - } catch (NoSuchBucketException ex) { - try { - CreateBucketResponse createBucketResponse = s3minio.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); - if (createBucketResponse.sdkHttpResponse().isSuccessful()) { - System.out.println("Bucket created successfully"); - } else { - System.err.println("Failed to create bucket: " + createBucketResponse.sdkHttpResponse().statusCode()); - } - } catch (S3Exception e) { - System.err.println("Error creating bucket: " + e.getMessage()); - } - } } /** - * We're using MinIO for testing non-direct upload. + * We're using Localstack for testing non-direct upload. */ @Test public void testNonDirectUpload() { - String driverId = "minio1"; - String driverLabel = "MinIO"; + String driverId = "localstack1"; + String driverLabel = "LocalStack"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -124,7 +99,6 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -191,7 +165,7 @@ public void testNonDirectUpload() { String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -220,7 +194,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -258,7 +232,6 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -441,7 +414,7 @@ public void testDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -476,7 +449,6 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -663,7 +635,6 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } From c5c4d58887cf32e75cc72082692296543e411442 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:42:41 -0400 Subject: [PATCH 02/12] feat(docker-compose-dev): scrub minio from docker-compose-dev.yml --- docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index bbaefeffd65..d1b0bfe2032 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,16 +44,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -248,23 +238,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From 8fcb0b936139c8454a1ae834ac1bf49ed6861de7 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:43:20 -0400 Subject: [PATCH 03/12] feat(conf/keycloak/docker-compose-dev): scrub minio from conf/keycloak/docker-compose-dev.yml --- conf/keycloak/docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 7356161ec47..b12aa6adbb6 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,16 +53,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -260,23 +250,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From 8ff89de347e69e84f420a36608c7eaffc2833843 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:04:54 -0400 Subject: [PATCH 04/12] feat(docker-compose-dev): add localstack_noredirect storage driver config --- docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d1b0bfe2032..b3c187a60d3 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,6 +44,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 98d1398229b9efdf5c91f834fe836a571d995da6 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:05:13 -0400 Subject: [PATCH 05/12] feat(keycloak/docker-compose-dev): add localstack_noredirect storage driver config --- conf/keycloak/docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index b12aa6adbb6..81e3878c7b1 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,6 +53,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 8903aaff961b8a73e53bf542f59455976b969d50 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:07:13 -0400 Subject: [PATCH 06/12] test(S3AccessIT): use localstack_noredirect driver in testNonDirectUpload Switch testNonDirectUpload from localstack1 (upload-redirect=true, download-redirect=true) to the new localstack_noredirect driver (both redirects disabled), so the test genuinely exercises the non-redirect proxy-through-Dataverse code path. Also replace the plain downloadFile call with downloadFileNoRedirect and assert statusCode(200). This makes the assertion self-documenting: a 303 response would now cause an explicit test failure instead of being silently followed by RestAssured. --- .../edu/harvard/iq/dataverse/api/S3AccessIT.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 1d15f87b131..f0834ceebbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -79,12 +79,16 @@ public static void setUp() { } /** - * We're using Localstack for testing non-direct upload. + * We're using LocalStack (with redirects disabled) for testing non-direct + * upload. Using localstack_noredirect ensures the non-redirect + * (proxy-through-Dataverse) code path is actually exercised. If localstack1 + * (redirect-enabled) were used, RestAssured would silently follow the 303 + * redirect and the proxy path would never be tested. */ @Test public void testNonDirectUpload() { - String driverId = "localstack1"; - String driverLabel = "LocalStack"; + String driverId = "localstack_noredirect"; + String driverLabel = "LocalStackNoRedirect"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -99,6 +103,7 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -181,8 +186,11 @@ public void testNonDirectUpload() { fail("Failed to read S3 object content: " + ex.getMessage()); } + // Use downloadFileNoRedirect to verify Dataverse serves the content directly + // (status 200). If the driver were misconfigured with download-redirect=true, + // this would return 303 instead, causing the test to fail explicitly. System.out.println("non-direct download..."); - Response downloadFile = UtilIT.downloadFile(Integer.valueOf(fileId), apiToken); + Response downloadFile = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); downloadFile.then().assertThat().statusCode(200); String contentsOfDownloadedFile = downloadFile.getBody().asString(); From 8ccc7144215f0c3dafe08b1e7b6d354ee74415da Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:16:29 -0400 Subject: [PATCH 07/12] test(S3AccessIT): update drivers doc strings to include localstack_noredirect --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index f0834ceebbf..5beebc2f177 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -240,6 +240,7 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -457,6 +458,7 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -643,6 +645,7 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } From d87393e3a9630e1e0822b059e2622c124416340e Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 14:10:48 -0400 Subject: [PATCH 08/12] fix(S3AccessIT): use distinct bucket name mybucket-noredirect for localstack_noredirect Using the same bucket name as localstack1 would cause a collision in the test environment when tasks/localstack_create_bucket.yml runs aws s3 mb on each bucket entry. Use mybucket-noredirect to avoid this. Update driver configs in both docker-compose files and switch S3AccessIT.testNonDirectUpload to use the new BUCKET_NAME_NOREDIRECT constant. --- conf/keycloak/docker-compose-dev.yml | 2 +- docker-compose-dev.yml | 2 +- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 81e3878c7b1..7e57cd7d83c 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -57,7 +57,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b3c187a60d3..e16bf1f5e59 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -48,7 +48,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 5beebc2f177..ed800ede727 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -54,6 +54,7 @@ public class S3AccessIT { private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName()); static final String BUCKET_NAME = "mybucket"; + static final String BUCKET_NAME_NOREDIRECT = "mybucket-noredirect"; static S3Client s3localstack = null; @BeforeAll @@ -165,13 +166,13 @@ public void testNonDirectUpload() { String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier"); String keyInDataverse = storageIdentifier.split(":")[2]; - Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier); + Assertions.assertEquals(driverId + "://" + BUCKET_NAME_NOREDIRECT + ":" + keyInDataverse, storageIdentifier); String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string @@ -203,7 +204,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string From 267d8e008c012acbdd22ace98602ff5d3663efee Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:30:15 -0400 Subject: [PATCH 09/12] docs(big-data-support): remove MinIO references --- .../source/installation/big-data-support.rst | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/big-data-support.rst b/doc/sphinx-guides/source/installation/big-data-support.rst index 45b94f71a9f..411bd6b54d8 100644 --- a/doc/sphinx-guides/source/installation/big-data-support.rst +++ b/doc/sphinx-guides/source/installation/big-data-support.rst @@ -68,7 +68,6 @@ If the bucket allows the wildcard ``*`` but the Dataverse application only allow Detailed information for the most common S3 admin tools around CORS: - `AWS `_ -- `Minio mc `_ - `s3cmd `_ Get Current CORS Policy on Bucket @@ -80,9 +79,6 @@ If you'd like to check the CORS configuration on your bucket before making chang .. group-tab:: AWS CLI :code:`aws s3api get-bucket-cors --bucket ` - .. group-tab:: Minio Client (mc) - :code:`mc cors get /` - Set CORS Policy on Bucket +++++++++++++++++++++++++ @@ -107,9 +103,6 @@ Both JSON and XML format are explained in detail in `AWS Docs ` as follows: - .. literalinclude:: /_static/installation/cors/cors.xml :name: xml-cors :language: xml @@ -124,7 +117,7 @@ Both JSON and XML format are explained in detail in `AWS Docs Date: Fri, 15 May 2026 12:32:43 -0400 Subject: [PATCH 10/12] docs(config): remove MinIO references --- doc/sphinx-guides/source/installation/config.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2a6fdfa324..2517e635006 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1218,7 +1218,7 @@ You can configure this redirect properly in your cloud environment to generate a Amazon S3 Storage (or Compatible) +++++++++++++++++++++++++++++++++ -The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Minio, Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. +The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. The Dataverse Software S3 driver supports multi-part upload for large files (over 1 GB by default - see the min-part-size option in the table below to change this). @@ -1264,7 +1264,7 @@ Please make note of the following details: - **Endpoint URL** - consult the documentation of your service on how to find it. - * Example: https://play.minio.io:9000 + * Example: http://localhost.localstack.cloud:4566 - **Region:** Optional, but some services might use it. Consult your service documentation. @@ -1461,11 +1461,6 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### -`Minio v2018-09-12 `_ - Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. - **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and - possibly slow) https://play.minio.io:9000 service. - `StorJ Object Store `_ StorJ is a distributed object store that can be configured with an S3 gateway. Per the S3 Storage instructions above, you'll first set up the StorJ S3 store by defining the id, type, and label. After following the general installation, set the following configuration to use a StorJ object store: ``dataverse.files..chunked-encoding=false``. For step-by-step instructions see https://docs.storj.io/dcs/how-tos/dataverse-integration-guide/ From 4360a58e41c437af01c0c3882bea7c5e91ed7c35 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:33:41 -0400 Subject: [PATCH 11/12] docs(S3AccessIO): remove MinIO references --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..f8eb9f91bdf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,21 +610,22 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This implementation - * is problematic, because S3 cannot save an object of an unknown length. This - * effectively nullifies any benefits of streaming; as we cannot start saving - * until we have read the entire stream. One way of solving this would be to - * buffer the entire stream as byte[], in memory, then save it... Which of - * course would be limited by the amount of memory available, and thus would not - * work for streams larger than that. So we have eventually decided to save save - * the stream to a temp file, then save to S3. This is slower, but guaranteed to - * work on any size stream. An alternative we may want to consider is to not - * implement this method in the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle attempts - * to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This + * implementation is problematic, because S3 cannot save an object of an + * unknown length. This effectively nullifies any benefits of streaming; as + * we cannot start saving until we have read the entire stream. One way of + * solving this would be to buffer the entire stream as byte[], in memory, + * then save it... Which of course would be limited by the amount of memory + * available, and thus would not work for streams larger than that. So we + * have eventually decided to save save the stream to a temp file, then save + * to S3. This is slower, but guaranteed to work on any size stream. An + * alternative we may want to consider is to not implement this method in + * the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle + * attempts to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -759,7 +760,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -910,13 +911,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key and - * bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key + * and bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname and - * protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname + * and protocol" + * * @return Main File Key * @throws IOException */ @@ -979,12 +980,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the main - * file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the + * main file label. * @return redirect url * @throws IOException. */ @@ -1003,9 +1004,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1270,7 +1271,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1331,8 +1332,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all, + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1521,12 +1522,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if(responseInputStream!= null && responseInputStream.available()>0) { + if (responseInputStream != null && responseInputStream.available() > 0) { responseInputStream.abort(); } } catch (IOException e) { From 1aaa2cb2e429cf1dbef084b14ecc68e931626abe Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:34:47 -0400 Subject: [PATCH 12/12] docs(S3AccessIT): remove MinIO references --- .../harvard/iq/dataverse/api/S3AccessIT.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index ed800ede727..2597e77474b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -319,18 +319,6 @@ public void testDirectUpload() { InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and @@ -533,18 +521,6 @@ public void testDirectUploadDetectStataFile() { } Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and