Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
2d657c9
feat(migrate): add extra migrations and flyway maven plugin to upgrad…
poikilotherm Jul 10, 2024
eccc0fe
feat(migrate): add Dataverse database migration module with (optional…
poikilotherm May 22, 2025
a08bc15
feat(migrate): migrate extra SQL table scripts to dedicated module
poikilotherm May 22, 2025
b72a3b9
feat(migrate): remove FZJ specific SQL migration scripts for URI fiel…
poikilotherm May 22, 2025
02a6c36
feat(migrate): add database migration scripts for new tables
poikilotherm May 22, 2025
b6065b7
feat(migrate,ct): update dependency versions and relocate configuration
poikilotherm May 22, 2025
9c6fbba
refactor: remove Flyway Maven plugin from pom.xml
poikilotherm May 22, 2025
bdd637f
chore(migrate): add migration for keywordTermUri and placeholder conf…
poikilotherm May 23, 2025
7c286d8
refactor(migrate): refine keyword migration logic with row count and …
poikilotherm May 27, 2025
64ec444
feat(migrate): add migration to clean up saved searches and linked ob…
poikilotherm May 27, 2025
c405bb2
chore(migrate): add SQL script to detect issues with file access flags
poikilotherm May 27, 2025
ad9dfa2
feat(migrate): add migration script to delete orphan templates
poikilotherm May 27, 2025
ce251e9
chore(migrate): remove obsolete EJB timer table with SQL migration
poikilotherm May 27, 2025
ff1bf17
feat(migrate): add migration script to identify datasets with access …
poikilotherm May 27, 2025
868a9eb
chore(migrate): update migration script for keywordTermUri handling
poikilotherm Jun 2, 2025
2c82cae
fix(migrate): update pg_dump command options
poikilotherm Jun 2, 2025
6f49209
style(migrate): correct volume file name in PostgreSQL config
poikilotherm Jun 2, 2025
f888def
chore(migrate): align naming and types as well as references
poikilotherm Jul 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--
DROP TABLE IF EXISTS EJB__TIMER__TBL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This is a workaround for the missing DDL statements in migration V5.1.1.2

CREATE TABLE IF NOT EXISTS externaltooltype
(
id SERIAL PRIMARY KEY,
type VARCHAR(255) NOT NULL,
externalTool_id BIGINT NOT NULL CONSTRAINT fk_externaltooltype_externaltool_id REFERENCES externaltool (id)
);

CREATE INDEX IF NOT EXISTS index_externaltooltype_externaltool_id ON externaltooltype (externaltool_id);
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
-- Migration script to delete orphan templates (templates with no associated dataverse), see also issue #8600

DO
$$
DECLARE
orphan_templates_count INTEGER;
affected_collections_count INTEGER;
row_count INTEGER;
BEGIN
-- Get the count of orphan templates
SELECT COUNT(t.id) INTO orphan_templates_count
FROM template t
WHERE dataverse_id IS NULL;

-- Count dataverse collections that use orphan templates as default
SELECT COUNT(*) INTO affected_collections_count
FROM dataverse d
WHERE d.defaulttemplate_id IN (
SELECT t.id FROM template t WHERE dataverse_id IS NULL
);

-- Only execute queries if the affected count is greater than 0
IF orphan_templates_count > 0 THEN
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
RAISE NOTICE 'Found % orphan templates (templates with no associated dataverse)', orphan_templates_count;
RAISE NOTICE 'Found % dataverses using orphan templates as their default template', affected_collections_count;

-- Please note: The below is quite a bunch of things we need to execute. When using the Admin API call
-- to delete the template, all the cascading is handled by JPA for us. We don't have that...

-- First, update all dataverses that use orphan templates as default template
UPDATE dataverse
SET defaulttemplate_id = NULL
WHERE defaulttemplate_id IN (
SELECT t.id FROM template t WHERE dataverse_id IS NULL
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Updated % collections to remove orphan templates set as default', row_count;

-- Create a temporary table to keep track of datasetfields to delete
CREATE TEMPORARY TABLE temp_datasetfields_to_delete AS
SELECT id FROM datasetfield
WHERE template_id IN (
SELECT t.id FROM template t WHERE dataverse_id IS NULL
);

-- Create a temporary table to keep track of compound values to delete
CREATE TEMPORARY TABLE temp_compoundvalues_to_delete AS
SELECT cv.id
FROM datasetfieldcompoundvalue cv
WHERE cv.parentdatasetfield_id IN (
SELECT id FROM temp_datasetfields_to_delete
);

-- Delete mappings between datasetfield and controlledvocabularyvalues
DELETE FROM datasetfield_controlledvocabularyvalue
WHERE datasetfield_id IN (
SELECT id FROM temp_datasetfields_to_delete
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % vocabulary mappings associated with orphan templates', row_count;

-- Delete datasetfieldvalue records that reference the datasetfields we're going to delete
DELETE FROM datasetfieldvalue
WHERE datasetfield_id IN (
SELECT id FROM temp_datasetfields_to_delete
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % datasetfieldvalues associated with orphan templates', row_count;

-- Break the circular reference by setting parentdatasetfieldcompoundvalue_id to NULL
UPDATE datasetfield
SET parentdatasetfieldcompoundvalue_id = NULL
WHERE parentdatasetfieldcompoundvalue_id IN (
SELECT id FROM temp_compoundvalues_to_delete
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Updated % datasetfields to remove references to compound values', row_count;

-- Now we can safely delete the compound values
DELETE FROM datasetfieldcompoundvalue
WHERE id IN (
SELECT id FROM temp_compoundvalues_to_delete
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % datasetfieldcompoundvalues associated with orphan templates', row_count;

-- Delete datasetfields that reference orphan templates
DELETE FROM datasetfield
WHERE id IN (
SELECT id FROM temp_datasetfields_to_delete
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % datasetfields referencing orphan templates', row_count;

-- Clean up temporary tables
DROP TABLE temp_datasetfields_to_delete;
DROP TABLE temp_compoundvalues_to_delete;

-- Then finally delete all orphan templates
DELETE FROM template
WHERE dataverse_id IS NULL;

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % orphan templates', row_count;
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
END IF;
END
$$;
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
--
DO
$$
DECLARE
should_delete BOOLEAN := CASE WHEN UPPER('${V5_4_cleanup_searches_and_links}') = 'TRUE' THEN TRUE ELSE FALSE END;
affected_searches_count INTEGER;
affected_linked_datasets_count INTEGER;
affected_linked_collections_count INTEGER;
row_count INTEGER;
BEGIN
-- Get the count of rows that match the criteria
SELECT COUNT(*) INTO affected_searches_count
from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld
where ss.id = ssfq.savedsearch_id
and ss.definitionpoint_id = dld.linkingdataverse_id
and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer
and ss.query='*'
and ssfq.filterquery like 'subtreePaths%';

select COUNT(*) INTO affected_linked_datasets_count
from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld
where dld.dataset_id = dvo.id
and dld.linkingdataverse_id = dvld.linkingdataverse_id
and dvo.owner_id = dvld.dataverse_id;

select COUNT(*) INTO affected_linked_collections_count
from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld
where dld.dataverse_id = dvo.id
and dld.linkingdataverse_id = dvld.linkingdataverse_id
and dvo.owner_id = dvld.dataverse_id;

-- Only show hint if count is greater than 0
IF affected_searches_count > 0 OR affected_linked_datasets_count > 0 OR affected_linked_collections_count > 0 THEN
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
RAISE NOTICE 'Found % saved searches affected by issue #7398', affected_searches_count;
RAISE NOTICE 'Found % dataset links affected by issue #7398', affected_linked_datasets_count;
RAISE NOTICE 'Found % collections links affected by issue #7398', affected_linked_collections_count;

IF should_delete THEN
RAISE NOTICE 'Cleaning up Saved Searches and Linked Datasets as requested by -Dmigrate.cleanupSavedSearches';

-- delete the saved searches identified using the ss_for_deletion query
create temporary table delete_ss on commit drop as (
Select ss.id
from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld
where ss.id = ssfq.savedsearch_id
and ss.definitionpoint_id = dld.linkingdataverse_id
and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer
and ss.query='*'
and ssfq.filterquery like 'subtreePaths%'
);

GET DIAGNOSTICS row_count = ROW_COUNT;

delete from savedsearchfilterquery where savedsearch_id in (select id from delete_ss);
delete from savedsearch where id in (select id from delete_ss);

RAISE NOTICE 'Deleted % saved searches', row_count;

COMMIT;

-- delete linked objects identified using the query in dld_for_deletion
delete from datasetlinkingdataverse where id in (
select dld.id
from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld
where dld.dataset_id = dvo.id
and dld.linkingdataverse_id = dvld.linkingdataverse_id
and dvo.owner_id = dvld.dataverse_id
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % linked datasets', row_count;

delete from dataverselinkingdataverse where id in (
select dld.id
from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld
where dld.dataverse_id = dvo.id
and dld.linkingdataverse_id = dvld.linkingdataverse_id
and dvo.owner_id = dvld.dataverse_id
);

GET DIAGNOSTICS row_count = ROW_COUNT;
RAISE NOTICE 'Deleted % linked collections', row_count;

COMMIT;

ELSE
RAISE NOTICE 'Auto-migrate these using `mvn -Dmigrate.cleanupSavedSearches ...` (re-execute migrations)';
END IF;
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
END IF;
END
$$;
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- this query will identify datasets where a superuser has run the Curate command and the update included a change to
-- the fileaccessrequest flag, resulting in the file access request updates not being reflected in the published version
DO
$$
DECLARE
-- should_migrate BOOLEAN := CASE WHEN UPPER('...') = 'TRUE' THEN TRUE ELSE FALSE END;
affected_count INTEGER;
BEGIN
-- Get the count of rows that match the criteria
SELECT COUNT(*) INTO affected_count
from datasetversion dv, termsofuseandaccess ta, dataset da
where dv.dataset_id=da.id
and dv.termsofuseandaccess_id=ta.id
and ta.fileaccessrequest != da.fileaccessrequest
and dv.versionstate='RELEASED'
and dv.releasetime in (select max(releasetime)
from datasetversion
where dataset_id=da.id);

-- Only show hint if count is greater than 0
IF affected_count > 0 THEN
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
RAISE NOTICE 'Found % datasets affected by issue #7687', affected_count;
RAISE NOTICE 'For now, please fix these manually. See Dataverse v5.4 release notes about #7687.';
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
END IF;

-- TODO: an opt-in migration to fix them all would be nice!
END;
$$;
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This is a workaround for the missing auxiliaryfile table in migration V5.4.1.1

create table if not exists auxiliaryfile
(
id serial primary key,
checksum varchar(255),
contenttype varchar(255),
filesize bigint,
formattag varchar(255),
formatversion varchar(255),
ispublic boolean,
origin varchar(255),
type varchar(255),
datafile_id bigint not null constraint fk_auxiliaryfile_datafile_id references dvobject
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This is a workaround for the missing license table in migration V5.9.0.1

create table if not exists license
(
id serial primary key,
active boolean not null,
iconurl text,
isdefault boolean not null,
name text constraint unq_license_0 unique,
shortdescription text,
sortorder bigint default 0 not null,
uri text constraint unq_license_1 unique
);

create index if not exists license_sortorder_id on license (sortorder, id);
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- This is a workaround for the missing embargo table in migration V6.0.0.2
create table if not exists embargo
(
id serial primary key,
dateavailable date not null,
reason text
);

-- This is a workaround for the missing storageuse table in migration V6.0.0.5
create table if not exists storageuse
(
id serial primary key,
sizeinbytes bigint,
dvobjectcontainer_id bigint not null constraint fk_storageuse_dvobjectcontainer_id references dvobject
);
create index if not exists index_storageuse_dvobjectcontainer_id on storageuse (dvobjectcontainer_id);

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- This is a workaround for the missing externalvocabularyvalue table in migration V6.1.0.3
create table if not exists externalvocabularyvalue
(
id serial primary key,
lastupdatedate timestamp,
uri text constraint externalvocabularvalue_uri_key unique,
value text
);

create index if not exists index_externalvocabularyvalue_uri on externalvocabularyvalue (uri);


Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
-- See also
DO
$$
DECLARE
should_migrate BOOLEAN := CASE WHEN UPPER('${V6_3_migrate_keywordTermUri}') = 'TRUE' THEN TRUE ELSE FALSE END;
keyword_count INTEGER;
BEGIN
-- Get the count of rows that match the criteria
SELECT COUNT(*) INTO keyword_count
FROM datasetfieldvalue dfv INNER JOIN datasetfield df ON df.id = dfv.datasetfield_id
WHERE df.datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordValue')
AND value ILIKE 'http%';

-- Only show hint if count is greater than 0
IF keyword_count > 0 THEN
RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
RAISE NOTICE 'Found % keywordValue metadata fields starting with "http"', keyword_count;
RAISE NOTICE 'See release notes of Dataverse 6.3 to learn more about migrating these to the keywordTermURI field.';

/*
To execute this migration, the keywordTermURI metadata field needs to be loaded.
So far, we never added metadata fields using a migration. Maybe it's fine to keep it a manual task for now.

IF should_migrate THEN
RAISE NOTICE 'Migrating keywordValue fields with http... to keywordTermUri as requested by -Dmigrate.keywordTermUri';
UPDATE datasetfield df
SET datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordTermURI')
FROM datasetfieldvalue dfv
WHERE dfv.datasetfield_id = df.id
AND df.datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordValue')
AND dfv.value ILIKE 'http%';

GET DIAGNOSTICS keyword_count = ROW_COUNT;
RAISE NOTICE 'Updated % rows', keyword_count;

ELSE
RAISE NOTICE 'Auto-migrate these into keywordTermUri using `mvn -Dmigrate.keywordTermUri ...` (re-execute migrations)';
END IF;
*/

RAISE NOTICE '--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---';
END IF;
END
$$;
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
-- This is a workaround for the missing dataset type tables in migration V6.3.0.3

create table if not exists datasettype
(
id serial primary key,
name varchar(255) not null constraint unq_datasettype_0 unique
);

create table if not exists datasettype_licenses
(
datasettype_id bigint not null
constraint fk_datasettype_license_datasettype_id
references datasettype,
licenses_id bigint not null
constraint fk_datasettype_license_licenses_id
references license,
primary key (datasettype_id, licenses_id)
);

create table if not exists datasettype_metadatablocks
(
datasettype_id bigint not null
constraint fk_datasettype_metadatablock_datasettype_id
references datasettype,
metadatablocks_id bigint not null
constraint fk_datasettype_metadatablock_metadatablocks_id
references public.metadatablock,
primary key (datasettype_id, metadatablocks_id)
);
Loading