From c1626139cb4494dd9fbe5f9e325559d7fd5958e8 Mon Sep 17 00:00:00 2001 From: XingY Date: Mon, 23 Feb 2026 14:15:29 -0800 Subject: [PATCH] Simplify query for runs to export for samples and data --- .../labkey/api/exp/api/ExperimentService.java | 6 ++ .../experiment/api/ExperimentServiceImpl.java | 102 ++++++++++++++++++ .../samples/DataClassFolderWriter.java | 33 ++---- .../samples/SampleTypeFolderWriter.java | 47 +++----- 4 files changed, 128 insertions(+), 60 deletions(-) diff --git a/api/src/org/labkey/api/exp/api/ExperimentService.java b/api/src/org/labkey/api/exp/api/ExperimentService.java index f776ea533b3..be904efff7c 100644 --- a/api/src/org/labkey/api/exp/api/ExperimentService.java +++ b/api/src/org/labkey/api/exp/api/ExperimentService.java @@ -804,6 +804,12 @@ static void validateParentAlias(Map aliasMap, Set reserv List getRunsUsingDataClasses(Collection dataClasses); + /** Get derivation run IDs for a data class — runs with SAMPLE_DERIVATION_PROTOCOL that have no material inputs/outputs */ + List getDerivationRunIdsForDataClassExport(long dataClassRowId); + + /** Get derivation/aliquot run IDs for sample types — filtered by protocol and optionally excluding runs with data inputs/outputs */ + List getDerivationRunIdsForSampleTypesExport(Collection sampleTypeLsids, Container c, boolean includeRunsWithDataIO); + /** * @return the subset of these runs which are supposed to be deleted when one of their inputs is deleted. */ diff --git a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java index b34a95cc8ef..f45a77a50ba 100644 --- a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java +++ b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java @@ -5799,6 +5799,108 @@ public List> getRunsAndRolesUsingMaterial(ExpMaterial mater return getRunsAndRolesUsingInput(material, getTinfoMaterialInput(), "MaterialId", () -> getRunsUsingMaterials(material.getRowId())); } + @Override + public List getDerivationRunIdsForDataClassExport(long dataClassRowId) + { + SQLFragment sql = new SQLFragment(""" + SELECT DISTINCT er.RowId + FROM exp.ExperimentRun er + WHERE er.ProtocolLSID = ? + AND er.RowId IN ( + SELECT pa.RunId FROM exp.ProtocolApplication pa + INNER JOIN exp.DataInput di ON di.TargetApplicationId = pa.RowId + INNER JOIN exp.Data d ON di.DataId = d.RowId + WHERE d.classId = ? + UNION + SELECT pa.RunId FROM exp.ProtocolApplication pa + INNER JOIN exp.Data d ON d.SourceApplicationId = pa.RowId + WHERE d.classId = ? + ) + AND NOT EXISTS ( + SELECT 1 FROM exp.ProtocolApplication pa2 + INNER JOIN exp.MaterialInput mi ON mi.TargetApplicationId = pa2.RowId + WHERE pa2.RunId = er.RowId + ) + AND NOT EXISTS ( + SELECT 1 FROM exp.Material m + INNER JOIN exp.ProtocolApplication pa3 ON m.SourceApplicationId = pa3.RowId + WHERE pa3.RunId = er.RowId + ) + """, + SAMPLE_DERIVATION_PROTOCOL_LSID, + dataClassRowId, + dataClassRowId + ); + + return new SqlSelector(getExpSchema(), sql).getArrayList(Long.class); + } + + @Override + public List getDerivationRunIdsForSampleTypesExport(Collection sampleTypeLsids, Container c, boolean includeRunsWithDataIO) + { + if (sampleTypeLsids.isEmpty()) + return Collections.emptyList(); + + SQLFragment inClause = getExpSchema().getSqlDialect().appendInClauseSql(new SQLFragment(), sampleTypeLsids); + + // Subquery to find runs that use materials from the given sample types and container + SQLFragment materialsSubquery = new SQLFragment(""" + SELECT pa.RunId FROM exp.ProtocolApplication pa + INNER JOIN exp.MaterialInput mi ON mi.TargetApplicationId = pa.RowId + INNER JOIN exp.Material m ON mi.MaterialId = m.RowId + WHERE m.cpasType """); + materialsSubquery.append(inClause); + materialsSubquery.append(" AND m.Container = ?\n"); + materialsSubquery.add(c); + materialsSubquery.append(""" + UNION + SELECT pa.RunId FROM exp.ProtocolApplication pa + INNER JOIN exp.Material m ON m.SourceApplicationId = pa.RowId + WHERE m.cpasType """); + materialsSubquery.append(inClause); + materialsSubquery.append(" AND m.Container = ?"); + materialsSubquery.add(c); + + SQLFragment sql = new SQLFragment(); + sql.append("SELECT DISTINCT er.RowId\nFROM exp.ExperimentRun er\nWHERE er.RowId IN (\n"); + sql.append(materialsSubquery); + sql.append(")\n"); + + if (includeRunsWithDataIO) + { + // Include all derivation and aliquot runs + sql.append("AND er.ProtocolLSID IN (?, ?)\n"); + sql.add(SAMPLE_DERIVATION_PROTOCOL_LSID); + sql.add(SAMPLE_ALIQUOT_PROTOCOL_LSID); + } + else + { + // Aliquot runs are always included; derivation runs only if they have no data inputs/outputs + sql.append("AND (\n"); + sql.append(" er.ProtocolLSID = ?\n"); + sql.add(SAMPLE_ALIQUOT_PROTOCOL_LSID); + sql.append(" OR (\n"); + sql.append(" er.ProtocolLSID = ?\n"); + sql.add(SAMPLE_DERIVATION_PROTOCOL_LSID); + sql.append(""" + AND NOT EXISTS ( + SELECT 1 FROM exp.ProtocolApplication pa2 + INNER JOIN exp.DataInput di ON di.TargetApplicationId = pa2.RowId + WHERE pa2.RunId = er.RowId + ) + AND NOT EXISTS ( + SELECT 1 FROM exp.Data d + INNER JOIN exp.ProtocolApplication pa3 ON d.SourceApplicationId = pa3.RowId + WHERE pa3.RunId = er.RowId + ) + ) + ) + """); + } + + return new SqlSelector(getExpSchema(), sql).getArrayList(Long.class); + } + @Override public List runsDeletedWithInput(List runs) { diff --git a/experiment/src/org/labkey/experiment/samples/DataClassFolderWriter.java b/experiment/src/org/labkey/experiment/samples/DataClassFolderWriter.java index d98a678487f..bb8b9d14592 100644 --- a/experiment/src/org/labkey/experiment/samples/DataClassFolderWriter.java +++ b/experiment/src/org/labkey/experiment/samples/DataClassFolderWriter.java @@ -11,10 +11,7 @@ import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.Sort; import org.labkey.api.data.TableInfo; -import org.labkey.api.exp.api.ExpData; import org.labkey.api.exp.api.ExpDataClass; -import org.labkey.api.exp.api.ExpObject; -import org.labkey.api.exp.api.ExpRun; import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.exp.query.ExpSchema; import org.labkey.api.query.FieldKey; @@ -29,9 +26,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; - -import static java.util.stream.Collectors.toList; public abstract class DataClassFolderWriter extends AbstractExpFolderWriter { @@ -78,19 +72,14 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E typesSelection.addDataClass(dataClass); exportTypes = true; - // get the list of runs with the data we expect to export, these will be the sample derivation - // protocol runs to track the lineage + // get the list of derivation runs for this data class — only sample derivation protocol runs + // with no material inputs/outputs (those are handled by the sample type writer) + // Sample derivation protocols involving data classes can be either to/from another data + // class or also to/from a sample type. If it's the latter, we will let the sample writer handle it + // since on import, data classes run before sample types. if (exportDataClassData) { - List dataIdsToExport = dataClass.getDatas().stream().map(ExpData::getRowId).collect(toList()); - - // only want the sample derivation runs; other runs will get included in the experiment xar. - List exportedRunIds = ExperimentService.get().getRunsUsingDataIds(dataIdsToExport).stream().filter(run -> { - String lsid = run.getProtocol().getLSID(); - return lsid.equals(ExperimentService.SAMPLE_DERIVATION_PROTOCOL_LSID) && isValidRunType(ctx, run); - }) - .collect(Collectors.toSet()) - .stream().map(ExpObject::getRowId).toList(); + List exportedRunIds = ExperimentService.get().getDerivationRunIdsForDataClassExport(dataClass.getRowId()); if (!exportedRunIds.isEmpty()) { @@ -129,16 +118,6 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E exportContext.setDataClassXarCreated(true); } - /** - * Sample derivation protocols involving data classes can be either to/from another data - * class or also to/from a sample type. If it's the latter, we will let the sample writer handle it - * since on import, data classes run before sample types. - */ - private boolean isValidRunType(FolderExportContext ctx, ExpRun run) - { - return run.getMaterialOutputs().isEmpty() && run.getMaterialInputs().isEmpty(); - } - private void writeDataClassDataFiles(Set dataClasses, FolderExportContext ctx, VirtualFile dir) throws Exception { // write out the DataClass rows diff --git a/experiment/src/org/labkey/experiment/samples/SampleTypeFolderWriter.java b/experiment/src/org/labkey/experiment/samples/SampleTypeFolderWriter.java index 9d445f31a41..b61bfbcc144 100644 --- a/experiment/src/org/labkey/experiment/samples/SampleTypeFolderWriter.java +++ b/experiment/src/org/labkey/experiment/samples/SampleTypeFolderWriter.java @@ -16,7 +16,6 @@ import org.labkey.api.data.WrappedColumnInfo; import org.labkey.api.exp.Lsid; import org.labkey.api.exp.api.ExpMaterial; -import org.labkey.api.exp.api.ExpRun; import org.labkey.api.exp.api.ExpSampleType; import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.exp.api.SampleTypeService; @@ -40,7 +39,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; public abstract class SampleTypeFolderWriter extends AbstractExpFolderWriter { @@ -62,7 +60,6 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E XarExportSelection typesSelection = new XarExportSelection(); XarExportSelection runsSelection = new XarExportSelection(); Set sampleTypes = new HashSet<>(); - List materialsToExport = new ArrayList<>(); _exportPhiLevel = ctx.getPhiLevel(); boolean exportTypes = false; boolean exportRuns = false; @@ -80,6 +77,7 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E return; Lsid sampleTypeLsid = new Lsid(ExperimentService.get().generateLSID(c, ExpSampleType.class, "export")); + Set sampleTypeLsids = new HashSet<>(); for (ExpSampleType sampleType : SampleTypeService.get().getSampleTypes(c, true)) { // ignore the magic sample type that is used for the specimen repository, it is managed by the specimen importer @@ -98,30 +96,27 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E { sampleTypes.add(sampleType); typesSelection.addSampleType(sampleType); - materialsToExport.addAll(sampleType.getSamples(c)); + sampleTypeLsids.add(sampleType.getLSID()); exportTypes = true; } } // get the list of runs with the materials or data we expect to export, these will be the sample derivation // protocol runs to track the lineage - Set exportedRuns = new HashSet<>(); - if (!materialsToExport.isEmpty() && exportSampleTypeData) - exportedRuns.addAll(ExperimentService.get().getRunsUsingMaterials(materialsToExport)); - // only want the sample derivation runs; other runs will get included in the experiment xar. - exportedRuns = exportedRuns.stream().filter(run -> { - String lsid = run.getProtocol().getLSID(); - if (lsid.equals(ExperimentService.SAMPLE_DERIVATION_PROTOCOL_LSID)) - return isValidRunType(ctx, run); - else - return lsid.equals(ExperimentService.SAMPLE_ALIQUOT_PROTOCOL_LSID); - }).collect(Collectors.toSet()); - - if (!exportedRuns.isEmpty()) + // Sample derivation protocols involving samples can be either to/from another sample + // or to/from a data class. If it's the latter, don't include the run if data class data + // is not included in the archive. + if (exportSampleTypeData && !sampleTypes.isEmpty()) { - runsSelection.addRuns(exportedRuns); - exportRuns = true; + boolean includeRunsWithDataIO = ctx.getDataTypes().contains(FolderArchiveDataTypes.DATA_CLASS_DATA); + List exportedRunIds = ExperimentService.get().getDerivationRunIdsForSampleTypesExport(sampleTypeLsids, c, includeRunsWithDataIO); + + if (!exportedRunIds.isEmpty()) + { + runsSelection.addRunIds(exportedRunIds); + exportRuns = true; + } } // UNDONE: The other exporters use FOLDER_RELATIVE, but it wants to use ${AutoFileLSID} replacements for DataClass LSIDs when exporting the TSV data.. see comment in ExportLsidDataColumn @@ -153,20 +148,6 @@ public void write(Container c, FolderExportContext ctx, VirtualFile vf) throws E exportContext.setSampleXarCreated(true); } - /** - * Sample derivation protocols involving samples can be either to/from another sample - * or to/from a data class. If it's the latter, don't include the run if data class data - * is not included in the archive. - */ - private boolean isValidRunType(FolderExportContext ctx, ExpRun run) - { - if (!run.getDataInputs().isEmpty() || !run.getDataOutputs().isEmpty()) - { - return ctx.getDataTypes().contains(FolderArchiveDataTypes.DATA_CLASS_DATA); - } - return true; - } - private void writeSampleTypeDataFiles(Set sampleTypes, FolderExportContext ctx, VirtualFile dir) throws Exception { // write out the sample rows