From 62a94dbba05b660426a9c4c99cfa55a53088e1e4 Mon Sep 17 00:00:00 2001 From: mathysgrapotte Date: Mon, 5 May 2025 12:46:09 +0200 Subject: [PATCH 1/2] now compares all-against-all and outputs raw comparison instead of comparing only within transforms. --- .../local/stimulus/compare_tensors/main.nf | 12 +---- subworkflows/local/evaluation/main.nf | 53 +++++++++++++------ 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/modules/local/stimulus/compare_tensors/main.nf b/modules/local/stimulus/compare_tensors/main.nf index d2c1fff..7966c7b 100644 --- a/modules/local/stimulus/compare_tensors/main.nf +++ b/modules/local/stimulus/compare_tensors/main.nf @@ -18,19 +18,9 @@ process STIMULUS_COMPARE_TENSORS { """ stimulus compare-tensors \ ${tensors} \ - -s scores.csv \ + -o "${prefix}_scores.csv" \ ${args} - # Extract first row of scores.csv - header_scores=\$(head -n 1 scores.csv) - - # Add metadata info to output file - echo "${header},\$header_scores" > "${prefix}_scores.csv" - - # Add values - scores=\$(awk 'NR==2 {sub(/[[:space:]]+\$/, "")} NR==2' scores.csv | tr -s '[:blank:]' ',') - echo "${values},\$scores" >> "${prefix}_scores.csv" - cat <<-END_VERSIONS > versions.yml "${task.process}": stimulus: \$(stimulus -v | cut -d ' ' -f 3) diff --git a/subworkflows/local/evaluation/main.nf b/subworkflows/local/evaluation/main.nf index bd0b364..50c7b03 100644 --- a/subworkflows/local/evaluation/main.nf +++ b/subworkflows/local/evaluation/main.nf @@ -41,30 +41,51 @@ workflow EVALUATION_WF { // and the same number of trials, we can estimate the noise across replicates // This is done by comparing the predictions of the alternative models between each other // and then calculatin a summary metric over them (e.g. mean, median, std, etc.) - - replicate_predictions = predictions.map{ - meta, prediction -> - [["id": meta.id, - "split_id": meta.split_id, - "transform_id": meta.transform_id, - "n_trials": meta.n_trials ], meta, prediction] - }.groupTuple(by:0) - .map{ - merging_meta, metas, predictions -> - [merging_meta, predictions] + pairs = predictions + .collate(2) + .collect() + .map { items -> + def pairs = [] + // Create all unique combinations using index comparison + (0.. + (i+1.. + def meta1 = items[i][0] + def meta2 = items[j][0] + def files = [items[i][1], items[j][1]] + // Only compare different transforms OR different replicates + if(meta1.transform_id != meta2.transform_id || meta1.replicate != meta2.replicate) { + pairs << [ + [ + "id1": meta1.id, + "id2": meta2.id, + "split_id1": meta1.split_id, + "split_id2": meta2.split_id, + "transform_id1": meta1.transform_id, + "transform_id2": meta2.transform_id, + "replicate1": meta1.replicate, + "replicate2": meta2.replicate + ], + // Create unique filenames using both transforms and replicates + files + ] + } + } } + pairs + } + .flatMap { it } + + //pairs.dump(tag: "pairs") - // check if the predictions are at least 2, meta,predictions - replicate_predictions.filter{ - it[1].size() > 1 - }.set{ replicate_predictions } STIMULUS_COMPARE_TENSORS_COSINE( - replicate_predictions + pairs ) cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.csv + cosine_scores.dump(tag: "cosine_scores") + cosine_scores .map { meta, csv -> csv From 755eca613d32a98af97e54b017a5f6b8712589fe Mon Sep 17 00:00:00 2001 From: mathysgrapotte Date: Mon, 5 May 2025 16:41:27 +0200 Subject: [PATCH 2/2] compare_tensors now compare all against all. --- modules/local/stimulus/compare_tensors/main.nf | 12 +++++++++++- subworkflows/local/evaluation/main.nf | 5 ----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/modules/local/stimulus/compare_tensors/main.nf b/modules/local/stimulus/compare_tensors/main.nf index 7966c7b..072f6fe 100644 --- a/modules/local/stimulus/compare_tensors/main.nf +++ b/modules/local/stimulus/compare_tensors/main.nf @@ -18,9 +18,19 @@ process STIMULUS_COMPARE_TENSORS { """ stimulus compare-tensors \ ${tensors} \ - -o "${prefix}_scores.csv" \ + -o scores.csv \ ${args} + # Extract first row of scores.csv + header_scores=\$(head -n 1 scores.csv) + + # Add metadata info to output file + echo "${header},\$header_scores" > "${prefix}_scores.csv" + + # Add values + scores=\$(awk 'NR==2 {sub(/[[:space:]]+\$/, "")} NR==2' scores.csv | tr -s '[:blank:]' ',') + echo "${values},\$scores" >> "${prefix}_scores.csv" + cat <<-END_VERSIONS > versions.yml "${task.process}": stimulus: \$(stimulus -v | cut -d ' ' -f 3) diff --git a/subworkflows/local/evaluation/main.nf b/subworkflows/local/evaluation/main.nf index 50c7b03..5e13a82 100644 --- a/subworkflows/local/evaluation/main.nf +++ b/subworkflows/local/evaluation/main.nf @@ -75,17 +75,12 @@ workflow EVALUATION_WF { } .flatMap { it } - //pairs.dump(tag: "pairs") - - STIMULUS_COMPARE_TENSORS_COSINE( pairs ) cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.csv - cosine_scores.dump(tag: "cosine_scores") - cosine_scores .map { meta, csv -> csv