diff --git a/METASOFT/Snakefile b/METASOFT/Snakefile index ee1713d..616eff9 100644 --- a/METASOFT/Snakefile +++ b/METASOFT/Snakefile @@ -9,21 +9,33 @@ # dataCode: "test" import os datasets = config["datasets"] +chromosomes = config["chrom"] -prefix = "-".join(datasets) +#prefix = "-".join(datasets) +prefix = "merged_cohorts" outFolder = config["outFolder"] dataset_string = os.path.join(outFolder, prefix) print(dataset_string) -chromosomes = list(range(1,23)) +#chromosomes = config["chrom"] +print(chromosomes) + +if chromosomes == "all": + chromosomes = list(range(1,23)) + +print("chromosomes = ", chromosomes) +#chromosomes = list(range(1,23)) vcf = config["vcf"] rule all: input: - expand(dataset_string + ".{tool}.{filetype}.qval.tsv.gz", tool = ["metasoft", "re2c"], filetype = ["top_assoc"] ), - expand(dataset_string + ".{tool}.{filetype}.tabixed.tsv.gz.tbi", tool = ["metasoft", "re2c"], filetype = "full_assoc" ) + expand(dataset_string + ".metasoft.{filetype}.qval.tsv.gz", filetype = ["top_assoc"] ), + expand(dataset_string + ".metasoft.{filetype}.tabixed.tsv.gz.tbi", filetype = "full_assoc" ) + #expand(dataset_string + ".{tool}.{filetype}.qval.tsv.gz", tool = ["metasoft", "re2c"], filetype = ["top_assoc"] ), + #expand(dataset_string + ".{tool}.{filetype}.tabixed.tsv.gz.tbi", tool = ["metasoft", "re2c"], filetype = "full_assoc" ) + rule createInputFiles: output: diff --git a/METASOFT/scripts/createMetaInputByChr.R b/METASOFT/scripts/createMetaInputByChr.R index 447568e..79836f2 100644 --- a/METASOFT/scripts/createMetaInputByChr.R +++ b/METASOFT/scripts/createMetaInputByChr.R @@ -152,7 +152,8 @@ library(tidyverse) qtl_list <- purrr::map(data_list, pullData, type = "QTL" ) -prefix <- paste( data_list, collapse = "-" ) +#prefix <- paste( data_list, collapse = "-" ) +prefix <- "merged_cohorts" outFile_final <- file.path(outFolder, paste0( prefix, ".input.tsv" ) ) # if chromosome is specified then just read in from that chromosome @@ -172,8 +173,9 @@ for( chr in chrs){ outFile <- file.path( outFolder, paste0(prefix, ".", chr, ".input.tsv" ) ) res <- purrr::map( qtl_list, ~{ extractTargetQTL(.x, chr = chr) - }) %>% - reduce( inner_join, by = "snp_gene" ) + }) %>% + reduce( full_join, by = "snp_gene" ) + #reduce( inner_join, by = "snp_gene" ) stopifnot( nrow(res) > 0 ) print(head(res) ) diff --git a/METASOFT/scripts/formatMETASOFT.R b/METASOFT/scripts/formatMETASOFT.R index 87be358..c4cffb9 100644 --- a/METASOFT/scripts/formatMETASOFT.R +++ b/METASOFT/scripts/formatMETASOFT.R @@ -43,11 +43,12 @@ df[, c("variant_id", "phenotype_id") := tstrsplit(RSID, "-", fixed=TRUE)] # For METASOFT, correct fixed and random effect Ps per gene by BH # for each phenotype calculate and add adjusted P values if( "PVALUE_RE2" %in% names(df) ){ - df[, c("PADJ_RE2", "PADJ_FE") := .(p.adjust(.SD$PVALUE_RE2, method = "FDR"), p.adjust(.SD$PVALUE_FE, method = 'FDR') ), by = phenotype_id] + # df[, c("PADJ_RE2", "PADJ_FE") := .(p.adjust(.SD$PVALUE_RE2, method = "FDR"), p.adjust(.SD$PVALUE_FE, method = 'FDR') ), by = phenotype_id] + df[, c("PADJ_RE2", "PADJ_FE") := .(p.adjust(.SD$PVALUE_RE2, method = "fdr"), p.adjust(.SD$PVALUE_FE, method = 'fdr') ), by = phenotype_id] } if( "RE2Cp" %in% names(df) ){ - df[, "PADJ_RE2C" := p.adjust(.SD$RE2Cp, method = "FDR") , by = phenotype_id] - + # df[, "PADJ_RE2C" := p.adjust(.SD$RE2Cp, method = "FDR") , by = phenotype_id] + df[, "PADJ_RE2C" := p.adjust(.SD$RE2Cp, method = "fdr") , by = phenotype_id] } # output top association per gene diff --git a/METASOFT/scripts/qvalueMETASOFT.R b/METASOFT/scripts/qvalueMETASOFT.R new file mode 100644 index 0000000..34bd5e6 --- /dev/null +++ b/METASOFT/scripts/qvalueMETASOFT.R @@ -0,0 +1,38 @@ +# qvalue meta-analysis results + +# multiple testing per feature +library(optparse) +library(qvalue) +library(readr) + + + +option_list <- list( + make_option(c('-o', '--output'), help = "the output file", default = ""), + make_option(c('-i', '--input'), help = "the input metasoft file", default = "") +) + +option.parser <- OptionParser(option_list=option_list) +opt <- parse_args(option.parser) + +output <- opt$output +input <- opt$input + +stopifnot(file.exists(input) ) +# input - top_assoc file +# output - top_assoc with qvalue column + + +df <- read_tsv(input) + +if("PADJ_RE2" %in% names(df) ){ + df$qvalue <- qvalue(df$PADJ_RE2)$qvalue +} + + +if("PADJ_RE2C" %in% names(df) ){ + df$qvalue <- qvalue(df$PADJ_RE2C)$qvalue +} + +write_tsv(df, output) + diff --git a/METASOFT/test_config.yaml b/METASOFT/test_config.yaml new file mode 100644 index 0000000..801d900 --- /dev/null +++ b/METASOFT/test_config.yaml @@ -0,0 +1,12 @@ +datasets: + - AnswerALS_expression + - ROSMAP_DLPFC_expression_EUR + - ROSMAP_Head_of_Caudate_Nucleus_expression_EUR + - ROSMAP_PCC_expression_EUR + +chrom: [21] # list in brackets which chromosomes you want to include. for chrs, type "all" + +outFolder: "outfolder/" +dataCode: "bigbrain_first_pass_5-20-22" + +vcf: "/sc/arion/projects/bigbrain/downstream-QTL/METASOFT/dbSNP_hg38_common_chr21.vcf.gz"