diff --git a/COLOC/Snakefile b/COLOC/Snakefile
index a503f63..b6387fe 100644
--- a/COLOC/Snakefile
+++ b/COLOC/Snakefile
@@ -5,24 +5,31 @@ gwas_data = config["gwas"]
 qtl_data = config["qtl"]
 outFolder = config["outFolder"]
 geneMeta = config["geneMeta"]
-
+sig_threshold = config["sig_threshold"]
 # sanity check - can the GWAS and QTL data be found in the database?
-gwas_df = pd.read_excel("/sc/arion/projects/ad-omics/data/references//GWAS/GWAS-QTL_data_dictionary.xlsx", sheet_name = 2)
-qtl_df = pd.read_excel("/sc/arion/projects/ad-omics/data/references//GWAS/GWAS-QTL_data_dictionary.xlsx", sheet_name = 1)
+gwas_df = pd.read_excel("/sc/arion/projects/ad-omics/data/references//GWAS/GWAS-QTL_data_dictionary.xlsx", sheet_name = 1)
+qtl_df = pd.read_excel("/sc/arion/projects/ad-omics/data/references//GWAS/GWAS-QTL_data_dictionary.xlsx", sheet_name = 2)
 
 gwas_check = all(i in gwas_df["dataset"].tolist() for i in gwas_data )
 qtl_check = all(i in qtl_df["dataset"].tolist() for i in qtl_data )
 
-if not all([gwas_check, qtl_check]):
-    print(" * QTL and/or GWAS cannot be found in database")
+if not all([gwas_check]):
+    print(" * GWAS cannot be found in database")
+    sys.exit()
+
+if not all([qtl_check]):
+    print(" * QTL cannot be found in database")
+    print(qtl_check)
     sys.exit()
 
+
 shell.prefix("export PS1=""; ml anaconda3; CONDA_BASE=$(conda info --base); source $CONDA_BASE/etc/profile.d/conda.sh; ml purge; conda activate snakemake;")
 rule all:
     input:
         #expand(outFolder + "{GWAS}/{QTL}/{QTL}_{GWAS}_coloc_results.snp.1kgp3_ld.tsv.gz", GWAS = gwas_data, QTL = qtl_data)
         outFolder + "all_COLOC_results_merged_H4_0_no_LD.tsv.gz",
         outFolder + "all_COLOC_results_merged_H4_0.5_with_LD.tsv.gz",
+        #outFolder + "pairwise_QTL_results.tsv.gz"
         #expand( outFolder + "{GWAS}/{QTL}_{GWAS}_COLOC.tsv", GWAS = gwas_data, QTL = qtl_data )
         #outFolder + "all_COLOC_summary_results.tsv.gz"
 
@@ -34,8 +41,8 @@ rule run_COLOC:
         script = "scripts/run_COLOC.R"
     shell:
         "mkdir -p {outFolder}{wildcards.GWAS};"
-        "ml R/4.0.3;" #ml arrow;"
-        "Rscript {params.script} -g {wildcards.GWAS} -q {wildcards.QTL} -o {outFolder}{wildcards.GWAS}/ "
+        "ml R;" #ml arrow;"
+        "Rscript {params.script} -g {wildcards.GWAS} -q {wildcards.QTL} -o {outFolder}{wildcards.GWAS}/ -s 1e-5 --verbose --lowmem"
    
 rule merge_COLOC_snp_level:
     input:
@@ -46,7 +53,7 @@ rule merge_COLOC_snp_level:
     params:
         script = "scripts/merge_COLOC_results.R"
     shell:
-        "ml R/4.0.3;"
+        "ml R;" # LDLINKR only installed on 3.6
         "Rscript {params.script} --threshold 0 --inFolder {outFolder} --geneMeta {geneMeta};" 
         "Rscript {params.script} --threshold 0.5 --ld --inFolder {outFolder} --geneMeta {geneMeta} "
 
@@ -61,3 +68,25 @@ rule create_LD_tables:
     shell:
        "conda activate echoR;"
        "Rscript {params.script} -g {wildcards.GWAS} -q {wildcards.QTL} --inFile {input} --outFolder {params.out}" 
+
+rule prepare_QTL_COLOC:
+    input:
+        outFolder + "all_COLOC_results_merged_H4_0.5_with_LD.tsv.gz"
+    output:
+        outFolder + "pairwise_QTL_COLOC_targets.tsv.gz"
+    params:
+        script = "scripts/prepare_pairwise_qtl_coloc.R"
+    shell:
+        "ml R/4.2.0;"
+        "Rscript {params.script} -i {input} -o {output} -m across -p 0.8"
+
+rule run_QTL_COLOC:
+    input:
+        outFolder + "pairwise_QTL_COLOC_targets.tsv.gz"
+    output:
+        outFolder + "pairwise_QTL_results.tsv.gz"
+    params:
+        script = "scripts/run_COLOC.R"
+    shell:
+        "ml R/4.2.0;"
+        "Rscript {params.script} -t {input} -o {output}"
diff --git a/COLOC/cluster.yaml b/COLOC/cluster.yaml
index 337150f..bccd9de 100644
--- a/COLOC/cluster.yaml
+++ b/COLOC/cluster.yaml
@@ -1,10 +1,13 @@
 __default__:
-  #partition: chimera
   queue: premium
   cores: 1
-  mem: 24000
+  mem: 48000
   time: '120'
   name: $(basename $(pwd)):{rule}:{wildcards}
   output: logs/{rule}:{wildcards}.stdout
   error: logs/{rule}:{wildcards}.stderr
   himem: ""
+pairwise_QTL_COLOC:
+  cores: 4
+  mem: 16000
+  time: 8640
diff --git a/COLOC/pairwise_qtl_coloc.smk b/COLOC/pairwise_qtl_coloc.smk
new file mode 100644
index 0000000..a79653f
--- /dev/null
+++ b/COLOC/pairwise_qtl_coloc.smk
@@ -0,0 +1,31 @@
+import sys
+import pandas as pd
+import glob
+import re
+import os
+
+# Pairwise QTL COLOC pipeline
+# Jack Humphrey
+outFolder = config["outFolder"]
+inFolder = config["inFolder"]
+MAF_file = config["MAF_file"]
+
+target_files = [ x for x in glob.glob(inFolder + "*genewide*pairs*tsv.gz" )]
+target_ids = [os.path.basename(re.sub('.tsv.gz', '', x, count=1)) for x in target_files ]
+
+
+shell.prefix("export PS1=""; ml anaconda3; CONDA_BASE=$(conda info --base); source $CONDA_BASE/etc/profile.d/conda.sh; ml purge; conda activate snakemake;")
+rule all:
+    input:
+        expand( outFolder + "{target}_COLOC.tsv.gz", target = target_ids)
+
+rule pairwise_QTL_COLOC:
+    input:
+        inFolder + "{target}.tsv.gz"
+    output:
+        outFolder + "{target}_COLOC.tsv.gz"
+    params:
+        script = "scripts/run_COLOC.R"
+    shell:
+        "ml R;" 
+        "Rscript {params.script} --targets {input} -o {outFolder} --lowmem --threads 1 --MAF {MAF_file}"
diff --git a/COLOC/scripts/prepare_pairwise_qtl_coloc.R b/COLOC/scripts/prepare_pairwise_qtl_coloc.R
new file mode 100644
index 0000000..1c5028e
--- /dev/null
+++ b/COLOC/scripts/prepare_pairwise_qtl_coloc.R
@@ -0,0 +1,73 @@
+library(optparse)
+
+option_list <- list(
+        make_option(c('-o', '--outFile'), help='the path to the output file', default = ""),
+        make_option(c('-i', '--inFile'), help='the path to the input file', default = ""),
+        make_option(c('-m', '--mode'), help='mode - across or within?', default = "across"),
+        make_option(c('-p', '--pp'), help='min PP4', default = 0.5),
+        make_option(c('--debug'), help = "load all files and then save RData without running COLOC", action = "store_true", default = FALSE)
+)
+
+option.parser <- OptionParser(option_list=option_list)
+opt <- parse_args(option.parser)
+
+inFile <- opt$inFile
+outFile <- opt$outFile
+mode <- opt$mode
+PP4 <- opt$pp
+library(tidyverse)
+
+#all_coloc <- read_tsv("../all_COLOC_results_merged_H4_0.5_with_LD.tsv.gz")
+all_coloc <- read_tsv(inFile)
+
+all_coloc$distance <- abs(all_coloc$GWAS_pos - all_coloc$QTL_pos)
+all_coloc$distance_filter <- (!is.na(all_coloc$LD) & all_coloc$LD > 0.1) | all_coloc$distance < 5e5
+
+qtl_coloc_input <- all_coloc %>%
+  filter(distance_filter == TRUE & PP.H4.abf > PP4) %>%
+  select(GWAS, QTL, locus, feature) %>%
+  distinct()
+
+message(" * ", nrow(qtl_coloc_input), " phenotypes colocalize at PP4 > ", PP4)
+stopifnot( nrow(qtl_coloc_input) > 0)
+
+
+shared_loci <- select(qtl_coloc_input, GWAS, locus, QTL) %>%
+  distinct() %>%
+  group_by(GWAS, locus) %>%
+  tally() %>%
+  filter(n > 1)
+message(" * ", nrow(shared_loci), " loci are shared between QTL types")
+stopifnot( nrow(shared_loci) > 0)
+
+qtl_coloc_input  <- filter(qtl_coloc_input, locus %in% shared_loci$locus)
+
+# get all pairwise comparisons required
+# make optional - do you want to test features within a dataset or just across?
+qtl_coloc_split <- qtl_coloc_input %>%
+  split(paste0(.$GWAS, ":", .$locus)) %>%
+  map_df( ~{
+    d <- select(.x, GWAS,locus, QTL, feature) %>% distinct() 
+    qtl_pairs <- combn(unique(paste(d$QTL, d$feature) ), 2)
+    locus_df <- select(d, GWAS, locus)
+    pair_df <- 
+      data.frame(qtl_1 = qtl_pairs[1,], qtl_2 = qtl_pairs[2,] ) %>%
+      tidyr::separate(qtl_1, into = c("qtl_1", "feature_1"), sep = " ") %>%
+      tidyr::separate(qtl_2, into = c("qtl_2", "feature_2"), sep = " ") %>%
+      mutate(GWAS = unique(d$GWAS), locus = unique(d$locus)) %>%
+      select(GWAS, locus, everything() )
+
+  }) %>%
+  distinct()
+
+if( mode == "across"){
+
+qtl_coloc_split <- qtl_coloc_split %>%
+ mutate( within = qtl_1 == qtl_2) %>% filter(within == FALSE)
+}
+
+write_tsv(qtl_coloc_split, outFile) #"test_eQTL_edQTL_pairwise_input.tsv")
+
+
+
+ 
diff --git a/COLOC/scripts/run_COLOC.R b/COLOC/scripts/run_COLOC.R
index 69bc401..1758c26 100644
--- a/COLOC/scripts/run_COLOC.R
+++ b/COLOC/scripts/run_COLOC.R
@@ -26,9 +26,24 @@ suppressPackageStartupMessages(library(optparse))
 suppressPackageStartupMessages(library(GenomicRanges))
 suppressPackageStartupMessages(library(rtracklayer))
 
+options(future.globals.maxSize = 8000 * 1024^2)
 
 #library(arrow)
 
+message_ <- function(text){
+    message(Sys.time(), " * ", text)   
+}
+## verbose messaging
+message_verbose <- function(input){
+    if(verbose){
+        if( "data.frame" %in% class(input)){
+            print(input)
+        }
+        if( class(input) == "character"){
+            message(Sys.time(), "     * ", input)
+        }
+    }
+}
 # flank coordinates by set number of bases (default 1MB)
 # work on either a coordinate string or a dataframe containing chr start and end columns
 joinCoords <- function(input, flank = 0){
@@ -36,6 +51,7 @@ joinCoords <- function(input, flank = 0){
         coord <- splitCoords(input)
         coord$start <- coord$start - flank
         coord$end <- coord$end + flank
+        if( coord$start < 0){coord$start <- 0}
         coord <- paste0(coord$chr, ":", coord$start, "-", coord$end)    
         return(coord)
     }
@@ -43,10 +59,15 @@ joinCoords <- function(input, flank = 0){
         stopifnot( all(c("chr", "start", "end") %in% names(input) ) | all(c("chr","pos") %in% names(input) )  )
         stopifnot( flank >= 0)
         if( all( c("chr","start","end") %in% names(input) ) ){
-            coords <- paste0(input$chr, ":", input$start - flank, "-",input$end + flank) 
+            # deal with flanks that go negative
+            starts <- input$start - flank
+            starts[starts < 0 ] <- 1
+            coords <- paste0(input$chr, ":", starts, "-",input$end + flank) 
         }
         if( all( c("chr", "pos") %in% names(input) ) ){
-            coords <- paste0(input$chr, ":", input$pos - (flank + 1), "-", input$pos + flank)
+            starts <- input$pos - (flank + 1)
+            starts[starts < 0 ] <- 1
+            coords <- paste0(input$chr, ":", starts, "-", input$pos + flank)
         }
         return(coords)
     }
@@ -61,10 +82,10 @@ splitCoords <- function(coords){
 }
 
 pullData <- function(dataset, type = "GWAS"){
-    message(Sys.time()," * selected dataset: ", dataset)
+    message_verbose(paste0("selected dataset: ", dataset))
     db_path <- "/sc/arion/projects/ad-omics/data/references/GWAS/GWAS-QTL_data_dictionary.xlsx"
 
-    message(Sys.time()," * reading GWAS database from ", db_path)
+    message_verbose(paste0("reading GWAS database from ", db_path))
     stopifnot( file.exists(db_path) )
 
     gwas_db <- suppressMessages(readxl::read_excel(db_path, sheet = type, na= c("", "-","NA")))
@@ -132,9 +153,9 @@ extractLoci <- function(gwas){
 # already filtered from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20100804/supporting/EUR.2of4intersection_allele_freq.20100804.sites.vcf.gz
 # all sites with an RSID and 2+ alleles 
 loadMAF <- function(path){
-    message(" * loading in MAF from 1000 Genomes")
+    #message_("loading in MAF from 1000 Genomes")
     maf <- data.table::fread(path, nThread = 4)
-    names(maf) <- c("chr", "pos", "snp", "MAF")
+    names(maf)[1:4] <- c("chr", "pos", "snp", "MAF")
     maf$chr <- as.character(maf$chr)
     data.table::setkey(maf, "chr")
     maf$MAF <- suppressWarnings(as.numeric(maf$MAF))
@@ -153,13 +174,14 @@ matchMAF <- function(data, maf){
     # subset maf by chromosome
     maf <- maf[chr] 
     # match on RSID
-    message("   * before joining MAF: ", nrow(data), " SNPs")
+    message_verbose(paste0("before joining MAF: ", nrow(data), " SNPs"))
     data$MAF <- maf$MAF[match(data$snp, maf$snp)]
-    matched <- data[ !is.na(data$MAF) ,]
-    message("   * after joining MAF: ", nrow(matched), " SNPs")
+    matched <- data[ !is.na(data$MAF) & data$MAF >0 & data$MAF < 1,]
+    message_verbose(paste0("after joining MAF: ", nrow(matched), " SNPs"))
     if(nrow(matched) == 0 ){
-        print(head(data) )
-    stop("no SNPs match on RSID. Inspect data")
+        #message_verbose(head(data) )
+        message_verbose("no SNPs match on RSID. Inspect data")
+        return(NULL)
     }
     return(matched)
 }
@@ -198,11 +220,11 @@ extractGWAS <- function(gwas, coord, refFolder = "/sc/arion/projects/ad-omics/da
         }
     }
      
-    cmd <- paste( "ml bcftools; tabix -h ", gwas_path, coord ) 
-    message(" * running command: ", cmd)
+    cmd <- paste( "ml bcftools/1.9; tabix -h ", gwas_path, coord ) 
+    message_verbose(paste0("running command: ", cmd) )
  
     result <- as.data.frame(data.table::fread(cmd = cmd, nThread = 4) )
-    message(" * GWAS extracted!")
+    message_verbose("GWAS extracted!")
     stopifnot( nrow(result) > 0 )
     
      
@@ -222,20 +244,18 @@ extractGWAS <- function(gwas, coord, refFolder = "/sc/arion/projects/ad-omics/da
     #return(result)
     
     # deal with MAF if missing
-    if( debug == TRUE){ result$MAF <- NA }else{
         if( is.na(mafCol) | force_maf == TRUE ){
-            message("       * MAF not present - using 1000 Genomes MAF")
+            message_verbose("MAF not present - using 1000 Genomes MAF")
             # how to get this in to the function? it can't find maf_1000g
             result <- matchMAF(result, maf = maf_1000g)
         }else{
-            message("       * using supplied MAF")
+            message_verbose("using supplied MAF")
             names(result)[names(col_dict) == mafCol] <- "MAF" 
         }
-    }
     # convert standard error to the variance
     # se is standard error - not standard deviation!
     result$varbeta <- result$varbeta^2
-    print(head(result))
+    #print(head(result))
     #save.image("debug.RData") 
     result <- dplyr::select( result, snp, pvalues, beta, varbeta, MAF, chr, pos, A1, A2)
     
@@ -276,9 +296,9 @@ liftOverCoord <- function(coord_string, from = "hg19", to = "hg38"){
         )
     # lift over using rtracklayer
     # watch out for duplicate entries
-    message(" * lifting over....")
+    message_verbose("lifting over....")
     lifted_over <- rtracklayer::liftOver(coord_gr, chain )
-    message(" * lifted!" )
+    message_verbose("lifted!" )
     #return(lifted_over)
     
     stopifnot(length(lifted_over) == length(coord_gr)  )
@@ -308,7 +328,7 @@ extractQTL_parquet <- function(qtl, coord, sig_level = 0.05){
     perm_res <- dplyr::bind_cols(perm_res, splitCoords(perm_res$variant_id) )
     sig <- dplyr::filter(perm_res, qval < sig_level & chr ==  coord_split$chr & start >= coord_split$start & start <= coord_split$end )
     
-    message( paste0( nrow(sig), " significant genes or splicing events at this locus" ) )
+    message_verbose( paste0( nrow(sig), " significant genes or splicing events at this locus" ) )
     if( nrow(sig) == 0 ){ return(NULL) }
     
     # read in nominal QTL associations
@@ -332,23 +352,23 @@ extractQTL_tabix <- function(qtl, coord){
         }
     }
     stopifnot( file.exists(qtl$full_path) )
-    cmd <- paste( "ml bcftools; tabix -h ", qtl$full_path, coord )
-    message("       * running command: ", cmd)
+    cmd <- paste( "ml bcftools/1.9; tabix -h ", qtl$full_path, coord )
+    message_verbose(paste0("running command: ", cmd))
     result <- as.data.frame(data.table::fread(cmd = cmd, nThread = 4) )
     # deal with regions of no QTL association
     if( nrow(result) == 0){ 
         return(NULL) 
     }
     #stopifnot( nrow(result) > 0 )
-    message("       * QTL extracted!")
+    message_verbose("QTL extracted!")
     return(result)
 }
 
 
 # extract all nominal QTL P-values overlapping the flanked GWAS hit
 # split by Gene being tested
-# sig_level doesn't do anything yet
-extractQTL <- function(qtl, coord, sig_level = 0.05, force_maf = FALSE){
+# now implemented sig_level filtering for minimum P-value for each phenotype
+extractQTL <- function(qtl, coord, sig_level = NULL, force_maf = FALSE, targets = NULL){
     # variables stored in qtl
     # if qtl type is parquet then read in parquet files
     # if qtl type is tabix then query the coordinates through tabix 
@@ -379,7 +399,7 @@ extractQTL <- function(qtl, coord, sig_level = 0.05, force_maf = FALSE){
     columns <- colnames(data.table::fread( cmd = cmd ))
     col_dict <- setNames(1:length(columns), columns)
     
-    print(col_dict)
+    #print(col_dict)
     #stopifnot( ncol(result) == length(col_dict) )
     col_dict <- col_dict[1:ncol(result)] # hacky workaround for now
     names(result)[names(col_dict) == phenoCol] <- "gene"
@@ -392,42 +412,56 @@ extractQTL <- function(qtl, coord, sig_level = 0.05, force_maf = FALSE){
         names(result)[names(col_dict) == seCol]    <- "varbeta"
         # don't forget to square the standard error to get the variance             
         result$varbeta <- result$varbeta^2
- 
+        message_verbose(" remove associations with 0 variance")
+        message_verbose(paste0(" before: ", nrow(result), " associations"))
         # remove rows with variance of 0 - this will corrupt COLOC
         result <- dplyr::filter(result, varbeta != 0) 
+        message_verbose(paste0(" after: ", nrow(result), " associations"))
     }
     names(result)[names(col_dict) == snpCol]   <- "snp"
     # Young microglia use log10P - convert
     if( pvalCol == "log10_p"){
         result$pvalues <- 10^result$pvalues
     }
-
+    # P-value thresholding - methylation QTLs, there are way too many sites to test all of them
+    if( !is.null(sig_level) ){
+        message_verbose(paste0(" currently: ", length(unique(result$gene) ), " phenotypes in window" ) )
+        message_verbose(paste0(" removing phenotypes with P < ", sig_level))
+        pheno_p <- result %>% dplyr::group_by(gene) %>% dplyr::slice_min(pvalues, n = 1, with_ties = FALSE) %>% dplyr::filter(pvalues < as.numeric(sig_level) )
+        result <- dplyr::filter(result, gene %in% pheno_p$gene)
+        if(nrow(result) == 0){
+            message_verbose(" 0 phenotypes remaining after P thresholding")
+            return(NULL)
+        }
+        message_verbose(paste0("retaining ", nrow(result), " associations from ", nrow(pheno_p), " phenotypes" ))
+    }
     # deal with MAF - meta-analysis outputs won't have it
     # this requires "chr" to be present in QTL data
-    if( debug == TRUE){ result$MAF <- NA }else{
+    #if( debug == TRUE){ result$MAF <- NA }else{ # why do we do this? breaks it when debug is TRUE
     if( is.na(mafCol) | force_maf == TRUE ){
-        message("       * MAF not present - using 1000 Genomes MAF")
+        message_verbose("MAF not present - using 1000 Genomes MAF")
         #stopifnot( "chr" %in% names(col_dict) )
         names(result)[names(col_dict) == "chr"] <- "chr"
         result <- matchMAF(result, maf = maf_1000g)
-        print(head(result) )
+        if( all(is.na(result$MAF) ) | is.null(result) ){ return(NULL) }
+        #print(head(result) )
     }else{
-        message("       * using supplied MAF")
+        message_verbose("using supplied MAF")
         names(result)[names(col_dict) == mafCol] <- "MAF"
-        print(col_dict)
-        print(head(result) )
-    }
+        #print(col_dict)
+        #print(head(result) )
     }
+    #}
     # deal with edge cases - 1 gene and the gene id is NA
     if( all( is.na(result$gene) ) ){
-        message(" * no genes in QTL result")
+        message_verbose("no genes in QTL result")
         return(NULL)
     }else{
         result <- dplyr::filter(result, !is.na(gene) )
     }
     # if MAF needs matching then chrCol will be "chr" - change it back to "QTL_chr"
-    print( names(col_dict) )
-    print( names(col_dict) == chrCol )
+    #print( names(col_dict) )
+    #print( names(col_dict) == chrCol )
     names(result)[which(names(col_dict) == chrCol) ]   <- "QTL_chr"
     names(result)[which(names(col_dict) == posCol) ]   <- "QTL_pos"
 
@@ -439,10 +473,28 @@ extractQTL <- function(qtl, coord, sig_level = 0.05, force_maf = FALSE){
     }else{
         res_subset <- dplyr::select(result, gene, snp, pvalues, MAF, QTL_chr, QTL_pos)
     }
-    print(head(res_subset) ) 
-    print("passed this point")
+    # jan 2025: another edge case - NA pvalues
+    message_verbose("removing NA P-values")
+    res_subset <- filter(res_subset, !is.na(pvalues) )
+    message_verbose(paste0("after: ", nrow(res_subset) ))
+    # edge case - multiallelic SNPs lead to two entries per SNP-gene - remove
+    message_verbose("removing multi-allelic SNPs")
+    res_subset <- dplyr::distinct(res_subset)
+    message_verbose(paste0("after: ", nrow(res_subset) ))
+    #print(head(res_subset) ) 
+    message_verbose("passed this point")
+    #save(list = ls(all.names = TRUE), file = "image.RData", envir = 
+    #environment())
     #dplyr::filter( pos >= coord_split$start & pos <= coord_split$end)
-       
+      
+    if( !is.null(targets) ){
+        res_subset <- dplyr::filter(res_subset, gene == targets)
+        message_verbose(paste0(length(unique(res_subset$gene)), " target features remain" ) ) 
+        message_verbose(paste0(nrow(res_subset), " associations remain"))
+        if( nrow(res_subset) == 0 ){
+            return(NULL)
+        }
+    }
     # split by gene, convert to list object
     res_split <- 
         split(res_subset, res_subset$gene) %>%
@@ -455,65 +507,8 @@ extractQTL <- function(qtl, coord, sig_level = 0.05, force_maf = FALSE){
     return(res_split)
 }
 
-       
-
-
-# running COLOC between a GWAS locus and all eQTLs within 1MB either side
-# flank coordinates
-# extract GWAS nominal SNPs from region
-# find GWAS hit SNP for recording
-# extract QTLs within range - at set significance level
-# join together and run COLOC
-# output a list of objects:
-## COLOC results - this should include the GWAS locus, the gene of interest, the top QTL variant and the top QTL p-value
-## object - this should be a table combining the two input datasets, inner joined on "snp", to be used for plotting.
-runCOLOC <- function(gwas, qtl, hit){
-    message(" * Analysing LOCUS: ", hit$locus )
-    # hit is a dataframe containing "snp", "chr", "pos", "locus"
-    
-    hit_coord <- joinCoords(hit, flank = 0)
-    hit_range <- joinCoords(hit, flank = 1e6)
- 
-    # extract GWAS and QTL summary for given coord
-    message("       * extracting GWAS")
-    g <- extractGWAS(gwas, hit_range)
-    # get hit info from GWAS
-    hit_snp <- hit$snp
-    # this assumes that the hit SNP is within the g object
-    # if the top hit doesn't have an RSID or cannot be MAF matched then this is violated
-    # I will have to use the hit object instead and accept that not all top_loci lists include the MAF, P value etc.
-    #if( hit_snp == hit$snp ){
-    #hit_info <- as.data.frame(g, stringsAsFactors = FALSE) %>% filter(snp == hit_snp) %>% 
-    #            select(GWAS_SNP = snp, GWAS_P = pvalues, GWAS_effect_size = beta, GWAS_MAF = MAF) %>%
-    #            mutate(locus = hit$locus) %>% select(locus, everything() )
-    #}else{
-    # now just use top loci
-   
-    # problem when no p column in hit!
-
-    hit_info <- hit %>%
-            select(locus, GWAS_SNP = snp, GWAS_P = p)
-    #}
- 
-    # if GWAS is hg19 then lift over to hg38
-    # use the lifted over position in hit_info
-    if(gwas$build != qtl$build){
-        qtl_coord <- liftOverCoord(hit_coord, from = gwas$build, to = qtl$build)
-    }else{
-        qtl_coord <- hit_coord
-    }
-    # record the position of the GWAS top SNP
-    hit_info$GWAS_chr <- splitCoords(qtl_coord)$chr
-    hit_info$GWAS_pos <- splitCoords(qtl_coord)$end
-
-
-    qtl_range <- joinCoords(qtl_coord, flank = 1e6)
-    
-    message("       * extracting QTLs")
-    q <- extractQTL(qtl, qtl_range)
-    print(names(q[[1]]))
-    if( is.null(q) ){ return(NULL) }
-    # for each gene extract top QTL SNP   
+# for each gene extract top QTL SNP   
+getQTLinfo <- function(q, hit_info){  
     if( all( c("beta", "varbeta") %in% names(q[[1]]) ) ){
         qtl_info <- q %>% 
         map_df( ~{ 
@@ -538,7 +533,7 @@ runCOLOC <- function(gwas, qtl, hit){
             }
         }) 
         qtl_info <- bind_cols(qtl_info, gwas_info)
-        print(head(qtl_info) )
+        message_verbose(head(qtl_info) )
     }else{
         qtl_info <- q %>% 
         map_df( ~{ 
@@ -548,14 +543,91 @@ runCOLOC <- function(gwas, qtl, hit){
         }) 
 
     }
-    # actually run COLOC
-    message("      * running COLOC")
+    return(qtl_info)
+}      
+
+
+# running COLOC between a GWAS locus and all eQTLs within 1MB either side
+# flank coordinates
+# extract GWAS nominal SNPs from region
+# find GWAS hit SNP for recording
+# extract QTLs within range - at set significance level
+# join together and run COLOC
+# output a list of objects:
+## COLOC results - this should include the GWAS locus, the gene of interest, the top QTL variant and the top QTL p-value
+## object - this should be a table combining the two input datasets, inner joined on "snp", to be used for plotting.
+## sig.level now filters out QTLs with P > sig.level
+## bare - if TRUE then only return COLOC summary, not full object (saves memory)
+runCOLOC <- function(gwas, qtl, qtl2 = NULL, hit, sig.level = NULL, target.file = NULL, bare = FALSE){
+    message_(paste0("Analysing locus: ", hit$locus ))
+    # hit is a dataframe containing "snp", "chr", "pos", "locus"
     
-    # run COLOC on each QTL gene 
+    hit_coord <- joinCoords(hit, flank = 0)
+    hit_range <- joinCoords(hit, flank = 1e6)
+ 
+    # extract GWAS and QTL summary for given coord
+    if( is.null(qtl2) ){
+        message_verbose("extracting GWAS")
+        g <- extractGWAS(gwas, hit_range)
+    }
+    # get hit info from GWAS
+    hit_snp <- hit$snp
+  
+    # problem when no p column in hit!
+    if(is.null(gwas) ){
+        hit_info <- hit %>%
+            select(locus) %>% mutate(GWAS_SNP = NA, GWAS_P = NA)
+        qtl_range <- hit_range
+    }else{
+        hit_info <- hit %>%
+            select(locus, GWAS_SNP = snp, GWAS_P = p)
+        # if GWAS is hg19 then lift over to hg38
+        if(gwas$build != qtl$build){
+            qtl_coord <- liftOverCoord(hit_coord, from = gwas$build, to = qtl$build)
+        }else{
+            qtl_coord <- hit_coord
+        }
+        # record the position of the GWAS top SNP
+        hit_info$GWAS_chr <- splitCoords(qtl_coord)$chr
+        hit_info$GWAS_pos <- splitCoords(qtl_coord)$end
+        qtl_range <- joinCoords(qtl_coord, flank = 1e6)
+    }
+    message_verbose("extracting QTLs")
+    q <- extractQTL(qtl, qtl_range, targets = target.file[1], sig_level = sig.level)
+    #print(names(q[[1]]))
+    if( is.null(q) ){ return(NULL) }
+    qtl_info <- getQTLinfo(q, hit_info)
+     # if sig filtering requested
+    if( !is.null(sig.level ) ){
+        qtl_info <- filter(qtl_info, QTL_P < sig.level)
+        q <- q[ qtl_info$gene ] 
+         message_verbose(paste0(length(q), " features in QTL"))
+    }
+
+    if( !is.null(qtl2) ){
+        # if second QTL dataset requested
+        q2 <- extractQTL(qtl2, qtl_range, targets = target.file[2])
+        if( is.null(q2) ){ return(NULL) }
+        # get QTL betas for lead SNP in QTL 1
+        hit_info_qtl1 <- data.frame(locus = hit_info$locus, GWAS_SNP = qtl_info$QTL_SNP, GWAS_P = qtl_info$QTL_P)
+        qtl2_info <- getQTLinfo(q2, hit_info_qtl1)
+        # get QTL betas for lead SNP in QTL 2
+        hit_info_qtl2 <- data.frame(locus = hit_info$locus, GWAS_SNP = qtl2_info$QTL_SNP, GWAS_P = qtl2_info$QTL_P)
+        qtl_info <- getQTLinfo(q, hit_info_qtl2)
+        if( !is.null(sig.level ) ){
+            qtl2_info <- filter(qtl2_info, QTL_P < sig.level)
+            q2 <- q2[ qtl2_info$gene ]
+        }
+        message_verbose( paste0(length(q2), " features in QTL 2"))
+    }
+    # actually run COLOC
+    message_("running COLOC")
     # return the results table and an object containing g and q
+    # GWAS-QTL COLOC
+    if( is.null(qtl2) ){
+    message_("GWAS-QTL COLOC")
     coloc_res <- 
         purrr::map( q, ~{
-            # if QTL has no overlapping SNPs with GWAS summary then return NULL
             if( length(intersect(g$snp, .x$snp) ) == 0 ){ 
                 message("Hold up! GWAS and QTL have no SNPs in common")
                 return(NULL) 
@@ -564,25 +636,58 @@ runCOLOC <- function(gwas, qtl, hit){
             # add in g and q to coloc object
             gq <- dplyr::inner_join(as.data.frame(g),as.data.frame(.x), by = "snp", suffix = c(".gwas", ".qtl") )
             coloc_object$results <- dplyr::inner_join(coloc_object$results, gq, by = "snp" )
-            # pull out coloc summary as data.frame
             coloc_df <- as.data.frame(t(coloc_object$summary), stringsAsFactors = FALSE)
             return( list(df = coloc_df, object = coloc_object) )
         })
-    if( is.null(coloc_res) ){ return(NULL) }
+    }else{
+    message_("QTL-QTL COLOC")
+    # remove features with lead SNPs > 1e-5
+    # test all pairs of QTL features in the locus
+    qtl_combos <- expand.grid(1:length(q), 1:length(q2) )
+    message_(paste0("testing ", nrow(qtl_combos), " pairs of QTLs"))
+    # qtl_info is matched on to COLOC results using gene names - but for QTL-QTL there are two genes, so instead use the index of the pair
+    qtl_info <- 
+        full_join( 
+            qtl_info[qtl_combos$Var1,] %>% mutate(index = 1:nrow(qtl_combos) ), 
+            qtl2_info[qtl_combos$Var2,] %>% mutate(index = 1:nrow(qtl_combos) ), 
+            by = "index", 
+            suffix = c(".qtl1", ".qtl2") ) %>%
+            dplyr::mutate(gene = as.character(index)) %>%
+            select(-index)
 
-    message("       * COLOC finished")
+    coloc_res <- 
+        purrr::map( 1:nrow(qtl_combos), ~{
+            q_1 = q[[ qtl_combos[.x, "Var1"] ]]
+            q_2 = q2[[ qtl_combos[.x, "Var2"] ]]
+            message_( paste0( .x, ": ", unique(q_1$gene), " vs ",  unique(q_2$gene) ) )
+            if( length(intersect(q_1$snp, q_2$snp) ) == 0 ){
+                message_("Hold up! GWAS and QTL have no SNPs in common")
+                return(NULL)
+            }
+            coloc_object <- coloc.abf(dataset1 = q_1, dataset2 = q_2)
+            qq <- dplyr::inner_join(as.data.frame(q_1),as.data.frame(q_2), by = "snp", suffix = c(".qtl1", ".qtl2") )
+            coloc_object$results <- dplyr::inner_join(coloc_object$results, qq, by = "snp" )
+            coloc_df <- as.data.frame(t(coloc_object$summary), stringsAsFactors = FALSE)
+            return( list(df = coloc_df, object = coloc_object) )
+        })
+    }
+    if( is.null(unlist(coloc_res)) ){ return(NULL) }
+
+    message_("COLOC finished")
     coloc_df <- map_df(coloc_res, "df", .id = "gene") %>% 
         dplyr::mutate(locus = hit$locus ) %>%
         dplyr::select(locus, gene, everything() )
-    message("       * COLOC res created")
+    message_("COLOC res created")
     # bind coloc_res to other tables
     full_res <- full_join(qtl_info, coloc_df, by = "gene")
     full_res <- full_join(hit_info, full_res, by = "locus")
-
-    # add pairwise LD between GWAS and QTL SNP
-    #full_res <- calc_LD(full_res)
-
-    return(list(full_res = full_res, coloc_object = coloc_res) ) 
+    # remove rows with no coloc run
+    full_res <- filter(full_res, !is.na(PP.H4.abf) )
+    if(bare == TRUE){
+        return(list(full_res = full_res) )
+    }else{
+        return(list(full_res = full_res, coloc_object = coloc_res) ) 
+    }
 }
 
 # for each COLOC result per locus
@@ -592,7 +697,7 @@ calc_LD <- function( coloc_res ){
     # get LDlink token from .Renviron (only Jack has this, for access go to https://ldlink.nci.nih.gov/?tab=apiaccess)
     token <- Sys.getenv("LDLINK_TOKEN")
     if( token == ""){ 
-        warning(" * LDlink token not found!" )
+        warning(Sys.time(), " * LDlink token not found!" )
         return(NA)
     }
     snps <- unique( c( unique(coloc_res$GWAS_SNP), unique( coloc_res$QTL_SNP) ) )
@@ -615,80 +720,143 @@ calc_LD <- function( coloc_res ){
 
 
 option_list <- list(
-        make_option(c('-o', '--outFolder'), help='the path to the output file', default = ""),
-        make_option(c('--gwas', '-g'), help= "the dataset ID for a GWAS in the GWAS/QTL database" ),
+        make_option(c('-o', '--outFolder'), help='the path to the output file', default = "."),
+        make_option(c('--gwas', '-g'), help= "the dataset ID for a GWAS in the GWAS/QTL database", default = NULL ),
         make_option(c('--qtl', '-q'), help = "the dataset ID for a QTL dataset in the GWAS/QTL database"),
-        make_option(c('--debug'), help = "load all files and then save RData without running COLOC", action = "store_true", default = FALSE)
+        make_option(c('--qtl2', '-r'), help = "a second QTL dataset - using this will trigger QTL-QTL COLOC", default = NULL),
+        make_option(c('--sig', '-s'), help = "the minimum p-value a QTL should have for testing", default = NULL),
+        make_option(c('--loci', '-l'), help = "text file of loci names", default = NULL),
+        make_option(c('--targets', '-t'), help = "TSV file of features, loci and GWAS for targeted analysis", default = NULL),
+        make_option(c('--debug'), help = "load all files and then save RData without running COLOC", action = "store_true", default = FALSE),
+        make_option(c('--lowmem', '-b'), help = "do not save COLOC objects, just the summaries", action = "store_true", default = FALSE),
+        make_option(c('--threads', '-c'), help = "how many threads to use", default = 1),
+        make_option(c('--MAF', '-m'), help = "path to custom MAF file", default = NULL),
+        make_option(c('--verbose', '-v'), help = "if selected then output more information", action = "store_true", default = FALSE)
 )
 
 option.parser <- OptionParser(option_list=option_list)
 opt <- parse_args(option.parser)
 
-
 outFolder <- opt$outFolder
 gwas_dataset <- opt$gwas
 qtl_dataset <- opt$qtl
+qtl_dataset_2 <- opt$qtl2
 debug <- opt$debug
-#gwas_prefix <- "/sc/arion/projects/als-omics/ALS_GWAS/Nicolas_2018/processed/Nicolas_2018_processed_"
-#qtl_prefix <- "/sc/arion/projects/als-omics/QTL/NYGC_Freeze02_European_Feb2020/QTL-mapping-pipeline/results/LumbarSpinalCord_expression/peer30/LumbarSpinalCord_expression_peer30"
-#qtl_prefix <- "/sc/arion/projects/als-omics/QTL/NYGC_Freeze02_European_Feb2020/QTL-mapping-pipeline/results/LumbarSpinalCord_splicing/peer20/LumbarSpinalCord_splicing_peer20"
-
-#hits_file <- "/sc/arion/projects/als-omics/QTL/NYGC_Freeze02_European_Feb2020/downstream-QTL/COLOC/Nicolas_2018/Nicolas_2018_hits_1e-7.tsv"
-#outFile <- "test_coloc_results.tsv"
-
-#options(echo = TRUE)
+sig <- opt$sig
+loci <- opt$loci
+target_file <- opt$targets
+low_mem <- opt$lowmem
+threads <- opt$threads
+verbose <- opt$verbose
+MAF <- opt$MAF
+
+# default MAF is 1000g phase 3v5
+# but users can provide their own
+# must have columns chr, pos, snp, MAF
+if( is.null(MAF) ){
+    #maf_1000gp1 <- "/sc/arion/projects/ad-omics/data/references/1KGP1/1000G_EUR_MAF.bed.gz"
+    MAF <- "/sc/arion/projects/ad-omics/data/references/1KGPp3v5/EUR_MAF/EUR.all.phase3_MAF.bed.gz"
+}
 
-# load in data
-# extract top GWAS loci
-# for each locus run COLOC
-maf_1000gp1 <- "/sc/arion/projects/ad-omics/data/references/1KGP1/1000G_EUR_MAF.bed.gz"
-maf_1000gp3 <- "/sc/arion/projects/ad-omics/data/references/1KGPp3v5/EUR_MAF/EUR.all.phase3_MAF.bed.gz"
+if(verbose){print(opt)}
 
 # load in MAF table
-if( !exists("maf_1000g")){
-
-        maf_1000g <- loadMAF(maf_1000gp3)
-        # load in liftover chain
-        chain_hg19_hg38 <- import.chain("/sc/arion/projects/ad-omics/data/references/liftOver/hg19ToHg38.over.chain")    
-    }
+if( !exists("maf_1000g") ){
+  message(paste0("loading MAF from ", MAF ) )
+  maf_1000g <- loadMAF(MAF)
+  # load in liftover chain
+  chain_hg19_hg38 <- import.chain("/sc/arion/projects/ad-omics/data/references/liftOver/hg19ToHg38.over.chain")    
+}
 
 main <- function(){
-    
-        #gwas_dataset <- "Kunkle_2019"
-    #gwas_dataset <- "Nalls23andMe_2019"
-    #gwas_dataset <- "Marioni_2018"
-    #qtl_dataset <- "Microglia_THA"
-    
-    gwas <- pullData(gwas_dataset, "GWAS")
-    qtl <- pullData(qtl_dataset, "QTL")
-    
-    top_loci <- extractLoci(gwas)
-    
-    # for testing
-    #top_loci <- top_loci[2,]
+    message_("starting COLOC pipeline")
     if( debug == TRUE){ save.image("debug.RData") }
- 
-     
-    all_coloc <- purrr::map(
+    # regular GWAS-QTL COLOC
+    if( is.null(target_file) ){ 
+        message_("GWAS - QTL COLOC")
+        gwas <- pullData(gwas_dataset, "GWAS")
+        qtl <- pullData(qtl_dataset, "QTL")
+    
+        if( !is.null(qtl_dataset_2) ){
+            qtl2 <- pullData(qtl_dataset_2, "QTL")
+        }else{ qtl2 <- NULL}
+        top_loci <- extractLoci(gwas)
+        message_(paste0(" testing ", nrow(top_loci), " loci ") )
+
+        all_coloc <- purrr::map(
         1:nrow(top_loci), ~{
-            res <- runCOLOC(gwas, qtl, hit = top_loci[.x,])
+            res <- runCOLOC(gwas, qtl, qtl2, hit = top_loci[.x,], sig.level = sig)
             if(is.null(res) ){return(NULL) }
             return(res)
+        })
+    }
+    # QTL-QTL COLOC with pairwise targets
+    if(!is.null(target_file) ){
+        message_("QTL-QTL COLOC")
+        targets <- read_tsv(target_file)
+        # for TESTING
+        #targets <- head(targets, 3)
+        message_( paste0("testing ", nrow(targets), " pairs of QTLs" ) )
+        # doesn't work properly - furrr loads the whole MAF object into memory for each parallel run, 100MB per run which is crazy.
+        # for parallel to work I would have to transition to querying the MAF database each time like we do with the sumstats. 
+        if(threads > 1){
+            suppressPackageStartupMessages(library(furrr))
+            plan(multisession, workers = threads)
+            map <- future_map
         }
-    )
-    all_res <- map_df(all_coloc, "full_res")
-    all_obj <- map(all_coloc, "coloc_object")    
-    names(all_obj) <- top_loci$locus
-    
-    
-    # arrange by H4
-    all_res <- arrange(all_res, desc(PP.H4.abf)  )
+        all_coloc <- map(
+        1:nrow(targets), ~{
+            message_verbose( paste0("Pair ", .x , " of ", nrow(targets) ) )
+            pair_df <- targets[.x,]
+            if( "GWAS" %in% names(pair_df) ){
+                gwas <- pullData(pair_df$GWAS, "GWAS") 
+                locus <- extractLoci(gwas) %>% filter(locus == pair_df$locus)
+            }else{
+                message_verbose("no GWAS specified - using chr and pos from targets file")
+                locus <- pair_df
+                gwas <- NULL
+            }
+            qtl1 <- pullData(pair_df$qtl_1, "QTL")
+            qtl2 <- pullData(pair_df$qtl_2, "QTL")
+            features <- c(pair_df$feature_1, pair_df$feature_2)
+            res <- runCOLOC(gwas, qtl1, qtl2, hit = locus, sig.level = sig, target.file = features, bare = low_mem)
+            gc()
+            if(is.null(res) ){return(NULL) }
+            return(res)
+        }
+        )
+    }
     
-    outFile <- paste0(outFolder, qtl_dataset, "_", gwas_dataset, "_COLOC.tsv")
-    readr::write_tsv(all_res, path = outFile)
+    all_res <- purrr::map_df(all_coloc, "full_res")
+    if(!is.null(target_file) ){
+        outFile <- file.path(outFolder, gsub(".tsv", "_COLOC.tsv", basename(target_file) ) )
+        # here if any of the COLOCs did not proceed then the target df will be larger than the all_res results        
+
+        #all_res <- dplyr::bind_cols(targets, all_res %>% select(-locus) )
+    }else{
+        outFile <- paste0(outFolder,"/", qtl_dataset, "_", gwas_dataset, "_COLOC.tsv")
+        names(all_obj) <- top_loci$locus
+    }
+    dir.create(outFolder,  showWarnings = FALSE)
+    # in case all COLOCs are null for some reason
+    if(nrow(all_res) > 0){
+        all_res <- dplyr::arrange(all_res, desc(PP.H4.abf)  )
+    }
+    message("finished COLOC pipeline")
+    message_verbose(paste0("writing output to ", outFile))
+    readr::write_tsv(all_res, file = outFile)
     # save COLOC objects for plotting
-    save(all_obj, file = gsub("tsv", "RData", outFile))
+    if( !low_mem ){
+        all_obj <- purrr::map(all_coloc, "coloc_object")
+        if( is.null(targets) ){
+            names(all_obj) <- top_loci$locus
+        }
+        outData <- gsub("tsv.gz", "RData", outFile)
+        outData <- gsub("tsv", "RData", outFile)
+        save(all_obj, file = outData)
+    }
 }
 
 main()
 
+
diff --git a/GWAS/processGWAS.R b/GWAS/processGWAS.R
index 12a1835..97fc82a 100644
--- a/GWAS/processGWAS.R
+++ b/GWAS/processGWAS.R
@@ -81,7 +81,7 @@ get_standard_error <- function(OR, P){
 # write out sorted file to reference folder
 # tabix index the file
 # Ripke has loci with chr23 for X - remove
-sortGWAS <- function(gwas, out_folder = "./"){
+sortGWAS <- function(gwas, out_folder = ""){
     dataset <- gwas$dataset
     
     col_chr <- gwas$full_chrom
@@ -127,7 +127,7 @@ sortGWAS <- function(gwas, out_folder = "./"){
     # remove duplicate rows
     gwas_sorted <- dplyr::distinct(gwas_sorted)
     
-    out_path <- paste0(out_folder, dataset, ".processed.tsv")
+    out_path <- file.path(out_folder, paste0(dataset, ".processed.tsv") )
     message(Sys.time()," * writing sorted GWAS file to ", out_path)
     readr::write_tsv(gwas_sorted, out_path)
     stopifnot(file.exists(out_path) )
diff --git a/qvalue_sharing/Snakefile b/qvalue_sharing/Snakefile
index f6c9c85..4b086d5 100644
--- a/qvalue_sharing/Snakefile
+++ b/qvalue_sharing/Snakefile
@@ -18,7 +18,7 @@ rule run_qvalue:
     output:
         outFolder + "{source}:{target}:{threshold}.qvalue.tsv"
     shell:
-        "ml R/3.6.0;"
+        "ml R;"
         "Rscript {params.script} -s {params.source} -t {params.target} -q {threshold} -o {outFolder}"
 
 rule merge_qvalue:
@@ -29,5 +29,5 @@ rule merge_qvalue:
     output:
         outFolder + "all_qvalue_merged." + threshold + ".tsv"
     shell:
-        "ml R/3.6.0;"
+        "ml R;"
         "Rscript {params.script} -o {outFolder} -q {threshold}"
diff --git a/qvalue_sharing/run_qvalue.R b/qvalue_sharing/run_qvalue.R
index 50a372b..38c6511 100644
--- a/qvalue_sharing/run_qvalue.R
+++ b/qvalue_sharing/run_qvalue.R
@@ -11,10 +11,10 @@ pullData <- function(dataset, type = "GWAS"){
     message(Sys.time()," * reading GWAS database from ", db_path)
     stopifnot( file.exists(db_path) )
     if( type == "GWAS"){
-        n_sheet <- 3
+        n_sheet <- 2
     }
     if( type == "QTL"){
-        n_sheet <- 2
+        n_sheet <- 3
     }
     gwas_db <- suppressMessages(readxl::read_excel(db_path, sheet = n_sheet,na= c("", "-","NA")))
     
@@ -78,7 +78,7 @@ extractTargetQTL <- function(qtl, chr, source_qtls){
     stopifnot( file.exists(qtl$full_path) )
      
     # read in QTL
-    cmd <- paste0("ml bcftools; tabix ", qtl$full_path, " ", chr)
+    cmd <- paste0("ml tabix; tabix ", qtl$full_path, " ", chr)
     message( " * ", cmd) 
     result <- data.table::fread( cmd = cmd, nThread = 8 )
     if( nrow(result) == 0){