-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
executable file
·89 lines (81 loc) · 3.91 KB
/
script.py
File metadata and controls
executable file
·89 lines (81 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
import os.path
from subprocess import call
import subprocess
import csv
import shutil
#Get reference sequences
if not os.path.isfile("/home/ec2-user/ref/hs37d5.fa.gz"):
command = "aws s3 cp s3://takomaticsdata/reference_genomes/hg19/ /home/ec2-user/ref --recursive --exclude \"*\" --include \"hs37d5*\""
print(command)
call(command, shell = True)
command = "docker pull wengkhong/speedseq"
call(command, shell = True)
command = "docker pull wengkhong/vcflib"
call(command, shell = True)
call("sudo pip install boto3", shell = True)
import boto3
#Get sample list
call("aws s3 cp s3://takomaticsdata/SarcomaPanel/Sarc_Samples.txt . ", shell = True)
call("aws s3 cp s3://takomaticsdata/Sarcoma.bed . ", shell = True)
with open('Sarc_Samples.txt','r') as tsv:
sample_list = [line.strip().split('\t') for line in tsv]
print("Length", len(sample_list))
def checkFileInS3(command):
proc = subprocess.Popen(command, stdout = subprocess.PIPE, shell = True)
(out,err) = proc.communicate()
#print out
if out:
return True;
if not out:
return False;
for line in sample_list:
file1 = str(line[0])
file2 = str(line[1])
sample_name = str(line[2])
if file1 == "File1":
continue
#Check if output for this sample already exists. Skip if so
command = "aws s3 ls s3://takomaticsdata/SarcomaPanel/" + sample_name + "/" + sample_name + ".vcf"
if(checkFileInS3(command)):
print "Sample " + sample_name + " already done. Skipping"
continue
############################################################
print "Currently processing " + sample_name
#Create folder
if not os.path.isdir(sample_name):
os.mkdir(sample_name)
#Goto folder
os.chdir(sample_name)
#Get fastq files
print "Getting fastq files"
command = "aws s3 cp s3://takomaticsdata/SarcomaPanel/" + file1 + " ."
call(command, shell = True)
command = "aws s3 cp s3://takomaticsdata/SarcomaPanel/" + file2 + " ."
call(command, shell = True)
#Align and call variants
print "Aligning for " + sample_name
command = "docker run --rm=true -v /home/ec2-user:/home wengkhong/speedseq code/speedseq/bin/speedseq align -o /home/" + sample_name + "/" + sample_name + " -R \"@RG\\tID:" + sample_name + "\\tSM:" + sample_name + "\\tLB:lib1\" -t16 -T /home/" + sample_name + "/" + sample_name + "_temp -M 10 /home/ref/hs37d5.fa /home/" + sample_name + "/" + file1 + " /home/" + sample_name + "/" + file2
print command
call(command, shell = True)
print "Calling variants for " + sample_name
command = "docker run --rm=true -v /home/ec2-user:/home wengkhong/speedseq /code/speedseq/bin/freebayes -f /home/ref/hs37d5.fa -b /home/" + sample_name + "/" + sample_name + ".bam -v /home/" + sample_name + "/" + sample_name + ".vcf"
print command
call(command, shell = True)
print "Filtering variants for " + sample_name
command = "docker run -it --rm=true -v /home/ec2-user/:/home wengkhong/vcflib vcflib/bin/vcffilter -f 'DP > 100 & QUAL > 30' /home/" + sample_name + "/" + sample_name + ".vcf > " + sample_name + ".filtered.vcf"
print command
call(command, shell = True)
print "Getting variants in target region"
command = "docker run --rm=true -v /home/ec2-user:/home wengkhong/vcflib bedtools intersect -a /home/" + sample_name + "/" + sample_name + ".filtered.vcf -b /home/Sarcoma.bed > " + sample_name + ".filtered.target.vcf"
print command
call(command, shell = True)
#Upload results
print "Uploading results for " + sample_name
command = " aws s3 cp . s3://takomaticsdata/SarcomaPanel/" + sample_name + " --recursive --exclude \"*discordants*\" --exclude \"*splitters*\" --exclude \"*fq.gz\""
call(command, shell = True)
#Delete folder
os.chdir('..')
print "Cleaning up folder for " + sample_name
shutil.rmtree(sample_name)
#break