demitri · sabrsyed · Oct 30, 2017 · Oct 30, 2017
diff --git a/motif_search.py b/motif_search.py
@@ -4,36 +4,59 @@
 import subprocess
 import sys
 import Bio.Seq
+import argparse
 
 from Bio import motifs
 from Bio import SeqIO
 from Bio.Seq import Seq
 
+parser = argparse.ArgumentParser(description="FastafromBed & Motif search")
+
+parser.add_argument("-f", "--fastagenome",
+					help="fasta genome", required=True)
+parser.add_argument("-b", "--bed",
+					help="bedfile", required=True)
+parser.add_argument("-o", "--fastaoutput",
+					help="output from fastaFromBed in FASTA format", required=True)
+parser.add_argument("-m", "--motifoutput",
+					help="output from motif search", required=True)	
+parser.add_argument("-s", "--motifsequence",
+					help="motif sequence", required=True)	
+
+
+
+args=parser.parse_args()
+
 file1 = sys.argv[1] #fasta file
 file2 = sys.argv[2] #bed file
 
-output = subprocess.run(['fastaFromBed', '-fi', file1, '-bed', file2], stdout=subprocess.PIPE )
+fasta_write = open(args.fastaoutput, "w")
+
+output = subprocess.run(['fastaFromBed', '-fi', args.fastagenome, '-bed', args.bed], stdout=subprocess.PIPE )
 bytes = output.stdout
 stdout=bytes.decode('utf-8')
 #print(stdout)
 
-fasta_write = open ("TAIR_fasta_output", "w")
 fasta_write.write(stdout)
 fasta_write.close()
 
-#fasta_frombed = open ("TAIR_fasta_output", "r")
-
-instances = [Seq("TACAA")]
+outputfile = open(args.motifoutput, "w")
+ 
+instances = [Seq(args.motifsequence)]	
 m=motifs.create(instances)
-
-myfile= "TAIR_fasta_output"
+myfile= args.fastaoutput
 
 for line in SeqIO.parse(myfile, "fasta"):
-	#print(str(line.seq))
-	motif=Seq(str(line.seq))
-	for pos in m.instances.search(motif):
-		print(line.id, pos[0])
-
+ 	#print(str(line.seq))
+ 	motif=Seq(str(line.seq))
+ 	for pos in m.instances.search(motif):
+ 		print(line.id, pos[0])
+ 		outputfile.write("{0}\t{1}\n".format(line.id, pos[0]))
+
+outputfile.close()
+
+
+
 # 	test_seq=Seq(line, m.alphabet)
 # 	# 
 # instances = [Seq("TACAA"), Seq("AATGC")]

diff --git a/peak_parser.py b/peak_parser.py
@@ -1,7 +1,9 @@
 #! /usr/bin/env python3
 
+import re
+
 #object will be path defined by user when this method is called:
-#path = sys.argv[1]
+#path = sys.argv[1] or argparse
 #newfile = Bedfile(path=path)
 class Bedfile(object):
 	'''
@@ -23,14 +25,14 @@ def __init__(self, path=None): #standard idiom when defining classes
 				if chr_name in self.chromosomes:
 					chromosome =  self.chromosomes[chr_name] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
 				else:
-					chromosome = Chromosome(name=chr_name)
-					self.chromosomes[chr_name] = chromosome
+					chromosome = Chromosome(name=chr_name) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
+					self.chromosomes[chr_name.lower] = chromosome
 
 				# create new peak from row
 				new_peak = Peak() #create new object with Peak class method
 				new_peak.start = int(fields[1]) #convert the string to an int
 				new_peak.end = int(fields[2])
-				new_peak.signal = float(fields[6])
+				new_peak.signal = float(fields[6]) #signal = peak height
 				peak_average = str(fields[3]).split(":")
 				new_peak.mean = peak_average[1]
 				#append the list of gene properties to the chromosome dictionary
@@ -64,6 +66,9 @@ def width(self):
 
 	def intersects_with(self, other_peak):
 		pass # return True or False
+
+	def __repr__(self): #Default override. unix returns the print function from this. If we define it here, then it won't return the default object location
+		return "<Peak {0}-{1}>".format(self.start, self.end)
 
 
 class Gff_file(object):
@@ -97,11 +102,63 @@ def __init__(self, path=None):
 					gene.start = int(fields[3]) #start is tss for positive strand -1000 for peak analysis
 					gene.stop = int(fields[4]) #stop is tss for negative strange +1000 for peak analysis
 					gene.strand = fields[6]
-					geneid = fields[8].split(";")
-					gene.ID = geneid[0]
+					gene_id = re.split(";|=", fields[8])
+					gene.ID = gene_id[1]
 					#append the list of gene properties to the chromosome dictionary
-					chromosome.genes.append(gene)
+					chromosome.genes.append(gene) #this populates the above chromosome dictionary.
+
+class IntersectBed(object):
+	'''
+	Class to read a bedfile type file.
+	'''
+	def __init__(self, path=None): #standard idiom when defining classes
+
+		self.chromosomesA = dict() # key=chromosome name, value=Chromosome object
+		self.chromosomesB = dict()
+		self.filepath = path
 
+		with open(path) as intersectbed:
+			for line in intersectbed:
+				line = line.rstrip()
+				fields = line.split("\t")
+				chr_nameA = fields[0]
+				chr_nameB = fields[10]
+
+				# get existing or create new chromosome				
+				if chr_nameA in self.chromosomesA:
+					chromosomeA =  self.chromosomesA[chr_nameA] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
+				else:
+					chromosomeA = Chromosome(name=chr_nameA) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
+					self.chromosomesA[chr_nameA] = chromosomeA
+
+				if chr_nameB in self.chromosomesB:
+					chromosomeB =  self.chromosomesB[chr_nameB] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
+				else:
+					chromosomeB = Chromosome(name=chr_nameB) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
+					self.chromosomesB[chr_nameB] = chromosomeB
+
+				# create new peak from row
+				new_peakB = Peak() #create new object with Peak class method
+				new_peakB.start = int(fields[11]) #convert the string to an int
+				new_peakB.end = int(fields[12])
+				new_peakB.signal = float(fields[16]) #signal = peak height
+				peak_averageB = str(fields[13]).split(":")
+				new_peakB.mean = peak_averageB[1]
+				#append the list of gene properties to the chromosome dictionary
+				chromosomeB.peaks.append(new_peakB) #chromosome is an object containing a list of objects
+
+				# create new peak from row
+				new_peakA = Peak() #create new object with Peak class method
+				new_peakA.start = int(fields[1]) #convert the string to an int
+				new_peakA.end = int(fields[2])
+				new_peakA.signal = float(fields[6]) #signal = peak height
+				peak_averageA = str(fields[3]).split(":")
+				new_peakA.mean = peak_averageA[1]
+				#append the list of gene properties to the chromosome dictionary
+				chromosomeA.peaks.append(new_peakA) #chromosome is an object containing a list of objects
+			#print(chromosome.peaks)
+
+
 class Gene(object):
 	def __init__(self):
 
@@ -115,26 +172,30 @@ def __init__(self):
 class DE_genes(object):
 	def __init__(self, path=None):
 		self.expression_values_dict = dict()
+		self.filepath=path
 
 		with open(path) as defile:
 			for line in defile:
 				line = line.rstrip()
 				fields = line.split('\t')
-				gene_name = fields[2]
+				gene_n = re.split("_",fields[2])
+				gene_name = gene_n[-1]
+
+				if "gene" in self.expression_values_dict: #giving all the values the same key "gene" so it is easier to iterate over in execution script
+					chromosome = self.expression_values_dict["gene"]
+				else:
+					chromosome = Chromosome(name="gene") #calling Chromosome class here to populate list of objects later					
+					self.expression_values_dict["gene"] = chromosome
 
-				if gene_name not in self.expression_values_dict:
-					gene_value_list_object = Chromosome(name = gene_name) #calling Chromosome class here to populate list of objects later					
-					self.expression_values_dict[gene_name] = gene_value_list_object
-					gene = DEexpression()
-					gene.cluster = fields[0]
-					gene.ID = fields[2]
-					gene.foldchange = fields[3:len(fields)-1]
-					gene.annotation = fields[-1]
-
-				gene_value_list_object.degenes.append(gene)
-
+				gene = DEexpression()
+				gene.cluster = fields[0]
+				gene.ID = gene_name
+				gene.foldchange = fields[3:len(fields)-1]
+				gene.annotation = fields[-1]
+
+				chromosome.degenes.append(gene)
+
 
-
 class DEexpression(object):
 	def __init__(self):
 

diff --git a/presentation/wombats_3_Ying_ying.pptx b/presentation/wombats_3_Ying_ying.pptx