Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions motif_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,59 @@
import subprocess
import sys
import Bio.Seq
import argparse

from Bio import motifs
from Bio import SeqIO
from Bio.Seq import Seq

parser = argparse.ArgumentParser(description="FastafromBed & Motif search")

parser.add_argument("-f", "--fastagenome",
help="fasta genome", required=True)
parser.add_argument("-b", "--bed",
help="bedfile", required=True)
parser.add_argument("-o", "--fastaoutput",
help="output from fastaFromBed in FASTA format", required=True)
parser.add_argument("-m", "--motifoutput",
help="output from motif search", required=True)
parser.add_argument("-s", "--motifsequence",
help="motif sequence", required=True)



args=parser.parse_args()

file1 = sys.argv[1] #fasta file
file2 = sys.argv[2] #bed file

output = subprocess.run(['fastaFromBed', '-fi', file1, '-bed', file2], stdout=subprocess.PIPE )
fasta_write = open(args.fastaoutput, "w")

output = subprocess.run(['fastaFromBed', '-fi', args.fastagenome, '-bed', args.bed], stdout=subprocess.PIPE )
bytes = output.stdout
stdout=bytes.decode('utf-8')
#print(stdout)

fasta_write = open ("TAIR_fasta_output", "w")
fasta_write.write(stdout)
fasta_write.close()

#fasta_frombed = open ("TAIR_fasta_output", "r")

instances = [Seq("TACAA")]
outputfile = open(args.motifoutput, "w")
instances = [Seq(args.motifsequence)]
m=motifs.create(instances)

myfile= "TAIR_fasta_output"
myfile= args.fastaoutput

for line in SeqIO.parse(myfile, "fasta"):
#print(str(line.seq))
motif=Seq(str(line.seq))
for pos in m.instances.search(motif):
print(line.id, pos[0])

#print(str(line.seq))
motif=Seq(str(line.seq))
for pos in m.instances.search(motif):
print(line.id, pos[0])
outputfile.write("{0}\t{1}\n".format(line.id, pos[0]))

outputfile.close()



# test_seq=Seq(line, m.alphabet)
# #
# instances = [Seq("TACAA"), Seq("AATGC")]
Expand Down
101 changes: 81 additions & 20 deletions peak_parser.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#! /usr/bin/env python3

import re

#object will be path defined by user when this method is called:
#path = sys.argv[1]
#path = sys.argv[1] or argparse
#newfile = Bedfile(path=path)
class Bedfile(object):
'''
Expand All @@ -23,14 +25,14 @@ def __init__(self, path=None): #standard idiom when defining classes
if chr_name in self.chromosomes:
chromosome = self.chromosomes[chr_name] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
else:
chromosome = Chromosome(name=chr_name)
self.chromosomes[chr_name] = chromosome
chromosome = Chromosome(name=chr_name) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
self.chromosomes[chr_name.lower] = chromosome

# create new peak from row
new_peak = Peak() #create new object with Peak class method
new_peak.start = int(fields[1]) #convert the string to an int
new_peak.end = int(fields[2])
new_peak.signal = float(fields[6])
new_peak.signal = float(fields[6]) #signal = peak height
peak_average = str(fields[3]).split(":")
new_peak.mean = peak_average[1]
#append the list of gene properties to the chromosome dictionary
Expand Down Expand Up @@ -64,6 +66,9 @@ def width(self):

def intersects_with(self, other_peak):
pass # return True or False

def __repr__(self): #Default override. unix returns the print function from this. If we define it here, then it won't return the default object location
return "<Peak {0}-{1}>".format(self.start, self.end)


class Gff_file(object):
Expand Down Expand Up @@ -97,11 +102,63 @@ def __init__(self, path=None):
gene.start = int(fields[3]) #start is tss for positive strand -1000 for peak analysis
gene.stop = int(fields[4]) #stop is tss for negative strange +1000 for peak analysis
gene.strand = fields[6]
geneid = fields[8].split(";")
gene.ID = geneid[0]
gene_id = re.split(";|=", fields[8])
gene.ID = gene_id[1]
#append the list of gene properties to the chromosome dictionary
chromosome.genes.append(gene)
chromosome.genes.append(gene) #this populates the above chromosome dictionary.

class IntersectBed(object):
'''
Class to read a bedfile type file.
'''
def __init__(self, path=None): #standard idiom when defining classes

self.chromosomesA = dict() # key=chromosome name, value=Chromosome object
self.chromosomesB = dict()
self.filepath = path

with open(path) as intersectbed:
for line in intersectbed:
line = line.rstrip()
fields = line.split("\t")
chr_nameA = fields[0]
chr_nameB = fields[10]

# get existing or create new chromosome
if chr_nameA in self.chromosomesA:
chromosomeA = self.chromosomesA[chr_nameA] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
else:
chromosomeA = Chromosome(name=chr_nameA) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
self.chromosomesA[chr_nameA] = chromosomeA

if chr_nameB in self.chromosomesB:
chromosomeB = self.chromosomesB[chr_nameB] #chromosome_name is associated with the chromosome value, which in the else statement below we turn into a Chromosome object list
else:
chromosomeB = Chromosome(name=chr_nameB) # we creating a new object in our dictionary, chromosome, and that object is of the Class Chromosome and its name is chr_name
self.chromosomesB[chr_nameB] = chromosomeB

# create new peak from row
new_peakB = Peak() #create new object with Peak class method
new_peakB.start = int(fields[11]) #convert the string to an int
new_peakB.end = int(fields[12])
new_peakB.signal = float(fields[16]) #signal = peak height
peak_averageB = str(fields[13]).split(":")
new_peakB.mean = peak_averageB[1]
#append the list of gene properties to the chromosome dictionary
chromosomeB.peaks.append(new_peakB) #chromosome is an object containing a list of objects

# create new peak from row
new_peakA = Peak() #create new object with Peak class method
new_peakA.start = int(fields[1]) #convert the string to an int
new_peakA.end = int(fields[2])
new_peakA.signal = float(fields[6]) #signal = peak height
peak_averageA = str(fields[3]).split(":")
new_peakA.mean = peak_averageA[1]
#append the list of gene properties to the chromosome dictionary
chromosomeA.peaks.append(new_peakA) #chromosome is an object containing a list of objects
#print(chromosome.peaks)


class Gene(object):
def __init__(self):

Expand All @@ -115,26 +172,30 @@ def __init__(self):
class DE_genes(object):
def __init__(self, path=None):
self.expression_values_dict = dict()
self.filepath=path

with open(path) as defile:
for line in defile:
line = line.rstrip()
fields = line.split('\t')
gene_name = fields[2]
gene_n = re.split("_",fields[2])
gene_name = gene_n[-1]

if "gene" in self.expression_values_dict: #giving all the values the same key "gene" so it is easier to iterate over in execution script
chromosome = self.expression_values_dict["gene"]
else:
chromosome = Chromosome(name="gene") #calling Chromosome class here to populate list of objects later
self.expression_values_dict["gene"] = chromosome

if gene_name not in self.expression_values_dict:
gene_value_list_object = Chromosome(name = gene_name) #calling Chromosome class here to populate list of objects later
self.expression_values_dict[gene_name] = gene_value_list_object
gene = DEexpression()
gene.cluster = fields[0]
gene.ID = fields[2]
gene.foldchange = fields[3:len(fields)-1]
gene.annotation = fields[-1]

gene_value_list_object.degenes.append(gene)

gene = DEexpression()
gene.cluster = fields[0]
gene.ID = gene_name
gene.foldchange = fields[3:len(fields)-1]
gene.annotation = fields[-1]

chromosome.degenes.append(gene)



class DEexpression(object):
def __init__(self):

Expand Down
Binary file added presentation/wombats_3_Ying_ying.pptx
Binary file not shown.