-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGFF_line.py
More file actions
executable file
·35 lines (30 loc) · 1.63 KB
/
GFF_line.py
File metadata and controls
executable file
·35 lines (30 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys, re
from collections import OrderedDict
############################################################################################################################################
class GFF_line:
"""This class basically parses a GFF line and allows you to interact with different components that I have deemed interesting
Most components are simple strings or intgers.
The attributes field which is a ;-separated list is returned as a dictionary """
def __init__(self, l, info_delimiter=";", info_field_delimiter = '='):
self.seqid, self.source, self.type, self.start, self.end, self.score, self.strand, self.phase, self.attribs = l.split('\t')
self.attributes = self.attribute_dict(self.attribs, info_delimiter, info_field_delimiter)
self.start = int(self.start)
self.end = int(self.end)
self.line = l
def attribute_dict(self, attributes, info_delimiter=";", info_field_delimiter = '='): ###this is fragile
d = OrderedDict()
attributes = attributes.strip(info_delimiter)
for i in [x.strip() for x in re.split(info_delimiter, attributes)]:
if len(i)>0:
field = re.split(info_field_delimiter, i)[0]
d[field] = re.split(info_field_delimiter, i)[1].strip('"')
return d
def retrieve_sequence(self, ref_dict, reverse_complement=False):
seq = str(ref_dict[self.seqid].seq[self.start-1: self.end])
if reverse_complement:
seq = reverse_complement(seq)
return seq
def reverse_complement(sequence):
#python 2 return str(sequence)[::-1].translate(maketrans('ACGTNRYKMWS?X.-BDHV', 'TGCANYRMKWS?X.-VHDB'))
tr = dict(zip('ACGTNRYKMWS?X.-BDHV', 'TGCANYRMKWS?X.-VHDB'))
return "".join([tr[i] for i in sequence])[::-1]