Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,4 @@ docs/_build/

# JetBrains
.idea
.DS_Store
33 changes: 32 additions & 1 deletion genomediff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,35 @@ def __len__(self):
return len(self.mutations) + len(self.evidence) + len(self.validation)

def __iter__(self):
return itertools.chain(self.mutations, self.evidence, self.validation)
return itertools.chain(self.mutations, self.evidence, self.validation)

<<<<<<< HEAD
def __str__(self):
return '\n'.join(["MUTATIONS:",'\n'.join([str(x) for x in self.mutations]),
"EVIDENCE:",'\n'.join([str(x) for x in self.evidence]),
"VALIDATION:",'\n'.join(self.validation)])


def remove(self,*args, mut_type=None):
'''
Remove mutations that satisfy the given conditions. Implementation of
gdtools REMOVE for genomediff objects.

Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
If mut_type is specified, only that mutation type will be removed.
Output: self.mutations is updated, with mutations satifying the conditions
having been removed.
'''
updated_mutations = []
for rec in self.mutations:
if (mut_type is None or mut_type == rec.type) and rec.satisfies(*args):
continue
else:
updated_mutations.append(rec)

self.mutations = updated_mutations
=======
#def __str__(self):
# return '\n'.join([self.mutations,self.evidence,self.validation])

>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
2 changes: 1 addition & 1 deletion genomediff/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ def __iter__(self):

yield Record(type, id, self._document, parent_ids, **extra_dct)
else:
raise Exception('Could not parse line #{}: {}'.format(i, line))
raise Exception('Could not parse line #{}: {}'.format(i, line))
71 changes: 66 additions & 5 deletions genomediff/records.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

class Metadata(object):
def __init__(self, name, value):
self.name = name
Expand Down Expand Up @@ -32,12 +34,71 @@ def __getattr__(self, item):
raise AttributeError


def __repr__(self):
return "Record('{}', {}, {}, {})".format(self.type,
def __repr__(self):
return "Record('{}', {}, {}, {})".format(self.type,
self.id,
self.parent_ids,
', '.join('{}={}'.format(k, repr(v)) for k, v in self._extra.items()))
', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items()))

<<<<<<< HEAD
def __str__(self):
return self.__repr__()

=======
>>>>>>> d98f95e89b46227d188c372219531e73daa8b852

def __eq__(self, other):
''' this definition allows identical mutations in different genome diffs
to be equal.'''
return self.type == other.type and self.attributes == other.attributes

def __ne__(self, other):
return not self.__eq__(other)
<<<<<<< HEAD

def satisfies(self, *args):
'''
Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
Output: return true if all conditions are true (i.e. correspond to key-values in attributes.

Find a condition that evaluates to false, otherwise return True.
'''

## helper function to check if values are numbers
def is_number(s):
try:
float(s)
return True
except ValueError:
return False

for c in args:
assert type(c) == str, "error: supplied condition is not a string."
condition_pattern = re.compile(r'^(?P<key>[_a-z]+)'
'(?P<comp>==|!=|<|<=|>|>=)'
'(?P<val>[-_a-zA-Z0-9\.]+)')
condition_match = condition_pattern.match(c)
assert condition_match, "the supplied condition\n"+c+"\n could not be parsed."
cond_key = condition_match.group('key')
cond_comp = condition_match.group('comp')
cond_val = condition_match.group('val')

try: ## in case the given condition is not in the attributes.
attribute_val = self.attributes[cond_key]
except:
continue

## add quote marks around strings before eval. can leave numbers alone.
if not is_number(cond_val):
cond_val = "\'"+cond_val+"\'"

def __eq__(self, other):
return self.__dict__ == other.__dict__
if not is_number(attribute_val):
attribute_val = "\'"+attribute_val+"\'"
else: ## attribute_val is a number in this record-- convert to str for eval.
attribute_val = str(attribute_val)
expr = attribute_val+cond_comp+cond_val
if not eval(expr):
return False
return True
=======
>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
61 changes: 60 additions & 1 deletion tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,65 @@ def test_resolve(self):
document = GenomeDiff.read(file)
self.assertEqual(document[1].parents, [document[2]])

class RecordComparisonTestCase(TestCase):
def test_cmp1(self):
file1 = StringIO("""
#=GENOME_DIFF 1.0
#=CREATED 20:02:17 23 Jan 2019
#=PROGRAM breseq 0.33.2
#=COMMAND breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz sequence-data/ZDBp889_reads.fastq -o consensus/ZDBp889
#=REFSEQ LCA.gff3
#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz
#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz
#=READSEQ sequence-data/ZDBp889_reads.fastq
#=CONVERTED-BASES 644779377
#=CONVERTED-READS 14448149
#=INPUT-BASES 645034321
#=INPUT-READS 14455411
#=MAPPED-BASES 602854657
#=MAPPED-READS 13788351
SNP 1 34 REL606 72313 C
""".strip())

document1 = GenomeDiff.read(file1)

file2 = StringIO("""
#=GENOME_DIFF 1.0
#=CREATED 16:49:49 23 Jan 2019
#=PROGRAM breseq 0.33.2
#=COMMAND breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz -o consensus/ZDB67
#=REFSEQ LCA.gff3
#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz
#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz
#=CONVERTED-BASES 114566968
#=CONVERTED-READS 419781
#=INPUT-BASES 114567554
#=INPUT-READS 419783
#=MAPPED-BASES 92472620
#=MAPPED-READS 339813
SNP 1 12 REL606 72313 C
""".strip())

document2 = GenomeDiff.read(file2)
self.assertEqual(document1.mutations,document2.mutations)


def test_cmp2(self):
file1 = StringIO("""
#=GENOME_DIFF 1.0
SNP 1 12 REL606 72313 C aa_new_seq=G aa_position=92 aa_ref_seq=D codon_new_seq=GGC codon_number=92 codon_position=2 codon_ref_seq=GAC gene_name=araA gene_position=275 gene_product=L-arabinose isomerase gene_strand=< genes_overlapping=araA locus_tag=ECB_00064 locus_tags_overlapping=ECB_00064 mutation_category=snp_nonsynonymous position_end=72313 position_start=72313 snp_type=nonsynonymous transl_table=11
""".strip())

document1 = GenomeDiff.read(file1)

file2 = StringIO("""
#=GENOME_DIFF 1.0
SNP 1 34 REL606 72313 C aa_new_seq=G aa_position=92 aa_ref_seq=D codon_new_seq=GGC codon_number=92 codon_position=2 codon_ref_seq=GAC gene_name=araA gene_position=275 gene_product=L-arabinose isomerase gene_strand=< genes_overlapping=araA locus_tag=ECB_00064 locus_tags_overlapping=ECB_00064 mutation_category=snp_nonsynonymous position_end=72313 position_start=72313 snp_type=nonsynonymous transl_table=11
""".strip())

document2 = GenomeDiff.read(file2)
self.assertEqual(document1.mutations,document2.mutations)


if __name__ == '__main__':
main()
main()