biosustain · rohanmaddamsetti · Jan 11, 2019 · Jan 11, 2019 · Jan 25, 2019 · Jan 25, 2019
diff --git a/.gitignore b/.gitignore
@@ -54,3 +54,4 @@ docs/_build/
 
 # JetBrains
 .idea
+.DS_Store
diff --git a/genomediff/__init__.py b/genomediff/__init__.py
@@ -39,4 +39,35 @@ def __len__(self):
         return len(self.mutations) + len(self.evidence) + len(self.validation)
 
     def __iter__(self):
-        return itertools.chain(self.mutations, self.evidence, self.validation)
+        return itertools.chain(self.mutations, self.evidence, self.validation)
+
+<<<<<<< HEAD
+    def __str__(self):
+        return '\n'.join(["MUTATIONS:",'\n'.join([str(x) for x in self.mutations]),
+                          "EVIDENCE:",'\n'.join([str(x) for x in self.evidence]),
+                          "VALIDATION:",'\n'.join(self.validation)])
+
+
+    def remove(self,*args, mut_type=None):
+        ''' 
+        Remove mutations that satisfy the given conditions. Implementation of
+        gdtools REMOVE for genomediff objects.
+
+        Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
+               If mut_type is specified, only that mutation type will be removed.
+        Output: self.mutations is updated, with mutations satifying the conditions
+                having been removed.
+        '''
+        updated_mutations = []
+        for rec in self.mutations:
+            if (mut_type is None or mut_type == rec.type) and rec.satisfies(*args):
+                continue       
+            else:
+                updated_mutations.append(rec)
+
+        self.mutations = updated_mutations
+=======
+    #def __str__(self):
+    #    return '\n'.join([self.mutations,self.evidence,self.validation])
+
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
diff --git a/genomediff/parser.py b/genomediff/parser.py
@@ -87,4 +87,4 @@ def __iter__(self):
 
                     yield Record(type, id, self._document, parent_ids, **extra_dct)
                 else:
-                    raise Exception('Could not parse line #{}: {}'.format(i, line))
+                    raise Exception('Could not parse line #{}: {}'.format(i, line))
diff --git a/genomediff/records.py b/genomediff/records.py
@@ -1,3 +1,5 @@
+import re
+
 class Metadata(object):
     def __init__(self, name, value):
         self.name = name
@@ -32,12 +34,71 @@ def __getattr__(self, item):
             raise AttributeError
 
 
-def __repr__(self):
-    return "Record('{}', {}, {}, {})".format(self.type,
+    def __repr__(self):
+        return "Record('{}', {}, {}, {})".format(self.type,
                                              self.id,
                                              self.parent_ids,
-                                             ', '.join('{}={}'.format(k, repr(v)) for k, v in self._extra.items()))
+                                             ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items()))
+
+<<<<<<< HEAD
+    def __str__(self):
+        return self.__repr__()
+
+=======
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
+
+    def __eq__(self, other):
+        ''' this definition allows identical mutations in different genome diffs
+            to be equal.'''
+        return self.type == other.type and self.attributes == other.attributes
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+<<<<<<< HEAD
+
+    def satisfies(self, *args):
+        '''
+        Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
+        Output: return true if all conditions are true (i.e. correspond to key-values in attributes.
+
+        Find a condition that evaluates to false, otherwise return True.
+        '''
+
+        ## helper function to check if values are numbers
+        def is_number(s):
+            try:
+                float(s)
+                return True
+            except ValueError:
+                return False
+
+        for c in args:
+            assert type(c) == str, "error: supplied condition is not a string."
+            condition_pattern = re.compile(r'^(?P<key>[_a-z]+)'
+                                            '(?P<comp>==|!=|<|<=|>|>=)'
+                                            '(?P<val>[-_a-zA-Z0-9\.]+)')
+            condition_match = condition_pattern.match(c)
+            assert condition_match, "the supplied condition\n"+c+"\n could not be parsed."
+            cond_key = condition_match.group('key')
+            cond_comp = condition_match.group('comp')
+            cond_val = condition_match.group('val')
 
+            try: ## in case the given condition is not in the attributes.
+                attribute_val = self.attributes[cond_key]
+            except:
+                continue
+
+            ## add quote marks around strings before eval. can leave numbers alone.
+            if not is_number(cond_val):
+                cond_val = "\'"+cond_val+"\'"
 
-def __eq__(self, other):
-    return self.__dict__ == other.__dict__
+            if not is_number(attribute_val):
+                attribute_val = "\'"+attribute_val+"\'"
+            else: ## attribute_val is a number in this record-- convert to str for eval.
+                attribute_val = str(attribute_val)
+            expr = attribute_val+cond_comp+cond_val
+            if not eval(expr):
+                return False
+        return True
+=======
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
diff --git a/tests.py b/tests.py
@@ -88,6 +88,65 @@ def test_resolve(self):
         document = GenomeDiff.read(file)
         self.assertEqual(document[1].parents, [document[2]])
 
+class RecordComparisonTestCase(TestCase):
+    def test_cmp1(self):
+        file1 = StringIO("""
+#=GENOME_DIFF	1.0
+#=CREATED	20:02:17 23 Jan 2019
+#=PROGRAM	breseq 0.33.2 
+#=COMMAND	breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz sequence-data/ZDBp889_reads.fastq -o consensus/ZDBp889
+#=REFSEQ	LCA.gff3
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz
+#=READSEQ	sequence-data/ZDBp889_reads.fastq
+#=CONVERTED-BASES	644779377
+#=CONVERTED-READS	14448149
+#=INPUT-BASES	645034321
+#=INPUT-READS	14455411
+#=MAPPED-BASES	602854657
+#=MAPPED-READS	13788351
+SNP	1	34	REL606	72313	C
+        """.strip())
+
+        document1 = GenomeDiff.read(file1)
+
+        file2 = StringIO("""
+#=GENOME_DIFF	1.0
+#=CREATED	16:49:49 23 Jan 2019
+#=PROGRAM	breseq 0.33.2 
+#=COMMAND	breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz -o consensus/ZDB67
+#=REFSEQ	LCA.gff3
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz
+#=CONVERTED-BASES	114566968
+#=CONVERTED-READS	419781
+#=INPUT-BASES	114567554
+#=INPUT-READS	419783
+#=MAPPED-BASES	92472620
+#=MAPPED-READS	339813
+SNP	1	12	REL606	72313	C
+        """.strip())
+
+        document2 = GenomeDiff.read(file2)
+        self.assertEqual(document1.mutations,document2.mutations)
+
+
+    def test_cmp2(self):
+        file1 = StringIO("""
+#=GENOME_DIFF	1.0
+SNP	1	12	REL606	72313	C	aa_new_seq=G	aa_position=92	aa_ref_seq=D	codon_new_seq=GGC	codon_number=92	codon_position=2	codon_ref_seq=GAC	gene_name=araA	gene_position=275	gene_product=L-arabinose isomerase	gene_strand=<	genes_overlapping=araA	locus_tag=ECB_00064	locus_tags_overlapping=ECB_00064	mutation_category=snp_nonsynonymous	position_end=72313	position_start=72313	snp_type=nonsynonymous	transl_table=11
+        """.strip())
+
+        document1 = GenomeDiff.read(file1)
+
+        file2 = StringIO("""
+#=GENOME_DIFF	1.0
+SNP	1	34	REL606	72313	C	aa_new_seq=G	aa_position=92	aa_ref_seq=D	codon_new_seq=GGC	codon_number=92	codon_position=2	codon_ref_seq=GAC	gene_name=araA	gene_position=275	gene_product=L-arabinose isomerase	gene_strand=<	genes_overlapping=araA	locus_tag=ECB_00064	locus_tags_overlapping=ECB_00064	mutation_category=snp_nonsynonymous	position_end=72313	position_start=72313	snp_type=nonsynonymous	transl_table=11
+        """.strip())
+
+        document2 = GenomeDiff.read(file2)
+        self.assertEqual(document1.mutations,document2.mutations)
 
+
 if __name__ == '__main__':
-    main()
+    main()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -54,3 +54,4 @@ docs/_build/

		# JetBrains
		.idea
		.DS_Store