From 480f0dbb12e803aca5fa0011bb6b35080d632cf8 Mon Sep 17 00:00:00 2001
From: Rohan Maddamsetti <rohan.maddamsetti@gmail.com>
Date: Fri, 11 Jan 2019 15:44:21 -0500
Subject: [PATCH 1/6] fixed indent bug and incorrect variable name in Record
 class definition

---
 .DS_Store             | Bin 0 -> 8196 bytes
 genomediff/records.py |  11 +++++------
 2 files changed, 5 insertions(+), 6 deletions(-)
 create mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5af569dcd3b135aa5a8f7ea1a3c714603d0846cc
GIT binary patch
literal 8196
zcmeHLO>h)N6n<|LV15Yc5JEOU%xVY~BuOO@NHE0O{6L65MP&0&a5K9zNv52gS$1X<
zLJYMf%1H$e9z@HFTIKHvJz3nLN-tKbl=9%kQV(9V^y10a)3X#T$-#rA1=Cy8-^_dc
z^~`(y&2+s50PN4}9RN`Pkm%sZRO;4g+|Q5uS|qTP6C&{eGGKrXCd|<<Y+(k%41^g7
zGZ1DV%)tLK1GH!JCT+0q3vbwm83;4*P-cLCA7XTHj0ac{=zlt>^Gg6iUIO@y&Z!RY
zIk5oa0Tu-MRGL#%4;WA}C^1mb$sgmx3F8441PVH!paTYX#-Kt$|L(+#`NIL@0>eJc
zK$wC1Gr(UT1?H)4x52r)eormA*_7p^$bT!9wM^G*Z~qn}W#yYTZ;>QLQYw_Qi3MjV
z;b~sp?bmW=_|=H!Eu@0;QOB|FoNK9Z(^yKf9RrT-X{K%De6pz<G-PJNv~_2x-^ts$
z>(7%A4oQ+FImx0cD@VH8TVfp@-K#CJl`ekX*?DwzRhDX-I(mm^Gs_oNUR=HSmWU67
zZv<49#CvU{U@doXKm;4f6odV)7_6+QTaVW$LrJ!MkzzQN^-RZh$Bmpzm}HeM#XX#M
z?2(+o{Yh4p)~s~i(mbQjvJ&P6!<|SwRz739lb&(j<CbaH$$105G09loHBK8e-dr>W
ziRDh3G#wwtP?kQa*tt8lcYoWV?qhvx+bUU=QmxcXESj#FvW&4MQ}-68G&gP7x@pf(
zQ{HUPoHLD_T!k(2sI6s;wW><Ctv0Hvllhc+E^m6vtWlE0T=i;$%)XFjbh_`ej?S67
z?@{Yj8gz!+2XY4W!mR95n`Nd+qb@z3nBsuaDl=CeeSsEHP}`M*64T`|OG_D+FdbI9
zB=)pCu_)3ejMYkyBu}QDOokrX6}BhUxWsNs&+{8Uqq*LxtYOo=c?kXWsyfB(eY$Sw
zlGGS)77Hvl#@mCXmY@#WXwMjh8PaqXF2SqtI$VLP@D6+cpTTvw0XN|md<9>_H}F0D
z2tUEk@C*C~zr!E!Cq_`ktyqC|_!u@~6Yj$U*olX+3%juwPv8&^<5M_-Q#g$?coq$`
z(ZMV(<4br6U%|`xCccXw;z#%~UdPYHQ$>n`JQ+M?0C<B=)_wa(kipv*IlZnR2WylQ
z|B)bXS8x?>uijC!vte&@OUIF(_?<N@S+;v*b+?v$`3QaZx+&^H=(RRlw`=zvMI9qW
z8RttiUerF@!WAy9@zJqIRh4nwh)cOiRY=vc@7v!*zDVF{Z6i+^30U|J9#T}ciL@$s
zUEPYp%1O+^d#sndn@P69+qZ|jAd}kuiSFKmPvJ|_+#1}5A4qS%lGgr0z|F`oio38L
zAHfD}#>cS*Td|Flb`+n$9vt-5b`qb%1dic2PT(x2P$$LBk>bvwiyr3jA{Kn*y^OEn
z6}*aX;5B@^M0G<Ys(XJU)h#A+w&mFK<S#1rUTS#tAZ4IT1QX@;|M89g|9_A|4r2*3
z5N4nx16V$o80;tGMjw4%YscuArGqy<t{~8-P{)UGyI+nI2IaVKiv_-TfCYg*l{h|?
X(5Dg#>c93!K)C;h`#%@pL*D)d$p6R%

literal 0
HcmV?d00001

diff --git a/genomediff/records.py b/genomediff/records.py
index 5cd9927..999eb91 100644
--- a/genomediff/records.py
+++ b/genomediff/records.py
@@ -32,12 +32,11 @@ def __getattr__(self, item):
             raise AttributeError
 
 
-def __repr__(self):
-    return "Record('{}', {}, {}, {})".format(self.type,
+    def __repr__(self):
+        return "Record('{}', {}, {}, {})".format(self.type,
                                              self.id,
                                              self.parent_ids,
-                                             ', '.join('{}={}'.format(k, repr(v)) for k, v in self._extra.items()))
-
+                                             ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items()))
 
-def __eq__(self, other):
-    return self.__dict__ == other.__dict__
+    def __eq__(self, other):
+        return self.__dict__ == other.__dict__

From 584bc6cb55646fae47bcbfe90f4f312492fb8e72 Mon Sep 17 00:00:00 2001
From: Rohan Maddamsetti <rohan.maddamsetti@gmail.com>
Date: Fri, 11 Jan 2019 15:45:30 -0500
Subject: [PATCH 2/6] .DS_Store banished!

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d35019c..e6b08e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ docs/_build/
 
 # JetBrains
 .idea
+.DS_Store

From ff96a6b1ba877c39d0a97481e823c98427ce6563 Mon Sep 17 00:00:00 2001
From: Rohan Maddamsetti <rohan.maddamsetti@gmail.com>
Date: Thu, 24 Jan 2019 23:49:09 -0500
Subject: [PATCH 3/6] compare mutations across diffs

---
 .gitignore             | 1 +
 genomediff/__init__.py | 6 +++++-
 genomediff/parser.py   | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index d35019c..e6b08e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ docs/_build/
 
 # JetBrains
 .idea
+.DS_Store
diff --git a/genomediff/__init__.py b/genomediff/__init__.py
index 4b12a87..508951c 100644
--- a/genomediff/__init__.py
+++ b/genomediff/__init__.py
@@ -39,4 +39,8 @@ def __len__(self):
         return len(self.mutations) + len(self.evidence) + len(self.validation)
 
     def __iter__(self):
-        return itertools.chain(self.mutations, self.evidence, self.validation)
\ No newline at end of file
+        return itertools.chain(self.mutations, self.evidence, self.validation)
+
+    #def __str__(self):
+    #    return '\n'.join([self.mutations,self.evidence,self.validation])
+
diff --git a/genomediff/parser.py b/genomediff/parser.py
index 595ef5a..3fa061a 100644
--- a/genomediff/parser.py
+++ b/genomediff/parser.py
@@ -87,4 +87,4 @@ def __iter__(self):
 
                     yield Record(type, id, self._document, parent_ids, **extra_dct)
                 else:
-                    raise Exception('Could not parse line #{}: {}'.format(i, line))
\ No newline at end of file
+                    raise Exception('Could not parse line #{}: {}'.format(i, line))

From 902b2c8966b90eb93c5cc55da11a89d8d7833576 Mon Sep 17 00:00:00 2001
From: rohanmaddamsetti <rohan.maddamsetti@gmail.com>
Date: Thu, 24 Jan 2019 23:50:32 -0500
Subject: [PATCH 4/6] Delete .DS_Store

---
 .DS_Store | Bin 8196 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 5af569dcd3b135aa5a8f7ea1a3c714603d0846cc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8196
zcmeHLO>h)N6n<|LV15Yc5JEOU%xVY~BuOO@NHE0O{6L65MP&0&a5K9zNv52gS$1X<
zLJYMf%1H$e9z@HFTIKHvJz3nLN-tKbl=9%kQV(9V^y10a)3X#T$-#rA1=Cy8-^_dc
z^~`(y&2+s50PN4}9RN`Pkm%sZRO;4g+|Q5uS|qTP6C&{eGGKrXCd|<<Y+(k%41^g7
zGZ1DV%)tLK1GH!JCT+0q3vbwm83;4*P-cLCA7XTHj0ac{=zlt>^Gg6iUIO@y&Z!RY
zIk5oa0Tu-MRGL#%4;WA}C^1mb$sgmx3F8441PVH!paTYX#-Kt$|L(+#`NIL@0>eJc
zK$wC1Gr(UT1?H)4x52r)eormA*_7p^$bT!9wM^G*Z~qn}W#yYTZ;>QLQYw_Qi3MjV
z;b~sp?bmW=_|=H!Eu@0;QOB|FoNK9Z(^yKf9RrT-X{K%De6pz<G-PJNv~_2x-^ts$
z>(7%A4oQ+FImx0cD@VH8TVfp@-K#CJl`ekX*?DwzRhDX-I(mm^Gs_oNUR=HSmWU67
zZv<49#CvU{U@doXKm;4f6odV)7_6+QTaVW$LrJ!MkzzQN^-RZh$Bmpzm}HeM#XX#M
z?2(+o{Yh4p)~s~i(mbQjvJ&P6!<|SwRz739lb&(j<CbaH$$105G09loHBK8e-dr>W
ziRDh3G#wwtP?kQa*tt8lcYoWV?qhvx+bUU=QmxcXESj#FvW&4MQ}-68G&gP7x@pf(
zQ{HUPoHLD_T!k(2sI6s;wW><Ctv0Hvllhc+E^m6vtWlE0T=i;$%)XFjbh_`ej?S67
z?@{Yj8gz!+2XY4W!mR95n`Nd+qb@z3nBsuaDl=CeeSsEHP}`M*64T`|OG_D+FdbI9
zB=)pCu_)3ejMYkyBu}QDOokrX6}BhUxWsNs&+{8Uqq*LxtYOo=c?kXWsyfB(eY$Sw
zlGGS)77Hvl#@mCXmY@#WXwMjh8PaqXF2SqtI$VLP@D6+cpTTvw0XN|md<9>_H}F0D
z2tUEk@C*C~zr!E!Cq_`ktyqC|_!u@~6Yj$U*olX+3%juwPv8&^<5M_-Q#g$?coq$`
z(ZMV(<4br6U%|`xCccXw;z#%~UdPYHQ$>n`JQ+M?0C<B=)_wa(kipv*IlZnR2WylQ
z|B)bXS8x?>uijC!vte&@OUIF(_?<N@S+;v*b+?v$`3QaZx+&^H=(RRlw`=zvMI9qW
z8RttiUerF@!WAy9@zJqIRh4nwh)cOiRY=vc@7v!*zDVF{Z6i+^30U|J9#T}ciL@$s
zUEPYp%1O+^d#sndn@P69+qZ|jAd}kuiSFKmPvJ|_+#1}5A4qS%lGgr0z|F`oio38L
zAHfD}#>cS*Td|Flb`+n$9vt-5b`qb%1dic2PT(x2P$$LBk>bvwiyr3jA{Kn*y^OEn
z6}*aX;5B@^M0G<Ys(XJU)h#A+w&mFK<S#1rUTS#tAZ4IT1QX@;|M89g|9_A|4r2*3
z5N4nx16V$o80;tGMjw4%YscuArGqy<t{~8-P{)UGyI+nI2IaVKiv_-TfCYg*l{h|?
X(5Dg#>c93!K)C;h`#%@pL*D)d$p6R%


From d98f95e89b46227d188c372219531e73daa8b852 Mon Sep 17 00:00:00 2001
From: Rohan Maddamsetti <rohan.maddamsetti@gmail.com>
Date: Thu, 24 Jan 2019 23:53:18 -0500
Subject: [PATCH 5/6] compare muts across diffs

---
 genomediff/records.py |  8 +++++-
 tests.py              | 61 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/genomediff/records.py b/genomediff/records.py
index 999eb91..b53077d 100644
--- a/genomediff/records.py
+++ b/genomediff/records.py
@@ -38,5 +38,11 @@ def __repr__(self):
                                              self.parent_ids,
                                              ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items()))
 
+    
     def __eq__(self, other):
-        return self.__dict__ == other.__dict__
+        ''' this definition allows identical mutations in different genome diffs
+            to be equal.'''
+        return self.type == other.type and self.attributes == other.attributes
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
diff --git a/tests.py b/tests.py
index bcf84bc..95f5ca0 100644
--- a/tests.py
+++ b/tests.py
@@ -88,6 +88,65 @@ def test_resolve(self):
         document = GenomeDiff.read(file)
         self.assertEqual(document[1].parents, [document[2]])
 
+class RecordComparisonTestCase(TestCase):
+    def test_cmp1(self):
+        file1 = StringIO("""
+#=GENOME_DIFF	1.0
+#=CREATED	20:02:17 23 Jan 2019
+#=PROGRAM	breseq 0.33.2 
+#=COMMAND	breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz sequence-data/ZDBp889_reads.fastq -o consensus/ZDBp889
+#=REFSEQ	LCA.gff3
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz
+#=READSEQ	sequence-data/ZDBp889_reads.fastq
+#=CONVERTED-BASES	644779377
+#=CONVERTED-READS	14448149
+#=INPUT-BASES	645034321
+#=INPUT-READS	14455411
+#=MAPPED-BASES	602854657
+#=MAPPED-READS	13788351
+SNP	1	34	REL606	72313	C
+        """.strip())
+
+        document1 = GenomeDiff.read(file1)
+        
+        file2 = StringIO("""
+#=GENOME_DIFF	1.0
+#=CREATED	16:49:49 23 Jan 2019
+#=PROGRAM	breseq 0.33.2 
+#=COMMAND	breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz -o consensus/ZDB67
+#=REFSEQ	LCA.gff3
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz
+#=READSEQ	sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz
+#=CONVERTED-BASES	114566968
+#=CONVERTED-READS	419781
+#=INPUT-BASES	114567554
+#=INPUT-READS	419783
+#=MAPPED-BASES	92472620
+#=MAPPED-READS	339813
+SNP	1	12	REL606	72313	C
+        """.strip())
+
+        document2 = GenomeDiff.read(file2)
+        self.assertEqual(document1.mutations,document2.mutations)
+
+
+    def test_cmp2(self):
+        file1 = StringIO("""
+#=GENOME_DIFF	1.0
+SNP	1	12	REL606	72313	C	aa_new_seq=G	aa_position=92	aa_ref_seq=D	codon_new_seq=GGC	codon_number=92	codon_position=2	codon_ref_seq=GAC	gene_name=araA	gene_position=275	gene_product=L-arabinose isomerase	gene_strand=<	genes_overlapping=araA	locus_tag=ECB_00064	locus_tags_overlapping=ECB_00064	mutation_category=snp_nonsynonymous	position_end=72313	position_start=72313	snp_type=nonsynonymous	transl_table=11
+        """.strip())
+
+        document1 = GenomeDiff.read(file1)
+        
+        file2 = StringIO("""
+#=GENOME_DIFF	1.0
+SNP	1	34	REL606	72313	C	aa_new_seq=G	aa_position=92	aa_ref_seq=D	codon_new_seq=GGC	codon_number=92	codon_position=2	codon_ref_seq=GAC	gene_name=araA	gene_position=275	gene_product=L-arabinose isomerase	gene_strand=<	genes_overlapping=araA	locus_tag=ECB_00064	locus_tags_overlapping=ECB_00064	mutation_category=snp_nonsynonymous	position_end=72313	position_start=72313	snp_type=nonsynonymous	transl_table=11
+        """.strip())
+
+        document2 = GenomeDiff.read(file2)
+        self.assertEqual(document1.mutations,document2.mutations)
 
+        
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 139ca6cd016baa42c71009133daa503da54359af Mon Sep 17 00:00:00 2001
From: Rohan Maddamsetti <rohan.maddamsetti@gmail.com>
Date: Fri, 11 Oct 2019 16:50:59 -0400
Subject: [PATCH 6/6] updated code

---
 genomediff/__init__.py | 27 ++++++++++++++++++++
 genomediff/records.py  | 56 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/genomediff/__init__.py b/genomediff/__init__.py
index 508951c..012bc0c 100644
--- a/genomediff/__init__.py
+++ b/genomediff/__init__.py
@@ -41,6 +41,33 @@ def __len__(self):
     def __iter__(self):
         return itertools.chain(self.mutations, self.evidence, self.validation)
 
+<<<<<<< HEAD
+    def __str__(self):
+        return '\n'.join(["MUTATIONS:",'\n'.join([str(x) for x in self.mutations]),
+                          "EVIDENCE:",'\n'.join([str(x) for x in self.evidence]),
+                          "VALIDATION:",'\n'.join(self.validation)])
+
+
+    def remove(self,*args, mut_type=None):
+        ''' 
+        Remove mutations that satisfy the given conditions. Implementation of
+        gdtools REMOVE for genomediff objects.
+        
+        Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
+               If mut_type is specified, only that mutation type will be removed.
+        Output: self.mutations is updated, with mutations satifying the conditions
+                having been removed.
+        '''
+        updated_mutations = []
+        for rec in self.mutations:
+            if (mut_type is None or mut_type == rec.type) and rec.satisfies(*args):
+                continue       
+            else:
+                updated_mutations.append(rec)
+
+        self.mutations = updated_mutations
+=======
     #def __str__(self):
     #    return '\n'.join([self.mutations,self.evidence,self.validation])
 
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
diff --git a/genomediff/records.py b/genomediff/records.py
index b53077d..72982a5 100644
--- a/genomediff/records.py
+++ b/genomediff/records.py
@@ -1,3 +1,5 @@
+import re
+
 class Metadata(object):
     def __init__(self, name, value):
         self.name = name
@@ -38,6 +40,12 @@ def __repr__(self):
                                              self.parent_ids,
                                              ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items()))
 
+<<<<<<< HEAD
+    def __str__(self):
+        return self.__repr__()
+
+=======
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852
     
     def __eq__(self, other):
         ''' this definition allows identical mutations in different genome diffs
@@ -46,3 +54,51 @@ def __eq__(self, other):
 
     def __ne__(self, other):
         return not self.__eq__(other)
+<<<<<<< HEAD
+
+    def satisfies(self, *args):
+        '''
+        Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
+        Output: return true if all conditions are true (i.e. correspond to key-values in attributes.
+
+        Find a condition that evaluates to false, otherwise return True.
+        '''
+
+        ## helper function to check if values are numbers
+        def is_number(s):
+            try:
+                float(s)
+                return True
+            except ValueError:
+                return False
+        
+        for c in args:
+            assert type(c) == str, "error: supplied condition is not a string."
+            condition_pattern = re.compile(r'^(?P<key>[_a-z]+)'
+                                            '(?P<comp>==|!=|<|<=|>|>=)'
+                                            '(?P<val>[-_a-zA-Z0-9\.]+)')
+            condition_match = condition_pattern.match(c)
+            assert condition_match, "the supplied condition\n"+c+"\n could not be parsed."
+            cond_key = condition_match.group('key')
+            cond_comp = condition_match.group('comp')
+            cond_val = condition_match.group('val')
+
+            try: ## in case the given condition is not in the attributes.
+                attribute_val = self.attributes[cond_key]
+            except:
+                continue
+            
+            ## add quote marks around strings before eval. can leave numbers alone.
+            if not is_number(cond_val):
+                cond_val = "\'"+cond_val+"\'"
+
+            if not is_number(attribute_val):
+                attribute_val = "\'"+attribute_val+"\'"
+            else: ## attribute_val is a number in this record-- convert to str for eval.
+                attribute_val = str(attribute_val)
+            expr = attribute_val+cond_comp+cond_val
+            if not eval(expr):
+                return False
+        return True
+=======
+>>>>>>> d98f95e89b46227d188c372219531e73daa8b852