From 480f0dbb12e803aca5fa0011bb6b35080d632cf8 Mon Sep 17 00:00:00 2001 From: Rohan Maddamsetti Date: Fri, 11 Jan 2019 15:44:21 -0500 Subject: [PATCH 1/6] fixed indent bug and incorrect variable name in Record class definition --- .DS_Store | Bin 0 -> 8196 bytes genomediff/records.py | 11 +++++------ 2 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5af569dcd3b135aa5a8f7ea1a3c714603d0846cc GIT binary patch literal 8196 zcmeHLO>h)N6n<|LV15Yc5JEOU%xVY~BuOO@NHE0O{6L65MP&0&a5K9zNv52gS$1X< zLJYMf%1H$e9z@HFTIKHvJz3nLN-tKbl=9%kQV(9V^y10a)3X#T$-#rA1=Cy8-^_dc z^~`(y&2+s50PN4}9RN`Pkm%sZRO;4g+|Q5uS|qTP6C&{eGGKrXCd|<^Gg6iUIO@y&Z!RY zIk5oa0Tu-MRGL#%4;WA}C^1mb$sgmx3F8441PVH!paTYX#-Kt$|L(+#`NIL@0>eJc zK$wC1Gr(UT1?H)4x52r)eormA*_7p^$bT!9wM^G*Z~qn}W#yYTZ;>QLQYw_Qi3MjV z;b~sp?bmW=_|=H!Eu@0;QOB|FoNK9Z(^yKf9RrT-X{K%De6pz(7%A4oQ+FImx0cD@VH8TVfp@-K#CJl`ekX*?DwzRhDX-I(mm^Gs_oNUR=HSmWU67 zZv<49#CvU{U@doXKm;4f6odV)7_6+QTaVW$LrJ!MkzzQN^-RZh$Bmpzm}HeM#XX#M z?2(+o{Yh4p)~s~i(mbQjvJ&P6!<|SwRz739lb&(jW ziRDh3G#wwtP?kQa*tt8lcYoWV?qhvx+bUU=QmxcXESj#FvW&4MQ}-68G&gP7x@pf( zQ{HUPoHLD_T!k(2sI6s;wW>_H}F0D z2tUEk@C*C~zr!E!Cq_`ktyqC|_!u@~6Yj$U*olX+3%juwPv8&^<5M_-Q#g$?coq$` z(ZMV(<4br6U%|`xCccXw;z#%~UdPYHQ$>n`JQ+M?0CuijC!vte&@OUIF(_?cS*Td|Flb`+n$9vt-5b`qb%1dic2PT(x2P$$LBk>bvwiyr3jA{Kn*y^OEn z6}*aX;5B@^M0Gc93!K)C;h`#%@pL*D)d$p6R% literal 0 HcmV?d00001 diff --git a/genomediff/records.py b/genomediff/records.py index 5cd9927..999eb91 100644 --- a/genomediff/records.py +++ b/genomediff/records.py @@ -32,12 +32,11 @@ def __getattr__(self, item): raise AttributeError -def __repr__(self): - return "Record('{}', {}, {}, {})".format(self.type, + def __repr__(self): + return "Record('{}', {}, {}, {})".format(self.type, self.id, self.parent_ids, - ', '.join('{}={}'.format(k, repr(v)) for k, v in self._extra.items())) - + ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items())) -def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __eq__(self, other): + return self.__dict__ == other.__dict__ From 584bc6cb55646fae47bcbfe90f4f312492fb8e72 Mon Sep 17 00:00:00 2001 From: Rohan Maddamsetti Date: Fri, 11 Jan 2019 15:45:30 -0500 Subject: [PATCH 2/6] .DS_Store banished! --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d35019c..e6b08e1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ docs/_build/ # JetBrains .idea +.DS_Store From ff96a6b1ba877c39d0a97481e823c98427ce6563 Mon Sep 17 00:00:00 2001 From: Rohan Maddamsetti Date: Thu, 24 Jan 2019 23:49:09 -0500 Subject: [PATCH 3/6] compare mutations across diffs --- .gitignore | 1 + genomediff/__init__.py | 6 +++++- genomediff/parser.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d35019c..e6b08e1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ docs/_build/ # JetBrains .idea +.DS_Store diff --git a/genomediff/__init__.py b/genomediff/__init__.py index 4b12a87..508951c 100644 --- a/genomediff/__init__.py +++ b/genomediff/__init__.py @@ -39,4 +39,8 @@ def __len__(self): return len(self.mutations) + len(self.evidence) + len(self.validation) def __iter__(self): - return itertools.chain(self.mutations, self.evidence, self.validation) \ No newline at end of file + return itertools.chain(self.mutations, self.evidence, self.validation) + + #def __str__(self): + # return '\n'.join([self.mutations,self.evidence,self.validation]) + diff --git a/genomediff/parser.py b/genomediff/parser.py index 595ef5a..3fa061a 100644 --- a/genomediff/parser.py +++ b/genomediff/parser.py @@ -87,4 +87,4 @@ def __iter__(self): yield Record(type, id, self._document, parent_ids, **extra_dct) else: - raise Exception('Could not parse line #{}: {}'.format(i, line)) \ No newline at end of file + raise Exception('Could not parse line #{}: {}'.format(i, line)) From 902b2c8966b90eb93c5cc55da11a89d8d7833576 Mon Sep 17 00:00:00 2001 From: rohanmaddamsetti Date: Thu, 24 Jan 2019 23:50:32 -0500 Subject: [PATCH 4/6] Delete .DS_Store --- .DS_Store | Bin 8196 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 5af569dcd3b135aa5a8f7ea1a3c714603d0846cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHLO>h)N6n<|LV15Yc5JEOU%xVY~BuOO@NHE0O{6L65MP&0&a5K9zNv52gS$1X< zLJYMf%1H$e9z@HFTIKHvJz3nLN-tKbl=9%kQV(9V^y10a)3X#T$-#rA1=Cy8-^_dc z^~`(y&2+s50PN4}9RN`Pkm%sZRO;4g+|Q5uS|qTP6C&{eGGKrXCd|<^Gg6iUIO@y&Z!RY zIk5oa0Tu-MRGL#%4;WA}C^1mb$sgmx3F8441PVH!paTYX#-Kt$|L(+#`NIL@0>eJc zK$wC1Gr(UT1?H)4x52r)eormA*_7p^$bT!9wM^G*Z~qn}W#yYTZ;>QLQYw_Qi3MjV z;b~sp?bmW=_|=H!Eu@0;QOB|FoNK9Z(^yKf9RrT-X{K%De6pz(7%A4oQ+FImx0cD@VH8TVfp@-K#CJl`ekX*?DwzRhDX-I(mm^Gs_oNUR=HSmWU67 zZv<49#CvU{U@doXKm;4f6odV)7_6+QTaVW$LrJ!MkzzQN^-RZh$Bmpzm}HeM#XX#M z?2(+o{Yh4p)~s~i(mbQjvJ&P6!<|SwRz739lb&(jW ziRDh3G#wwtP?kQa*tt8lcYoWV?qhvx+bUU=QmxcXESj#FvW&4MQ}-68G&gP7x@pf( zQ{HUPoHLD_T!k(2sI6s;wW>_H}F0D z2tUEk@C*C~zr!E!Cq_`ktyqC|_!u@~6Yj$U*olX+3%juwPv8&^<5M_-Q#g$?coq$` z(ZMV(<4br6U%|`xCccXw;z#%~UdPYHQ$>n`JQ+M?0CuijC!vte&@OUIF(_?cS*Td|Flb`+n$9vt-5b`qb%1dic2PT(x2P$$LBk>bvwiyr3jA{Kn*y^OEn z6}*aX;5B@^M0Gc93!K)C;h`#%@pL*D)d$p6R% From d98f95e89b46227d188c372219531e73daa8b852 Mon Sep 17 00:00:00 2001 From: Rohan Maddamsetti Date: Thu, 24 Jan 2019 23:53:18 -0500 Subject: [PATCH 5/6] compare muts across diffs --- genomediff/records.py | 8 +++++- tests.py | 61 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/genomediff/records.py b/genomediff/records.py index 999eb91..b53077d 100644 --- a/genomediff/records.py +++ b/genomediff/records.py @@ -38,5 +38,11 @@ def __repr__(self): self.parent_ids, ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items())) + def __eq__(self, other): - return self.__dict__ == other.__dict__ + ''' this definition allows identical mutations in different genome diffs + to be equal.''' + return self.type == other.type and self.attributes == other.attributes + + def __ne__(self, other): + return not self.__eq__(other) diff --git a/tests.py b/tests.py index bcf84bc..95f5ca0 100644 --- a/tests.py +++ b/tests.py @@ -88,6 +88,65 @@ def test_resolve(self): document = GenomeDiff.read(file) self.assertEqual(document[1].parents, [document[2]]) +class RecordComparisonTestCase(TestCase): + def test_cmp1(self): + file1 = StringIO(""" +#=GENOME_DIFF 1.0 +#=CREATED 20:02:17 23 Jan 2019 +#=PROGRAM breseq 0.33.2 +#=COMMAND breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz sequence-data/ZDBp889_reads.fastq -o consensus/ZDBp889 +#=REFSEQ LCA.gff3 +#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R1.fastq.gz +#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDBp889_R2.fastq.gz +#=READSEQ sequence-data/ZDBp889_reads.fastq +#=CONVERTED-BASES 644779377 +#=CONVERTED-READS 14448149 +#=INPUT-BASES 645034321 +#=INPUT-READS 14455411 +#=MAPPED-BASES 602854657 +#=MAPPED-READS 13788351 +SNP 1 34 REL606 72313 C + """.strip()) + + document1 = GenomeDiff.read(file1) + + file2 = StringIO(""" +#=GENOME_DIFF 1.0 +#=CREATED 16:49:49 23 Jan 2019 +#=PROGRAM breseq 0.33.2 +#=COMMAND breseq -r LCA.gff3 sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz -o consensus/ZDB67 +#=REFSEQ LCA.gff3 +#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R1.fastq.gz +#=READSEQ sequence-data/DM0 evolved re-runs (Rohan)/ZDB67_R2.fastq.gz +#=CONVERTED-BASES 114566968 +#=CONVERTED-READS 419781 +#=INPUT-BASES 114567554 +#=INPUT-READS 419783 +#=MAPPED-BASES 92472620 +#=MAPPED-READS 339813 +SNP 1 12 REL606 72313 C + """.strip()) + + document2 = GenomeDiff.read(file2) + self.assertEqual(document1.mutations,document2.mutations) + + + def test_cmp2(self): + file1 = StringIO(""" +#=GENOME_DIFF 1.0 +SNP 1 12 REL606 72313 C aa_new_seq=G aa_position=92 aa_ref_seq=D codon_new_seq=GGC codon_number=92 codon_position=2 codon_ref_seq=GAC gene_name=araA gene_position=275 gene_product=L-arabinose isomerase gene_strand=< genes_overlapping=araA locus_tag=ECB_00064 locus_tags_overlapping=ECB_00064 mutation_category=snp_nonsynonymous position_end=72313 position_start=72313 snp_type=nonsynonymous transl_table=11 + """.strip()) + + document1 = GenomeDiff.read(file1) + + file2 = StringIO(""" +#=GENOME_DIFF 1.0 +SNP 1 34 REL606 72313 C aa_new_seq=G aa_position=92 aa_ref_seq=D codon_new_seq=GGC codon_number=92 codon_position=2 codon_ref_seq=GAC gene_name=araA gene_position=275 gene_product=L-arabinose isomerase gene_strand=< genes_overlapping=araA locus_tag=ECB_00064 locus_tags_overlapping=ECB_00064 mutation_category=snp_nonsynonymous position_end=72313 position_start=72313 snp_type=nonsynonymous transl_table=11 + """.strip()) + + document2 = GenomeDiff.read(file2) + self.assertEqual(document1.mutations,document2.mutations) + if __name__ == '__main__': - main() \ No newline at end of file + main() From 139ca6cd016baa42c71009133daa503da54359af Mon Sep 17 00:00:00 2001 From: Rohan Maddamsetti Date: Fri, 11 Oct 2019 16:50:59 -0400 Subject: [PATCH 6/6] updated code --- genomediff/__init__.py | 27 ++++++++++++++++++++ genomediff/records.py | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/genomediff/__init__.py b/genomediff/__init__.py index 508951c..012bc0c 100644 --- a/genomediff/__init__.py +++ b/genomediff/__init__.py @@ -41,6 +41,33 @@ def __len__(self): def __iter__(self): return itertools.chain(self.mutations, self.evidence, self.validation) +<<<<<<< HEAD + def __str__(self): + return '\n'.join(["MUTATIONS:",'\n'.join([str(x) for x in self.mutations]), + "EVIDENCE:",'\n'.join([str(x) for x in self.evidence]), + "VALIDATION:",'\n'.join(self.validation)]) + + + def remove(self,*args, mut_type=None): + ''' + Remove mutations that satisfy the given conditions. Implementation of + gdtools REMOVE for genomediff objects. + + Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'. + If mut_type is specified, only that mutation type will be removed. + Output: self.mutations is updated, with mutations satifying the conditions + having been removed. + ''' + updated_mutations = [] + for rec in self.mutations: + if (mut_type is None or mut_type == rec.type) and rec.satisfies(*args): + continue + else: + updated_mutations.append(rec) + + self.mutations = updated_mutations +======= #def __str__(self): # return '\n'.join([self.mutations,self.evidence,self.validation]) +>>>>>>> d98f95e89b46227d188c372219531e73daa8b852 diff --git a/genomediff/records.py b/genomediff/records.py index b53077d..72982a5 100644 --- a/genomediff/records.py +++ b/genomediff/records.py @@ -1,3 +1,5 @@ +import re + class Metadata(object): def __init__(self, name, value): self.name = name @@ -38,6 +40,12 @@ def __repr__(self): self.parent_ids, ', '.join('{}={}'.format(k, repr(v)) for k, v in self.attributes.items())) +<<<<<<< HEAD + def __str__(self): + return self.__repr__() + +======= +>>>>>>> d98f95e89b46227d188c372219531e73daa8b852 def __eq__(self, other): ''' this definition allows identical mutations in different genome diffs @@ -46,3 +54,51 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) +<<<<<<< HEAD + + def satisfies(self, *args): + ''' + Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'. + Output: return true if all conditions are true (i.e. correspond to key-values in attributes. + + Find a condition that evaluates to false, otherwise return True. + ''' + + ## helper function to check if values are numbers + def is_number(s): + try: + float(s) + return True + except ValueError: + return False + + for c in args: + assert type(c) == str, "error: supplied condition is not a string." + condition_pattern = re.compile(r'^(?P[_a-z]+)' + '(?P==|!=|<|<=|>|>=)' + '(?P[-_a-zA-Z0-9\.]+)') + condition_match = condition_pattern.match(c) + assert condition_match, "the supplied condition\n"+c+"\n could not be parsed." + cond_key = condition_match.group('key') + cond_comp = condition_match.group('comp') + cond_val = condition_match.group('val') + + try: ## in case the given condition is not in the attributes. + attribute_val = self.attributes[cond_key] + except: + continue + + ## add quote marks around strings before eval. can leave numbers alone. + if not is_number(cond_val): + cond_val = "\'"+cond_val+"\'" + + if not is_number(attribute_val): + attribute_val = "\'"+attribute_val+"\'" + else: ## attribute_val is a number in this record-- convert to str for eval. + attribute_val = str(attribute_val) + expr = attribute_val+cond_comp+cond_val + if not eval(expr): + return False + return True +======= +>>>>>>> d98f95e89b46227d188c372219531e73daa8b852