diff --git a/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/dataset.zip b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/dataset.zip new file mode 100644 index 0000000..e1bb227 Binary files /dev/null and b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..5ea22c2 --- /dev/null +++ b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136094.1 1 1703 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1228423 +CY136094.1 Genbank region 1 1703 . + . ID=CY136094.1:1..1703;Dbxref=taxon:1228423;Name=4;bio-material=CEIRS#9BM11387#;country=USA: Alaska;gbkey=Src;lab-host=R0 passage(s);mol_type=viral cRNA;nat-host=northern pintail%3B gender F%3B age hatch year;note=Sample provided by University of Alaska Fairbanks;segment=4;serotype=H10N7;strain=A/northern pintail/Interior Alaska/9BM11387R0/2009 +CY136094.1 Genbank sequence_feature 1 1703 . + . ID=id-CY136094.1:1..1703;Dbxref=IRD:NIGSP_CEIRS_CIP055_AK2_00216.HA;gbkey=misc_feature +CY136094.1 Genbank gene 8 1693 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY136094.1 Genbank CDS 8 1693 . + 0 ID=cds-AGG26198.1;Parent=gene-HA;Dbxref=NCBI_GP:AGG26198.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGG26198.1 +CY136094.1 Genbank signal_peptide_region_of_CDS 8 55 . + . ID=id-AGG26198.1:1..16;Parent=cds-AGG26198.1;gbkey=Prot +CY136094.1 Genbank mature_protein_region_of_CDS 56 1027 . + . ID=id-AGG26198.1:17..340;Parent=cds-AGG26198.1;gbkey=Prot;product=HA1 +CY136094.1 Genbank mature_protein_region_of_CDS 1028 1690 . + . ID=id-AGG26198.1:341..561;Parent=cds-AGG26198.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/pathogen.json b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/pathogen.json new file mode 100644 index 0000000..fa34a31 --- /dev/null +++ b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h10_h10n7", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136094" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/reference.fasta b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/reference.fasta new file mode 100644 index 0000000..e0ac257 --- /dev/null +++ b/data/flu/HA/ha_h10_h10n7/CY136094/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY136094.1 Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) hemagglutinin (HA) gene, complete cds +GGTCACAATGTACAAAATAGTACTAGTACTTGCGCTCCTTGGAGCGGTGCATGGTCTTGACAAAATATGC +CTTGGACATCATGCAGTCTCCAATGGCACCATTGTAAAGACTCTCACAAACGAAAAGGAAGAGGTGACCA +ATGCTACTGAAACAGTGGAGAGCAAAAGCCTGGACAAACTTTGCATGAAAAGTCGGAATTACAAGGACTT +AGGTAATTGCCACCCAATAGGGATGATAATAGGGACTCCTGCTTGTGACCTACACCTCACCGGAACATGG +GACACTTTAATAGAGAGAGACAATTCTATTGCCTACTGTTACCCGGGTGCCACTGTGAATGAAGAAGCAT +TAAGGCAGAAAATTATGGAAAGTGGAGGGATTGACAAGATAAGCACCGGGTTTACATATGGATCATCCAT +CAATTCAGCTGGAACCACTAAAGCATGCATGAGAAATGGGGGAAATAGTTTCTATGCGGAGCTAAAGTGG +CTAGTGTCGAAGAGCAAAGGACAAAACTTCCCACAAACAACAAACACATACAGAAATACAGATTCAGCAG +AACACCTTATAATCTGGGGAATTCATCACCCTTCAAGCACACAAGAAAAGAATGATCTGTATGGAACACA +ATCACTTTCCATTTCAGTAGGGAGTTCTACTTATCAAAACAACTTTGTGCCTGTGGTGGGAGCAAGACCA +CAAGTGAATGGCCAAAGTGGGCGGATTGTTTTCCATTGGGCGATGGTACAGCCAGGTGATAACATCACTT +TTTCGCATAATGGTGGATTGATAGCACCTAGCAGAGTGAGTAAACTAAAGGGAAGAGGCCTTGGCATCCA +ATCAGGAGCTTCAGTAGACAATGACTGTGAATCAAAATGTTTCTGGAAAGGTGGATCCATCAACACCAAA +CTCCCTTTTCAGAATCTTTCCCCAAGAACTGTGGGTCAATGCCCCAAGTATGTGAACAAAAAGAGCCTGT +TGCTTGCTACTGGAATGAGGAATGTGCCAGAGGTTGTCCAAGGAAGAGGCCTGTTTGGAGCAATAGCTGG +ATTCATAGAGAATGGATGGGAAGGAATGGTAGATGGTTGGTATGGTTTCCGACACCAAAATGCCCAAGGC +ACTGGCCAGGCCGCGGATTATAAAAGTACTCAGGCAGCTATAGATCAAATAACCGGGAAATTGAACAGAC +TGATAGAGAAGACAAACACAGAGTTCGAATCCATAGAATCTGAGTTCAGTGAAATAGAACATCAAATTGG +CAATGTAATAAACTGGACTAAGGATTCTATAACAGACATCTGGACGTACCAAGCTGAATTGCTGGTAGCA +ATGGAAAATCAGCATACAATTGACATGGCTGATTCAGAAATGCTGAATCTATATGAGAGGGTGAGGAAGC +AACTAAGGCAAAATGCAGAAGAAGATGGGAAAGGGTGCTTTGAAATATATCACAAATGCGATAACAATTG +TATGGAAAGCATCAGAAACAACACCTATGACCATACACAATACAGAGAAGAAGCACTCTTGAACAGACTC +AACATTAATCCGGTGAAACTCTCTTCTGGGTACAAAGATGTTATACTGTGGTTTAGCTTCGGGGCGTCAT +GCTTTGTACTTTTGGCTGTCATCATGGGGCTTGTTTTCTTCTGCTTGAAAAATGGAAACATGCGATGCAC +AATCTGTATTTAGTTAAAAACAC + diff --git a/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/dataset.zip b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/dataset.zip new file mode 100644 index 0000000..586b2b5 Binary files /dev/null and b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..4d04d00 --- /dev/null +++ b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130070.1 1 1735 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=402474 +CY130070.1 Genbank region 1 1735 . + . ID=CY130070.1:1..1735;Dbxref=taxon:402474;Name=4;bio-material=CEIRS#162855#;collection-date=1974;country=USA: Memphis;gbkey=Src;mol_type=viral cRNA;nat-host=duck;segment=4;serotype=H11N9;strain=A/duck/Memphis/546/1974 +CY130070.1 Genbank sequence_feature 1 1735 . + . ID=id-CY130070.1:1..1735;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00013.HA;gbkey=misc_feature +CY130070.1 Genbank gene 21 1718 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY130070.1 Genbank CDS 21 1718 . + 0 ID=cds-AGB50960.1;Parent=gene-HA;Dbxref=NCBI_GP:AGB50960.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGB50960.1 +CY130070.1 Genbank signal_peptide_region_of_CDS 21 68 . + . ID=id-AGB50960.1:1..16;Parent=cds-AGB50960.1;gbkey=Prot +CY130070.1 Genbank mature_protein_region_of_CDS 69 1046 . + . ID=id-AGB50960.1:17..342;Parent=cds-AGB50960.1;gbkey=Prot;product=HA1 +CY130070.1 Genbank mature_protein_region_of_CDS 1047 1715 . + . ID=id-AGB50960.1:343..565;Parent=cds-AGB50960.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/pathogen.json b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/pathogen.json new file mode 100644 index 0000000..3c705e3 --- /dev/null +++ b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h11_h11n9", + "reference name": "Influenza A virus (A/duck/Memphis/546/1974(H11N9)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130070" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/reference.fasta b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/reference.fasta new file mode 100644 index 0000000..ca13c8b --- /dev/null +++ b/data/flu/HA/ha_h11_h11n9/CY130070/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY130070.1 Influenza A virus (A/duck/Memphis/546/1974(H11N9)) hemagglutinin (HA) gene, complete cds +GGAAATATCTAGAAATCAAAATGAAGAAAGTACTGCTTTTTGCAGCAATCATCATCTGTATTCGAGCAGA +CGAAATCTGCATTGGATACCTGAGCAACAACTCAACAGAGAAAGTGGACACAATAATTGAGAGTAATGTC +ACGGTTACTAGCTCGGTTGAACTGGTTGAAAATGAGTACACTGGATCATTCTGCTCAATCGATGGGAAAG +CACCAATAAGTCTTGGTGATTGCTCCTTTGCTGGGTGGATTCTTGGGAACCCAATGTGTGATGATTTGAT +TGGGAAAACATCATGGTCTTACATAGTAGAGAAACCGAATCCCATTAATGGCATATGCTACCCTGGTACT +CTAGAGAATGAAGAGGAATTGAGACTGAAGTTTAGTGGGGTCCTCGAATTCAACAAATTTGAAGCCTTCA +CTTCAAACGGATGGGGATCAGTGAATTCTGGTGCTGGTGTGACCGCAGCCTGCAAATTTGGAAGCAGTAA +CTCTTTTTTCAGAAACATGGTATGGTTGATACACCAATCAGGGACATATCCTGTGATACGGAGGACATTC +AACAACACCAAAGGGAGAGATGTATTAATGGTATGGGGAGTTCACCATCCTGCAACTCTAAAAGAACACC +AAGACTTGTACAAAAAGGACAACTCCTATGTAGCAGTGGGTTCAGAGAGTTATAACAGGAGGTTCACCCC +TGAGATCAGCACAAGGCCTAAAGTAAATGGTCAGGCTGGAAGAATGACCTTCTACTGGACCATAGTGAAG +CCTGAAGAGGCAATAACATTTGAGTCAAATGGTGCATTTCTCGCTCCTCGGTACGCTTTTGAGTTGGTGT +CCTTAGGGAATGGAAAATTGTTCAGAAGTGACTTAAATATTGAATCTTGCTCAACTAAATGCCAGTCTGA +AATTGGATGGATCAACACTAATAGAAGCTTCCACAGTGTCCATAGAAACACAATAGGAGACTGCCCCAAA +TATGTGAATGTTAAATCTTTAAAGCTTGCTACCGGACTCAGAAATGTCCCTGCGATTGCTGCAAGAGGCC +TGTTTGGTGCAATAGCTGGTTTCATAGAAGGTGGTTGGCCAGGTTTAATCAATGGTTGGTATGGATTCCA +ACATAGGAATGAAGAAGGTACAGGGATTGCTGCAGACAAAGAATCAACCCAGACAGCAATAGACCAGATA +ACCTCTAAAGTCAATAACATCGTTGATCGGATGAACACAAACTTTGAGTCTGTTCAACATGAATTCAGTG +AAATTGAAGAGAGAATAAATCAACTGTCAAAGCATGTGGATGATTCTGTTATTGACATATGGTCATACAA +TGCACAGCTCCTTGTTCTATTGGAAAATGAAAAGACACTAGATCTCCATGACTCTAATGTTCGAAACCTC +CATGAAAAAGTCAGACGAATGCTGAAGGATAATGCTAAAGATGAAGGGAATGGCTGTTTTACTTTCTATC +ACAAGTGTGATAACGAGTGCATTGAAAAAGTTAGGAATGGAACATATGACCACAAGGAATTTGAGGAGGA +GTCCAGACTAAACAGACAAGAAATTGAAGGAGTGAAACTGGATTCCAGTGGCAACGTCTACAAAATACTA +TCAATTTACAGCTGCATTGCAAGCAGTCTTGTGTTAGCAGCAATCATTATGGGGTTCATCTTTTGGGCGT +GTAGCAATGGATCATGTAGATGTACCATTTGCATTTAGAATTGCAGTAAAAACAC + diff --git a/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/dataset.zip b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/dataset.zip new file mode 100644 index 0000000..2f24035 Binary files /dev/null and b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..c2e3df4 --- /dev/null +++ b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130078.1 1 1712 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=385582 +CY130078.1 Genbank region 1 1712 . + . ID=CY130078.1:1..1712;Dbxref=taxon:385582;Name=4;bio-material=CEIRS#14739#;collection-date=1976;country=Canada: Alberta;gbkey=Src;mol_type=viral cRNA;nat-host=duck;segment=4;serotype=H12N5;strain=A/duck/Alberta/60/1976 +CY130078.1 Genbank sequence_feature 1 1712 . + . ID=id-CY130078.1:1..1712;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00014.HA;gbkey=misc_feature +CY130078.1 Genbank gene 8 1702 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY130078.1 Genbank CDS 8 1702 . + 0 ID=cds-AGB50971.1;Parent=gene-HA;Dbxref=NCBI_GP:AGB50971.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGB50971.1 +CY130078.1 Genbank signal_peptide_region_of_CDS 8 58 . + . ID=id-AGB50971.1:1..17;Parent=cds-AGB50971.1;gbkey=Prot +CY130078.1 Genbank mature_protein_region_of_CDS 59 1033 . + . ID=id-AGB50971.1:18..342;Parent=cds-AGB50971.1;gbkey=Prot;product=HA1 +CY130078.1 Genbank mature_protein_region_of_CDS 1034 1699 . + . ID=id-AGB50971.1:343..564;Parent=cds-AGB50971.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/pathogen.json b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/pathogen.json new file mode 100644 index 0000000..81525e0 --- /dev/null +++ b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h12_h12n5", + "reference name": "Influenza A virus (A/duck/Alberta/60/1976(H12N5)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130078" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/reference.fasta b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/reference.fasta new file mode 100644 index 0000000..3fc3b5f --- /dev/null +++ b/data/flu/HA/ha_h12_h12n5/CY130078/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY130078.1 Influenza A virus (A/duck/Alberta/60/1976(H12N5)) hemagglutinin (HA) gene, complete cds +GGTCACAATGGAAAAATTCATCATTTTGAGTACTGTCTTGGCAGCAAGCTTTGCATATGACAAAATTTGC +ATTGGATACCAAACAAACAACTCGACTGAAACGGTAAACACACTAAGTGAACAAAACGTTCCGGTGACGC +AGGTGGAAGAACTTGTACATGGTGGGATTGATCCGATCCTGTGTGGAACGGAACTAGGATCACCACTAGT +GCTTGATGACTGTTCATTAGAGGGTCTAATCCTAGGCAATCCCAAATGTGATCTTTATTTGAATGGCAGG +GAATGGTCATACATAGTAGAGAGGCCCAAAGAGATGGAAGGAGTTTGCTATCCAGGGTCAATTGAAAACC +AGGAAGAGCTAAGATCTCTGTTTTCTTCCATCAAAAAATATGAAAGAGTGAAGATGTTTGATTTCACCAA +ATGGAATGTCACATACACTGGGACCAGCAAGGCCTGCAATAATACATCAAACCAAGGCTCATTCTATAGG +AGCATGAGATGGTTGACCTTAAAATCAGGACAATTTCCAGTCCAAACAGATGAGTACAAGAACACCAGAG +ATTCAGACATTGTATTCACCTGGGCCATTCACCACCCACCAACATCTGATGAACAAGTAAAATTATACAA +AAATCCTGATACTCTCTCTTCAGTCACCACCGATGAAATCAATAGGAGCTTCAAGCCTAATATAGGGCCA +AGACCACTCGTGAGAGGACAACAAGGGAGAATGGATTACTACTGGGCTGTTCTTAAACCTGGACAAACAG +TCAAAATACAAACCAATGGTAATCTTATTGCACCTGAATATGGTCACTTAATCACAGGGAAATCACATGG +CAGGATACTCAAGAATAATTTGCCCATGGGACAGTGTGTGACTGAATGTCAATTGAACGAGGGTGTAATG +AACACAAGCAAACCTTTCCAGAACACTAGTAAGCACTATATTGGGAAATGCCCCAAATACATACCATCAG +GGAGTTTAAAATTGGCAATAGGGCTCAGGAATGTCCCACAAGTTCAAGATCGGGGGCTCTTTGGAGCAAT +TGCAGGTTTCATAGAAGGCGGATGGCCAGGGCTAGTGGCTGGTTGGTACGGATTTCAGCATCAAAATGCG +GAGGGGACAGGCATAGCTGCAGACAGAGACAGCACCCAAAGGGCAATAGACAATATGCAAAACAAACTCA +ACAATGTCATCGACAAAATGAATAAACAATTTGAAGTGGTGAATCATGAGTTTTCAGAAGTGGAAAGCAG +AATAAACATGATTAATTCCAAAATTGATGATCAGATAACTGACATATGGGCATACAATGCTGAATTGCTT +GTCCTATTGGAAAATCAGAAGACATTAGATGAGCATGACGCTAATGTAAGGAATCTACATGATCGGGTCA +GAAGAGTCCTGAGGGAAAATGCAATTGACACAGGAGACGGCTGCTTTGAGATTTTACATAAATGTGACAA +CAATTGTATGGACACGATTAGAAACGGGACATACAATCACAAAGAGTATGAGGAAGAAAGCAAAATCGAA +CGACAGAAAGTCAATGGTGTGAAACTTGAGGAGAATTCTACATATAAAATTCTGAGCATCTACAGCAGTG +TTGCCTCAAGCTTAGTTCTACTGCTCATGATTATTGGGGGTTTCATTTTCGGGTGTCAAAATGGAAATGT +TCGTTGTACTTTCTGTATTTAATTAAAAACAC + diff --git a/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/dataset.zip b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/dataset.zip new file mode 100644 index 0000000..c29f540 Binary files /dev/null and b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..533bbd4 --- /dev/null +++ b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130086.1 1 1743 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=384499 +CY130086.1 Genbank region 1 1743 . + . ID=CY130086.1:1..1743;Dbxref=taxon:384499;Name=4;bio-material=CEIRS#80894#;collection-date=1977;country=USA: Maryland;gbkey=Src;mol_type=viral cRNA;nat-host=gull;segment=4;serotype=H13N6;strain=A/gull/Maryland/704/1977 +CY130086.1 Genbank sequence_feature 1 1743 . + . ID=id-CY130086.1:1..1743;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00015.HA;gbkey=misc_feature +CY130086.1 Genbank gene 30 1730 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY130086.1 Genbank CDS 30 1730 . + 0 ID=cds-AGB51312.1;Parent=gene-HA;Dbxref=NCBI_GP:AGB51312.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGB51312.1 +CY130086.1 Genbank signal_peptide_region_of_CDS 30 83 . + . ID=id-AGB51312.1:1..18;Parent=cds-AGB51312.1;gbkey=Prot +CY130086.1 Genbank mature_protein_region_of_CDS 84 1058 . + . ID=id-AGB51312.1:19..343;Parent=cds-AGB51312.1;gbkey=Prot;product=HA1 +CY130086.1 Genbank mature_protein_region_of_CDS 1059 1727 . + . ID=id-AGB51312.1:344..566;Parent=cds-AGB51312.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/pathogen.json b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/pathogen.json new file mode 100644 index 0000000..ebcc669 --- /dev/null +++ b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h13_h13n6", + "reference name": "Influenza A virus (A/gull/Maryland/704/1977(H13N6)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130086" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/reference.fasta b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/reference.fasta new file mode 100644 index 0000000..1726319 --- /dev/null +++ b/data/flu/HA/ha_h13_h13n6/CY130086/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY130086.1 Influenza A virus (A/gull/Maryland/704/1977(H13N6)) hemagglutinin (HA) gene, complete cds +GGAAATCTTAACAATCAGAAACAAACAAGATGGCTCTAAATGTCATTGCAACTTTGACACTTATAAGTGT +ATGTGTACATGCAGACAGAATATGCGTGGGGTATCTGAGCACCAATTCATCAGAAAGGGTCGACACGCTC +CTTGAAAATGGGGTCCCAGTCACCAGCTCCATTGATCTGATTGAGACAAACCACACAGGAACATACTGTT +CTCTAAATGGAGTCAGTCCAGTGCATTTGGGAGATTGCAGCTTTGAAGGATGGATTGTAGGAAACCCAGC +CTGCACCAGCAACTTTGGGATCAGAGAGTGGTCATACCTGATTGAGGACCCCGCGGCCCCTCATGGGCTT +TGCTACCCTGGAGAATTAAACAACAATGGTGAACTCAGACACTTGTTCAGTGGAATCAGGTCATTCAGTA +GAACGGAATTGATCCCACCTACCTCCTGGGGGGAAGTACTTGACGGTACAACATCTGCTTGCAGAGATAA +CACGGGAACCAACAGCTTCTATCGAAATTTAGTTTGGTTTATAAAGAAGAATAATAGATATCCAGTTATC +AGTAAGACCTACAACAATACAACGGGAAGGGATGTTTTAGTTTTATGGGGAATACATCACCCAGTGTCTG +TGGATGAGACAAAGACTCTGTATGTCAATAGTGATCCATACACACTGGTTTCCACCAAGTCTTGGAGCGA +GAAATATAAACTAGAAACGGGAGTCCGACCTGGCTATAATGGACAGAGGAGCTGGATGAAAATTTATTGG +TCTTTGATACATCCAGGGGAGATGATTACTTTCGAGAGTAATGGTGGATTTTTAGCCCCAAGATATGGGT +ACATAATTGAAGAATATGGAAAAGGAAGGATTTTCCAGAGTCGCATCAGAATGTCTAGGTGCAACACCAA +GTGCCAGACTTCGGTTGGAGGGATAAACACAAACAGAACGTTCCAAAACATCGATAAGAATGCTCTTGGT +GACTGTCCCAAATACATAAAGTCTGGCCAACTCAAGCTAGCCACTGGACTCAGAAATGTGCCAGCTATAT +CGAATAGAGGATTGTTCGGAGCAATTGCAGGGTTCATAGAAGGAGGCTGGCCAGGTTTAATCAATGGTTG +GTACGGTTTTCAGCATCAAAATGAACAGGGAACAGGAATAGCTGCAGACAAAGAATCAACACAGAAAGCT +ATAGACCAGATAACAACCAAAATAAATAACATTATTGATAAAATGAATGGGAACTATGATTCAATTAGGG +GTGAATTCAATCAAGTTGAGAAGCGTATAAACATGCTTGCAGACAGAATAGATGATGCCGTGACGGACAT +TTGGTCATACAATGCCAAACTTCTTGTATTGCTGGAAAATGATAAAACTTTAGATATGCATGATGCTAAT +GTAAAGAATTTACATGAGCAAGTACGAAGAGAATTGAAGGACAATGCAATTGACGAAGGAAATGGCTGTT +TTGAACTCCTTCATAAATGCAATGACTCCTGCATGGAAACTATAAGAAATGGAACGTATGACCACACTGA +GTATGCAGAGGAGTCAAAGTTAAAGAGGCAAGAAATCGATGGGATCAAACTCAAATCAGAAGACAACGTT +TACAAAGCATTATCAATATACAGTTGCATTGCAAGTAGTGTTGTACTAGTAGGACTCATACTCTCTTTCA +TCATGTGGGCCTGTAGTAGTGGGAATTGCCGATTCAATGTTTGTATATAAGTAGAAAAAACAC + diff --git a/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/dataset.zip b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/dataset.zip new file mode 100644 index 0000000..a265444 Binary files /dev/null and b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..7205644 --- /dev/null +++ b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region JN696314.2 1 1749 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1088701 +JN696314.2 Genbank region 1 1749 . + . ID=JN696314.2:1..1749;Dbxref=taxon:1088701;Name=4;collected-by=Chad Courtney;collection-date=16-Nov-2010;country=USA;gbkey=Src;genome=genomic;mol_type=viral cRNA;nat-host=Clangula hyemalis (long-tailed duck);segment=4;serotype=H14N6;strain=A/long-tailed duck/Wisconsin/10OS3912/2010 +JN696314.2 Genbank gene 18 1724 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +JN696314.2 Genbank CDS 18 1724 . + 0 ID=cds-AEP68847.2;Parent=gene-HA;Dbxref=NCBI_GP:AEP68847.2;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AEP68847.2 diff --git a/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/pathogen.json b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/pathogen.json new file mode 100644 index 0000000..d4945d8 --- /dev/null +++ b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h14_h14n5", + "reference name": "Influenza A virus (A/long-tailed duck/Wisconsin/10OS3912/2010(H14N6)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "JN696314" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/reference.fasta b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/reference.fasta new file mode 100644 index 0000000..f2f0ae6 --- /dev/null +++ b/data/flu/HA/ha_h14_h14n5/JN696314/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>JN696314.2 Influenza A virus (A/long-tailed duck/Wisconsin/10OS3912/2010(H14N6)) segment 4 hemagglutinin (HA) gene, complete cds +AGCAAAAGCAGGGGAAAATGATTGCATTCATAGTGATTGCACTGGCTCTGAGCCACACTACTTACTCCCA +GATCACAAATGGAAACACAGGAAACCCTGTTATATGCTTGGGTCACCATGCAGTGGAGAATGGCACATCT +GTTAAAACATTAACAGACAATCATATAGAGGTTGTGTCGGCTAAAGAATTAGTTGAGACAAACCACATTA +ATGAACTATGCCCAAGTCCTCTGAAGCTTGTCGATGGACAAGACTGCGACCTTATCAACGGTGCATTGGG +AAGCCCAGGCTGTGACCACTTACAAGACACTACTTGGGATGTTTTCATTGAAAGGCCAACGGCAATGGAC +ACGTGCTATCCATTCGATGTTCCAGATTACCAGAGCCTTAGAAGCATCTTGGCAAGCAGTGGGAGTCTGG +AATTCATTGCCGAACAATTCACTTGGAATGGTGTCACAGTTGACGGATCAAGCAGTGCTTGTTTGAGGGG +CGGCCGCAATGGCTTCTTCACCCGACTGAACTGGCTAACCAGAGTAAAAAACGGGAACTATGGGCCTATT +AATGTCACAAAAGAAAATACAGGGTCTTACGTCAGGCTCTATCTCTGGGGAGTGCACCATCCATCAAGTG +ATACTGAGCAAACGGATCTTTACAAAGTTGCAACGGGAAGAGTAACGGTGTCTACTCGCTCAGATCAAAT +CAGCATTATTCCCAATATAGGAAGTAGACCAAGGGTGAGGAATCAGAGCGGCAGAATAAGCATCTACTGG +ACTCTAGTAAACCCAGGGGATTCCATCATCTTTAACAGCATTGGAAACCTAATTGCACCAAGAGGCCACT +ACAAAATAAATAAATCTACAAAGGGCACTGTGCTTAAAAGTGACAAGAAGATTGGATCATGCACAAGCCC +TTGTTTAACCGATAAAGGTTCAATCCAAAGTGACAAACCTTTTCAGAATGTGTCCAGAATCGCTATAGGA +AACTGCCCGAAGTATGTGAAGCAAGGCTCACTGATGTTGGCAACTGGAATGCGTAACATCCCTGACAAGC +AGACGAAGGGCTTATTTGGAGCAATTGCTGGATTCATTGAAAATGGTTGGCAAGGCCTGATTGATGGGTG +GTATGGATTCAGGCACCAAAATGCTGAGGGAACAGGAACTGCTGCGGATTTGAAATCAACCCAGGCAGCC +ATTGACCAAATAAATGGCAAACTAAACAGATTGATAGAGAAGACAAATGAAAAATATCACCAAATCGAAA +AAGAATTCGAGCAAGTGGAAGGAAGAATACAGGACCTTGAGAAATACGTTGAGGACACTAAAATTGATCT +GTGGTCATACAATGCTGAGCTATTGGTGGCCCTAGAAAATCAGCACACAATAGATGTTACAGACTCCGAG +ATGAACAAACTCTTTGAGAGGGTGAGAAGACAACTTAGGGAAAATGCGGAAGATCAAGGCAACGGATGTT +TCGAGATATTCCATCAGTGTGACAACAACTGTATAGAAAGTATCAGAAATGGAACCTATGATCACAACAT +CTACAGGGATGAAGCCATTAACAATCGGATTAAAATAAATCCCGTCAATTTGACGATGGGTTACAAGGAC +ATAATACTGTGGATTTCTTTCTCCATGTCATGCTTTGTCTTTGTGGCACTGATTTTGGGATTTGTTCTTT +GGGCTTGTAAGAACGGAAATATCCGATGCCAAATTTGTATATAAAGAAAAAAACACCCTTGTTTCTACT + diff --git a/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/dataset.zip b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/dataset.zip new file mode 100644 index 0000000..b7b51aa Binary files /dev/null and b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..d99d9d9 --- /dev/null +++ b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/genome_annotation.gff3 @@ -0,0 +1,13 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY006010.1 1 1763 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=352560 +CY006010.1 Genbank region 1 1763 . + . ID=CY006010.1:1..1763;Dbxref=taxon:352560;Name=4;collection-date=1979;country=Australia: Western Australia;gbkey=Src;mol_type=viral cRNA;nat-host=Avian;segment=4;serotype=H15N9;strain=A/wedge-tailed shearwater/Western Australia/2576/1979 +CY006010.1 Genbank primer_binding_site 1 19 . + . ID=id-CY006010.1:1..19;Note=PCR amplification primer sequence;gbkey=primer_bind +CY006010.1 Genbank gene 22 1734 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY006010.1 Genbank CDS 22 1734 . + 0 ID=cds-ABB88138.1;Parent=gene-HA;Dbxref=NCBI_GP:ABB88138.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=ABB88138.1 +CY006010.1 Genbank signal_peptide_region_of_CDS 22 75 . + . ID=id-ABB88138.1:1..18;Parent=cds-ABB88138.1;gbkey=Prot +CY006010.1 Genbank mature_protein_region_of_CDS 76 1068 . + . ID=id-ABB88138.1:19..349;Parent=cds-ABB88138.1;gbkey=Prot;product=HA1 +CY006010.1 Genbank mature_protein_region_of_CDS 1069 1731 . + . ID=id-ABB88138.1:350..570;Parent=cds-ABB88138.1;gbkey=Prot;product=HA2 +CY006010.1 Genbank primer_binding_site 1744 1763 . - . ID=id-CY006010.1:1744..1763;Note=PCR amplification primer sequence;gbkey=primer_bind diff --git a/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/pathogen.json b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/pathogen.json new file mode 100644 index 0000000..bd92a5f --- /dev/null +++ b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h15_h15n9", + "reference name": "Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 4, complete sequence", + "reference accession": "CY006010" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/reference.fasta b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/reference.fasta new file mode 100644 index 0000000..71d64ac --- /dev/null +++ b/data/flu/HA/ha_h15_h15n9/CY006010/unreleased/reference.fasta @@ -0,0 +1,28 @@ +>CY006010.1 Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 4, complete sequence +AGCAAAAGCAGGGGAAACAAAATGAACACTCAAATCATTGTCATTCTAGTCCTCGGACTGTCAATGGTGA +AATCTGACAAGATTTGTCTCGGGCACCATGCCGTAGCAAATGGGACAAAAGTCAACACACTAACTGAGAG +AGGAGTGGAAGTGGTCAATGCCACGGAGACAGTGGAGATTACCGGAATAGATAAAGTGTGCACAAAAGGG +AAGAAAGCAGTGGACCTGGGGTCTTGTGGAATACTGGGAACTATCATTGGGCCTCCACAATGTGATCTTC +ATCTTGAATTCAAAGCTGATCTGATAATAGAAAGAAGAAATTCAAGTGACATCTGTTACCCAGGAAGATT +CACTAATGAGGAAGCACTGAGACAAATAATCAGAGAATCTGGAGGAATTGACAAAGAGTCAATGGGCTTT +AGATATTCAGGAATAAGAACAGACGGGGCAACCAGTGCGTGTAAGAGAACAGTGTCCTCTTTCTACTCAG +AAATGAAATGGCTTTCATCCAGCATGAATAACCAGGTGTTCCCACAACTGAATCAGACATACAGGAACAC +CAGAAAAGAACCAGCCCTAATTGTCTGGGGAGTACATCATTCAAGTTCCTTGGATGAGCAAAATAAGCTA +TATGGAACTGGGAACAAGCTGATAACAGTAGGAAGCTCAAAGTACCAACAATCGTTTTCACCAAGTCCAG +GGGCCAGGCCCAAAGTGAATGGTCAGGCCGGGAGGATCGACTTTCATTGGATGCTATTGGACCCAGGGGA +TACAGTCACTTTTACCTTCAATGGTGCATTCATAGCCCCAGATAGAGCCACCTTTCTCCGCTCTAATGCC +CCTTCAGGAATTGAGTACAATGGGAAGTCACTGGGAATACAGAGTGATGCACAAATCGATGAATCATGTG +AAGGGGAATGCTTCTACAGTGGAGGGACAATAAACAGCCCTTTACCATTTCAAAACATCGATAGTAGGGC +TGTCGGAAAGTGCCCCAGATATGTGAAGCAATCAAGCTTGCCGCTGGCCTTAGGAATGAAAAATGTACCA +GAGAAAATACGTACTAGGGGACTGTTCGGTGCAATTGCAGGATTCATCGAAAATGGATGGGAAGGGCTCA +TTGATGGATGGTATGGATTTAGGCATCAGAATGCACAAGGGCAGGGAACAGCTGCTGACTACAAGAGTAC +TCAGGCTGCAATTGACCAGATAACAGGGAAACTTAATAGGTTAATTGAAAAAACCAACAAACAGTTTGAA +CTCATAGACAATGAGTTCACTGAAGTGGAGCAGCAGATAGGCAATGTAATAAACTGGACAAGGGACTCCT +TGACTGAGATCTGGTCATACAATGCCGAACTGCTAGTAGCAATGGAGAATCAGCATACAATTGACCTTGC +AGATTCTGAAATGAACAAACTCTATGAGAGAGTGAGAAGACAGCTAAGGGAGAATGCCGAGGAGGATGGA +ACTGGATGTTTTGAGATTTTCCACCGATGTGACGATCAATGTATGGAGAGCATACGGAATAATACTTACA +ATCACACTGAATATCGACAGGAAGCCTTACAAAATAGGATAATGATCAATCCGGTAAAGCTTAGTAGTGG +GTACAAAGATGTGATACTATGGTTTAGCTTCGGGGCATCATGTGTAATGCTTCTAGCCATTGCTATGGGT +CTTATTTTCATGTGTGTGAAAAACGGGAATCTGCGGTGCACTATCTGTATATAATTATTTGAAAAAACAC +CCTTGTTTCTACT + diff --git a/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/dataset.zip b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/dataset.zip new file mode 100644 index 0000000..f20ae2c Binary files /dev/null and b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..7127643 --- /dev/null +++ b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136630.1 1 1733 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1283472 +CY136630.1 Genbank region 1 1733 . + . ID=CY136630.1:1..1733;Dbxref=taxon:1283472;Name=4;bio-material=CEIRS#144406#;collection-date=19-May-1987;country=USA: Delaware;gbkey=Src;lab-host=E1 passage(s);mol_type=viral cRNA;nat-host=laughing gull;segment=4;serotype=H16N3;strain=A/laughing gull/Delaware Bay/2839/1987 +CY136630.1 Genbank sequence_feature 1 1733 . + . ID=id-CY136630.1:1..1733;Dbxref=IRD:NIGSP_CEIRS_SJC001_JBC_00508.HA;gbkey=misc_feature +CY136630.1 Genbank gene 23 1720 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY136630.1 Genbank CDS 23 1720 . + 0 ID=cds-AGG26996.1;Parent=gene-HA;Dbxref=NCBI_GP:AGG26996.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGG26996.1 +CY136630.1 Genbank signal_peptide_region_of_CDS 23 79 . + . ID=id-AGG26996.1:1..19;Parent=cds-AGG26996.1;gbkey=Prot +CY136630.1 Genbank mature_protein_region_of_CDS 80 1048 . + . ID=id-AGG26996.1:20..342;Parent=cds-AGG26996.1;gbkey=Prot;product=HA1 +CY136630.1 Genbank mature_protein_region_of_CDS 1049 1717 . + . ID=id-AGG26996.1:343..565;Parent=cds-AGG26996.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/pathogen.json b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/pathogen.json new file mode 100644 index 0000000..9a2a949 --- /dev/null +++ b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h16_h16n3", + "reference name": "Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136630" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/reference.fasta b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/reference.fasta new file mode 100644 index 0000000..d03ffd0 --- /dev/null +++ b/data/flu/HA/ha_h16_h16n3/CY136630/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY136630.1 Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) hemagglutinin (HA) gene, complete cds +GGATATTGTCAAAACAACAAGAATGGCCATCAAAGTGCTCCACTTGCTCATCATAGTGCTAGGCAGATAT +TCAATAGCAGACAAAATATGTATAGGATATCTAAGCAATAACTCTTCAGACACAGTAGACACACTAACAG +AGAATGGAGTACCTGTGACAAGCTCAATTGACCTCGTTGAAACAAACCACACTGGAACATACTGTTCTTT +GAATGGGATCAGCCCAATTCATCTTGGTGACTGCAGCTTTGAGGGATGGATTGTAGGAAACCCATCCTGT +GCTACAAACATCAACATCAGGGAGTGGTCATATTTGATTGAAGACCCTAATGCCCCTAATAAGCTGTGTT +TTCCAGGGGAATTGGACAACAATGGTGAACTACGGCATCTCTTTAGTGGAGTGAACTCTTTTAGCAGAAC +AGAGCTAATAAGTCCAAGCAAATGGGGAGATGTTCTGGATGGAGTAACTGCTTCTTGTCTTGACAAGGGG +GCAAGCAGTTTTTACAGGAATTTGGTCTGGCTAGTGAAACAAAATGACAGGTACCCTGTTGTAAGAGGGG +ATTACAACAACACAACAGGCAGAGATGTTTTGGTCCTTTGGGGAATTCACCACCCAGACACAGAAACAAC +AGCCACAAAGCTATATGTCAACAAAAACCCCTACACATTGGTATCAACAAAAGAATGGAGCAAACGGTAT +GAACTTGAAATTGGAACCAGAATAGGAGATGGACAGAGAAGCTGGATGAAAATTTATTGGCACCTAATGC +ACCCTGGAGAAAGAATAATGTTTGAAAGCAATGGGGGTCTTTTAGCACCTAGATACGGATACATTATTGA +GAAATACGGTACAGGACGAATTTTCCAAAGCGGAATAAGAATGGCCAAATGCAACACAAAATGCCAAACG +TCAATGGGTGGGGTAAACACAAACAAAACTTTCCAGAACATAGAAAGGAATGCTCTTGGAGACTGCCCAA +AATACATAAAGTCTGGACAGCTGAAGCTTGCAACTGGACTGAGAAATGTTCCATCTATTGGTGAAAGAGG +TCTATTTGGTGCGATTGCAGGCTTCATAGAAGGAGGGTGGCCTGGTCTAATTAACGGATGGTATGGTTTT +CAGCATCAAAACGAACAGGGAACTGGCATTGCCGCGGATAAAGCTTCCACCCAGAAAGCCATAAATGAAA +TAACAACAAAGATAAACAATATAATAGAGAAAATGAACGGGAACTATGATTCAATAAGAGGGGAATTCAA +CCAAGTAGAAAAGAGAATCAACATGCTCGCTGATCGGGTTGATGATGCAGTGACTGATATTTGGTCTTAC +AATGCTAAGCTTCTTGTGTTAATTGAAAATGATAGAACTCTGGATTTGCATGATGCTAATGTCAAGAACC +TACATGAGCAGGTCAAAAGAGCATTAAAGAATAATGCCATTGATGAAGGAGATGGTTGCTTCAATCTTCT +TCACAAATGTAATGACTCATGCATGGAAACCATCAGAAATGGGACCTACAATCATGAAGACTACAGGGAA +GAGTCACAGTTGAAAAGGCAGGAGATTGAAGGAATAAAGTTGAAGACTGAAGACAATGTGTATAAGGTAC +TGTCAATTTATAGCTGCATTGCAAGCAGTATTGTGATGGTAGGTCTCATACTCGCATTTATAATGTGGGC +ATGCAGCAGTGGCAATTGCCGGTTCAATGTTTGTATATAATCAGAAAAAACAC + diff --git a/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/dataset.zip b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/dataset.zip new file mode 100644 index 0000000..01aa66b Binary files /dev/null and b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..3525909 --- /dev/null +++ b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY103876.1 1 1784 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1129345 +CY103876.1 Genbank region 1 1784 . + . ID=CY103876.1:1..1784;Dbxref=taxon:1129345;Name=4;collection-date=May-2009;country=Guatemala: El Jobo;gbkey=Src;isolation-source=rectal swab;mol_type=viral cRNA;nat-host=Sturnira lilium%3B gender M;note=Complete sequence%2C ends confirmed by RACE-PCR;segment=4;serotype=H17N10;strain=A/little yellow-shouldered bat/Guatemala/153/2009 +CY103876.1 Genbank gene 35 1729 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY103876.1 Genbank CDS 35 1729 . + 0 ID=cds-AFC35418.1;Parent=gene-HA;Dbxref=NCBI_GP:AFC35418.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AFC35418.1 +CY103876.1 Genbank signal_peptide_region_of_CDS 35 88 . + . ID=id-AFC35418.1:1..18;Parent=cds-AFC35418.1;gbkey=Prot +CY103876.1 Genbank mature_protein_region_of_CDS 89 1060 . + . ID=id-AFC35418.1:19..342;Parent=cds-AFC35418.1;gbkey=Prot;product=HA1 +CY103876.1 Genbank mature_protein_region_of_CDS 1061 1726 . + . ID=id-AFC35418.1:343..564;Parent=cds-AFC35418.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/pathogen.json b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/pathogen.json new file mode 100644 index 0000000..109985c --- /dev/null +++ b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h17_h17n10", + "reference name": "Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY103876" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/reference.fasta b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/reference.fasta new file mode 100644 index 0000000..cbebbcc --- /dev/null +++ b/data/flu/HA/ha_h17_h17n10/CY103876/unreleased/reference.fasta @@ -0,0 +1,28 @@ +>CY103876.1 Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) hemagglutinin (HA) gene, complete cds +AGCAGAAGCAGGGTCACTATTACTCTGTGCTACTATGGAGCTGATTGTCCTACTAATCCTTCTCAATCCT +TATACTTTTGTATTAGGGGACAGAATATGCATAGGCTATCAAGCAAACCAAAATAACCAAACGGTTAACA +CTTTGCTCGAACAGAATGTTCCAGTTACCGGAGCACAGGAAATACTAGAAACCAATCACAATGGAAAGTT +ATGCAGCCTAAATGGGGTCCCACCATTGGACCTACAATCATGCACTCTGGCCGGGTGGTTACTGGGGAAT +CCGAACTGTGACAGCCTATTGGAAGCAGAAGAATGGTCGTATATAAAAATAAATGAAAGTGCCCCTGACG +ATCTTTGCTTCCCTGGGAACTTCGAAAACTTACAGGACTTACTACTAGAAATGTCAGGAGTTCAAAATTT +CACCAAGGTGAAACTATTCAACCCCCAAAGTATGACTGGGGTAACTACCAATAATGTCGACCAGACTTGC +CCTTTTGAAGGGAAACCATCTTTCTACAGAAACCTCAACTGGATACAAGGGAATAGCGGTTTGCCTTTCA +ATATAGAAATCAAGAATCCAACTAGCAATCCATTGCTCCTTCTCTGGGGGATCCACAACACCAAGGATGC +AGCACAACAAAGAAATCTCTATGGAAATGATTACTCTTATACTATTTTTAACTTTGGAGAGAAAAGTGAA +GAATTTCGACCTGAAATCGGGCAAAGAGATGAAGTCAAAGCCCATCAAGACAGAATCGATTACTACTGGG +GAAGCCTACCAGCGCAGAGCACTCTAAGGATAGAATCCACCGGAAATCTAATCGCACCAGAATATGGCTT +TTATTACAAGCGAAAAGAGGGGAAAGGGGGGCTAATGAAAAGTAAGCTGCCAATTAGTGACTGCTCAACC +AAATGTCAAACACCCCTGGGAGCTCTCAACAGTACCCTGCCCTTTCAAAACGTTCACCAACAAACAATTG +GTAATTGTCCCAAATATGTGAAGGCTACCTCCTTGATGCTTGCCACAGGGCTAAGAAACAATCCCCAAAT +GGAGGGAAGAGGTCTCTTTGGAGCCATTGCCGGGTTTATTGAAGGAGGGTGGCAAGGGATGATTGATGGT +TGGTATGGATACCACCACGAGAATCAAGAAGGAAGCGGCTACGCCGCTGACAAAGAAGCCACTCAAAAGG +CTGTCGATGCCATAACGAACAAAGTGAACAGTATTATTGACAAAATGAACAGCCAATTCGAATCCAACAT +CAAAGAGTTCAACAGGTTGGAACTCAGGATACAACACCTAAGTGATAGAGTTGATGATGCGTTACTTGAC +ATTTGGTCCTATAATACTGAATTACTTGTCCTCTTAGAAAATGAAAGAACCCTTGATTTCCATGATGCTA +ATGTAAAGAACCTCTTTGAAAAAGTGAAGGCCCAATTGAAGGACAATGCAATAGATGAAGGAAATGGTTG +CTTTCTACTTTTACACAAGTGTAACAACTCTTGCATGGACGACATAAAGAATGGGACATACAAATACATG +GACTATAGGGAAGAGTCACACATCGAGAAACAGAAAATCGATGGGGTAAAACTAACTGATTACTCTAGAT +ACTATATCATGACTCTGTATTCTACCATTGCATCATCCGTCGTGCTTGGCTCGTTGATAATAGCCGCTTT +TCTTTGGGGGTGCCAAAAAGGCTCAATCCAATGTAAAATATGTATATAGAACGGTGGAATTAACCTTGTC +ATTCAGAAAAGCAAAAAAGACCCTTGTTTCTACT + diff --git a/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/dataset.zip b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/dataset.zip new file mode 100644 index 0000000..872b545 Binary files /dev/null and b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..64eef10 --- /dev/null +++ b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY125945.1 1 1771 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1395524 +CY125945.1 Genbank region 1 1771 . + . ID=CY125945.1:1..1771;Dbxref=taxon:1395524;Name=4;collection-date=2010;country=Peru: Truenococha;gbkey=Src;isolation-source=rectal swab;mol_type=viral cRNA;nat-host=Artibeus planirostris%3B gender M;note=Complete sequence%2C ends confirmed by RACE-PCR.;segment=4;serotype=H18N11;strain=A/flat-faced bat/Peru/033/2010 +CY125945.1 Genbank gene 29 1714 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY125945.1 Genbank CDS 29 1714 . + 0 ID=cds-AGX84934.1;Parent=gene-HA;Dbxref=NCBI_GP:AGX84934.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGX84934.1 +CY125945.1 Genbank signal_peptide_region_of_CDS 29 70 . + . ID=id-AGX84934.1:1..14;Parent=cds-AGX84934.1;gbkey=Prot +CY125945.1 Genbank mature_protein_region_of_CDS 71 1045 . + . ID=id-AGX84934.1:15..339;Parent=cds-AGX84934.1;gbkey=Prot;product=HA1 +CY125945.1 Genbank mature_protein_region_of_CDS 1046 1711 . + . ID=id-AGX84934.1:340..561;Parent=cds-AGX84934.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/pathogen.json b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/pathogen.json new file mode 100644 index 0000000..806bdd2 --- /dev/null +++ b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h18_h18n11", + "reference name": "Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY125945" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/reference.fasta b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/reference.fasta new file mode 100644 index 0000000..6ff2547 --- /dev/null +++ b/data/flu/HA/ha_h18_h18n11/CY125945/unreleased/reference.fasta @@ -0,0 +1,28 @@ +>CY125945.1 Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) hemagglutinin (HA) gene, complete cds +AGCAGAAGCAGGGTGATTATTATTCAGAATGATTACAATACTTATCTTGGTACTCCCTATTGTTGTAGGT +GACCAAATATGCATTGGCTATCATTCAAATAATTCAACACAAACAGTGAATACTCTCCTTGAATCAAATG +TACCAGTGACTTCCTCTCACAGCATCCTAGAAAAAGAACACAATGGTTTGCTTTGCAAGCTAAAAGGGAA +AGCACCCTTGGACCTTATTGACTGCTCTCTTCCTGCATGGCTTATGGGAAACCCAAAATGTGACGAACTC +TTAACAGCAAGCGAATGGGCCTACATAAAAGAAGACCCAGAACCTGAAAATGGAATCTGTTTTCCAGGAG +ATTTTGATTCTTTAGAGGATCTGATTTTATTGGTTTCTAACACTGACCATTTCAGAAAAGAGAAAATAAT +AGACATGACCAGATTCTCTGATGTGACTACAAACAACGTAGACAGTGCATGCCCATATGACACAAATGGT +GCTTCCTTTTACAGAAATCTAAACTGGGTGCAGCAAAACAAAGGCAAGCAACTGATTTTTCACTACCAGA +ATTCTGAAAACAACCCACTTTTGATAATTTGGGGAGTACACCAGACATCTAATGCTGCAGAACAAAACAC +ATACTATGGCTCACAGACTGGCTCAACAACCATCACTATTGGGGAAGAAACAAACACTTATCCACTAGTG +ATAAGTGAAAGTTCTATTCTTAACGGTCACTCTGATAGAATAAATTACTTTTGGGGAGTTGTCAATCCTA +ATCAGAATTTTTCAATTGTCAGTACAGGGAATTTCATCTGGCCAGAGTACGGATACTTTTTCCAAAAAAC +AACCAATATAAGTGGAATAATAAAATCAAGTGAAAAGATAAGTGATTGTGACACAATCTGCCAGACAAAA +ATTGGGGCAATAAACAGCACACTGCCTTTTCAGAATATCCATCAAAATGCGATTGGAGATTGTCCTAAAT +ATGTGAAAGCCCAAGAACTTGTTCTTGCAACTGGATTAAGGAACAATCCAATAAAAGAAACAAGAGGGCT +TTTTGGTGCAATTGCAGGTTTCATCGAGGGAGGATGGCAAGGATTGATTGATGGTTGGTATGGGTATCAC +CACCAGAACTCAGAAGGTTCAGGCTATGCTGCTGACAAAGAAGCAACCCAGAAGGCTGTTGATGCAATAA +CCACAAAAGTAAACAACATAATAGACAAAATGAACACGCAATTTGAATCAACTGCCAAAGAATTCAACAA +AATTGAAATGAGAATAAAACATCTCAGTGACAGAGTTGATGATGGCTTCTTGGATGTTTGGAGTTACAAT +GCTGAATTACTCGTTTTGCTGGAAAATGAAAGAACTCTGGACTTCCATGATGCAAATGTTAACAATTTGT +ATCAAAAAGTGAAAGTCCAGCTGAAAGACAATGCAATTGACATGGGAAACGGCTGTTTCAAGATTCTACA +CAAATGCAACAACACATGTATGGATGACATTAAAAACGGAACATACAATTATTATGAATACAGAAAGGAA +AGCCACTTGGAGAAACAAAAAATTGACGGTGTGAAGCTATCAGAAAACAGCTCATATAAAATAATGATCA +TTTACTCAACAGTGGCAAGTTCAGTAGTGCTTGGCTTGATTATACTAGCCGCAATTGAATGGGGCTGTTT +TAAAGGGAACCTGCAATGCAGAATATGTATTTGAGGCTGTGGTGTTAGCTAATGTCAATCTATTATTGCA +AAAAACACCCTTGTTTCTACT + diff --git a/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/dataset.zip b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/dataset.zip new file mode 100644 index 0000000..666a0c1 Binary files /dev/null and b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..05721bb --- /dev/null +++ b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY028836.1 1 1725 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=387222 +CY028836.1 Genbank region 1 1725 . + . ID=CY028836.1:1..1725;Dbxref=taxon:387222;Name=4;collection-date=1963;country=USA: Florida;gbkey=Src;mol_type=viral cRNA;nat-host=Equine;segment=4;serotype=H3N8;strain=A/equine/Miami/1/1963 +CY028836.1 Genbank gene 11 1708 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY028836.1 Genbank CDS 11 1708 . + 0 ID=cds-ABY81492.1;Parent=gene-HA;Dbxref=NCBI_GP:ABY81492.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=ABY81492.1 +CY028836.1 Genbank signal_peptide_region_of_CDS 11 58 . + . ID=id-ABY81492.1:1..16;Parent=cds-ABY81492.1;gbkey=Prot +CY028836.1 Genbank mature_protein_region_of_CDS 59 1042 . + . ID=id-ABY81492.1:17..344;Parent=cds-ABY81492.1;gbkey=Prot;product=HA1 +CY028836.1 Genbank mature_protein_region_of_CDS 1043 1705 . + . ID=id-ABY81492.1:345..565;Parent=cds-ABY81492.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/pathogen.json b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/pathogen.json new file mode 100644 index 0000000..6df52fe --- /dev/null +++ b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h3_h3n8", + "reference name": "Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 4, complete sequence", + "reference accession": "CY028836" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/reference.fasta b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/reference.fasta new file mode 100644 index 0000000..1c3415f --- /dev/null +++ b/data/flu/HA/ha_h3_h3n8/CY028836/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY028836.1 Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 4, complete sequence +TCTGTCAATCATGAAGACAACCACTATTTTGATACTACTGACCCATTGGGTCCACAGTCAAAACCCAACC +GGTGGCAACAACACAGCCACACTGTGCCTGGGACACCATGCAGTAGCAAATGGAACACTGGTAAAAACAA +TAACTGATGACCAGATTGAGGTGACAAATGCTACTGAATTAGTTCAGAGCACTTCAACAGGGAAAATATG +CAACAACCCATATAGGGTCCTAGATGGAAGAAACTGCACATTAATAGATGCAATGCTGGGAGATCCCCAT +TGTAATGTTTTTCAGTATGAGAATTGGGACCTCTTCATTGAAAGAAGCAGTGCTTTCAGCAATTGCTACC +CATATGACGTCCCTGACTATGCATCGCTCCGGTCTCTTGTGGCATCTTCAGGAACGTTAGAATTCATGGC +AGAGGGATTCACATGGACAGGTGTCACTCAAAACGGAGGAAGTAGCGCCTGCAGAAGGGGATCAGCCGAT +AGTTTCTTTAGCCGACTGAATTGGCTAACAAAATCTGAAAGTTCCTACCCCACATTGAATGTGACAATGC +CTAACAATGACAATTTCGATAAACTATACATCTGGGGGATCCATCACCCGAGTACAAATAATGAGCAGAC +AAAATTGTATGTCCAAGCATCAGGGCGAGTAACAGTTTCAACAAAAAGAAGTCAACAAACGATAATCCCC +AACATCGGGTCCAGACCGTGGGTCAGGGGTCAATCAGGCAGGATAAGCATATATTGGACCATTGTGAAAC +CTGGAGATGTCCTAATGATAAACAGTAATGGCAACTTAATTGCACCGCGGGGATATTTCAAAATGCGGAC +AGGAAAAAGCTCTATAATGAGATCAGATGCACCCATAGACACTTGTGTGTCTGAGTGTATTACACCAAAT +GGAAGCATCCCCAACGACAAACCGTTTCAAAATGTGAACAAGGTTACATATGGAAAATGCCCCAAGTATG +TCAAGCAGAGTACTTTGAAGCTGGCTACCGGGATGAGGAATGTACAAGAAAAGCAAATCAGAGGAATCTT +TGGAGCAATAGCGGGATTCATAGAAAACGGCTGGGAGGGAATGGTTGATGGGTGGTATGGATTCCGATAT +CAGAATTCGGAAGGGACAGGGCAAGCTGCAGATCTAAAGAGCACTCAAGCAGCCATTGACCAGATCAATG +GGAAATTGAACAGAGTGATTGAGAAAACTAATGAAAAATTTCATCAAATAGAGAAGGAATTCTCAGAAGT +AGAAGGGAGAATCCAAGACTTGGAGAAGTACGTAGAAGACACCAAAATAGACCTATGGTCCTACAACGCA +GAGTTACTGGTAGCCCTAGAAAATCAACATACGATTGACCTAACAGATGCAGAGATGAATAAATTATTCG +AGAAGACCAGGCGCCAGTTAAGAGAAAACGCGGAAGACATGGGGAATGGATGTTTCAAGATTTATCACAA +ATGTGATAATGCATGCATTGAATCAATAAGAAATGGGACATATGACCATGACATATACAGAGATGAGGCA +TTAAACAACCGGTTTCAAATTAGAGGTGTTGAGTTGAAATCAGGCTACAAAGATTGGATACTGTGGATTT +CATTCGCCATATCATGCTTCTTAATTTGCGTTGTTCTATTGGGTTTCATTATGTGGGCTTGCCAAAAAGG +CAACATCAGGTGCAACATTTGCATTTGAGTAAACTGATAGTTAAA + diff --git a/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/dataset.zip b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/dataset.zip new file mode 100644 index 0000000..b8f0d76 Binary files /dev/null and b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..1f80ccf --- /dev/null +++ b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY181241.1 1 1713 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1445223 +CY181241.1 Genbank region 1 1713 . + . ID=CY181241.1:1..1713;Dbxref=taxon:1445223;Name=4;collection-date=1974;country=USA: DeSoto West%2C WI;gbkey=Src;mol_type=viral cRNA;nat-host=mallard;segment=4;serotype=H4N6;strain=A/mallard/Wisconsin/14/1974 +CY181241.1 Genbank sequence_feature 1 1713 . + . ID=id-CY181241.1:1..1713;Dbxref=IRD:NIGSP_SSC_00484.HA;gbkey=misc_feature +CY181241.1 Genbank gene 8 1702 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY181241.1 Genbank CDS 8 1702 . + 0 ID=cds-AHN04762.1;Parent=gene-HA;Dbxref=NCBI_GP:AHN04762.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AHN04762.1 +CY181241.1 Genbank signal_peptide_region_of_CDS 8 55 . + . ID=id-AHN04762.1:1..16;Parent=cds-AHN04762.1;gbkey=Prot +CY181241.1 Genbank mature_protein_region_of_CDS 56 1036 . + . ID=id-AHN04762.1:17..343;Parent=cds-AHN04762.1;gbkey=Prot;product=HA1 +CY181241.1 Genbank mature_protein_region_of_CDS 1037 1699 . + . ID=id-AHN04762.1:344..564;Parent=cds-AHN04762.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/pathogen.json b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/pathogen.json new file mode 100644 index 0000000..b6eee5a --- /dev/null +++ b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h4_h4n6", + "reference name": "Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY181241" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/reference.fasta b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/reference.fasta new file mode 100644 index 0000000..6586dde --- /dev/null +++ b/data/flu/HA/ha_h4_h4n6/CY181241/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY181241.1 Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) hemagglutinin (HA) gene, complete cds +GGAAACAATGCTATCAATCACGATTCTGTTTCTGCTCATAGCAGAGGGCTCTTCTCAGAATTACACAGGG +AATCCTGTGATATGCCTGGGACATCATGCTGTATCCAACGGGACAATGGTGAAAACCTTGACTGATGACC +AAATAGAAGTTGTTACTGCCCAGGAATTAGTGGAATCGCAACATCTACCAGAATTGTGCCCTAGCCCTTT +AAGATTAGTAGATGGGCAAACTTGTGACATCATCAATGGTGCTCTGGGAAGCCCAGGCTGTGATCACTTG +AATGGTGCAGAATGGGATGTCTTCATAGAGCGACCTACCGCTGTGGACACTTGTTACCCATTTGATGTGC +CAGATTACCAGAGCCTACGGAGTATCTTAGCAAACAACGGGAAATTTGAGTTCATTGCTGAGGAATTCCA +ATGGAACACAGTCAAACAAAATGGGAAATCCGGGGCATGCAAAAGAGCAAATGTGAATGACTTCTTCAAC +AGACTGAATTGGCTGACCAAGTCAGATGGGGATGCATACCCACTCCAAAACTTGACAAAGGTCAACAACG +GGGACTACGCAAGACTCTACATATGGGGAGTTCACCATCCATCGACTGACACAGAACAGACCAACTTATA +TAAGAACAACCCTGGAAGAGTGACTGTCTCTACCAAAACCAGTCAAGCAAGTGTAGTACCAAATATTGGC +AGTAGACCATGGGTGAGAGGCCAAAGTGGTAGAATTAGCTTCTATTGGACAATTGTAGAACCAGGAGATC +TCATAGTTTTCAACACCATAGGAAATTTAATTGCTCCAAGAGGTCATTATAAACTCAACAGCCAAAAGAA +GAGTACAATTCTGAATACTGCAGTTCCCATAGGATCTTGCGTTAGTAAATGTCACACCGACAGGGGTTCA +ATCTCTACAACTAAACCCTTTCAGAATATCTCGAGGACATCAATCGGGGACTGTCCCAAGTATGTCAAAC +AGGGGTCCTTGAAACTTGCTACAGGAATGAGGAACATCCCTGAGAAGGCAACCAGGGGTCTATTTGGTGC +AATTGCTGGTTTCATAGAGAATGGTTGGCAAGGTCTAATCGATGGTTGGTATGGGTTTAGGCATCAAAAT +GCAGAAGGGACAGGAACAGCTGCAGATCTCAAATCAACCCAGGCAGCCATTGATCAAATCAATGGAAAAC +TGAATCGTCTCATCGAGAAAACAAATGAGAAATACCACCAAATTGAAAAGGAATTTGAACAGGTAGAGGG +AAGAATTCAAGACTTAGAGAAGTATGTTGAAGACACAAAGATTGACCTGTGGTCTTACAATGCTGAATTA +TTGGTGGCATTGGAAAATCAACATACTATAGATGTGACAGACTCCGAAATGAACAAACTCTTTGAAAGAG +TTAGACGCCAACTAAGAGAGAATGCTGAGGACAAAGGAAATGGGTGTTTTGAAATCTTCCACCAGTGTGA +CAACAATTGCATTGAAAGCATAAGGAACGGGACATATGACCATGATATTTACAGAGACGAGGCAATCAAT +AACAGATTCCAGATACAAGGAGTTAAATTGACTCAAGGATACAAGGACATCATTCTCTGGATTTCCTTTT +CCATATCATGCTTCTTGCTCGTTGCACTACTTTTAGCCTTTATTTTGTGGGCTTGTCAGAATGGAAACAT +CCGGTGTCAGATTTGCATTTAAAGAAAAAACAC + diff --git a/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/dataset.zip b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/dataset.zip new file mode 100644 index 0000000..94b0d03 Binary files /dev/null and b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..b403e1c --- /dev/null +++ b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_007362.1 1 1760 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=93838 +NC_007362.1 RefSeq region 1 1760 . + . ID=NC_007362.1:1..1760;Dbxref=taxon:93838;Name=4;gbkey=Src;genome=genomic;mol_type=viral cRNA;old-name=Influenza A virus (A/Goose/Guangdong/1/96(H5N1));segment=4;serotype=H5N1;strain=A/goose/Guangdong/1/1996 +NC_007362.1 RefSeq gene 22 1728 . + . ID=gene-FLUAVH5N1_s4gp1;Dbxref=GeneID:3654620;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding;locus_tag=FLUAVH5N1_s4gp1 +NC_007362.1 RefSeq CDS 22 1728 . + 0 ID=cds-YP_308669.1;Parent=gene-FLUAVH5N1_s4gp1;Dbxref=GenBank:YP_308669.1,GeneID:3654620;Name=HA;gbkey=CDS;locus_tag=FLUAVH5N1_s4gp1;product=hemagglutinin;protein_id=YP_308669.1 +NC_007362.1 RefSeq signal_peptide_region_of_CDS 22 69 . + . ID=id-YP_308669.1:1..16;Parent=cds-YP_308669.1;gbkey=Prot +NC_007362.1 RefSeq mature_protein_region_of_CDS 70 1059 . + . ID=id-YP_308669.1:17..346;Parent=cds-YP_308669.1;gbkey=Prot;product=HA1;protein_id=YP_529486.1 +NC_007362.1 RefSeq mature_protein_region_of_CDS 1060 1725 . + . ID=id-YP_308669.1:347..568;Parent=cds-YP_308669.1;gbkey=Prot;product=HA2;protein_id=YP_529487.1 diff --git a/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/pathogen.json b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/pathogen.json new file mode 100644 index 0000000..dc42ac8 --- /dev/null +++ b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h5_h5n1", + "reference name": "Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) hemagglutinin (HA) gene, complete cds", + "reference accession": "NC_007362.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/reference.fasta b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/reference.fasta new file mode 100644 index 0000000..344aa03 --- /dev/null +++ b/data/flu/HA/ha_h5_h5n1/NC_007362.1/unreleased/reference.fasta @@ -0,0 +1,28 @@ +>NC_007362.1 Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) hemagglutinin (HA) gene, complete cds +GCAGGGGTATAATCTGTCAAAATGGAGAAAATAGTGCTTCTTCTTGCAATAGTCAGTCTTGTCAAAAGTG +ATCAGATTTGCATTGGTTACCATGCAAACAACTCGACAGAGCAGGTTGACACAATAATGGAAAAGAACGT +TACTGTTACACATGCCCAAGACATACTGGAAAAGACACACAATGGGAAGCTCTGCGATCTAAATGGAGTG +AAGCCTCTCATTTTGAGAGATTGTAGTGTAGCTGGATGGCTCCTCGGAAACCCTATGTGTGACGAATTCA +TCAATGTGCCGGAATGGTCTTACATAGTGGAGAAGGCCAGTCCAGCCAATGACCTCTGTTACCCAGGGGA +TTTCAACGACTATGAAGAACTGAAACACCTATTGAGCAGAACAAACCATTTTGAGAAAATTCAGATCATC +CCCAAAAGTTCTTGGTCCAATCATGATGCCTCATCAGGGGTGAGCTCAGCATGTCCATACCATGGGAGGT +CCTCCTTTTTCAGAAATGTGGTATGGCTTATCAAAAAGAACAGTGCATACCCAACAATAAAGAGGAGCTA +CAATAATACCAACCAAGAAGATCTTTTAGTACTGTGGGGGATTCACCATCCTAATGATGCGGCAGAGCAG +ACAAAGCTCTATCAAAACCCAACCACTTACATTTCCGTTGGAACATCAACACTGAACCAGAGATTGGTTC +CAGAAATAGCTACTAGACCCAAAGTAAACGGGCAAAGTGGAAGAATGGAGTTCTTCTGGACAATTTTAAA +GCCGAATGATGCCATCAATTTCGAGAGTAATGGAAATTTCATTGCTCCAGAATATGCATACAAAATTGTC +AAGAAAGGGGACTCAGCAATTATGAAAAGTGAATTGGAATATGGTAACTGCAACACCAAGTGTCAAACTC +CAATGGGGGCGATAAACTCTAGTATGCCATTCCACAACATACACCCCCTCACCATCGGGGAATGCCCCAA +ATATGTGAAATCAAACAGATTAGTCCTTGCGACTGGACTCAGAAATACCCCTCAGAGAGAGAGAAGAAGA +AAAAAGAGAGGACTATTTGGAGCTATAGCAGGTTTTATAGAGGGAGGATGGCAGGGAATGGTAGATGGTT +GGTATGGGTACCACCATAGCAATGAGCAGGGGAGTGGATACGCTGCAGACAAAGAATCCACTCAAAAGGC +AATAGATGGAGTCACCAATAAGGTCAACTCGATCATTGACAAAATGAACACTCAGTTTGAGGCCGTTGGA +AGGGAATTTAATAACTTGGAAAGGAGGATAGAGAATTTAAACAAGCAGATGGAAGACGGATTCCTAGATG +TCTGGACTTATAATGCTGAACTTCTGGTTCTCATGGAAAATGAGAGAACTCTAGACTTTCATGACTCAAA +TGTCAAGAACCTTTATGACAAGGTCCGACTACAGCTTAGGGATAATGCAAAGGAGCTGGGTAATGGTTGT +TTCGAGTTCTATCACAAATGTGATAATGAATGTATGGAAAGTGTAAAAAACGGAACGTATGACTACCCGC +AGTATTCAGAAGAAGCAAGACTAAACAGAGAGGAAATAAGTGGAGTAAAATTGGAATCAATGGGAACTTA +CCAAATACTGTCAATTTATTCAACAGTGGCGAGTTCCCTAGCACTGGCAATCATGGTAGCTGGTCTATCT +TTATGGATGTGCTCCAATGGATCGTTACAATGCAGAATTTGCATTTAAATTTGTGAGTTCAGATTGTAGT +TAAAAACACC + diff --git a/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/dataset.zip b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/dataset.zip new file mode 100644 index 0000000..cb9bdf6 Binary files /dev/null and b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..4bc6e4f --- /dev/null +++ b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region KU143256.1 1 1752 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1756060 +KU143256.1 Genbank region 1 1752 . + . ID=KU143256.1:1..1752;Dbxref=taxon:1756060;Name=4;collection-date=Dec-2014;country=China;gbkey=Src;genome=genomic;isolation-source=chicken fecals;mol_type=viral cRNA;nat-host=chicken;segment=4;serotype=H5N2;strain=A/chicken/Wuhan/WHJF/2014 +KU143256.1 Genbank gene 25 1719 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +KU143256.1 Genbank CDS 25 1719 . + 0 ID=cds-ALR82540.1;Parent=gene-HA;Dbxref=NCBI_GP:ALR82540.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=ALR82540.1 +KU143256.1 Genbank signal_peptide_region_of_CDS 25 72 . + . ID=id-ALR82540.1:1..16;Parent=cds-ALR82540.1;gbkey=Prot +KU143256.1 Genbank mature_protein_region_of_CDS 73 1050 . + . ID=id-ALR82540.1:17..342;Parent=cds-ALR82540.1;gbkey=Prot;product=HA1 +KU143256.1 Genbank mature_protein_region_of_CDS 1051 1716 . + . ID=id-ALR82540.1:343..564;Parent=cds-ALR82540.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/pathogen.json b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/pathogen.json new file mode 100644 index 0000000..0bd74be --- /dev/null +++ b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h5_h5n2", + "reference name": "Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "KU143256" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/reference.fasta b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/reference.fasta new file mode 100644 index 0000000..dbd7f88 --- /dev/null +++ b/data/flu/HA/ha_h5_h5n2/KU143256/unreleased/reference.fasta @@ -0,0 +1,28 @@ +>KU143256.1 Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 4 hemagglutinin (HA) gene, complete cds +AAAGCAGGGGTCTAATCTGTCAAAATGGAGAAAATAGTGCTTCTTCTTGCAATAGTCAGCCTTGTTAGAA +GTGATCAGATTTGCATTGGTTATCATGCAAACAACTCGACAGAACAGGTCGACACAATAATGGAAAAGAA +TGTTACTGTCACACATGCCCAGGACATACTAGAAAGGACACACAACGGGAAGCTCTGCAGCCTAAATGGA +GTGAAGCCTCTCATTCTGAGGGATTGTAGTGTAGCTGGATGGTTACTTGGAAACCCCATGTGTGACGAAT +TCCTCAATGTGCCAGAGTGGTCTTACATAGTGGAGAAGGACAATCCAGTCAATGGCCTCTGCTATCCAGG +GGACTTCAATGACTATGAAGAACTAAAACACCTATTGAGTAGCACAAACCATTTTGAGAAAATTCAAATC +ATCCCCAGAAGTTCCTGGTCTAATCATGATGCCTCATCAGGGGTGAGCTCTGCATGCCCATATAATGGGA +GGTCCTCTTTTTTTCGAAATGTGGTGTGGCTTATCAAAAAGAACAATGCGTACCCAACAATAAAGAAGAG +TTACAACAATACCAATCAAGAAGATCTTCTGGTGCTGTGGGGGATTCATCACCCTAATGACGCAACAGAG +CAGACAAAGCTCTATCAAAACCCAACCACTTACGTTTCAGTTGGAACATCAACACTGAACCAGAGATCGA +TACCAGAAATAGCTACTAGACCCAAAGTAAACGGGCAAAGCGGAAGAATGGAGTTCTTCTGGACAATTTT +AAAGCCGAATGATGCCATCAATTTTGAGAGTAATGGAAATTTTATTGCTCCAGAATATGCATACAAAATT +GTCAAGAAGGGGGACTCAGCAATCATGAAAAGTGGCTTGGAATATGGCAACTGCAACACCAAGTGTCAAA +CTCCAATGGGTGCAATAAACTCTAGCATGCCATTCCACAACATACATCCTCTCACCATTGGGGAATGTCC +CAAATACGTGAAATCAGATAGATTGGTCCTTGCGACTGGACTCAGGAATGTCCCTCAGAGGGAAACAAGA +GGACTATTTGGGGCTATAGCAGGCTTTATAGAAGGAGGGTGGCAAGGCATGGTAGATGGTTGGTATGGGT +ACCACCATAGCAATGAGCAGGGGAGTGGATACGCTGCAGACAAGGAGTCCACTCAGAAAGCAATAGATGG +AATCACTAATAAGGTCAACTCAATCATTGACAAGATGAACACTCAATTTGAGGCCGTTGGAAAGGAATTC +AACAATCTGGAAAGAAGGATAGAGAATCTAAACAAGAAAATGGAAGACGGATTTCTAGATGTATGGACTT +ACAACGCTGAACTTCTGGTTCTCATGGAAAATGAGAGGACTCTAGACTTTCATGATTCGAATGTCAAGAA +CCTTTATGACAAGGTTCGACTGCAGCTTAGAGATAATGCAAAGGAACTGGGTAACGGTTGTTTCGAGTTC +TATCACAAATGTGATAATGAGTGTATGGAAAGTGTAAGAAACGGAACATATAATTACCCGCAGTATTCAG +AAGAAGCAAGACTGAATAGAGAGGAAATAAGTGGAGTAAAGTTGGAATCAATGGGAACTTACCAAATACT +GTCAATTTATTCAACAGTGGCGAGTTCCCTAGCACTGGCAATCATGATAGCTGGTCTATCTTTCTGGATG +TGCTCCAATGGATCATTGCAGTGCAGAATTTGCATTTAAACTTGTGAGTTCAGATTGTAGTTAAAAACAC +CC + diff --git a/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/dataset.zip b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/dataset.zip new file mode 100644 index 0000000..4a7bafe Binary files /dev/null and b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..2d71636 --- /dev/null +++ b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130030.1 1 1714 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=402473 +CY130030.1 Genbank region 1 1714 . + . ID=CY130030.1:1..1714;Dbxref=taxon:402473;Name=4;bio-material=CEIRS#162849#;collection-date=1965;country=USA: Massachusetts;gbkey=Src;mol_type=viral cRNA;nat-host=turkey;segment=4;serotype=H6N2;strain=A/turkey/Massachusetts/3740/1965 +CY130030.1 Genbank sequence_feature 1 1714 . + . ID=id-CY130030.1:1..1714;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00006.HA;gbkey=misc_feature +CY130030.1 Genbank gene 4 1707 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY130030.1 Genbank CDS 4 1707 . + 0 ID=cds-AGB50905.1;Parent=gene-HA;Dbxref=NCBI_GP:AGB50905.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGB50905.1 +CY130030.1 Genbank signal_peptide_region_of_CDS 4 51 . + . ID=id-AGB50905.1:1..16;Parent=cds-AGB50905.1;gbkey=Prot +CY130030.1 Genbank mature_protein_region_of_CDS 52 1038 . + . ID=id-AGB50905.1:17..345;Parent=cds-AGB50905.1;gbkey=Prot;product=HA1 +CY130030.1 Genbank mature_protein_region_of_CDS 1039 1704 . + . ID=id-AGB50905.1:346..567;Parent=cds-AGB50905.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/pathogen.json b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/pathogen.json new file mode 100644 index 0000000..365142b --- /dev/null +++ b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h6_h6n2", + "reference name": "Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130030" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/reference.fasta b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/reference.fasta new file mode 100644 index 0000000..64f1f5a --- /dev/null +++ b/data/flu/HA/ha_h6_h6n2/CY130030/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY130030.1 Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) hemagglutinin (HA) gene, complete cds +AAAATGATTGCAATCATAATAATCGCGGTAGTGGCCTCTACCAGCAAATCAGACAAGATCTGCATTGGGT +ATCATGCCAACAACTCGACAACACAAGTGGACACAATATTAGAGAAGAATGTGACAGTGACGCACTCTGT +AGAGCTCCTAGAAAGTCAGAAGGAGGAGAGATTCTGCAGAGTGTTGAATAAAACACCTCTGGATCTAAAG +GGTTGCACCATTGAAGGATGGATTCTTGGAAACCCCCAATGTGACATCTTACTTGGTGACCAAAGTTGGT +CATACATAGTAGAGAGGCCTGGAGCCCAAAATGGGATATGTTACCCAGGGGTGCTGAACGAAGTGGAAGA +ACTGAAAGCATTCATTGGGTCCGGAGAGAAAGTACAGAGATTTGAAATGTTTCCCAAGAGCACGTGGACC +GGAGTGGACACTAACAGTGGAGTTACGAGAGCTTGCCCCTATACTACCAGTGGATCATCCTTTTACAGGA +ATCTTTTGTGGATAATAAAAACAAGGTCTGCTGCATACCCAGTAATTAAGGGAACATACAATAATACTGG +CTCCCAGCCAATCCTATATTTCTGGGGTGTGCATCATCCTCCAAATACCGATGAGCAAAATACCTTATAT +GGCTCTGGTGACAGGTATGTTAGAATGGGAACTGAAAGCATGAATTTTGCCAAGAGTCCTGAAATAGCAG +CCAGGCCAGCTGTGAATGGGCAAAGAGGAAGAATTGATTATTATTGGTCTGTACTGAAACCAGGAGAAAC +CTTAAATGTAGAATCCAATGGAAATTTAATAGCTCCTTGGTATGCTTACAAGTTCACAAGTTCCAACAAC +AAAGGAGCTATCTTCAAATCAAACCTCCCAATTGAGAATTGTGATGCTGTATGTCAAACTGTTGCTGGAG +CACTAAAGACAAACAAAACTTTCCAAAATGTTAGTCCACTCTGGATTGGAGAATGTCCCAAATATGTTAA +GAGTGAGAGCCTAAGACTGGCAACTGGTCTGAGGAATGTCCCACAGGCAGAAACAAGAGGATTGTTTGGA +GCCATAGCTGGGTTTATAGAAGGAGGGTGGACAGGTATGATAGACGGATGGTACGGGTACCATCATGAGA +ACTCACAGGGGTCGGGTTATGCAGCAGATAAAGAAAGTACCCAGAAAGCAATTGACGGGATCACCAATAA +AGTAAATTCCATCATTGACAAGATGAACACACAGTTTGAAGCAGTAGAGCATGAGTTCTCAAATCTCGAA +AGGAGAATAGACAATTTAAACAAAAGAATGGAAGATGGATTTTTGGATGTGTGGACGTACAATGCTGAAC +TTTTAGTTCTACTGGAAAATGAAAGGACCCTGGATCTGCACGATGCCAATGTGAAGAACCTATACGAGAA +GGTGAAATCACAATTGAGAGATAATGCAAAGGATTTGGGTAATGGGTGTTTTGAATTTTGGCACAAATGC +GACGATGAATGCATCAACTCAGTTAAGAATGGCACATACGATTACCCAAAGTACCAAGACGAGAGCAAAC +TTAACAGACAGGAGATAGACTCAGTGAAGCTGGAAAATCTGGGCGTATATCAAATTCTTGCTATTTATAG +TACGGTATCGAGCAGTCTAGTTTTGGTGGGGCTGATCATTGCCATGGGTCTTTGGATGTGCTCAAATGGC +TCAATGCAATGCAGGATATGTATATAATTAGGAA + diff --git a/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/dataset.zip b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/dataset.zip new file mode 100644 index 0000000..b33f53e Binary files /dev/null and b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..0479d6b --- /dev/null +++ b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,11 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_026425.1 1 1708 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1332244 +NC_026425.1 RefSeq region 1 1708 . + . ID=NC_026425.1:1..1708;Dbxref=taxon:1332244;Name=4;collection-date=05-Mar-2013;country=China;gbkey=Src;genome=genomic;mol_type=viral cRNA;nat-host=Homo sapiens;note=passage details: E1;segment=4;serotype=H7N9;strain=A/Shanghai/02/2013 +NC_026425.1 RefSeq gene 1 1683 . + . ID=gene-TS66_s4gp1;Dbxref=GeneID:23104227;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding;locus_tag=TS66_s4gp1 +NC_026425.1 RefSeq CDS 1 1683 . + 0 ID=cds-YP_009118475.1;Parent=gene-TS66_s4gp1;Dbxref=GenBank:YP_009118475.1,GeneID:23104227;Name=HA;gbkey=CDS;locus_tag=TS66_s4gp1;product=hemagglutinin;protein_id=YP_009118475.1 +NC_026425.1 RefSeq signal_peptide_region_of_CDS 1 54 . + . ID=id-YP_009118475.1:1..18;Parent=cds-YP_009118475.1;gbkey=Prot +NC_026425.1 RefSeq mature_protein_region_of_CDS 55 1017 . + . ID=id-YP_009118475.1:19..339;Parent=cds-YP_009118475.1;gbkey=Prot;product=HA1;protein_id=YP_009118482.1 +NC_026425.1 RefSeq mature_protein_region_of_CDS 1018 1680 . + . ID=id-YP_009118475.1:340..560;Parent=cds-YP_009118475.1;gbkey=Prot;product=HA2;protein_id=YP_009118483.1 diff --git a/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/pathogen.json b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/pathogen.json new file mode 100644 index 0000000..0b90209 --- /dev/null +++ b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h7_h7n9", + "reference name": "Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "NC_026425.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/reference.fasta b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/reference.fasta new file mode 100644 index 0000000..93d4c91 --- /dev/null +++ b/data/flu/HA/ha_h7_h7n9/NC_026425.1/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>NC_026425.1 Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 4 hemagglutinin (HA) gene, complete cds +ATGAACACTCAAATCCTGGTATTCGCTCTGATTGCGATCATTCCAACAAATGCAGACAAAATCTGCCTCG +GACATCATGCCGTGTCAAACGGAACCAAAGTAAACACATTAACTGAAAGAGGAGTGGAAGTCGTCAATGC +AACTGAAACAGTGGAACGAACAAACATCCCCAGGATCTGCTCAAAAGGGAAAAGGACAGTTGACCTCGGT +CAATGTGGACTCCTGGGGACAATCACTGGACCACCTCAATGTGACCAATTCCTAGAATTTTCAGCCGATT +TAATTATTGAGAGGCGAGAAGGAAGTGATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAG +GCAAATTCTCAGAGAATCAGGCGGAATTGACAAGGAAGCAATGGGATTCACATACAGTGGAATAAGAACT +AATGGAGCAACCAGTGCATGTAGGAGATCAGGATCTTCATTCTATGCAGAAATGAAATGGCTCCTGTCAA +ACACAGATAATGCTGCATTCCCGCAGATGACTAAGTCATATAAAAATACAAGAAAAAGCCCAGCTCTAAT +AGTATGGGGGATCCATCATTCCGTATCAACTGCAGAGCAAACCAAGCTATATGGGAGTGGAAACAAACTG +GTGACAGTTGGGAGTTCTAATTATCAACAATCTTTTGTACCGAGTCCAGGAGCGAGACCACAAGTTAATG +GTCTATCTGGAAGAATTGACTTTCATTGGCTAATGCTAAATCCCAATGATACAGTCACTTTCAGTTTCAA +TGGGGCTTTCATAGCTCCAGACCGTGCAAGCTTCCTGAGAGGAAAATCTATGGGAATCCAGAGTGGAGTA +CAGGTTGATGCCAATTGTGAAGGGGACTGCTATCATAGTGGAGGGACAATAATAAGTAACTTGCCATTTC +AGAACATAGATAGCAGGGCAGTTGGAAAATGTCCGAGATATGTTAAGCAAAGGAGTCTGCTGCTAGCAAC +AGGGATGAAGAATGTTCCTGAGATTCCAAAAGGAAGAGGCCTATTTGGTGCTATAGCGGGTTTCATTGAA +AATGGATGGGAAGGCCTAATTGATGGTTGGTATGGTTTCAGACACCAGAATGCACAGGGAGAGGGAACTG +CTGCAGATTACAAAAGCACTCAATCGGCAATTGATCAAATAACAGGAAAATTAAACCGGCTTATAGAAAA +AACCAACCAACAATTTGAGTTGATAGACAATGAATTCAATGAGGTAGAGAAGCAAATCGGTAATGTGATA +AATTGGACCAGAGATTCTATAACAGAAGTGTGGTCATACAATGCTGAACTCTTGGTAGCAATGGAGAACC +AGCATACAATTGATCTGGCTGATTCAGAAATGGACAAACTGTACGAACGAGTGAAAAGACAGCTGAGAGA +GAATGCTGAAGAAGATGGCACTGGTTGCTTTGAAATATTTCACAAGTGTGATGATGACTGTATGGCCAGT +ATTAGAAATAACACCTATGATCACAGCAAATACAGGGAAGAGGCAATGCAAAATAGAATACAGATTGACC +CAGTCAAACTAAGCAGCGGCTACAAAGATGTGATACTTTGGTTTAGCTTCGGGGCATCATGTTTCATACT +TCTAGCCATTGTAATGGGCCTTGTCTTCATATGTGTAAAGAATGGAAACATGCGGTGCACTATTTGTATA +TAAGTTTGGAAAAAACACCCTTGTTTCT + diff --git a/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/dataset.zip b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/dataset.zip new file mode 100644 index 0000000..477c508 Binary files /dev/null and b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..8fcfae2 --- /dev/null +++ b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/genome_annotation.gff3 @@ -0,0 +1,12 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136131.1 1 1719 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1228846 +CY136131.1 Genbank region 1 1719 . + . ID=CY136131.1:1..1719;Dbxref=taxon:1228846;Name=4;bio-material=CEIRS#9BM8109#;country=USA: Alaska;gbkey=Src;lab-host=R0 passage(s);mol_type=viral cRNA;nat-host=northern pintail%3B gender M%3B age hatch year;note=Sample provided by University of Alaska Fairbanks;segment=4;serotype=H8N4;strain=A/northern pintail/Interior Alaska/9BM8109R0/2009 +CY136131.1 Genbank sequence_feature 1 1719 . + . ID=id-CY136131.1:1..1719;Dbxref=IRD:NIGSP_CEIRS_CIP055_AK2_00220.HA;gbkey=misc_feature +CY136131.1 Genbank gene 8 1708 . + . ID=gene-HA;Name=HA;gbkey=Gene;gene=HA;gene_biotype=protein_coding +CY136131.1 Genbank CDS 8 1708 . + 0 ID=cds-AGG26254.1;Parent=gene-HA;Dbxref=NCBI_GP:AGG26254.1;Name=HA;gbkey=CDS;product=hemagglutinin;protein_id=AGG26254.1 +CY136131.1 Genbank signal_peptide_region_of_CDS 8 58 . + . ID=id-AGG26254.1:1..17;Parent=cds-AGG26254.1;gbkey=Prot +CY136131.1 Genbank mature_protein_region_of_CDS 59 1039 . + . ID=id-AGG26254.1:18..344;Parent=cds-AGG26254.1;gbkey=Prot;product=HA1 +CY136131.1 Genbank mature_protein_region_of_CDS 1040 1705 . + . ID=id-AGG26254.1:345..566;Parent=cds-AGG26254.1;gbkey=Prot;product=HA2 diff --git a/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/pathogen.json b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/pathogen.json new file mode 100644 index 0000000..2dbb647 --- /dev/null +++ b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h8_h8n4", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136131" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/reference.fasta b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/reference.fasta new file mode 100644 index 0000000..8b77b10 --- /dev/null +++ b/data/flu/HA/ha_h8_h8n4/CY136131/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>CY136131.1 Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) hemagglutinin (HA) gene, complete cds +GGTCACAATGGAGAAGTTTATCGCAATAGCAATGCTCTTGGCGAGCACAAATGCATACGATAGGATATGC +ATTGGTTACCAATCGAACAACTCCACAGACACGGTGAATACTCTTATAGAACAGAATGTGCCGGTCACTC +AAACAATGGAGCTTGTGGAAACAGAGAAACACCCCGCTTATTGTAATACTGACTTAGGAACACCATTGGA +ACTGCGAGACTGCAAAATTGAGGCGGTAATCTACGGGAATCCCAAGTGTGACATCCATCTAAAGGATCAA +GGTTGGTCATACATAGTGGAGAGGCCCAGCGCGCCAGAGGGAATGTGTTATCCTGGATCGGTAGAAAATC +TAGAGGAACTGAGATTTGTCTTTTCCAATGCGGCATCCTACAAGAGAATAAGACTATTTGACTATTCCAG +GTGGAATGTAACCAGCTCTGGGACCAGCAAGGCATGCAATGCATCAACAGGTGGTCAATCCTTTTATAGG +AGCATTAATTGGTTGACCAAAAAGAAACCAGACACTTATGATTTCAATGAAGGAAGCTATGTCAACAATG +AAGATGGAGACATCATTTTCCTATGGGGGATCCATCATCCACCTGATACAAAAGAGCAAACGACGCTGTA +CAAGAATGCAAACACTTTGAGTAGTGTTACTACCAACACCATAAACAGAAGCTTTCAACCCAATATCGGT +CCAAGACCATTAGTCAGAGGACAACAAGGAAGAATGGATTACTATTGGGGCATTCTGAAAAGAGGGGAGA +CTCTGAAAATCAGGACCAATGGAAATTTAATTGCACCTGAATTTGGATATCTACTTAAGGGTGAAAGCCA +TGGCAGAATAATTCAAAATGAGGACATACCCATTGGGAACTGTCACACAAAATGTCAGACATATGCAGGA +GCAATCAATAGCAGCAAACCCTTTCAGAATGCAAGCAGACATTATATGGGGGAATGTCCCAAATATGTAA +AGAAGGCAAGCTTACGACTCGCAGTGGGTCTTAGAAATACACCTTCTATTGAGCCCAAAGGGCTATTCGG +AGCCATTGCCGGTTTTATCGAAGGAGGGTGGTCTGGAATGATTGATGGATGGTATGGATTTCATCACAGT +AACTCAGAGGGAACAGGAATGGCAGCTGACCAAAAGTCAACACAGGAAGCCATCGATAAGATCACCAATA +AAGTCAATAATATAGTCGACAAGATGAACAGAGAGTTTGAAGTTGTGAATCATGAGTTCCCTGAAGTTGA +AAAAAGGATAAACATGATAAATGACAAAATAGATGACCAAATTGAAGACCTTTGGGCTTACAACGCAGAA +CTTCTTGTACTTCTAGAAAACCAGAAAACACTAGACGAACATGACTCCAATGTCAAGAACCTCTTTGATG +AAGTGAAAAGGAGGTTGTCAACCAATGCAATAGATGCTGGGAACGGTTGCTTCGACATACTTCACAAATG +CAACAATGAATGTATGGAAACTATAAAGAATGGGACTTACAATCATAAGGAGTATGAAGAGGAAGCTAAA +CTAGAAAGGAGCAAAATAAATGGGGTGAAACTGGAAGAGAACACCACTTACAAAATTCTCAGCATTTACA +GTACAGTGGCGGCCAGTCTCTGCTTGGCAATCCTGATTGCTGGAGGTTTAATCCTGGGTATGCAAAATGG +ATCTTGTAGATGCATGTTCTGTATTTAAAGAAAAAACAC + diff --git a/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/dataset.zip b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/dataset.zip new file mode 100644 index 0000000..05412fc Binary files /dev/null and b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/dataset.zip differ diff --git a/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/genome_annotation.gff3 b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..242c8b9 --- /dev/null +++ b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_004908.1 1 1714 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=130760 +NC_004908.1 RefSeq region 1 1714 . + . ID=NC_004908.1:1..1714;Dbxref=taxon:130760;Name=4;gbkey=Src;genome=genomic;mol_type=genomic RNA;note=subtype H9N2;segment=4;strain=A/Hong Kong/1073/99 +NC_004908.1 RefSeq gene 32 1714 . + . ID=gene-FLUAVAHHH9N2s4gp1;Dbxref=GeneID:1460996;Name=ha;gbkey=Gene;gene=ha;gene_biotype=protein_coding;locus_tag=FLUAVAHHH9N2s4gp1 +NC_004908.1 RefSeq CDS 32 1714 . + 0 ID=cds-NP_859037.1;Parent=gene-FLUAVAHHH9N2s4gp1;Dbxref=GOA:Q9ICY5,InterPro:IPR000149,InterPro:IPR001364,InterPro:IPR008980,InterPro:IPR013827,InterPro:IPR013828,InterPro:IPR013829,UniProtKB/TrEMBL:Q9ICY5,GenBank:NP_859037.1,GeneID:1460996;Name=HA;gbkey=CDS;locus_tag=FLUAVAHHH9N2s4gp1;product=Hemagglutinin;protein_id=NP_859037.1 diff --git a/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/pathogen.json b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/pathogen.json new file mode 100644 index 0000000..e98a3a1 --- /dev/null +++ b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "ha_h9_h9n2", + "reference name": "Influenza A virus ha gene for Hemagglutinin, genomic RNA, strain A/Hong Kong/1073/99(H9N2)", + "reference accession": "NC_004908.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/reference.fasta b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/reference.fasta new file mode 100644 index 0000000..0624be6 --- /dev/null +++ b/data/flu/HA/ha_h9_h9n2/NC_004908.1/unreleased/reference.fasta @@ -0,0 +1,27 @@ +>NC_004908.1 Influenza A virus ha gene for Hemagglutinin, genomic RNA, strain A/Hong Kong/1073/99(H9N2) +GCAAAAGCAGGGGAATTACTTAACTAGCAAAATGGAAACAATATCACTAATAACTATACTACTAGTAGTA +ACAGCAAGCAATGCAGATAAAATCTGCATCGGCCACCAGTCAACAAACTCCACAGAAACTGTGGACACGC +TAACAGAAACCAATGTTCCTGTGACACATGCCAAAGAATTGCTCCACACAGAGCATAATGGAATGCTGTG +TGCAACAAGCCTGGGACATCCCCTCATTCTAGACACATGCACTATTGAAGGACTAGTCTATGGCAACCCT +TCTTGTGACCTGCTGTTGGGAGGAAGAGAATGGTCCTACATCGTCGAAAGATCATCAGCTGTAAATGGAA +CGTGTTACCCTGGGAATGTAGAAAACCTAGAGGAACTCAGGACACTTTTTAGTTCCGCTAGTTCCTACCA +AAGAATCCAAATCTTCCCAGACACAACCTGGAATGTGACTTACACTGGAACAAGCAGAGCATGTTCAGGT +TCATTCTACAGGAGTATGAGATGGCTGACTCAAAAGAGCGGTTTTTACCCTGTTCAAGACGCCCAATACA +CAAATAACAGGGGAAAGAGCATTCTTTTCGTGTGGGGCATACATCACCCACCCACCTATACCGAGCAAAC +AAATTTGTACATAAGAAACGACACAACAACAAGCGTGACAACAGAAGATTTGAATAGGACCTTCAAACCA +GTGATAGGGCCAAGGCCCCTTGTCAATGGTCTGCAGGGAAGAATTGATTATTATTGGTCGGTACTAAAAC +CAGGCCAAACATTGCGAGTACGATCCAATGGGAATCTAATTGCTCCATGGTATGGACACGTTCTTTCAGG +AGGGAGCCATGGAAGAATCCTGAAGACTGATTTAAAAGGTGGTAATTGTGTAGTGCAATGTCAGACTGAA +AAAGGTGGCTTAAACAGTACATTGCCATTCCACAATATCAGTAAATATGCATTTGGAACCTGCCCCAAAT +ATGTAAGAGTTAATAGTCTCAAACTGGCAGTCGGTCTGAGGAACGTGCCTGCTAGATCAAGTAGAGGACT +ATTTGGAGCCATAGCTGGATTCATAGAAGGAGGTTGGCCAGGACTAGTCGCTGGCTGGTATGGTTTCCAG +CATTCAAATGATCAAGGGGTTGGTATGGCTGCAGATAGGGATTCAACTCAAAAGGCAATTGATAAAATAA +CATCCAAGGTGAATAATATAGTCGACAAGATGAACAAGCAATATGAAATAATTGATCATGAATTCAGTGA +GGTTGAAACTAGACTCAATATGATCAATAATAAGATTGATGACCAAATACAAGACGTATGGGCATATAAT +GCAGAATTGCTAGTACTACTTGAAAATCAAAAAACACTCGATGAGCATGATGCGAACGTGAACAATCTAT +ATAACAAGGTGAAGAGGGCACTGGGCTCCAATGCTATGGAAGATGGGAAAGGCTGTTTCGAGCTATACCA +TAAATGTGATGATCAGTGCATGGAAACAATTCGGAACGGGACCTATAATAGGAGAAAGTATAGAGAGGAA +TCAAGACTAGAAAGGCAGAAAATAGAGGGGGTTAAGCTGGAATCTGAGGGAACTTACAAAATCCTCACCA +TTTATTCGACTGTCGCCTCATCTCTTGTGCTTGCAATGGGGTTTGCTGCCTTCCTGTTCTGGGCCATGTC +CAATGGATCTTGCAGATGCAACATTTGTATATAA + diff --git a/data/flu/NA/na_n10_h17n10/CY103878/unreleased/dataset.zip b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/dataset.zip new file mode 100644 index 0000000..e460b98 Binary files /dev/null and b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n10_h17n10/CY103878/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..493c7b9 --- /dev/null +++ b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY103878.1 1 1390 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1129345 +CY103878.1 Genbank region 1 1390 . + . ID=CY103878.1:1..1390;Dbxref=taxon:1129345;Name=6;collection-date=May-2009;country=Guatemala: El Jobo;gbkey=Src;isolation-source=rectal swab;mol_type=viral cRNA;nat-host=Sturnira lilium%3B gender M;note=Complete sequence%2C ends confirmed by RACE-PCR;segment=6;serotype=H17N10;strain=A/little yellow-shouldered bat/Guatemala/153/2009 +CY103878.1 Genbank gene 24 1352 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY103878.1 Genbank CDS 24 1352 . + 0 ID=cds-AFC35420.1;Parent=gene-NA;Dbxref=NCBI_GP:AFC35420.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AFC35420.1 diff --git a/data/flu/NA/na_n10_h17n10/CY103878/unreleased/pathogen.json b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/pathogen.json new file mode 100644 index 0000000..c488a5c --- /dev/null +++ b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n10_h17n10", + "reference name": "Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY103878" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n10_h17n10/CY103878/unreleased/reference.fasta b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/reference.fasta new file mode 100644 index 0000000..58fe4cd --- /dev/null +++ b/data/flu/NA/na_n10_h17n10/CY103878/unreleased/reference.fasta @@ -0,0 +1,22 @@ +>CY103878.1 Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) neuraminidase (NA) gene, complete cds +AGCAGAAGCAGGAGTTTTTAATAATGTCTATCAACGGAACGACATGTCTACTCACACTCAGTCTAATACT +CAATGTTATAATGATAGGGCTCCAAATCCTGATGCCCTTTATTCTTTTATGGACCAACAGCCCCCCGCCA +GAAATCTCCAACAGCACTAGCTGCTGCAACGGAACCTTTCTGAATGAAACAAACAACAATATAACCAATA +TATCACAAATAACCAATAATTTCCTCAAAGAAGAGAAATTCTACTGGAGGGCAAAATCCCAAATGTGCGA +AGTCAAAGGTTGGGTTCCTACACATAGAGGGTTCCCTTGGGGTCCTGAGCTCCCCGGAGACTTAATTCTC +AGTAGGAGGGCATACGTTAGCTGTGACTTGACATCCTGTTTCAAATTCTTTATTGCTTACGGCCTCAGTG +CAAATCAGCACTTATTGAACACAAGTATGGAGTGGGAAGAAAGCCTGTACAAAACTCCAATTGGAAGTGC +AAGCACCTTAAGCACTTCAGAAATGATTCTCCCCGGGAGAAGTTCATCAGCATGCTTCGACGGGCTAAAA +TGGACCGTCCTGGTAGCTAATGGCAGAGACCGGAACAGCTTCATAATGATCAAATATGGAGAGGAAGTAA +CAGACACTTTCTCGGCCAGCAGAGGAGGTCCCCTGCGACTCCCCAACTCAGAATGCATCTGTATAGAAGG +AAGTTGTTTTGTAATAGTAAGTGACGGACCCAATGTGAATCAGAGCGTCCACCGGATCTATGAACTCCAA +AATGGAACAGTCCAGAGATGGAAGCAGCTAAATACAACTGGCATAAACTTTGAATACAGCACGTGCTATA +CAATCAACAACCTGATAAAGTGCACTGGGACAAATCTCTGGAATGATGCCAAAAGACCTTTGCTCCGATT +CACTAAGGAACTCAACTATCAGATTGTAGAGCCCTGCAATGGGGCTCCCACAGATTTCCCCAGAGGCGGG +CTTACCACCCCAAGTTGCAAGATGGCTCAAGAAAAAGGAGAAGGAGGGATTCAGGGTTTCATACTTGACG +AGAAACCAGCCTGGACCTCAAAAACAAAGGCTGAGTCATCTCAGAATGGTTTTGTATTAGAACAAATTCC +TAACGGGATAGAAAGTGAAGGAACAGTTTCATTAAGCTATGAACTTTTTTCTAACAAGAGAACCGGAAGG +AGTGGATTCTTTCAACCCAAAGGAGACCTCATTTCTGGATGCCAACGAATCTGTTTCTGGCTGGAAATAG +AAGATCAAACAGTAGGCCTAGGAATGATTCAAGAACTCAGCACTTTCTGTGGGATAAACTCACCTGTTCA +GAATATAAATTGGGATTCATGACCAATGGACAGCGAATGAAAAAACTCCTTGTTTCTACT + diff --git a/data/flu/NA/na_n11_h18n11/CY125947/unreleased/dataset.zip b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/dataset.zip new file mode 100644 index 0000000..a470d52 Binary files /dev/null and b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n11_h18n11/CY125947/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..2fda81b --- /dev/null +++ b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY125947.1 1 1426 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1395524 +CY125947.1 Genbank region 1 1426 . + . ID=CY125947.1:1..1426;Dbxref=taxon:1395524;Name=6;collection-date=2010;country=Peru: Truenococha;gbkey=Src;isolation-source=rectal swab;mol_type=viral cRNA;nat-host=Artibeus planirostris%3B gender M;note=Complete sequence%2C ends confirmed by RACE-PCR.;segment=6;serotype=H18N11;strain=A/flat-faced bat/Peru/033/2010 +CY125947.1 Genbank gene 24 1367 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY125947.1 Genbank CDS 24 1367 . + 0 ID=cds-AGX84936.1;Parent=gene-NA;Dbxref=NCBI_GP:AGX84936.1;Name=NA;gbkey=CDS;product=neuraminidase-like protein;protein_id=AGX84936.1 diff --git a/data/flu/NA/na_n11_h18n11/CY125947/unreleased/pathogen.json b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/pathogen.json new file mode 100644 index 0000000..273a75d --- /dev/null +++ b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n11_h18n11", + "reference name": "Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) neuraminidase-like protein (NA) gene, complete cds", + "reference accession": "CY125947" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n11_h18n11/CY125947/unreleased/reference.fasta b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/reference.fasta new file mode 100644 index 0000000..1e370cb --- /dev/null +++ b/data/flu/NA/na_n11_h18n11/CY125947/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY125947.1 Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) neuraminidase-like protein (NA) gene, complete cds +AGCAGAAGCAGGAGTTTTTCATAATGTCGTTTCAAACATCGACATGTCTGTTGATTGTTTCCCTAATATG +TGGGATACTAACAGTCTGCCTTCAGGTACTGTTACCCTTCATATTGATATGGACAAATACAGAACCAAAT +TATTCCTGTGAGTGTCCAGCTCCCAACATCAGTCTTAGCTGTCCAAACGGGACTTCTGTAACATATGACA +GTAAAAATATAACTGAAAACAGCTTCTACAGTTCAACAACAAACTACCTGTCCCCTGTCATTGCAACCCC +TCTGGTGCTAGGAGAGAATCTGTGCAGCATAAATGGGTGGGTTCCAACCTACAGAGGAGAAGGAACAACC +GGAAAAATTCCTGATGAACAAATGCTGACCAGACAGAACTTTGTATCCTGCTCAGATAAAGAGTGTCGAA +GATTTTTTGTGAGTATGGGATACGGAACTACCACAAATTTTGCAGACCTAATTGTGTCAGAACAAATGAA +TGTTTACAGTGTAAAGTTAGGAGACCCTCCAACACCTGACAAGTTAAAATTTGAAGCTGTTGGCTGGAGT +GCCAGCTCGTGTCATGATGGCTTTCAGTGGACTGTCCTGTCCGTTGCAGGAGACGGTTTTGTGAGCATCC +TTTATGGAGGAATTATAACTGATACAATTCATCCAACAAATGGAGGCCCACTGAGAACACAAGCTTCATC +TTGCATATGCAATGATGGAACTTGTTATACAATCATTGCTGATGGAACCACTTACACTGCATCTTCTCAC +AGACTTTACAGACTAGTCAATGGAACATCTGCCGGCTGGAAGGCCCTTGATACCACAGGGTTCAATTTTG +AGTTTCCGACTTGCTACTATACAAGTGGCAAAGTAAAATGTACCGGAACAAATCTTTGGAATGATGCCAA +GAGGCCCTTTCTTGAATTTGACCAGTCCTTCACTTACACTTTCAAGGAGCCATGCTTGGGGTTCCTTGGG +GACACCCCAAGAGGGATTGACACCACTAATTACTGTGACAAGACAACAACAGAGGGAGAGGGTGGAATCC +AAGGTTTCATGATTGAAGGCTCAAACTCCTGGATAGGAAGAATTATTAATCCAGGATCCAAGAAAGGATT +TGAAATTTATAAGTTCCTGGGAACATTGTTTTCTGTCCAAACTGTAGGAAATAGGAACTACCAATTGTTA +AGTAACAGCACAATTGGGAGATCAGGCCTGTATCAGCCTGCTTATGAATCACGTGATTGTCAAGAGTTGT +GTTTTTGGATTGAAATTGCTGCAACTACCAAAGCAGGCTTGTCATCCAATGATCTGATTACTTTTTGTGG +GACAGGAGGCTCAATGCCAGATGTCAACTGGGGGTAAGTATATGATTACATTCATATTTTAAATGGATGT +ATAAGAAAAAACTCCTTGTTTCTACT + diff --git a/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/dataset.zip b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/dataset.zip new file mode 100644 index 0000000..dd2385f Binary files /dev/null and b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..d300a9b --- /dev/null +++ b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_007361.1 1 1458 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=93838 +NC_007361.1 RefSeq region 1 1458 . + . ID=NC_007361.1:1..1458;Dbxref=taxon:93838;Name=6;gbkey=Src;genome=genomic;mol_type=viral cRNA;old-name=Influenza A virus (A/Goose/Guangdong/1/96(H5N1));segment=6;strain=A/Goose/Guangdong/1/96(H5N1) +NC_007361.1 RefSeq gene 21 1430 . + . ID=gene-FLUAVH5N1_s6gp1;Dbxref=GeneID:3654619;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding;locus_tag=FLUAVH5N1_s6gp1 +NC_007361.1 RefSeq CDS 21 1430 . + 0 ID=cds-YP_308668.1;Parent=gene-FLUAVH5N1_s6gp1;Dbxref=GenBank:YP_308668.1,GeneID:3654619;Name=NA;gbkey=CDS;locus_tag=FLUAVH5N1_s6gp1;product=neuraminidase;protein_id=YP_308668.1 diff --git a/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/pathogen.json b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/pathogen.json new file mode 100644 index 0000000..e936ffd --- /dev/null +++ b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n1_h5n1", + "reference name": "Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) neuraminidase (NA) gene, complete cds", + "reference accession": "NC_007361.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/reference.fasta b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/reference.fasta new file mode 100644 index 0000000..1d146a2 --- /dev/null +++ b/data/flu/NA/na_n1_h5n1/NC_007361.1/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>NC_007361.1 Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) neuraminidase (NA) gene, complete cds +AGCAAAAGCAGGAGATTAAAATGAATCCAAATCAGAAGATAATAACCATTGGATCAATCTGTATGGTAGT +TGGGATAATTAGCTTGATGTTACAAATTGGGAACATAATCTCAATATGGGTCAGTCATTCAATTCAGACA +GGGAATCAACACCAAGCTGAACCATGCAATCAAAGCATTATTACTTATGAAAACAACACCTGGGTAAATC +AAACATATGTCAACATCAGCAATACCAATTTTCTTACTGAAAAAGCTGTGGCTTCAGTAACATTAGCGGG +CAATTCATCTCTTTGCCCCATTAGCGGATGGGCTGTACACAGTAAGGACAACGGTATAAGAATCGGTTCC +AAGGGGGATGTGTTTGTTATAAGAGAGCCGTTCATCTCATGCTCCCACTTGGAATGCAGAACTTTCTTTT +TGACTCAGGGAGCCTTGCTGAATGACAAGCACTCCAATGGGACCGTCAAAGACAGAAGCCCTCACAGAAC +ATTGATGAGTTGTCCTGTGGGTGAGGCTCCCTCCCCATATAACTCAAGGTTTGAGTCTGTTGCTTGGTCG +GCAAGTGCTTGCCATGATGGCACCAGTTGGTTGACAATTGGAATTTCTGGCCCAGACAATGGGGCTGTGG +CTGTATTGAAATACAACGGCATAATAACAGACACTATCAAGAGTTGGAGGAACAACATACTGAGAACTCA +AGAGTCTGAATGTGCATGTGTAAATGGCTCTTGCTTTACTGTAATGACTGACGGACCAAGTAATGGGCAG +GCCTCATATAAGATCTTCAAAATGGAAAAAGGGAAAGTAGTTAAATCAGTCGAATTGAATGCCCCTAATT +ATCACTATGAGGAGTGCTCCTGTTATCCTGATGCTGGCGAAATCACATGTGTGTGCAGGGATAATTGGCA +TGGCTCAAATCGGCCATGGGTATCTTTCAATCAAAATTTGGAGTATCAAATAGGATATATATGCAGTGGA +GTTTTCGGAGACAATCCACGCCCCAATGATGGAACAGGCAGTTGTGGTCCGGTGTCCCCTAACGGGGCAT +ATGGAGTAAAAGGGTTTTCATTTAAATACGGCAATGGTGTTTGGATCGGGAGAACCAAAAGCACTAATTC +CAGGAGCGGCTTTGAAATGATTTGGGATCCAAATGGGTGGACTGGAACGGACAGTAGCTTCTCGGTGAAA +CAAGATATCGTAGCAATAACTGATTGGTCAGGATATAGCGGGAGTTTTGTCCAGCATCCAGAACTGACAG +GATTAGATTGCATAAGACCTTGTTTCTGGGTTGAGCTAATCAGAGGGCGGCCCAAAGAGAGCACAATTTG +GACTAGTGGGAGCAGCATATCTTTTTGTGGTGTAAATAGTGACACTGTGGGTTGGTCTTGGCCAGACGAT +GCCGAGTTGCCATTCACCATTGACAAGTAGTTTGTTCAAAAAACTCCTTGTTTCTACT + diff --git a/data/flu/NA/na_n2_h5n2/KU143347/unreleased/dataset.zip b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/dataset.zip new file mode 100644 index 0000000..e951ae2 Binary files /dev/null and b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n2_h5n2/KU143347/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..06104ce --- /dev/null +++ b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region KU143347.1 1 1453 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1756060 +KU143347.1 Genbank region 1 1453 . + . ID=KU143347.1:1..1453;Dbxref=taxon:1756060;Name=6;collection-date=Dec-2014;country=China;gbkey=Src;genome=genomic;isolation-source=chicken fecals;mol_type=viral cRNA;nat-host=chicken;segment=6;serotype=H5N2;strain=A/chicken/Wuhan/WHJF/2014 +KU143347.1 Genbank gene 20 1429 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +KU143347.1 Genbank CDS 20 1429 . + 0 ID=cds-ALR82543.1;Parent=gene-NA;Dbxref=NCBI_GP:ALR82543.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=ALR82543.1 diff --git a/data/flu/NA/na_n2_h5n2/KU143347/unreleased/pathogen.json b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/pathogen.json new file mode 100644 index 0000000..798cf58 --- /dev/null +++ b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n2_h5n2", + "reference name": "Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 6 neuraminidase (NA) gene, complete cds", + "reference accession": "KU143347" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n2_h5n2/KU143347/unreleased/reference.fasta b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/reference.fasta new file mode 100644 index 0000000..d7a0093 --- /dev/null +++ b/data/flu/NA/na_n2_h5n2/KU143347/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>KU143347.1 Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 6 neuraminidase (NA) gene, complete cds +AGCAAAAGCAGGAGTGAAAATGAATCCAAACCAGAAGATAATAACAATTGGCTCTGTCTCTCTAACCATT +GCAACAGTATGTTTCCTCATGCAAATTGCCATCCTAGCAACGACTATAACACTGCACTTCAAGCAGAATG +AATGCAGCATCCCCTCGAACAATCAAGTAGTGCCATGTGAGCCAATCATAGTAGAAAGGAACATAACAGA +GATAGTGTATTTGAATAACACCACCATAGAAAAAGAATTCTGCCCTAAATTAACAGAATACAGGGATTGG +TCGAAGCCACAGTGTCAGATCACAGGGTTTGCTCCTTTCTCCAAGGACAACTCAATCCGGCTTTCCGCTG +GTGGGGACATTTGGGTAACAAGGGAACCTTATGTATCATGCAGTCCCAATAAATGTTATCAGTTCGCACT +TGGGCAGGGAACCACGCTAGACAACAAACACTCAAATGGCACAATACATGATAGGATTCCCCATCGGACC +CTTTTGATGAACGAGTTGGGTGTTCCGTTTCATTTGGGGACCAAACAAGTGTGCATAGCATGGTCCAGTT +CGAGCTGCCATGATGGAAGAGCATGGCTGCACGTTTGTGTTACTGGGGATGATAGGAATGCAACTGCCAG +TTTCATTTATGATGGGATGCTTGTTGACAGTATTGTTTCATGGTCTCAAAACATCCTCAGAACTCAAGAG +TCAGAATGCGTCTGCATCAATGGAACTTGTACAGTAGTAATGACTGATGGAAGTGCATCAGGAAGGGCTG +ATACTAAAATACTATTCATTAAAGAGGGGAAAATTGTACATATCAGCCCATTATCAGGAAGCGCCCAGCA +TATAGAGGAATGTTCCTGTTATCCCCGCTATCCAGACGTCAGATGTGTCTGCAGAGACAATTGGAAAGGT +TCAAATAGGCCCGTTATAGATATAAATATGGCAGATTATAGCATTGATTCTAGTTATGTATGCTCAGGGC +TTGTTGGAGACACACCAAGAAACGATGATAGCTCTAGCAATAGCAACTGCAAGGATCCTAATAATGAGAG +AGGGAACCCAGGAGTAAAAGGGTGGGCATTTGACTATGGGAGTGATGTTTGGATGGGAAGAACAATCAGC +AAGGACTCACGCTCAGGTTATGAGACCTTCAGGGTCATTGGCGGTTGGACAACAGCTAATTCCAAATCTC +AGGTAAATAGACAAGTCATAGTTGACAATAATAACTGGTCTGGTTATTCTGGCATCTTCTCTGTTGAAGG +CAAAAGCTGCGTCAATAGGTGTTTTTATGTGGAAATGGTAAGAGGAAGGCCACAAGAGACTAGAGTATGG +TGGACTTCAAACAGTATTGTCGTATTTTGTGGCACTTCAGGTACTTATGGAACAGGCTCATGGCCTGATG +GGGCGAATATCAATTTCATGCCTATATAAGCTTTCGCAATTTTAGAAAAAAAC + diff --git a/data/flu/NA/na_n2_h6n2/CY130032/unreleased/dataset.zip b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/dataset.zip new file mode 100644 index 0000000..f3046f7 Binary files /dev/null and b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n2_h6n2/CY130032/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..0c77d00 --- /dev/null +++ b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130032.1 1 1430 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=402473 +CY130032.1 Genbank region 1 1430 . + . ID=CY130032.1:1..1430;Dbxref=taxon:402473;Name=6;bio-material=CEIRS#162849#;collection-date=1965;country=USA: Massachusetts;gbkey=Src;mol_type=viral cRNA;nat-host=turkey;segment=6;serotype=H6N2;strain=A/turkey/Massachusetts/3740/1965 +CY130032.1 Genbank sequence_feature 1 1430 . + . ID=id-CY130032.1:1..1430;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00006.NA;gbkey=misc_feature +CY130032.1 Genbank gene 8 1417 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY130032.1 Genbank CDS 8 1417 . + 0 ID=cds-AGB50908.1;Parent=gene-NA;Dbxref=NCBI_GP:AGB50908.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGB50908.1 diff --git a/data/flu/NA/na_n2_h6n2/CY130032/unreleased/pathogen.json b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/pathogen.json new file mode 100644 index 0000000..8e133cf --- /dev/null +++ b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n2_h6n2", + "reference name": "Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130032" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n2_h6n2/CY130032/unreleased/reference.fasta b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/reference.fasta new file mode 100644 index 0000000..ef28083 --- /dev/null +++ b/data/flu/NA/na_n2_h6n2/CY130032/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY130032.1 Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) neuraminidase (NA) gene, complete cds +AGTGAAAATGAATCCAAATCAGAAGATAATAACAATTGGCTCCGTCTCTCTAACCATTGCAACAGTATGT +TTCCTCATGCAGATTGCCATCTTAGCAACGACTGTGACGCTGCATTTCAAGCAAAATGAATGCAGCATCC +CCGCGAACAATCAAGTAGTGCCATGTGAACCAATCATAATAGAAAGAAACATAACAGAGATAGTGTATTT +GAATAGTACTACTATAGAAAAAGAAATTTGTCCTGGAGTAGTAGAATACAGGAATTGGTCAAAACCGCAA +TGTCAAATTACAGGGTTTGCTCCTTTCTCCAAGGACAACTCAATTCGGCTTTCTGCAGGTGGGAACATTT +GGGTAACAAGAGAACCTTATGTGTCATGCGATCCCGGTAAATGTTATCAATTTGCACTTGGACAGGGGAC +CACGCTGGACAATAAACACTCAAATGGCACAATACATGATAGAATCCCTCATCGAACTCTTCTAATGAAT +GAATTGGGTGTTCCTTTTCATTTGGGAACCAAACAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCATG +ATGGGAAAGCATGGTTGCACGTTTGTATCACTGGGGATGATAGAAATGCGACTGCTAGTTTCATTTATGA +TGGGATGCTTGTTGACAGTATTGGTTCTTGGTCTCAAAATATCCTCAGAACTCAGGAGTCAGAATGCGTT +TGCATCAATGGGACTTGTACAGTAGTAATGACTGATGGAAGTGCATCAGGAAGGGCCGATACTAAAATAC +TATTCGTTAGAGAGGGGAAAATTGTCCATATTAGCCCTCTGTCAGGAAGTGCTCAGCATATAGAGGAATG +TTCCTGTTATCCCCGATATCCAAACGTCAGATGTGTTTGCAGAGACAACTGGAAGGGCTCTAATAGGCCC +GTTATAGATATAAGTATGGCAGATTATAGCATTGATTCCAGTTATGTGTGCTCAGGACTTGTTGGCGACA +CACCAAGGAACGATGATAGCTCTAGCAGCAGCAACTGCAAGGATCCTAACAATGAAAGAGGGAACCCAGG +AGTAAAAGGGTGGGCCTTTGACAATGGAAATGATGTTTGGATGGGAAGAACAATCAGCAAAGACTCGCGC +TCAGGTTATGAAACCTTCAGGGTCATTGGTGGTTGGACCACAGCTAATTCCAAGTCACAGGTCAATAGGC +AAGTCATAGTTGACAATAACAACTGGTCTGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCAT +CAATAGGTGTTTTTATGTAGAGTTGATAAGAGGAAGGCCACAGGAGACTAGAGTATGGTGGACCTCAAAC +AGTATTGTCGTATTTTGTGGCACTTCAGGTACCTATGGAACAGGCTCATGGCCTGATGGGGCGAATATCA +ATTTCATGCCTATATAAGCTTTCGCAATTT + diff --git a/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/dataset.zip b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/dataset.zip new file mode 100644 index 0000000..8b0da89 Binary files /dev/null and b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..d2b9c88 --- /dev/null +++ b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_004909.1 1 1418 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=130760 +NC_004909.1 RefSeq region 1 1418 . + . ID=NC_004909.1:1..1418;Dbxref=taxon:130760;Name=6;gbkey=Src;genome=genomic;mol_type=genomic RNA;note=subtype H9N2;segment=6;strain=A/Hong Kong/1073/99 +NC_004909.1 RefSeq gene 1 1404 . + . ID=gene-FLUAVAHHH9N2s6gp1;Dbxref=GeneID:1460997;Name=na;gbkey=Gene;gene=na;gene_biotype=protein_coding;locus_tag=FLUAVAHHH9N2s6gp1 +NC_004909.1 RefSeq CDS 1 1404 . + 0 ID=cds-NP_859038.1;Parent=gene-FLUAVAHHH9N2s6gp1;Dbxref=GOA:Q9ICY2,InterPro:IPR001860,InterPro:IPR011040,UniProtKB/TrEMBL:Q9ICY2,GenBank:NP_859038.1,GeneID:1460997;Name=NA;gbkey=CDS;locus_tag=FLUAVAHHH9N2s6gp1;product=neuraminidase;protein_id=NP_859038.1 diff --git a/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/pathogen.json b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/pathogen.json new file mode 100644 index 0000000..3212c86 --- /dev/null +++ b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n2_h9n2", + "reference name": "Influenza A virus na gene for neuraminidase, genomic RNA, strain A/Hong Kong/1073/99(H9N2)", + "reference accession": "NC_004909.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/reference.fasta b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/reference.fasta new file mode 100644 index 0000000..039639c --- /dev/null +++ b/data/flu/NA/na_n2_h9n2/NC_004909.1/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>NC_004909.1 Influenza A virus na gene for neuraminidase, genomic RNA, strain A/Hong Kong/1073/99(H9N2) +ATGAATCCAAATCAAAAGATAATAGCACTTGGCTCTGTTTCTATAACTATTGCGACAATATGTTTACTCA +TGCAGATTGCCATCTTAGCAACGACTATGACACTACATTTCAATGAATGTACCAACCCATCGAACAATCA +AGCAGTGCCATGTGAACCAATCATAATAGAAAGGAACATAACAGAGATAGTGCATTTGAATAATACTACC +ATAGAGAAGGAAAGTTGTCCTAAAGTAGCAGAATACAAGAATTGGTCAAAACCGCAATGTCAAATTACAG +GGTTCGCCCCTTTCTCCAAGGACAACTCAATTAGGCTTTCTGCAGGCGGGGATATTTGGGTGACAAGAGA +ACCTTATGTATCGTGCGGTCTTGGTAAATGTTACCAATTTGCACTTGGGCAGGGAACCACTTTGAACAAC +AAACACTCAAATGGCACAATACATGATAGGAGTCCCCATAGAACCCTTTTAATGAACGAGTTGGGTGTTC +CATTTCATTTGGGAACCAAACAAGTGTGCATAGCATGGTCCAGCTCAAGCTGCCATGATGGGAAGGCATG +GTTACATGTTTGTGTCACTGGGGATGATAGAAATGCGACTGCTAGCATCATTTATGATGGGATGCTTACC +GACAGTATTGGTTCATGGTCTAAGAACATCCTCAGAACTCAGGAGTCAGAATGCGTTTGCATCAATGGAA +CTTGTACAGTAGTAATGACTGATGGAAGTGCATCAGGAAGGGCTGATACTAAAATACTATTCATTAGAGA +AGGGAAAATTGTCCACATTGGTCCACTGTCAGGAAGTGCTCAGCATGTGGAGGAATGCTCCTGTTACCCC +CGGTATCCAGAAGTTAGATGTGTTTGCAGAGACAATTGGAAGGGCTCCAATAGACCCGTGCTATATATAA +ATGTGGCAGATTATAGTGTTGATTCTAGTTATGTGTGCTCAGGACTTGTTGGCGACACACCAAGAAATGA +CGATAGCTCCAGCAGCAGTAACTGCAGGGATCCTAATAACGAGAGAGGGGGCCCAGGAGTGAAAGGGTGG +GCCTTTGACAATGGAAATGATGTTTGGATGGGACGAACAATCAAGAAAGATTCGCGCTCTGGTTATGAGA +CTTTCAGGGTCGTTGGTGGTTGGACTACGGCTAATTCCAAGTCACAAATAAATAGGCAAGTCATAGTTGA +CAGTGATAACTGGTCTGGGTATTCTGGTATATTCTCTGTTGAAGGAAAAACCTGCATCAACAGGTGTTTT +TATGTGGAGTTGATAAGAGGGAGACCACAGGAGACCAGAGTATGGTGGACTTCAAATAGCATCATTGTAT +TTTGTGGAACTTCAGGTACCTATGGAACAGGCTCATGGCCTGATGGAGCGAATATCAATTTCATGTCTAT +ATAAGCTTTCGCAATTTT + diff --git a/data/flu/NA/na_n3_h16n3/CY136632/unreleased/dataset.zip b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/dataset.zip new file mode 100644 index 0000000..0d16556 Binary files /dev/null and b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n3_h16n3/CY136632/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..c12ff23 --- /dev/null +++ b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136632.1 1 1422 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1283472 +CY136632.1 Genbank region 1 1422 . + . ID=CY136632.1:1..1422;Dbxref=taxon:1283472;Name=6;bio-material=CEIRS#144406#;collection-date=19-May-1987;country=USA: Delaware;gbkey=Src;lab-host=E1 passage(s);mol_type=viral cRNA;nat-host=laughing gull;segment=6;serotype=H16N3;strain=A/laughing gull/Delaware Bay/2839/1987 +CY136632.1 Genbank sequence_feature 1 1422 . + . ID=id-CY136632.1:1..1422;Dbxref=IRD:NIGSP_CEIRS_SJC001_JBC_00508.NA;gbkey=misc_feature +CY136632.1 Genbank gene 4 1413 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY136632.1 Genbank CDS 4 1413 . + 0 ID=cds-AGG26999.1;Parent=gene-NA;Dbxref=NCBI_GP:AGG26999.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGG26999.1 diff --git a/data/flu/NA/na_n3_h16n3/CY136632/unreleased/pathogen.json b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/pathogen.json new file mode 100644 index 0000000..df87db0 --- /dev/null +++ b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n3_h16n3", + "reference name": "Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136632" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n3_h16n3/CY136632/unreleased/reference.fasta b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/reference.fasta new file mode 100644 index 0000000..28ebb3c --- /dev/null +++ b/data/flu/NA/na_n3_h16n3/CY136632/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY136632.1 Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) neuraminidase (NA) gene, complete cds +GAAATGAATCCAAATCAGAGAATCATAGCGATTGGGTCTGTGAACACAGTATTGTCCACAATAGCATTGC +TAGTGGGGATAGGTAACTTGGCATTCAATGCAGTCATTCATGGCAAAGTAGAGAACAACAAAGATGAAAG +TGCACCAATCACCACACCTCATCCAATTCACAACTGCAGCGGAACTGTGATAACAAATAATCACACTACG +ATCAACAACATAACAACGGTTGTATTTCAAGATCCAGAGAAACACTTCAGGCTTCCGTTGCCATTATGCC +CTTTCAGAGGATTCTTCCCTTTTCATAAAGACAATGCTCTGAGGCTGGCTGAAAACAAAGATGTTTTGGT +GACAAGAGAACCCTATATCAGCTGTGACAATAAAGGGTGTTGGTCTTTCGCGCTAGCTCAAGGAGCGCTT +TTAGGGACGAAGCATAGCAATGGGACAAATAAGGATAGAACTCCTTACAGGTCCTTAATTAGGTTCCCCA +TTGGAACAGCTCCCGTACTTGGGAACTACGAAGAAATGTGTGCTGCATGGTCGAGTAGCAGTTGCTTTGA +TGGTAAAGAATGGATGCATGTTTGTATTACTGGGAATGACAATGACGCCACAGCGCAGATAATCTATGCA +GGGACAATGCGAGACTCTATAAAGTCATGGCGGAGAAACATATTGAGAACCCAAGAGTCAGAATGCCAAT +GTTTACACGGAACTTGTGTTGTAGCAGTGACAGATGGACCAGCGGACAATAAGGCTGACCACCGAATATA +CTGGATAAGAGAAGGGAAAATCATAAAGCATGAGAAGATCCCAGACGACAAGATACAACATTTGGAAGAA +TGTTCATGTTACACAGATGTTGACATATACTGCATCTGTAGAGACAACTGGAAAGGCTCTAACAGGCCAT +GGATGCGTATAAACAATGAAACTATATTGGAAACTGGGTATATATGCAGCAAATTCCACTCAGACACTCC +CAGACCAAGTGATCCCTCTACAATTTCGTGTAACTCTCCAAGTGGAATTGATGGCAGAAGAGGAGTTAAA +GGATTCGGATTTAAAGTTCAGAATGATGTGTGGCTTGGGAGGACAATATCATATAGCAGCCGGTCAGGAT +TCGAAGTGATCAAAGTTTCAAATGGTTGGATTAATTCGAACAATCAATTGAAAGTATTCAATCAGACACT +TGTTTCCAATAATGACTGGTCCGGGTATTCGGGAAGCTTTGTCATTGAAAACAATGGCTGTTTTCAGCCT +TGTTTTTACGTTGAGCTCACAAGAGGAGTGCCAAACAAAAATGAGGATGTCTCTTGGACCAGCAATAGCA +TAGTTACGTTCTGTGGACTAGACAATGAGCCTGGATCGGGCAATTGGCCTGATGGTGCTAATATTGGGTT +TATGCCCAAGTAATAGAAAAAA + diff --git a/data/flu/NA/na_n4_h8n4/CY136133/unreleased/dataset.zip b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/dataset.zip new file mode 100644 index 0000000..51a6677 Binary files /dev/null and b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n4_h8n4/CY136133/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..ce9ac22 --- /dev/null +++ b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136133.1 1 1437 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1228846 +CY136133.1 Genbank region 1 1437 . + . ID=CY136133.1:1..1437;Dbxref=taxon:1228846;Name=6;bio-material=CEIRS#9BM8109#;country=USA: Alaska;gbkey=Src;lab-host=R0 passage(s);mol_type=viral cRNA;nat-host=northern pintail%3B gender M%3B age hatch year;note=Sample provided by University of Alaska Fairbanks;segment=6;serotype=H8N4;strain=A/northern pintail/Interior Alaska/9BM8109R0/2009 +CY136133.1 Genbank sequence_feature 1 1437 . + . ID=id-CY136133.1:1..1437;Dbxref=IRD:NIGSP_CEIRS_CIP055_AK2_00220.NA;gbkey=misc_feature +CY136133.1 Genbank gene 10 1422 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY136133.1 Genbank CDS 10 1422 . + 0 ID=cds-AGG26257.1;Parent=gene-NA;Dbxref=NCBI_GP:AGG26257.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGG26257.1 diff --git a/data/flu/NA/na_n4_h8n4/CY136133/unreleased/pathogen.json b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/pathogen.json new file mode 100644 index 0000000..da78be9 --- /dev/null +++ b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n4_h8n4", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136133" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n4_h8n4/CY136133/unreleased/reference.fasta b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/reference.fasta new file mode 100644 index 0000000..b2a65ae --- /dev/null +++ b/data/flu/NA/na_n4_h8n4/CY136133/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY136133.1 Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) neuraminidase (NA) gene, complete cds +AGTTTCATAATGAATCCAAATCAGAAAATCATAACCATCGGCAGTGTTAGTATTATATTAACGACGATAG +GCCTTCTCCTCCAAATAACAAGTTTGTGCTCAATATGGTTTAGCCACTACAACCAGGTGACACAGACACA +CGAACAACCTTGTTCTAACAACACTACGAATTACTACAATGAGACTTTTGTTAATGTAACCAATGTGCAG +AACAATTATACCACAGTAACTGAGCCCCCAGCACCTGATATGGTTCACTACTCTAGTGGAAGAGACTTGT +GCCCAATAAGGGGGTGGGCACCTCTGAGTAAGGACAATGGAATTAGAATTGGATCCCGAGGCGAAGTATT +TGTCATACGGGAGCCCTTCATATCATGCTCCATCAGTGAATGCAGAACTTTTTTCTTAACTCAAGGAGCT +CTTCTCAATGACAAGCACTCGAATGGGACAGTGAAAGACAGAAGTCCCTTCCGCACATTGATGAGTTGTC +CCATAGGGGTTGCCCCCTCTCCTAGCAATAGCCGCTTTGAGTCTGTAGCATGGTCTGCTACTGCATGTAG +CGACGGACCCGGTTGGCTAACACTAGGAATCACCGGCCCAGATACTACTGCTGTAGCAGTGCTGAAATAC +AATGGTATAATAACAGACACATTAAAAAGCTGGAAGGGAAATATCATGCGAACACAAGAGTCCGAGTGCG +TATGCCAGGATGAATTTTGTTATACTCTGATAACAGACGGACCGTCCGACGCGCAAGCTTTCTATAAGAT +ACTAAAAATCAGAAAGGGGAAAATAGTAAATATGCAGGATGTGGACGCAACAGGGTTCCACTTCGAAGAG +TGCTCCTGTTACCCGAGTGGGACAGATGTTGAGTGTGTCTGTCGAGACAACTGGCGGGGAAGTAATCGAC +CATGGATAAGATTCAACAGTGATCTTGATTACCAAATCGGCTATGTATGTAGTGGGATATTTGGGGACAA +TCCCAGGCCCGTGGATGGCACGGGCTCATGTAACAGCCCAGTAAATAATGGGAAAGGAAGATACGGGGTG +AAGGGGTTCAGCTTTAGGTATGGGGATGGTGTTTGGATAGGAAGGACAAAGAGCTTGGAATCCAGAAGCG +GTTTTGAAATGGTGTGGGATGCTAATGGATGGGTATCGACAGACAAGGATTCAAATGGTGTGCAGGATAT +TATAGATAATGACAATTGGTCTGGTTACAGTGGGAGTTTCAGTATTAGAGGAGAAACAACAGGCAGGAAT +TGCACTGTCCCATGTTTCTGGGTTGAAATGATAAGAGGGCAGCCCAAAGAAAGGACCATATGGACCAGTG +GTAGTAGTATTGCATTCTGTGGTGTTAATTCTGATACCACAGGTTGGTCATGGCCTGATGGCGCTCTGTT +GCCCTTTGACATAGACAAGTAATTTTTCGAAAAAACT + diff --git a/data/flu/NA/na_n5_h12n5/CY130080/unreleased/dataset.zip b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/dataset.zip new file mode 100644 index 0000000..68f8947 Binary files /dev/null and b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n5_h12n5/CY130080/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..c4a5c33 --- /dev/null +++ b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130080.1 1 1444 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=385582 +CY130080.1 Genbank region 1 1444 . + . ID=CY130080.1:1..1444;Dbxref=taxon:385582;Name=6;bio-material=CEIRS#14739#;collection-date=1976;country=Canada: Alberta;gbkey=Src;mol_type=viral cRNA;nat-host=duck;segment=6;serotype=H12N5;strain=A/duck/Alberta/60/1976 +CY130080.1 Genbank sequence_feature 1 1444 . + . ID=id-CY130080.1:1..1444;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00014.NA;gbkey=misc_feature +CY130080.1 Genbank gene 9 1430 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY130080.1 Genbank CDS 9 1430 . + 0 ID=cds-AGB50974.1;Parent=gene-NA;Dbxref=NCBI_GP:AGB50974.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGB50974.1 diff --git a/data/flu/NA/na_n5_h12n5/CY130080/unreleased/pathogen.json b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/pathogen.json new file mode 100644 index 0000000..93a0cdc --- /dev/null +++ b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n5_h12n5", + "reference name": "Influenza A virus (A/duck/Alberta/60/1976(H12N5)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130080" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n5_h12n5/CY130080/unreleased/reference.fasta b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/reference.fasta new file mode 100644 index 0000000..53fdd2f --- /dev/null +++ b/data/flu/NA/na_n5_h12n5/CY130080/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY130080.1 Influenza A virus (A/duck/Alberta/60/1976(H12N5)) neuraminidase (NA) gene, complete cds +AGTTTAAAATGAATCCAAATCAGAAAATAATAACAATTGGTTCTGTGTCATTGGCACTAGTTATATTCAA +CATATTGCTTCATATTGCATCAATAGCCATAGGAATAATATCAGTGACAAAAGAAAGCAGTGTGTCATCA +TCCTGCAACACCACCGAGGTTTACAATGAAACTGTGAGGCTAGAAACTATAACAATTCCTATCAATAACA +CTGTGTATATAGAAAGGGAGTTACATCAGGAACCTGAGTTCTTAAACAACACAGAACCCCTCTGCAATGT +ATCCGGATTTGCAATAGTTTCCAAGGACAATGGAATCAGAATAGGGTCAAGGGGGCATGTGTTTGTCATA +AGAGAACCATTTGTGGCATGTGGTCCCACGGAATGTAGAACATTTTTCCTAACGCAAGGTGCCTTACTGA +ATGACAAACATTCCAATAATACAGTGAAAGACAGAAGTCCTTATCGTGCATTGATGAGTGTTCCATTAGG +ATCTTCACCCAATGCCTACCAGGCCAAGTTTGAGTCTGTTGCATGGTCAGCCACAGCATGCCATGATGGC +AAAAAATGGCTGGCAGTAGGGATAAGTGGTGCGGATGACGATGCTTATGCTGTAATCCATTATGGGGGAA +TGCCAACAGATGTGGTGAGGTCATGGAGAAAGCAAATTCTAAGAACACAAGAATCATCATGTGTATGTAT +GAATGGGAACTGTTATTGGGTAATGACGGATGGTCCTGCGAACAGTCAGGCTAGTTACAAGATTTTCAAG +TCTCATGAGGGAATGGTGACAAATGAAAGAGAAGTGTCGTTTCAGGGAGGCCACATTGAAGAATGTTCTT +GCTACCCCAACTTGGGTAAAGTGGAATGTGTTTGCCGGGATAATTGGAATGGAATGAATAGACCAATTTT +GATCTTTGATGAGGACCTTGACTATGAGGTGGGTTATTTGTGTGCTGGAATTCCGACAGACACTCCACGG +GTTCAGGACAGTAGTTTCACTGGTTCCTGCACTAATGCTGTTGGAGGGAGTGGGACGAATAACTATGGAG +TGAAAGGATTTGGCTTCAGACAAGGTAATAGTGTGTGGGCAGGAAGAACAGTTAGCATTTCGTCCCGAAG +TGGTTTTGAAATCCTATTAATAGAAGATGGTTGGATTAGAACAAGCAAAACAATCGTCAAAAAAGTGGAG +GTCCTCAACAACAAGAATTGGTCAGGATACAGCGGAGCTTTCACCATCCCAATCACAATGACTAGTAAAC +AATGCTTAGTTCCATGTTTCTGGCTGGAAATGATAAGAGGAAAACCAGAAGAGAGGACAAGCATTTGGAC +CTCTAGTAGCTCCACGGTATTTTGTGGTGTTTCAAGTGAGGTCCCAGGATGGTCCTGGGATGATGGAGCA +ATTCTTCCCTTTGACATCGATAAGATGTAATTTGTAAAAAAACT + diff --git a/data/flu/NA/na_n6_h13n6/CY130088/unreleased/dataset.zip b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/dataset.zip new file mode 100644 index 0000000..a9ac45f Binary files /dev/null and b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n6_h13n6/CY130088/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..a997f94 --- /dev/null +++ b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130088.1 1 1442 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=384499 +CY130088.1 Genbank region 1 1442 . + . ID=CY130088.1:1..1442;Dbxref=taxon:384499;Name=6;bio-material=CEIRS#80894#;collection-date=1977;country=USA: Maryland;gbkey=Src;mol_type=viral cRNA;nat-host=gull;segment=6;serotype=H13N6;strain=A/gull/Maryland/704/1977 +CY130088.1 Genbank sequence_feature 1 1442 . + . ID=id-CY130088.1:1..1442;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00015.NA;gbkey=misc_feature +CY130088.1 Genbank gene 7 1422 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY130088.1 Genbank CDS 7 1422 . + 0 ID=cds-AGB51315.1;Parent=gene-NA;Dbxref=NCBI_GP:AGB51315.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGB51315.1 diff --git a/data/flu/NA/na_n6_h13n6/CY130088/unreleased/pathogen.json b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/pathogen.json new file mode 100644 index 0000000..bd34905 --- /dev/null +++ b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n6_h13n6", + "reference name": "Influenza A virus (A/gull/Maryland/704/1977(H13N6)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130088" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n6_h13n6/CY130088/unreleased/reference.fasta b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/reference.fasta new file mode 100644 index 0000000..6d599d6 --- /dev/null +++ b/data/flu/NA/na_n6_h13n6/CY130088/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY130088.1 Influenza A virus (A/gull/Maryland/704/1977(H13N6)) neuraminidase (NA) gene, complete cds +GTGACAATGAATCCAAATCAGAAGATAATATGCATTTCAGCTACAGGAATGACACTATCGGTAGTAAGCC +TCCTGATAGGAATTGCCAATTTAGGTTTAAACATCGGACTCCATTATAAGGTAGGCGATACACCAGATGT +GAATACTCCAAATGTGAATGGTACCAATTCAACAACAACAACAATAATTAACAACAATACCCAGAATAAT +TTCACAAACATCACCAACATCATACATAACAAAAATGAGGAGAGGACATTTCTAAATTTAACTAAGCCTC +TATGTGAAGTAAACTCATGGCACATCCTGTCAAAAGATAATGCAATAAGAATTGGAGAGGAAGCTCATAT +ATTAGTCACAAGGGAACCCTATTTATCCTGTGATCCACAAGGGTGCAGGATGTTTGCTCTAAGTCAAGGC +ACAACACTCAGGGGGCGACATGCAAATGGGACTATACATGATAGGAGTCCGTTCAGAGCCCTCGTAAGTT +GGGAAATGGGTCAAGCACCCAGCCCATATAACGCTAAGATCGAATGTATAGGGTGGTCAAGCACATCATG +CCATGACGGCATATCAAGAATGTCAATATGCATGTCAGGACCAAACAATAATGCATCAGCTGTGGTGTGG +TATGGGGGTAGACCAGTAACAGAAATTCCATCATGGGCAGGAAATATTCTCAGAACCCAAGAGTCAGAGT +GTGTATGCCATAAGGGAATTTGTCCAGTAGTCATGACAGATGGCCCAGCAAATAATAAAGCAGCAACTAA +GATAATCTATTTCAAAGAAGGGAAAATACAAAAAATTGAAGAGCTGACAGGAAACGCCCAACACATTGAA +GAATGCTCATGCTATGGAGCAAAAGAAGTGATCAAATGCATATGCAGAGACAATTGGAAGGGGGCAAATA +GACCAGTAATCACTATAGACCCTGAGATGATGACCCACACAAGCAAGTATTTATGCTCAAAGATCCTAAC +CGATACAAGTCGTCCCAATGATCCCACTAATGGAAACTGTGACGCACCAATAACAGGAGGAAACCCAGAT +CCTGGAGTCAAGGGGTTTGCATTCCTAGATGGGGAAAATTCATGGCTTGGAAGGACAATTAGCAAAGACT +CCAGATCAGGTTACGAAATGTTAAAAGTCCCAAATGCAGAAACCAATACCCAATCGGGCCCAATCACACA +CCAGGTAATTGTCAACAACCAAAACTGGTCGGGATACTCAGGAGCATTCATAGACTACTGGGCAAACAAA +GAGTGCTTCAATCCTTGTTTCTATGTGGAGCTAATTAGAGGGAGGCCCAAAGAAAGCAGTGTACTGTGGA +CTTCAAATAGCATTGTAGCTCTTTGCGGATCCAAGGAGCGATTGGGATCATGGTCCTGGCATGATGGTGC +TGAAATCATCTACTTTAAGTAGGAATGATTTAGGAAAAACAC + diff --git a/data/flu/NA/na_n6_h4n6/CY181243/unreleased/dataset.zip b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/dataset.zip new file mode 100644 index 0000000..51e8f5e Binary files /dev/null and b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n6_h4n6/CY181243/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..9593a30 --- /dev/null +++ b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY181243.1 1 1439 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1445223 +CY181243.1 Genbank region 1 1439 . + . ID=CY181243.1:1..1439;Dbxref=taxon:1445223;Name=6;collection-date=1974;country=USA: DeSoto West%2C WI;gbkey=Src;mol_type=viral cRNA;nat-host=mallard;segment=6;serotype=H4N6;strain=A/mallard/Wisconsin/14/1974 +CY181243.1 Genbank sequence_feature 1 1439 . + . ID=id-CY181243.1:1..1439;Dbxref=IRD:NIGSP_SSC_00484.NA;gbkey=misc_feature +CY181243.1 Genbank gene 7 1419 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY181243.1 Genbank CDS 7 1419 . + 0 ID=cds-AHN04765.1;Parent=gene-NA;Dbxref=NCBI_GP:AHN04765.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AHN04765.1 diff --git a/data/flu/NA/na_n6_h4n6/CY181243/unreleased/pathogen.json b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/pathogen.json new file mode 100644 index 0000000..48a2941 --- /dev/null +++ b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n6_h4n6", + "reference name": "Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY181243" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n6_h4n6/CY181243/unreleased/reference.fasta b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/reference.fasta new file mode 100644 index 0000000..098e4e4 --- /dev/null +++ b/data/flu/NA/na_n6_h4n6/CY181243/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY181243.1 Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) neuraminidase (NA) gene, complete cds +GTGAAGATGAATCCAAATCAGAAGATAATATGCATCTCAGCAACAGGAATGACACTATCCGTAGTAAGTC +TGCTAATAGGATTGGCCAACTTGGGTATGAACATTGGGCTTCATTTCAAGGTAGGAGACACACCGGAAAT +AGAGACCCCTAGCATCAACGAGGCAAACTCCACAACCACGATAATCAACTACAATACCCAAAACAATTTC +ACAAATGTAACCAATATTGTGTTGATTAAAGAAGAAGACAAAATGTTCACAAACCTTTCAAAACCCTTGT +GTGAAGTAAACTCATGGCATATTCTATCTAAGGACAATGCGGTTAGAATAGGGGAGGATGCCCACATCCT +TGTCACAAGAGAACCATATCTCTCATGTGGACCACATGAATGCAGAATGTTTGCCCTCAGCCAAGGTACC +ACACTAAGGGGTCGACATGCAAATGGGACTATACATGACAGAAGCCCATTTAGGGCATTAATAAGTTGGG +AAATGGGGCAAGCACCGAGTCCGTACAATGTCAAAGTAGAATGCGTGGGATGGTCCAGCACTTCATGCCA +TGACGGCATCTCAAGAATGTCAATCTGCATGTCAGGACCTAATAACAATGCTTCGGCAGTGGTCTGGTAC +AATGGAAGACCAGTCACCGAGATTGCTTCGTGGGCAGGGAATATATTAAGGACTCAGGAATCAGAATGTG +TATGCCATAATGGAATATGCCCTGTAGTGATGACGGATGGCCCAGCTAATAACAGAGCAGAAACAAAAAT +AATTTATTTCAAAGAGGGAAAAATACAGAAAATAGAGGAATTGACAGGAAGTGCACAGCATATAGAAGAG +TGCTCATGCTATGGAGCAGAAGAAATGATTAAATGCATTTGCAGGGACAATTGGAAAGGTGCAAATAGAC +CAGTAATCACTATAAACCCAAAGACAATGACTCATACAAGCAAATACTTGTGTTCAAAGGTTCTAACTGA +CACAAGTCGGCCTAATGACCCCGGAAGCGGAAACTGTGATGCACCAATAACCGGAGGGAGCCCAGATCCT +GGCGTAAAAGGATTTGCATTCTTAGATGGGGGAAATTCCTGGTTGGGAAGGACCATAAGCAAAGATTCAA +GGTCAGGGTATGAGATGCTAAAAGTCCCAAATGCGGAAACAGATAATCAGTCCGGTCCAGTTGAACATCA +GGTGATAGTAAACAACCAAAACTGGTCAGGGTACTCAGGAGCGTTCATCGATTATTGGGCTAATAGAGAG +TGCTTTAACCCTTGCTTTTATGTGGAATTGATCAGAGGCATGCCAAAAGAGAGTAGTGTATTGTGGACAT +CCAACAGTATAGTAGCGCTTTGTGGATCCAAGGAGCGATTGGGATCGTGGTCATGGCATGATGGGGCTGA +AATCATCTACTTTAAGTAGAAAAGATTTTGGAAAAACAC + diff --git a/data/flu/NA/na_n7_h10n7/CY136096/unreleased/dataset.zip b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/dataset.zip new file mode 100644 index 0000000..757545a Binary files /dev/null and b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n7_h10n7/CY136096/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..c466dd8 --- /dev/null +++ b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY136096.1 1 1437 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1228423 +CY136096.1 Genbank region 1 1437 . + . ID=CY136096.1:1..1437;Dbxref=taxon:1228423;Name=6;bio-material=CEIRS#9BM11387#;country=USA: Alaska;gbkey=Src;lab-host=R0 passage(s);mol_type=viral cRNA;nat-host=northern pintail%3B gender F%3B age hatch year;note=Sample provided by University of Alaska Fairbanks;segment=6;serotype=H10N7;strain=A/northern pintail/Interior Alaska/9BM11387R0/2009 +CY136096.1 Genbank sequence_feature 1 1437 . + . ID=id-CY136096.1:1..1437;Dbxref=IRD:NIGSP_CEIRS_CIP055_AK2_00216.NA;gbkey=misc_feature +CY136096.1 Genbank gene 11 1423 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY136096.1 Genbank CDS 11 1423 . + 0 ID=cds-AGG26201.1;Parent=gene-NA;Dbxref=NCBI_GP:AGG26201.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGG26201.1 diff --git a/data/flu/NA/na_n7_h10n7/CY136096/unreleased/pathogen.json b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/pathogen.json new file mode 100644 index 0000000..13ecbf8 --- /dev/null +++ b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n7_h10n7", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136096" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n7_h10n7/CY136096/unreleased/reference.fasta b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/reference.fasta new file mode 100644 index 0000000..47360df --- /dev/null +++ b/data/flu/NA/na_n7_h10n7/CY136096/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY136096.1 Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) neuraminidase (NA) gene, complete cds +GTGATTGAGAATGAATCCTAATCAAAAATTATTCGCACTCTCTGGGGTGGCCATAGCACTGAGTATCCTC +AACCTACTAATAGGAATATCCAATGTGGGACTGAATGTCTCACTACACCTAAAGGGAAGCAGTAACCAGG +ATAGAAATTGGACATGCACGAGTGTAACACAAAACAATACGACTTTAATTGAAAACACATATGTCAACAA +TACTACTGTCATCAATAAGGAAACAGGGACTGCAAAGCCAAATTATCTAATGCTGAACAAGAGCTTATGC +AAAGTTGAAGGATGGGTAGTGGTGGCCAAGGACAATGCCATAAGATTCGGTGAAGGTGAACAAATAATAG +TGACAAGAGAGCCGTATGTGTCATGTGATCCATTAGGATGTAAGATGTACGCACTGCATCAAGGGACAAC +CATTAGAAACAAGCATTCAAACGGAACAATACACGACAGGACTGCTTTCAGAGGCTTGATATCAACTCCT +TTGGGGAGCCCCCCTGTAGTCAGCAATAGTGACTTTCTTTGTGTAGGGTGGTCAAGCACCAGTTGCCATG +ACGGCATCGGGCGAATGACCATTTGTGTGCAGGGAAATAATGACAACGCAACAGCTACAGTGTACTATGA +CCGAAGGCTCACTACCACAATAAAAACATGGGCAGGAAACATCCTTAGGACGCAAGAGTCGGAATGTGTA +TGCCACAATGGAACATGTGTAGTAATAATGACTGATGGATCGGCAAGCAGCCAGGCATATACAAAAGTTC +TGTATTTCCACAAAGGACTAGTAATAAAAGAGGAAGCCCTTAAAGGATCAGCCAGACACATAGAGGAGTG +CTCATGCTATGGGCACAATTCGAAGGTGACTTGTGTATGCAGGGACAACTGGCAAGGAGCCAATAGACCA +GTGATTGAAATAGATATGAATGCCATGGAGCATACAAGCCAGTATCTATGTACAGGAGTTCTCACTGACA +CGAGCAGACCATCAGACAAATCAATGGGAGACTGCAATAATCCGATCACTGGGAGTCCGGGAGCCCCTGG +GGTCAAAGGATTCGGCTTCCTGGATAGTAGCAATACATGGTTGGGCCGCACAATAAGTCCTCGTTCCAGG +AGTGGTTTTGAGATGTTGAAGATACCTAATGCTGAGACAGACCCAAATTCTAAAATCACCGAGAGGCAAG +AAATAGTTGACAACAACAATTGGTCAGGATACTCAGGAAGTTTCATTGACTATTGGGACGAAAGCAGTGA +GTGCTACAACCCCTGTTTTTATGTTGAATTAATAAGAGGAAGGCCTGAAGAAGCCAAGTATGTTTGGTGG +GCGAGCAACAGTTTAGTTGCACTATGTGGAAGCCCAATCTCAGTTGGGTCCGGTTCCTTCCCCGATGGGG +CACAAATCCAATACTTTTCGTAAATTGCAAAAAACAC + diff --git a/data/flu/NA/na_n8_h3n8/CY028838/unreleased/dataset.zip b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/dataset.zip new file mode 100644 index 0000000..6a8f450 Binary files /dev/null and b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n8_h3n8/CY028838/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..89b948b --- /dev/null +++ b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY028838.1 1 1423 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=387222 +CY028838.1 Genbank region 1 1423 . + . ID=CY028838.1:1..1423;Dbxref=taxon:387222;Name=6;collection-date=1963;country=USA: Florida;gbkey=Src;mol_type=viral cRNA;nat-host=Equine;segment=6;serotype=H3N8;strain=A/equine/Miami/1/1963 +CY028838.1 Genbank gene 1 1413 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY028838.1 Genbank CDS 1 1413 . + 0 ID=cds-ABY81495.1;Parent=gene-NA;Dbxref=NCBI_GP:ABY81495.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=ABY81495.1 diff --git a/data/flu/NA/na_n8_h3n8/CY028838/unreleased/pathogen.json b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/pathogen.json new file mode 100644 index 0000000..15296ba --- /dev/null +++ b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n8_h3n8", + "reference name": "Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 6, complete sequence", + "reference accession": "CY028838" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n8_h3n8/CY028838/unreleased/reference.fasta b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/reference.fasta new file mode 100644 index 0000000..af2cbb4 --- /dev/null +++ b/data/flu/NA/na_n8_h3n8/CY028838/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY028838.1 Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 6, complete sequence +ATGAATCCAAATCAAAAGATAATAACAATTGGATCTGCATCATTAGGGCTTGTAATCCTCAACGTCATTC +TCCATGTGGTCAGCATTATAGTAACAGTACTGGTCCTCAGTAACAATGGGACAGGTCCGAATTGCAACGG +GACGATCATAAGGGAGTACAATGAAACAGTGAGAGTAGAAAGAATTACTCAATGGTATAATACTAATATA +ATCGAGTATATAGAGGAACCTTCAAATGAATACTATATGAGCAACACCGAGCCACTGTGTGAAGCCCAGG +GCTTTGCACCATTTTCCAAAGATAATGGAATACGAATTGGATCGAGAGGCCATGTTTTTGTAATAAGAGA +ACCTTTTGTTTCATGTTCGCCGTTAGAATGTAGAACCTTTTTCCTCACACAGGGCTCATTACTTAATGAC +AAGCATTCCAACGGCACAGTGAAGGACCGAAGTCCATATAGGACTTTGATGAGTGTCGAAGTAGGGCAAT +CACCTAACGTGTATCAAGCTAGGTTTGAAGCGGTGGCATGGTCAGCAACAGCATGCCATGATGGGAAAAA +GTGGATGACAGTTGGAGTCACAGGGCCCGACGCTCAAGCAGTTGCAGTGGTGCACTATGGAGGTGTTCCG +GTTGACGTCATTAATTCATGGGCAGGGGATATTCTAAGAACCCAAGAATCGTCATGCACCTGCATTAAAG +GAGACTGTTATTGGGTGATGACTGACGGACCGGCAAACAGGCAAGCTCAATATAGGATATTCAAAGCAAA +AGATGGGAGAATAATTGGGCAGACTGATATAAATTTCAATGGGGGACACATAGAGGAGTGTTCGTGTTAC +CCCAATGAAGGGAAGGTGGAGTGCGTATGCAGGGACAACTGGACTGGAACAAATAGGCCGGTTCTGGTAA +TATCTCCTGATCTATCGTACACAGTCGGATATTTGTGTGCTGGCATTCCCACTGACACTCCTAGGGGAGA +GGATAGTCAATTCACAGGCTCATGCACAAGCCCTTTGGGAAGTCAAGGATACGGTGTAAAGGGTTTCGGG +TTTCGACAAGGGAATGACGTGTGGGCCGGAAGGACAATTAGTAGGACTTCCAGATCAGGATTCGAAATAA +TAAAAATCAGGAATGGTTGGACACAAAACAGTAAAGACCAAATCCGAAAGCAGGTGATTGTTGATAACCT +AAACTGGTCAGGATATAGTGGTTCTTTCACATTGCCGGTTGAACTAACAAAGAAAGGATGTTTAGTCCCC +TGTTTCTGGGTTGAAATGATCAGAGGTAAACCTGAAGAGATAACAATATGGACCTCTAGCAGCTCCATTG +TGATGTGTGGAGTAGACCATAAAGTTGCCAGTTGGTCATGGCACGATGGAGCTATTCTTCCCTTTGACAT +CGATAAGATGTAGTTTACGAAAA + diff --git a/data/flu/NA/na_n9_h11n9/CY130072/unreleased/dataset.zip b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/dataset.zip new file mode 100644 index 0000000..df36365 Binary files /dev/null and b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n9_h11n9/CY130072/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..699cd67 --- /dev/null +++ b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/genome_annotation.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY130072.1 1 1435 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=402474 +CY130072.1 Genbank region 1 1435 . + . ID=CY130072.1:1..1435;Dbxref=taxon:402474;Name=6;bio-material=CEIRS#162855#;collection-date=1974;country=USA: Memphis;gbkey=Src;mol_type=viral cRNA;nat-host=duck;segment=6;serotype=H11N9;strain=A/duck/Memphis/546/1974 +CY130072.1 Genbank sequence_feature 1 1435 . + . ID=id-CY130072.1:1..1435;Dbxref=IRD:NIGSP_CEIRS_SJC001_WEB_00013.NA;gbkey=misc_feature +CY130072.1 Genbank gene 7 1419 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY130072.1 Genbank CDS 7 1419 . + 0 ID=cds-AGB50963.1;Parent=gene-NA;Dbxref=NCBI_GP:AGB50963.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=AGB50963.1 diff --git a/data/flu/NA/na_n9_h11n9/CY130072/unreleased/pathogen.json b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/pathogen.json new file mode 100644 index 0000000..dbd60f7 --- /dev/null +++ b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n9_h11n9", + "reference name": "Influenza A virus (A/duck/Memphis/546/1974(H11N9)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130072" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n9_h11n9/CY130072/unreleased/reference.fasta b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/reference.fasta new file mode 100644 index 0000000..f18b4b0 --- /dev/null +++ b/data/flu/NA/na_n9_h11n9/CY130072/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY130072.1 Influenza A virus (A/duck/Memphis/546/1974(H11N9)) neuraminidase (NA) gene, complete cds +GTCAAGATGAATCCAAATCAGAAGATTCTATGCACTTCTGCCACTGCTATCGTAATAGGCACAATTGCAG +TACTCATAGGAATAGCGAACTTAGGATTGAATATAGGACTACATCTGAAACCGAGCTGCAATTGCTCACA +CTCACAACCTGAAGCAATCAATGCAAGCCAAACAATAATAAACAACTACTATAATGAAACAAACATCACC +CAAATAAGTAATACCAACATCCAAGTAGAGGAAAGGGCAAACAGAGACTTCAATAACTTAACTAGAGGGC +TCTGTACTATAAATTCATGGCACATATTTGGGAAAGACAATGCGGTAAGAATTGGGGAGGACTCAGATGT +TTTAGTCACAAGAGAACCCTATGTCTCCTGTGACCCAGATGAGTGCAGGTTCTATGCTCTCAGCCAAGGG +ACAACAATCAGAGGGAAACATTCAAATGGAACAATACACGATAGATCCCAATACCGCGCCCTGATAAGCT +GGCCACTGTCATCACCGCCCACAGTATACAATAGCAGAGTGGAATGCATTGGATGGTCAAGCACTAGTTG +CCATGATGGCAAAGCCAGGATGTCAATATGTATATCGGGCCCGAACAACAATGCATCAGCAGTAATCTGG +TACAATAGAAGACCTGTTACAGAAATCAACACATGGGCCCGAAACATACTAAGAACACAAGAATCTGAAT +GTGTATGCCACAACGGTGTTTGCCCGGTAGTGTTCACAGATGGGTCTGCCACTGGACCTGCAGAGACAAG +AATATACTATTTTAAGGAAGGAAAGACATTAAAATGGGAACCTCTGACTGGAACGGCTAAACATATCGAA +GAATGCTCATGTTACGGGGAGCGGGCAGGGATTACTTGCACATGCAGGGATAATTGGCAGGGCTCAAATA +GGCCAGTAATTCAGATAGATCCAGTGGCGATGACACACACTAGTCAGTATATATGTAGCCCTGTTCTTAC +AGACAACCCCCGACCGAATGATCCAACTGTAGGTAAGTGTAACGACCCTTATCCAGGCAATAACAACAAC +GGGGTCAAAGGGTTTTCATACCTGGATGGGAGTAATACCTGGTTGGGGAGGACAATAAGCACAGCTTCAA +GATCCGGATATGAGATGCTAAAGGTGCCAAATGCATTGACAGACGATAGATCAAAGCCCACTCAAGGTCA +GACTATCGTATTAAACACTGACTGGAGTGGTTACAGTGGGTCCTTCATGGACTATTGGGCTGAGGGGGAA +TGCTACCGAGCGTGCTTTTACGTGGAGTTAATACGTGGAAGACCTAAGGAGGATAAAGTATGGTGGACCA +GTAATAGTATAGTATCAATGTGTTCCAGCACAGAATTCCTTGGACAATGGAACTGGCCTGATGGGGCTAA +AATAGAGTACTTCCTCTAAGATACAGAAAAAAGAC + diff --git a/data/flu/NA/na_n9_h15n9/CY005407/unreleased/dataset.zip b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/dataset.zip new file mode 100644 index 0000000..e0c5e89 Binary files /dev/null and b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n9_h15n9/CY005407/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..d050b31 --- /dev/null +++ b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/genome_annotation.gff3 @@ -0,0 +1,10 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region CY005407.1 1 1459 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=352560 +CY005407.1 Genbank region 1 1459 . + . ID=CY005407.1:1..1459;Dbxref=taxon:352560;Name=6;collection-date=1979;country=Australia: Western Australia;gbkey=Src;mol_type=viral cRNA;nat-host=Avian;segment=6;serotype=H15N9;strain=A/wedge-tailed shearwater/Western Australia/2576/1979 +CY005407.1 Genbank primer_binding_site 1 15 . + . ID=id-CY005407.1:1..15;Note=PCR amplification primer sequence;gbkey=primer_bind +CY005407.1 Genbank gene 19 1431 . + . ID=gene-NA;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding +CY005407.1 Genbank CDS 19 1431 . + 0 ID=cds-ABB88141.1;Parent=gene-NA;Dbxref=NCBI_GP:ABB88141.1;Name=NA;gbkey=CDS;product=neuraminidase;protein_id=ABB88141.1 +CY005407.1 Genbank primer_binding_site 1439 1459 . - . ID=id-CY005407.1:1439..1459;Note=PCR amplification primer sequence;gbkey=primer_bind diff --git a/data/flu/NA/na_n9_h15n9/CY005407/unreleased/pathogen.json b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/pathogen.json new file mode 100644 index 0000000..dd3d694 --- /dev/null +++ b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n9_h15n9", + "reference name": "Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 6, complete sequence", + "reference accession": "CY005407" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n9_h15n9/CY005407/unreleased/reference.fasta b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/reference.fasta new file mode 100644 index 0000000..c9d1f05 --- /dev/null +++ b/data/flu/NA/na_n9_h15n9/CY005407/unreleased/reference.fasta @@ -0,0 +1,23 @@ +>CY005407.1 Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 6, complete sequence +AGCAAAAGCAGGGTCAAGATGAATCCAAATCAGAAGATTCTATGCACATCTGCCACTGCCATCGCAATAG +GCACAATTGCTGTGTTAATAGGAATAGCAAACCTAGGTTTGAACATAGGACTACACCTGAAACCGGGCTG +CAACTGCTCCAATCCCACTCCTGAAGCCACAAATGTGAGCCAAACAATAATAAACAATTACCACAATGAA +ACAAATATTACTCAAATAAGCAATACAAACATTCAACATATGGGGGAAACTGACAGAGACTTCAACAATC +TGACCAAAGGGCTCTGCACAATAAATTCGTGGCATATATTCGGGAAGGACAATGCTATAAGAATAGGGGA +GAACTCTGATGTTTTAGTCACAAGAGAGCCATATGTTTCTTGTGATCCAGATGGATGCAGATTTTATGCT +CTTAGCCAAGGAACAACGATAAGGGGAAAGCACTCAAATGGAACAATACACGATAGATCCCAATACCGTG +CTTTAGTGAGCTGGCCTTTATCATCACCACCCACTGTATACAATACCAGGGTAGAATGCATTGGATGGTC +CAGCACAAGCTGCCATGATGGGAAAGCACGAATGTCTATATGTGTTTCAGGTCCCAACAACAATGCATCA +GCAGTGATTTGGTACAAAGGGAGGCCTATCACAGAAATCAATACGTGGGCCCGAAACATATTGAGAACCC +AAGAGTCTGAGTGTGTGTGCCACAATGGAGTATGTCCAGTAGTGTTCACTGACGGTTCTGCTACTGGCCC +AGCAGAAACTAGGATATACTACTTCAAAGAGGGGAAAATCCTTAAATGGGAGCCACTAACTGGAACTGCC +AAGCACATTGAGGAATGCTCTTGCTATGGGAAAGACTCAGAAATAACATGCACATGTAGAGACAATTGGC +AAGGCTCGAATAGACCAGTGATACAAATAAATCCCACAATGATGACGCACACTAGTCAATACATATGCAG +CCCTGTCCTCACAGACAATCCACGTCCCAATGACCCCGCGGTAGGCAAGTGTAATGATCCTTATCCAGGG +AACAACAATAATGGGGTCAAAGGATTCTCATATTTAGATGGTGACAATACGTGGCTAGGAAGAACAATAA +GCACAGCTTCTAGGTCTGGATATGAAATGCTGAAAGTGCCTAATGCACTGACAGATGATAGATCAAAACC +TACTCAAGGTCAGACAATTGTGTTAAACACAGACTGGAGTGGTTACAGTGGGTCTTTCATTGATTACTGG +GCAAAAGGGGAGTGCTATAGAGCATGCTTCTATGTTGAGCTGATCCGTGGGAGGCCAAAGGAGGACAAAG +TGTGGTGGACCAGTAATAGCATAGTATCGATGTGTTCCAGCACAGAGTTCCTTGGACAATGGAACTGGCC +AGATGGGGCTAAAATAGAGTACTTCCTCTAAGATGTAGAAAAAGACCCTTGTTTCTACT + diff --git a/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/dataset.zip b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/dataset.zip new file mode 100644 index 0000000..6fe0ae4 Binary files /dev/null and b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/dataset.zip differ diff --git a/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/genome_annotation.gff3 b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/genome_annotation.gff3 new file mode 100644 index 0000000..5723836 --- /dev/null +++ b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/genome_annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_026429.1 1 1398 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1332244 +NC_026429.1 RefSeq region 1 1398 . + . ID=NC_026429.1:1..1398;Dbxref=taxon:1332244;Name=6;collection-date=05-Mar-2013;country=China;gbkey=Src;genome=genomic;mol_type=viral cRNA;nat-host=Homo sapiens;note=passage details: E1;segment=6;serotype=H7N9;strain=A/Shanghai/02/2013 +NC_026429.1 RefSeq gene 1 1398 . + . ID=gene-TS66_s6gp1;Dbxref=GeneID:23104238;Name=NA;gbkey=Gene;gene=NA;gene_biotype=protein_coding;locus_tag=TS66_s6gp1 +NC_026429.1 RefSeq CDS 1 1398 . + 0 ID=cds-YP_009118481.1;Parent=gene-TS66_s6gp1;Dbxref=GenBank:YP_009118481.1,GeneID:23104238;Name=NA;gbkey=CDS;locus_tag=TS66_s6gp1;product=neuraminidase;protein_id=YP_009118481.1 diff --git a/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/pathogen.json b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/pathogen.json new file mode 100644 index 0000000..79aa837 --- /dev/null +++ b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/pathogen.json @@ -0,0 +1,16 @@ +{ + "alignmentParams": { + "minSeedCover": 0.01 + }, + "schemaVersion": "3.0.0", + "attributes": { + "name": "na_n9_h7n9", + "reference name": "Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 6 neuraminidase (NA) gene, complete cds", + "reference accession": "NC_026429.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + } +} diff --git a/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/reference.fasta b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/reference.fasta new file mode 100644 index 0000000..63f3bfe --- /dev/null +++ b/data/flu/NA/na_n9_h7n9/NC_026429.1/unreleased/reference.fasta @@ -0,0 +1,22 @@ +>NC_026429.1 Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 6 neuraminidase (NA) gene, complete cds +ATGAATCCAAATCAGAAGATTCTATGCACTTCAGCCACTGCTATCATAATAGGCGCAATCGCAGTACTCA +TTGGAATGGCAAACCTAGGATTGAACATAGGACTGCATCTAAAACCGGGCTGCAATTGCTCACACTCACA +ACCTGAAACAACCAACACAAGCCAAACAATAATAAACAACTATTATAATGAAACAAACATCACCAAYATC +CAAATGGAAGAGAGAACAAGCAGGAATTTCAATAACTTAACTAAAGGGCTCTGTACTATAAATTCATGGC +ACATATATGGGAAAGACAATGCAGTAAGAATTGGAGAGAGCTCGGATGTTTTAGTCACAAGAGAACCCTA +TGTTTCATGCGACCCAGATGAATGCAGGTTCTATGCTCTCAGCCAAGGAACAACAATCAGAGGGAAACAC +TCAAACGGAACAATACACGATAGGTCCCAGTATCGCGCCCTGATAAGCTGGCCACTATCATCACCGCCCA +CAGTGTACAACAGCAGGGTGGAATGCATTGGGTGGTCAAGTACTAGTTGCCATGATGGCAAATCCAGGAT +GTCAATATGTATATCAGGACCAAACAACAATGCATCTGCAGTAGTATGGTACAACAGAAGGCCTGTTGCA +GAAATTAACACATGGGCCCGAAACATACTAAGAACACAGGAATCTGAATGTGTATGCCACAACGGCGTAT +GCCCAGTAGTGTTCACCGATGGGTCTGCCACTGGACCTGCAGACACAAGAATATACTATTTTAAAGAGGG +GAAAATATTGAAATGGGAGTCTCTGACTGGAACTGCTAAGCATATTGAAGAATGCTCATGTTACGGGGAA +CGAACAGGAATTACCTGCACATGCAGGGACAATTGGCAGGGCTCAAATAGACCAGTGATTCAGATAGACC +CAGTAGCAATGACACACACTAGTCAATATATATGCAGTCCTGTTCTTACAGACAATCCCCGACCGAATGA +CCCAAATATAGGTAAGTGTAATGACCCTTATCCAGGTAATAATAACAATGGAGTCAAGGGATTCTCATAC +CTGGATGGGGCTAACACTTGGCTAGGGAGGACAATAAGCACAGCCTCGAGGTCTGGATACGAGATGTTAA +AAGTGCCAAATGCATTGACAGATGATAGATCAAAGCCCATTCAAGGTCAGACAATTGTATTAAACGCTGA +CTGGAGTGGTTACAGTGGATCTTTCATGGACTATTGGGCTGAAGGGGACTGCTATCGAGCGTGTTTTTAT +GTGGAGTTGATACGTGGAAGACCCAAGGAGGATAAAGTGTGGTGGACCAGCAATAGTATAGTATCGATGT +GTTCCAGTACAGAATTCCTGGGACAATGGAACTGGCCTGATGGGGCTAAAATAGAGTACTTCCTCTAA + diff --git a/data/index.json b/data/index.json index af5a363..462aa6e 100644 --- a/data/index.json +++ b/data/index.json @@ -301,7 +301,7 @@ { "path": "cchfv/S-2to5", "enabled": true, - "files": { + "files": { "reference": "reference.fasta", "pathogenJson": "pathogen.json", "genomeAnnotation": "genome_annotation.gff3" @@ -319,8 +319,734 @@ "version": { "tag": "unreleased" } + }, + { + "path": "flu/HA/ha_h3_h3n8/CY028836", + "enabled": true, + "attributes": { + "name": "ha_h3_h3n8", + "reference name": "Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 4, complete sequence", + "reference accession": "CY028836" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h4_h4n6/CY181241", + "enabled": true, + "attributes": { + "name": "ha_h4_h4n6", + "reference name": "Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY181241" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h5_h5n1/NC_007362.1", + "enabled": true, + "attributes": { + "name": "ha_h5_h5n1", + "reference name": "Influenza A virus (A/goose/Guangdong/1/1996(H5N1)) hemagglutinin (HA) gene, complete cds", + "reference accession": "NC_007362.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h5_h5n2/KU143256", + "enabled": true, + "attributes": { + "name": "ha_h5_h5n2", + "reference name": "Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "KU143256" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h6_h6n2/CY130030", + "enabled": true, + "attributes": { + "name": "ha_h6_h6n2", + "reference name": "Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130030" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h7_h7n9/NC_026425.1", + "enabled": true, + "attributes": { + "name": "ha_h7_h7n9", + "reference name": "Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "NC_026425.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h8_h8n4/CY136131", + "enabled": true, + "attributes": { + "name": "ha_h8_h8n4", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136131" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h9_h9n2/NC_004908.1", + "enabled": true, + "attributes": { + "name": "ha_h9_h9n2", + "reference name": "Influenza A virus ha gene for Hemagglutinin, genomic RNA, strain A/Hong Kong/1073/99(H9N2)", + "reference accession": "NC_004908.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h10_h10n7/CY136094", + "enabled": true, + "attributes": { + "name": "ha_h10_h10n7", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136094" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h11_h11n9/CY130070", + "enabled": true, + "attributes": { + "name": "ha_h11_h11n9", + "reference name": "Influenza A virus (A/duck/Memphis/546/1974(H11N9)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130070" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h12_h12n5/CY130078", + "enabled": true, + "attributes": { + "name": "ha_h12_h12n5", + "reference name": "Influenza A virus (A/duck/Alberta/60/1976(H12N5)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130078" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h13_h13n6/CY130086", + "enabled": true, + "attributes": { + "name": "ha_h13_h13n6", + "reference name": "Influenza A virus (A/gull/Maryland/704/1977(H13N6)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY130086" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h14_h14n5/JN696314", + "enabled": true, + "attributes": { + "name": "ha_h14_h14n5", + "reference name": "Influenza A virus (A/long-tailed duck/Wisconsin/10OS3912/2010(H14N6)) segment 4 hemagglutinin (HA) gene, complete cds", + "reference accession": "JN696314" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h15_h15n9/CY006010", + "enabled": true, + "attributes": { + "name": "ha_h15_h15n9", + "reference name": "Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 4, complete sequence", + "reference accession": "CY006010" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h16_h16n3/CY136630", + "enabled": true, + "attributes": { + "name": "ha_h16_h16n3", + "reference name": "Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY136630" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h17_h17n10/CY103876", + "enabled": true, + "attributes": { + "name": "ha_h17_h17n10", + "reference name": "Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY103876" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/HA/ha_h18_h18n11/CY125945", + "enabled": true, + "attributes": { + "name": "ha_h18_h18n11", + "reference name": "Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) hemagglutinin (HA) gene, complete cds", + "reference accession": "CY125945" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n1_h5n1/NC_007361.1", + "enabled": true, + "attributes": { + "name": "na_n1_h5n1", + "reference name": "Influenza A virus (A/Goose/Guangdong/1/96(H5N1)) neuraminidase (NA) gene, complete cds", + "reference accession": "NC_007361.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n2_h6n2/CY130032", + "enabled": true, + "attributes": { + "name": "na_n2_h6n2", + "reference name": "Influenza A virus (A/turkey/Massachusetts/3740/1965(H6N2)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130032" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n2_h9n2/NC_004909.1", + "enabled": true, + "attributes": { + "name": "na_n2_h9n2", + "reference name": "Influenza A virus na gene for neuraminidase, genomic RNA, strain A/Hong Kong/1073/99(H9N2)", + "reference accession": "NC_004909.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n2_h5n2/KU143347", + "enabled": true, + "attributes": { + "name": "na_n2_h5n2", + "reference name": "Influenza A virus (A/chicken/Wuhan/WHJF/2014(H5N2)) segment 6 neuraminidase (NA) gene, complete cds", + "reference accession": "KU143347" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n3_h16n3/CY136632", + "enabled": true, + "attributes": { + "name": "na_n3_h16n3", + "reference name": "Influenza A virus (A/laughing gull/Delaware Bay/2839/1987(H16N3)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136632" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n4_h8n4/CY136133", + "enabled": true, + "attributes": { + "name": "na_n4_h8n4", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM8109R0/2009(H8N4)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136133" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n5_h12n5/CY130080", + "enabled": true, + "attributes": { + "name": "na_n5_h12n5", + "reference name": "Influenza A virus (A/duck/Alberta/60/1976(H12N5)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130080" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n6_h4n6/CY181243", + "enabled": true, + "attributes": { + "name": "na_n6_h4n6", + "reference name": "Influenza A virus (A/mallard/Wisconsin/14/1974(H4N6)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY181243" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n6_h13n6/CY130088", + "enabled": true, + "attributes": { + "name": "na_n6_h13n6", + "reference name": "Influenza A virus (A/gull/Maryland/704/1977(H13N6)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130088" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n7_h10n7/CY136096", + "enabled": true, + "attributes": { + "name": "na_n7_h10n7", + "reference name": "Influenza A virus (A/northern pintail/Interior Alaska/9BM11387R0/2009(H10N7)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY136096" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n8_h3n8/CY028838", + "enabled": true, + "attributes": { + "name": "na_n8_h3n8", + "reference name": "Influenza A virus (A/equine/Miami/1/1963(H3N8)) segment 6, complete sequence", + "reference accession": "CY028838" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n9_h7n9/NC_026429.1", + "enabled": true, + "attributes": { + "name": "na_n9_h7n9", + "reference name": "Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 6 neuraminidase (NA) gene, complete cds", + "reference accession": "NC_026429.1" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n9_h11n9/CY130072", + "enabled": true, + "attributes": { + "name": "na_n9_h11n9", + "reference name": "Influenza A virus (A/duck/Memphis/546/1974(H11N9)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY130072" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n9_h15n9/CY005407", + "enabled": true, + "attributes": { + "name": "na_n9_h15n9", + "reference name": "Influenza A virus (A/wedge-tailed shearwater/Western Australia/2576/1979(H15N9)) segment 6, complete sequence", + "reference accession": "CY005407" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n10_h17n10/CY103878", + "enabled": true, + "attributes": { + "name": "na_n10_h17n10", + "reference name": "Influenza A virus (A/little yellow-shouldered bat/Guatemala/153/2009(H17N10)) neuraminidase (NA) gene, complete cds", + "reference accession": "CY103878" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } + }, + { + "path": "flu/NA/na_n11_h18n11/CY125947", + "enabled": true, + "attributes": { + "name": "na_n11_h18n11", + "reference name": "Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) neuraminidase-like protein (NA) gene, complete cds", + "reference accession": "CY125947" + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3" + }, + "versions": [ + { + "tag": "unreleased" + } + ], + "version": { + "tag": "unreleased" + } } ] } ] -} \ No newline at end of file +} diff --git a/flu-dataset-generator/config.yml b/flu-dataset-generator/config.yml new file mode 100644 index 0000000..17acab4 --- /dev/null +++ b/flu-dataset-generator/config.yml @@ -0,0 +1,38 @@ +HA: + ha_h3_h3n8: CY028836 + ha_h4_h4n6: CY181241 + ha_h5_h5n1: NC_007362.1 + ha_h5_h5n2: KU143256 + ha_h6_h6n2: CY130030 + ha_h7_h7n9: NC_026425.1 + ha_h8_h8n4: CY136131 + ha_h9_h9n2: NC_004908.1 + ha_h10_h10n7: CY136094 + ha_h11_h11n9: CY130070 + ha_h12_h12n5: CY130078 + ha_h13_h13n6: CY130086 + ha_h14_h14n5: JN696314 + ha_h15_h15n9: CY006010 + ha_h16_h16n3: CY136630 + ha_h17_h17n10: CY103876 + ha_h18_h18n11: CY125945 + +NA: + na_n1_h5n1: NC_007361.1 + na_n2_h6n2: CY130032 + na_n2_h9n2: NC_004909.1 + na_n2_h5n2: KU143347 + na_n3_h16n3: CY136632 + na_n4_h8n4: CY136133 + na_n5_h12n5: CY130080 + na_n6_h4n6: CY181243 + na_n6_h13n6: CY130088 + na_n7_h10n7: CY136096 + na_n8_h3n8: CY028838 + na_n9_h7n9: NC_026429.1 + na_n9_h11n9: CY130072 + na_n9_h15n9: CY005407 + na_n10_h17n10: CY103878 + na_n11_h18n11: CY125947 + +output_dir: ../data diff --git a/flu-dataset-generator/generator.py b/flu-dataset-generator/generator.py new file mode 100644 index 0000000..34d1789 --- /dev/null +++ b/flu-dataset-generator/generator.py @@ -0,0 +1,162 @@ +import json +from pathlib import Path +import sys +import zipfile +import yaml +from Bio import Entrez, SeqIO +from dataclasses import dataclass +import click + +# Set your email address (required by NCBI) +Entrez.email = "your_email@example.com" + +InsdcAccession = str +SequenceName = str + +PATHOGEN_TEMPLATE = { + "alignmentParams": {"minSeedCover": 0.01}, + "schemaVersion": "3.0.0", + "attributes": { + "name": "TODO", + "reference name": "TODO", + "reference accession": "TODO", + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3", + }, +} + +DATASET_TEMPLATE = { + "path": "flu/h3n2/seg6/CY114383", + "enabled": True, + "attributes": { + "name": "Influenza A/H3N2 (segment 6/NA)", + "reference name": "Influenza A virus (A/Wisconsin/67/2005(H3N2)) segment 6, complete sequence", + "reference accession": "CY114383.1", + }, + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "genome_annotation.gff3", + }, + "versions": [{"tag": "unreleased"}], + "version": {"tag": "unreleased"}, +} + + +@dataclass +class Config: + HA: dict[SequenceName, InsdcAccession] + NA: dict[SequenceName, InsdcAccession] + output_dir: str + + +def fetch_genbank_description(accession: str): + with Entrez.efetch( + db="nucleotide", + id=accession, + rettype="gb", + retmode="text", + ) as handle: + return SeqIO.read(handle, "genbank").description + + +def update_index(pathogen_json, path, output_dir): + index_path = Path(output_dir) / "index.json" + index = json.loads(index_path.read_text()) + dataset = DATASET_TEMPLATE.copy() + dataset["path"] = str(Path(path).relative_to(output_dir).parent) + dataset["attributes"] = pathogen_json["attributes"] + dataset["files"] = pathogen_json["files"] + index["collections"][0]["datasets"].append(dataset) + index_path.write_text( + json.dumps(index, indent=2, ensure_ascii=False) + "\n" + ) + +def rename_cds_to_gene_names(gff_content: str) -> str: + lines = gff_content.splitlines() + updated_lines = [] + for line in lines: + if line.startswith("#") or not line.strip(): + updated_lines.append(line) + continue + parts = line.split("\t") + if len(parts) < 9: + updated_lines.append(line) + continue + attributes = parts[8] + attr_dict = dict(item.split("=") for item in attributes.split(";") if "=" in item) + if parts[2] == "CDS" and "gene" in attr_dict: + attr_dict["Name"] = attr_dict["gene"].upper() + del attr_dict["gene"] + parts[8] = ";".join(f"{k}={v}" for k, v in attr_dict.items()) + updated_line = "\t".join(parts) + updated_lines.append(updated_line) + else: + updated_lines.append(line) + return "\n".join(updated_lines) + + +def generate_dataset(dataset_dir: Path, ref_name: str, accession: str, output_dir: str): + dataset_dir.mkdir(parents=True, exist_ok=True) + fasta_path = dataset_dir / "reference.fasta" + gff_path = dataset_dir / "genome_annotation.gff3" + pathogen_path = dataset_dir / "pathogen.json" + zip_path = dataset_dir / "dataset.zip" + + try: + fasta = Entrez.efetch( + db="nucleotide", id=accession, rettype="fasta", retmode="text" + ) + fasta_path.write_text(fasta.read()) + + gff3 = Entrez.efetch( + db="nucleotide", id=accession, rettype="gff3", retmode="text" + ) + gff_content = gff3.read() + gff_path.write_text(rename_cds_to_gene_names(gff_content)) + + pathogen_json = PATHOGEN_TEMPLATE.copy() + pathogen_json["attributes"]["name"] = ref_name + pathogen_json["attributes"]["reference name"] = fetch_genbank_description( + accession + ) + pathogen_json["attributes"]["reference accession"] = accession + + pathogen_path.write_text( + json.dumps(pathogen_json, indent=2, ensure_ascii=False) + "\n" + ) + update_index(pathogen_json, dataset_dir, output_dir) + except Exception as e: + print(f" {dataset_dir} Failed: {e}", file=sys.stderr) + + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + for path in dataset_dir.rglob("*"): + if path == zip_path: + continue # don’t zip the zip + zf.write(path, arcname=path.relative_to(dataset_dir)) + + +def create_datasets(config: Config) -> None: + for ref_name, accession in config.HA.items(): + dataset_dir = Path(config.output_dir) / "flu" / "HA" / ref_name / accession / "unreleased" + generate_dataset(dataset_dir, ref_name, accession, config.output_dir) + for ref_name, accession in config.NA.items(): + dataset_dir = Path(config.output_dir) / "flu" / "NA" / ref_name / accession / "unreleased" + generate_dataset(dataset_dir, ref_name, accession, config.output_dir) + + +@click.command() +@click.option("--config-file", required=True, type=click.Path(exists=True)) +def main(config_file: str) -> None: + with open(config_file, encoding="utf-8") as file: + full_config = yaml.safe_load(file) + relevant_config = {key: full_config[key] for key in Config.__annotations__} + config = Config(**relevant_config) + create_datasets(config) + + +if __name__ == "__main__": + main()