Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

128 changes: 128 additions & 0 deletions rnacentral/sequence_search/static/rnacentral/sequence-search/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
Copyright [2009-present] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Usage: python search.py [file] [database]
Examples:
python search.py file.fasta # search in all databases
python search.py file.fasta mirbase # search in miRBase only
"""

import json
import requests
import sys
import time
from Bio import SeqIO
from pathlib import Path

SERVER = "https://sequence-search.rnacentral.org/"

def get_sequence_search_result(description, sequence, job_id):
"""Poll job status then retrieve all paginated sequence search results."""
job_status = "pending"
while True:
try:
data = requests.get(SERVER + "api/job-status/" + job_id, timeout=30).json()
job_status = data["status"]
progress = data.get("progress", 0)
except requests.exceptions.RequestException as e:
print("Error checking job status, retrying: {}".format(e))
time.sleep(10)
continue

if job_status == "finished":
break
elif job_status in ("error", "not_found"):
print("Search failed for record: {}".format(description))
return {"job_id": job_id, "status": job_status, "description": description, "sequence": str(sequence)}
else:
print(" {:.0f}% complete...".format(progress))
time.sleep(10)

# Fetch results page by page
results = []
page = 1
page_size = 100
hit_count = None

while True:
try:
url = "{}api/job-results/{}?page={}&page_size={}".format(SERVER, job_id, page, page_size)
data = requests.get(url, timeout=30).json()
except requests.exceptions.RequestException as e:
print("Error fetching results page {} for {}: {}".format(page, description, e))
break

if hit_count is None:
hit_count = data.get("hit_count", 0)

entries = data.get("entries", [])
results.extend(entries)

if not entries or len(results) >= hit_count:
break

page += 1

return {
"job_id": job_id,
"hits": hit_count or 0,
"status": job_status,
"description": description,
"sequence": str(sequence),
"results": results,
}


def main():
if len(sys.argv) == 1:
print("You must specify the FASTA file")
exit()
elif len(sys.argv) == 2:
filename = sys.argv[1]
database = []
elif len(sys.argv) == 3:
filename = sys.argv[1]
database = [sys.argv[2]]
else:
print("Usage: python search.py file.fasta [database]")
exit()

Path("results").mkdir(parents=True, exist_ok=True)

with open(filename, mode="r") as handle:
for record in SeqIO.parse(handle, "fasta"):
description = record.description
sequence = record.seq

data = {"databases": database, "sequence": str(sequence)}
post_job = requests.post(SERVER + "api/submit-job", json=data)

job_id = None
if post_job.status_code == 200:
job_id = post_job.json()["job_id"]
else:
print("Failed to submit job for record:\n{}\n{}".format(description, sequence))

if job_id:
result = get_sequence_search_result(description, sequence, job_id)
if result:
hits = result.get("hits", 0)
if hits == 0:
print("No results found for {}".format(description))
else:
print("Saving {} hit(s) for {}".format(hits, description))
with open("results/" + description + ".json", "w") as f:
json.dump(result, f)


if __name__ == "__main__":
main()
147 changes: 137 additions & 10 deletions rnacentral/sequence_search/templates/api.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ <h2>Overview</h2>
<div class="col-12">
<h2>API Throttling</h2>
<p>
The maximum number of requests from the same IP address is limited to 50 requests per minute.
Job submissions (<code>/api/submit-job</code>) are rate limited to 10 requests per minute per IP address.
There is no rate limit on job status or results endpoints.
</p>
</div>
</div>
Expand Down Expand Up @@ -55,14 +56,140 @@ <h2>Example script</h2>
|-- ...
</code>
</p>
<table class="table table-borderless">
<thead>
<tr></tr>
</thead>
<tbody>
<tr><th><script src="https://gist.github.com/carlosribas/b2f4095df29a44116d5d0555d708b357.js"></script></th></tr>
</tbody>
</table>
<link rel="stylesheet" href="{% static 'rnacentral/sequence-search/prism/prism.min.css' %}">
<link rel="stylesheet" href="{% static 'rnacentral/sequence-search/prism/prism-line-numbers.min.css' %}">
<pre class="line-numbers"><code class="language-python">"""
Copyright [2009-present] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Usage: python search.py [file] [database]
Examples:
python search.py file.fasta # search in all databases
python search.py file.fasta mirbase # search in miRBase only
"""

import json
import requests
import sys
import time
from Bio import SeqIO
from pathlib import Path

SERVER = "https://sequence-search.rnacentral.org/"

def get_sequence_search_result(description, sequence, job_id):
"""Poll job status then retrieve all paginated sequence search results."""
job_status = "pending"
while True:
try:
data = requests.get(SERVER + "api/job-status/" + job_id, timeout=30).json()
job_status = data["status"]
progress = data.get("progress", 0)
except requests.exceptions.RequestException as e:
print("Error checking job status, retrying: {}".format(e))
time.sleep(10)
continue

if job_status == "finished":
break
elif job_status in ("error", "not_found"):
print("Search failed for record: {}".format(description))
return {"job_id": job_id, "status": job_status, "description": description, "sequence": str(sequence)}
else:
print(" {:.0f}% complete...".format(progress))
time.sleep(10)

# Fetch results page by page
results = []
page = 1
page_size = 100
hit_count = None

while True:
try:
url = "{}api/job-results/{}?page={}&amp;page_size={}".format(SERVER, job_id, page, page_size)
data = requests.get(url, timeout=30).json()
except requests.exceptions.RequestException as e:
print("Error fetching results page {} for {}: {}".format(page, description, e))
break

if hit_count is None:
hit_count = data.get("hit_count", 0)

entries = data.get("entries", [])
results.extend(entries)

if not entries or len(results) >= hit_count:
break

page += 1

return {
"job_id": job_id,
"hits": hit_count or 0,
"status": job_status,
"description": description,
"sequence": str(sequence),
"results": results,
}


def main():
if len(sys.argv) == 1:
print("You must specify the FASTA file")
exit()
elif len(sys.argv) == 2:
filename = sys.argv[1]
database = []
elif len(sys.argv) == 3:
filename = sys.argv[1]
database = [sys.argv[2]]
else:
print("Usage: python search.py file.fasta [database]")
exit()

Path("results").mkdir(parents=True, exist_ok=True)

with open(filename, mode="r") as handle:
for record in SeqIO.parse(handle, "fasta"):
description = record.description
sequence = record.seq

data = {"databases": database, "sequence": str(sequence)}
post_job = requests.post(SERVER + "api/submit-job", json=data)

job_id = None
if post_job.status_code == 200:
job_id = post_job.json()["job_id"]
else:
print("Failed to submit job for record:\n{}\n{}".format(description, sequence))

if job_id:
result = get_sequence_search_result(description, sequence, job_id)
if result:
hits = result.get("hits", 0)
if hits == 0:
print("No results found for {}".format(description))
else:
print("Saving {} hit(s) for {}".format(hits, description))
with open("results/" + description + ".json", "w") as f:
json.dump(result, f)


if __name__ == "__main__":
main()
</code></pre>
<script src="{% static 'rnacentral/sequence-search/prism/prism.min.js' %}"></script>
<script src="{% static 'rnacentral/sequence-search/prism/prism-python.min.js' %}"></script>
<script src="{% static 'rnacentral/sequence-search/prism/prism-line-numbers.min.js' %}"></script>
</div>
</div>

Expand All @@ -75,7 +202,7 @@ <h2>Swagger</h2>
</div>
<div class="panel-body">
<div class="embed-responsive embed-responsive-16by9">
<iframe class="embed-responsive-item" src="https://search.rnacentral.org/api/doc" frameborder="0" allowfullscreen></iframe>
<iframe class="embed-responsive-item" src="https://sequence-search.rnacentral.org/docs" frameborder="0" allowfullscreen></iframe>
</div>
</div>
</div>
Expand Down
Loading