Skip to content

Commit 436fffe

Browse files
authored
Merge pull request #280 from SmartAPI/add-metakg-endpoint
Add New Endpoint: /metakg/parse issue#271
2 parents c008226 + 3777a48 commit 436fffe

File tree

4 files changed

+258
-55
lines changed

4 files changed

+258
-55
lines changed

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
(r"/api/metakg/consolidated/?", "handlers.api.MetaKGQueryHandler", {"biothing_type": "metakg_consolidated"}),
9797
(r"/api/metakg/consolidated/fields/?", "biothings.web.handlers.MetadataFieldHandler", {"biothing_type": "metakg_consolidated"}),
9898
(r"/api/metakg/paths/?", "handlers.api.MetaKGPathFinderHandler", {"biothing_type": "metakgpathfinder"}),
99+
(r"/api/metakg/parse/?", "handlers.api.MetaKGParserHandler"),
99100
]
100101

101102
# biothings web tester will read this

src/handlers/api.py

Lines changed: 203 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
11
import asyncio
22
import json
33
import logging
4-
from typing import List, Union
54
import os
5+
66
import bmt
77
from biothings.utils import serializer
88
from biothings.web.auth.authn import BioThingsAuthnMixin
99
from biothings.web.handlers import BaseAPIHandler, QueryHandler
1010
from biothings.web.handlers.query import BiothingHandler, capture_exceptions
1111
from biothings.web.settings.default import QUERY_KWARGS
1212
from tornado.httpclient import AsyncHTTPClient
13-
from tornado.web import Finish, HTTPError
1413
from tornado.template import Loader
14+
from tornado.web import Finish, HTTPError
1515

1616
from controller import SmartAPI
1717
from controller.exceptions import ControllerError, NotFoundError
1818
from pipeline import MetaKGQueryPipeline
1919
from utils.downloader import DownloadError, download_async
2020
from utils.http_error import SmartAPIHTTPError
21+
from utils.metakg.biolink_helpers import get_expanded_values
22+
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
2123
from utils.metakg.export import edges2graphml
24+
from utils.metakg.parser import MetaKGParser
2225
from utils.metakg.path_finder import MetaKGPathFinder
23-
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
24-
from utils.metakg.biolink_helpers import get_expanded_values
2526
from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage
2627

2728
logger = logging.getLogger("smartAPI")
@@ -382,7 +383,68 @@ def post(self):
382383
raise HTTPError(status_code=400, reason="Missing required form field: id")
383384

384385

385-
class MetaKGQueryHandler(QueryHandler):
386+
class MetaKGHandlerMixin:
387+
"""
388+
Mixin to provide reusable logic for filtering API information.
389+
"""
390+
def get_filtered_api(self, api_dict):
391+
"""Extract and return filtered API information."""
392+
api_info = api_dict.get("api", api_dict) # Handle both formats
393+
394+
# Default to False if not present
395+
bte = self.args.bte
396+
api_details = self.args.api_details
397+
398+
# Default structure to preserve top-level keys
399+
filtered_dict = {
400+
key: api_dict.get(key)
401+
for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"]
402+
if key in api_dict
403+
}
404+
405+
# Determine filtered API structure based on `bte` and `api_details`
406+
if bte and not api_details:
407+
# When bte is True and api_details is False, include only minimal API info
408+
filtered_api = {
409+
**({"name": api_info.get("name")} if "name" in api_info else {}),
410+
**(
411+
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
412+
if "smartapi" in api_info
413+
else {"smartapi": {"id": None}}
414+
),
415+
"bte": api_info.get("bte", {}),
416+
}
417+
elif api_details:
418+
# When api_details is True, include more detailed information
419+
filtered_api = api_info.copy()
420+
if not bte:
421+
filtered_api.pop("bte", None)
422+
423+
# Handle case where "ui" key exists and ends with "None"
424+
if filtered_api.get('smartapi', {}).get("ui", "").endswith("/None"):
425+
filtered_api["smartapi"]["ui"] = None
426+
else:
427+
# Default: No bte and no api_details - just minimal API info
428+
filtered_api = {
429+
**({"name": api_info.get("name")} if "name" in api_info else {}),
430+
**(
431+
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
432+
if "smartapi" in api_info
433+
else {"smartapi": {"id": None}}
434+
),
435+
}
436+
437+
# Add the filtered 'api' key to the preserved top-level structure
438+
filtered_dict["api"] = filtered_api
439+
440+
# Remove 'bte' from 'api' and move it to the top level
441+
if "bte" in filtered_dict["api"]:
442+
filtered_dict["bte"] = filtered_dict["api"].pop("bte")
443+
444+
return filtered_dict
445+
446+
447+
class MetaKGQueryHandler(QueryHandler, MetaKGHandlerMixin):
386448
"""
387449
Support metakg queries with biolink model's semantic descendants
388450
@@ -458,30 +520,8 @@ async def get(self, *args, **kwargs):
458520
value_list = get_expanded_values(value_list, self.biolink_model_toolkit) if expanded_fields[field] else value_list
459521
setattr(self.args, field, value_list)
460522

461-
462523
await super().get(*args, **kwargs)
463524

464-
def get_filtered_api(self, api_dict):
465-
"""Extract and return filtered API information."""
466-
api_info = api_dict
467-
if not self.args.bte and not self.args.api_details: # no bte and no api details
468-
filtered_api= {
469-
**({"name": api_info["name"]} if "name" in api_info else {}),
470-
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {})
471-
}
472-
elif self.args.bte and not self.args.api_details : # bte and no api details
473-
filtered_api= {
474-
**({"name": api_info["name"]} if "name" in api_info else {}),
475-
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {}),
476-
'bte': api_info.get('bte', {})
477-
}
478-
elif not self.args.bte and self.args.api_details: # no bte and api details
479-
api_info.pop('bte', None)
480-
filtered_api = api_info
481-
else:
482-
filtered_api = api_info
483-
return filtered_api
484-
485525
def process_apis(self, apis):
486526
"""Process each API dict based on provided args."""
487527
if isinstance(apis, list):
@@ -491,11 +531,11 @@ def process_apis(self, apis):
491531
elif isinstance(apis, dict):
492532
if 'bte' in apis:
493533
# update dict for new format
494-
apis['api']['bte']=apis.pop('bte')
534+
apis['api']['bte'] = apis.pop('bte')
495535
api_dict = apis["api"]
496-
filtered_api= self.get_filtered_api(api_dict)
536+
filtered_api = self.get_filtered_api(api_dict)
497537
apis["api"] = filtered_api
498-
538+
499539
def write(self, chunk):
500540
"""
501541
Overwrite the biothings query handler to ...
@@ -522,10 +562,10 @@ def write(self, chunk):
522562
self.set_header("Content-Disposition", 'attachment; filename="smartapi_metakg.graphml"')
523563

524564
return super(BaseAPIHandler, self).write(chunk)
525-
565+
526566
if self.format == "html":
527567
# setup template
528-
template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates'))
568+
template_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'templates'))
529569
loader = Loader(template_path)
530570
template = loader.load("cytoscape.html")
531571
# initial counts
@@ -542,7 +582,7 @@ def write(self, chunk):
542582
graph_data = serializer.to_json(cdf.get_data())
543583
# generate global template variable with graph data
544584
result = template.generate(
545-
data= graph_data,
585+
data=graph_data,
546586
response=serializer.to_json(chunk),
547587
shown=shown,
548588
available=available,
@@ -586,7 +626,7 @@ class MetaKGPathFinderHandler(QueryHandler):
586626
"max": 6,
587627
"default": [],
588628
"enum": ["subject", "object", "predicate", "node", "edge", "all"]
589-
}
629+
}
590630
},
591631
}
592632

@@ -689,9 +729,134 @@ async def get(self, *args, **kwargs):
689729
raw_query_output = self.setup_pathfinder_rawquery(expanded_fields)
690730
self.write(raw_query_output)
691731
return
692-
res = {
693-
"total": len(paths_with_edges),
694-
"paths": paths_with_edges,
695-
}
732+
res = {
733+
"total": len(paths_with_edges),
734+
"paths": paths_with_edges,
735+
}
696736
await asyncio.sleep(0.01)
697737
self.finish(res)
738+
739+
740+
class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin):
741+
"""
742+
Handles parsing of SmartAPI metadata from a given URL or request body.
743+
744+
This handler processes SmartAPI metadata and returns structured,
745+
cleaned results based on the specified query parameters.
746+
747+
Supported HTTP methods:
748+
- **GET**: Parses metadata from a provided URL.
749+
- **POST**: Parses metadata from the request body.
750+
751+
Query Parameters:
752+
- `url` (str, required): The URL of the SmartAPI metadata to parse.
753+
Maximum length: 1000 characters.
754+
- `api_details` (bool, optional, default: `False`):
755+
Whether to return detailed API information.
756+
- `bte` (bool, optional, default: `False`):
757+
Whether to include BTE (BioThings Explorer) specific metadata.
758+
"""
759+
760+
kwargs = {
761+
"*": {
762+
"api_details": {"type": bool, "default": False},
763+
"bte": {"type": bool, "default": False},
764+
},
765+
"GET": {
766+
"url": {
767+
"type": str,
768+
"required": True,
769+
"max": 1000,
770+
"description": "URL of the SmartAPI metadata to parse"
771+
},
772+
},
773+
}
774+
775+
def initialize(self, *args, **kwargs):
776+
super().initialize(*args, **kwargs)
777+
# change the default query pipeline from self.biothings.pipeline
778+
self.pipeline = MetaKGQueryPipeline(ns=self.biothings)
779+
780+
def process_apis(self, apis):
781+
"""Process each API dict based on provided args."""
782+
if isinstance(apis, list):
783+
for i, api_dict in enumerate(apis):
784+
filtered_api = self.get_filtered_api(api_dict)
785+
apis[i] = filtered_api
786+
elif isinstance(apis, dict):
787+
if "bte" in apis:
788+
# Update dict for new format
789+
apis["api"]["bte"] = apis.pop("bte")
790+
api_dict = apis["api"]
791+
filtered_api = self.get_filtered_api(api_dict)
792+
apis["api"] = filtered_api
793+
return apis
794+
795+
async def get(self, *args, **kwargs):
796+
url = self.args.url
797+
parser = MetaKGParser()
798+
799+
try:
800+
parsed_metakg = parser.get_metakg(url=url)
801+
except DownloadError:
802+
self.write_error(400, reason="There was an error downloading the data from the given url.")
803+
except (ValueError, TypeError) as err:
804+
self.write_error(
805+
status_code=400,
806+
reason="The data retrived from the given url is not a valid JSON or YAML object.",
807+
message=str(err)
808+
)
809+
810+
# Apply filtering -- if data found
811+
if parsed_metakg:
812+
for i, api_dict in enumerate(parsed_metakg):
813+
parsed_metakg[i] = self.get_filtered_api(api_dict)
814+
815+
# Add url to metadata if api_details is set to 1
816+
if self.args.api_details:
817+
for data_dict in parsed_metakg:
818+
if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None:
819+
data_dict["api"]["smartapi"]["metadata"] = url
820+
821+
response = {
822+
"total": len(parsed_metakg),
823+
"hits": parsed_metakg,
824+
}
825+
826+
self.finish(response)
827+
828+
async def post(self, *args, **kwargs):
829+
content_type = self.request.headers.get("Content-Type", "").lower()
830+
if content_type in ["application/json", "application/x-yaml"]:
831+
# if content type is set properly, it should have alrady been parsed
832+
metadata_from_body = self.args_json or self.args_yaml
833+
elif self.request.body:
834+
# if request body is provided but no proper content type is set
835+
# we will parse it as YAML anyway
836+
metadata_from_body = self._parse_yaml()
837+
else:
838+
metadata_from_body = None
839+
840+
if metadata_from_body:
841+
# Process the parsed metadata
842+
parser = MetaKGParser()
843+
parsed_metakg = parser.get_metakg(metadata_from_body)
844+
845+
# Apply filtering to the combined data
846+
if parsed_metakg:
847+
for i, api_dict in enumerate(parsed_metakg):
848+
parsed_metakg[i] = self.get_filtered_api(api_dict)
849+
850+
# Send the response back to the client
851+
response = {
852+
"total": len(parsed_metakg),
853+
"hits": parsed_metakg,
854+
}
855+
856+
self.finish(response)
857+
else:
858+
self.write_error(
859+
status_code=400,
860+
reason="Request body cannot be empty.",
861+
message="Please provide a valid JSON/YAML object in the request body."
862+
)

src/pipeline.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22
from enum import Enum
33
from typing import Dict, OrderedDict
44

5-
from biothings.web.query import (
6-
AsyncESQueryBackend,
7-
AsyncESQueryPipeline,
8-
ESQueryBuilder,
9-
ESResultFormatter,
10-
)
11-
from controller.base import OpenAPI, Swagger
5+
from biothings.web.query import AsyncESQueryBackend, AsyncESQueryPipeline, ESQueryBuilder, ESResultFormatter
126
from elasticsearch_dsl import Q, Search
7+
8+
from controller.base import OpenAPI, Swagger
139
from utils import decoder
1410

1511

@@ -219,8 +215,8 @@ def apply_extras(self, search, options):
219215
apply extra filters
220216
"""
221217
# if not options._source:
222-
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
223-
# options._source = ["-api.bte", "-bte"]
218+
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
219+
# options._source = ["-api.bte", "-bte"]
224220

225221
search = super().apply_extras(search, options)
226222
# apply extra filters from query parameters
@@ -262,6 +258,7 @@ def adjust_index(self, original_index: str, query: str, **options: Dict) -> str:
262258
query_index = self.indices.get("metakg", None)
263259
return query_index
264260

261+
265262
class MetaKGQueryPipeline(AsyncESQueryPipeline):
266263
def __init__(self, *args, **kwargs):
267264
# ns is an instance of BiothingsNamespace

0 commit comments

Comments
 (0)