"""A client for OWL-sourced identifier mappings."""
import json
import os
import pickle
from collections import defaultdict
from operator import itemgetter
from typing import Any, Collection, Mapping, TYPE_CHECKING
from tqdm import tqdm
from indra.databases.obo_client import OntologyClient, prune_empty_entries
from indra.resources import get_resource_path
if TYPE_CHECKING:
import pronto
[docs]class OwlClient(OntologyClient):
"""A base client for data that's been grabbed via OWL."""
[docs] @staticmethod
def entry_from_term(
term: "pronto.Term",
prefix: str,
remove_prefix: bool = False,
allowed_external_ns: Collection = None,
) -> Mapping[str, Any]:
"""Create a data dictionary from a Pronto term."""
rels_dict = defaultdict(list)
xrefs = []
for xref in term.xrefs:
try:
xref_db, xref_id = xref.id.split(":", maxsplit=1)
except ValueError:
continue
else:
xrefs.append(dict(namespace=xref_db, id=xref_id))
for parent in term.superclasses(distance=1, with_self=False):
parent_db, parent_id = parent.id.split(':', maxsplit=1)
# If the parent here is not from this namespace and not one of the
# allowed external namespaces then we skip the parent
if parent_db.lower() != prefix.lower() and \
(not allowed_external_ns or
parent_db not in allowed_external_ns):
continue
if remove_prefix and parent_db.lower() == prefix.lower():
rels_dict["is_a"].append(parent_id)
else:
rels_dict["is_a"].append(parent.id)
term_ns, term_id = term.id.split(':', maxsplit=1)
term_ns = term_ns.lower()
return {
"namespace": term_ns,
"id": term_id,
"name": term.name,
"synonyms": [s.description for s in term.synonyms],
"xrefs": xrefs,
"alt_ids": sorted(term.alternate_ids),
"relations": dict(rels_dict),
}
@classmethod
def entries_from_ontology(
cls,
prefix: str,
ontology: "pronto.Ontology",
*,
skip_obsolete: bool = True,
remove_prefix: bool = False,
allowed_external_ns: Collection = None,
):
prefix = prefix.upper()
rv = []
for term in tqdm(ontology.terms(), desc=f"[{prefix}]"):
if term.obsolete and skip_obsolete:
continue
if not term.id.startswith(prefix):
continue
rv.append(cls.entry_from_term(term, prefix,
remove_prefix=remove_prefix))
return rv
@classmethod
def update_resource(
cls,
prefix: str,
ontology: "pronto.Ontology",
skip_obsolete: bool = True,
remove_prefix: bool = False,
):
prefix = prefix.lower()
entries = cls.entries_from_ontology(
prefix=prefix, ontology=ontology, skip_obsolete=skip_obsolete,
remove_prefix=remove_prefix
)
entries = prune_empty_entries(
entries,
{"synonyms", "xrefs", "alt_ids", "relations"},
)
entries = sorted(
entries,
key=itemgetter("id") if remove_prefix else _id_key,
)
resource_path = get_resource_path(f"{prefix}.json")
with open(resource_path, "w") as file:
json.dump(entries, file, indent=1, sort_keys=True)
@classmethod
def update_from_obo_library(
cls,
prefix: str,
extension: str = "owl",
**kwargs,
):
prefix = prefix.lower()
cache_path = get_resource_path(f"{prefix}.{extension}.pkl")
if os.path.exists(cache_path):
with open(cache_path, "rb") as file:
ontology = pickle.load(file)
else:
try:
import pronto
except ImportError:
raise ImportError(
"To use the INDRA OWL Client, you must first"
"install Pronto with `pip install pronto`."
)
ontology = pronto.Ontology.from_obo_library(
f"{prefix.upper()}.{extension}")
with open(cache_path, "wb") as file:
pickle.dump(ontology, file, protocol=pickle.HIGHEST_PROTOCOL)
cls.update_resource(prefix=prefix, ontology=ontology, **kwargs)
@classmethod
def update_from_file(
cls,
prefix: str,
file,
**kwargs,
):
try:
import pronto
except ImportError:
raise ImportError(
"To use the INDRA OWL Client, you must first"
"install Pronto with `pip install pronto`."
)
ontology = pronto.Ontology(file)
cls.update_resource(prefix=prefix, ontology=ontology, **kwargs)
def _id_key(x):
return int(x["id"].split(':')[1])
if __name__ == "__main__":
OwlClient.update_from_obo_library("ido")