Source code for indra.databases.owl_client

"""A client for OWL-sourced identifier mappings."""

import json
import os
import pickle
from collections import defaultdict
from operator import itemgetter
from typing import Any, Collection, Mapping, TYPE_CHECKING

from tqdm import tqdm

from indra.databases.obo_client import OntologyClient, prune_empty_entries
from indra.resources import get_resource_path

if TYPE_CHECKING:
    import pronto


[docs]class OwlClient(OntologyClient): """A base client for data that's been grabbed via OWL."""
[docs] @staticmethod def entry_from_term( term: "pronto.Term", prefix: str, remove_prefix: bool = False, allowed_external_ns: Collection = None, ) -> Mapping[str, Any]: """Create a data dictionary from a Pronto term.""" rels_dict = defaultdict(list) xrefs = [] for xref in term.xrefs: try: xref_db, xref_id = xref.id.split(":", maxsplit=1) except ValueError: continue else: xrefs.append(dict(namespace=xref_db, id=xref_id)) for parent in term.superclasses(distance=1, with_self=False): parent_db, parent_id = parent.id.split(':', maxsplit=1) # If the parent here is not from this namespace and not one of the # allowed external namespaces then we skip the parent if parent_db.lower() != prefix.lower() and \ (not allowed_external_ns or parent_db not in allowed_external_ns): continue if remove_prefix and parent_db.lower() == prefix.lower(): rels_dict["is_a"].append(parent_id) else: rels_dict["is_a"].append(parent.id) term_ns, term_id = term.id.split(':', maxsplit=1) term_ns = term_ns.lower() return { "namespace": term_ns, "id": term_id, "name": term.name, "synonyms": [s.description for s in term.synonyms], "xrefs": xrefs, "alt_ids": sorted(term.alternate_ids), "relations": dict(rels_dict), }
@classmethod def entries_from_ontology( cls, prefix: str, ontology: "pronto.Ontology", *, skip_obsolete: bool = True, remove_prefix: bool = False, allowed_external_ns: Collection = None, ): prefix = prefix.upper() rv = [] for term in tqdm(ontology.terms(), desc=f"[{prefix}]"): if term.obsolete and skip_obsolete: continue if not term.id.startswith(prefix): continue rv.append(cls.entry_from_term(term, prefix, remove_prefix=remove_prefix)) return rv @classmethod def update_resource( cls, prefix: str, ontology: "pronto.Ontology", skip_obsolete: bool = True, remove_prefix: bool = False, ): prefix = prefix.lower() entries = cls.entries_from_ontology( prefix=prefix, ontology=ontology, skip_obsolete=skip_obsolete, remove_prefix=remove_prefix ) entries = prune_empty_entries( entries, {"synonyms", "xrefs", "alt_ids", "relations"}, ) entries = sorted( entries, key=itemgetter("id") if remove_prefix else _id_key, ) resource_path = get_resource_path(f"{prefix}.json") with open(resource_path, "w") as file: json.dump(entries, file, indent=1, sort_keys=True) @classmethod def update_from_obo_library( cls, prefix: str, extension: str = "owl", **kwargs, ): prefix = prefix.lower() cache_path = get_resource_path(f"{prefix}.{extension}.pkl") if os.path.exists(cache_path): with open(cache_path, "rb") as file: ontology = pickle.load(file) else: try: import pronto except ImportError: raise ImportError( "To use the INDRA OWL Client, you must first" "install Pronto with `pip install pronto`." ) ontology = pronto.Ontology.from_obo_library( f"{prefix.upper()}.{extension}") with open(cache_path, "wb") as file: pickle.dump(ontology, file, protocol=pickle.HIGHEST_PROTOCOL) cls.update_resource(prefix=prefix, ontology=ontology, **kwargs) @classmethod def update_from_file( cls, prefix: str, file, **kwargs, ): try: import pronto except ImportError: raise ImportError( "To use the INDRA OWL Client, you must first" "install Pronto with `pip install pronto`." ) ontology = pronto.Ontology(file) cls.update_resource(prefix=prefix, ontology=ontology, **kwargs)
def _id_key(x): return int(x["id"].split(':')[1]) if __name__ == "__main__": OwlClient.update_from_obo_library("ido")