Source code for indra.databases.owl_client

"""A client for OWL-sourced identifier mappings."""

import json
import os
import pickle
from collections import defaultdict
from operator import itemgetter
from typing import Any, Collection, Mapping, TYPE_CHECKING

from tqdm import tqdm

from indra.databases.obo_client import OntologyClient, prune_empty_entries
from indra.resources import get_resource_path

if TYPE_CHECKING:
    import pronto


[docs]class OwlClient(OntologyClient):
    """A base client for data that's been grabbed via OWL."""

[docs]    @staticmethod
    def entry_from_term(
        term: "pronto.Term",
        prefix: str,
        remove_prefix: bool = False,
        allowed_external_ns: Collection = None,
    ) -> Mapping[str, Any]:
        """Create a data dictionary from a Pronto term."""
        rels_dict = defaultdict(list)
        xrefs = []
        for xref in term.xrefs:
            try:
                xref_db, xref_id = xref.id.split(":", maxsplit=1)
            except ValueError:
                continue
            else:
                xrefs.append(dict(namespace=xref_db, id=xref_id))
        for parent in term.superclasses(distance=1, with_self=False):
            parent_db, parent_id = parent.id.split(':', maxsplit=1)
            # If the parent here is not from this namespace and not one of the
            # allowed external namespaces then we skip the parent
            if parent_db.lower() != prefix.lower() and \
                    (not allowed_external_ns or
                     parent_db not in allowed_external_ns):
                continue
            if remove_prefix and parent_db.lower() == prefix.lower():
                rels_dict["is_a"].append(parent_id)
            else:
                rels_dict["is_a"].append(parent.id)

        term_ns, term_id = term.id.split(':', maxsplit=1)
        term_ns = term_ns.lower()
        return {
            "namespace": term_ns,
            "id": term_id,
            "name": term.name,
            "synonyms": [s.description for s in term.synonyms],
            "xrefs": xrefs,
            "alt_ids": sorted(term.alternate_ids),
            "relations": dict(rels_dict),
        }

    @classmethod
    def entries_from_ontology(
        cls,
        prefix: str,
        ontology: "pronto.Ontology",
        *,
        skip_obsolete: bool = True,
        remove_prefix: bool = False,
        allowed_external_ns: Collection = None,
    ):
        prefix = prefix.upper()
        rv = []
        for term in tqdm(ontology.terms(), desc=f"[{prefix}]"):
            if term.obsolete and skip_obsolete:
                continue
            if not term.id.startswith(prefix):
                continue
            rv.append(cls.entry_from_term(term, prefix,
                                          remove_prefix=remove_prefix))
        return rv

    @classmethod
    def update_resource(
        cls,
        prefix: str,
        ontology: "pronto.Ontology",
        skip_obsolete: bool = True,
        remove_prefix: bool = False,
    ):
        prefix = prefix.lower()
        entries = cls.entries_from_ontology(
            prefix=prefix, ontology=ontology, skip_obsolete=skip_obsolete,
            remove_prefix=remove_prefix
        )
        entries = prune_empty_entries(
            entries,
            {"synonyms", "xrefs", "alt_ids", "relations"},
        )
        entries = sorted(
            entries,
            key=itemgetter("id") if remove_prefix else _id_key,
        )

        resource_path = get_resource_path(f"{prefix}.json")
        with open(resource_path, "w") as file:
            json.dump(entries, file, indent=1, sort_keys=True)

    @classmethod
    def update_from_obo_library(
        cls,
        prefix: str,
        extension: str = "owl",
        **kwargs,
    ):
        prefix = prefix.lower()
        cache_path = get_resource_path(f"{prefix}.{extension}.pkl")

        if os.path.exists(cache_path):
            with open(cache_path, "rb") as file:
                ontology = pickle.load(file)
        else:
            try:
                import pronto
            except ImportError:
                raise ImportError(
                    "To use the INDRA OWL Client, you must first"
                    "install Pronto with `pip install pronto`."
                )
            ontology = pronto.Ontology.from_obo_library(
                f"{prefix.upper()}.{extension}")
            with open(cache_path, "wb") as file:
                pickle.dump(ontology, file, protocol=pickle.HIGHEST_PROTOCOL)

        cls.update_resource(prefix=prefix, ontology=ontology, **kwargs)

    @classmethod
    def update_from_file(
        cls,
        prefix: str,
        file,
        **kwargs,
    ):
        try:
            import pronto
        except ImportError:
            raise ImportError(
                "To use the INDRA OWL Client, you must first"
                "install Pronto with `pip install pronto`."
            )
        ontology = pronto.Ontology(file)
        cls.update_resource(prefix=prefix, ontology=ontology, **kwargs)


def _id_key(x):
    return int(x["id"].split(':')[1])


if __name__ == "__main__":
    OwlClient.update_from_obo_library("ido")