Source code for indra.databases.go_client

"""A client to the Gene Ontology."""
import re
import logging
from typing import Union
from indra.databases.obo_client import OboClient

logger = logging.getLogger(__name__)

_client = OboClient(prefix='go')


[docs]def get_go_label(go_id): """Get label corresponding to a given GO identifier. Parameters ---------- go_id : str The GO identifier. Should include the `GO:` prefix, e.g., `GO:1903793` (positive regulation of anion transport). Returns ------- str Label corresponding to the GO ID. """ return _client.get_name_from_id(go_id)
[docs]def get_go_id_from_label(label): """Get ID corresponding to a given GO label. Parameters ---------- label : str The GO label to get the ID for. Returns ------- str Identifier corresponding to the GO label, starts with GO:. """ return _client.get_id_from_name(label)
[docs]def get_go_id_from_label_or_synonym(label): """Get ID corresponding to a given GO label or synonym Parameters ---------- label : str The GO label or synonym to get the ID for. Returns ------- str Identifier corresponding to the GO label or synonym, starts with GO:. """ return _client.get_id_from_name_or_synonym(label)
[docs]def get_primary_id(go_id): """Get primary ID corresponding to an alternative/deprecated GO ID. Parameters ---------- go_id : str The GO ID to get the primary ID for. Returns ------- str Primary identifier corresponding to the given ID. """ return _client.get_id_from_alt_id(go_id)
[docs]def get_valid_location(loc): """Return a valid GO label based on an ID, label or synonym. The rationale behind this function is that many sources produce cellular locations that are arbitrarily either GO IDs (sometimes without the prefix and sometimes outdated) or labels or synonyms. This function handles all these cases and returns a valid GO label in case one is available, otherwise None. Parameters ---------- loc : txt The location that needst o be canonicalized. Returns ------- str or None The valid location string is available, otherwise None. """ if not loc: return None # If it's actually a GO ID, we do some validation and use it. If it is # a text label then we look up the GO ID for it if re.match(r'^(GO:)?\d+$', loc): if not loc.startswith('GO:'): loc = 'GO:' + loc go_id = loc prim_id = get_primary_id(go_id) if prim_id: go_id = prim_id else: go_id = get_go_id_from_label_or_synonym(loc) if not go_id: return None # If we managed to get a GO ID either way, we get its label and return it # with some extra caution to not return a None name under any # circumstances if go_id: loc = get_go_label(go_id) if loc: return loc return None
[docs]def get_namespace(go_id: str) -> Union[str, None]: """Return the GO namespace associated with a GO ID. Parameters ---------- go_id : The GO ID to get the namespace for Returns ------- : The GO namespace for the given ID. This is one of molecular_function, biological_process or cellular_component. If the GO ID is not available as an entry, None is returned. """ return _client.entries.get(go_id, {}).get('namespace')