"""A client to the Gene Ontology."""
import re
import logging
from typing import Union
from indra.databases.obo_client import OboClient
logger = logging.getLogger(__name__)
_client = OboClient(prefix='go')
[docs]def get_go_label(go_id):
"""Get label corresponding to a given GO identifier.
Parameters
----------
go_id : str
The GO identifier. Should include the `GO:` prefix, e.g., `GO:1903793`
(positive regulation of anion transport).
Returns
-------
str
Label corresponding to the GO ID.
"""
return _client.get_name_from_id(go_id)
[docs]def get_go_id_from_label(label):
"""Get ID corresponding to a given GO label.
Parameters
----------
label : str
The GO label to get the ID for.
Returns
-------
str
Identifier corresponding to the GO label, starts with GO:.
"""
return _client.get_id_from_name(label)
[docs]def get_go_id_from_label_or_synonym(label):
"""Get ID corresponding to a given GO label or synonym
Parameters
----------
label : str
The GO label or synonym to get the ID for.
Returns
-------
str
Identifier corresponding to the GO label or synonym, starts with GO:.
"""
return _client.get_id_from_name_or_synonym(label)
[docs]def get_primary_id(go_id):
"""Get primary ID corresponding to an alternative/deprecated GO ID.
Parameters
----------
go_id : str
The GO ID to get the primary ID for.
Returns
-------
str
Primary identifier corresponding to the given ID.
"""
return _client.get_id_from_alt_id(go_id)
[docs]def get_valid_location(loc):
"""Return a valid GO label based on an ID, label or synonym.
The rationale behind this function is that many sources produce
cellular locations that are arbitrarily either GO IDs (sometimes
without the prefix and sometimes outdated) or labels or synonyms.
This function handles all these cases and returns a valid GO label
in case one is available, otherwise None.
Parameters
----------
loc : txt
The location that needst o be canonicalized.
Returns
-------
str or None
The valid location string is available, otherwise None.
"""
if not loc:
return None
# If it's actually a GO ID, we do some validation and use it. If it is
# a text label then we look up the GO ID for it
if re.match(r'^(GO:)?\d+$', loc):
if not loc.startswith('GO:'):
loc = 'GO:' + loc
go_id = loc
prim_id = get_primary_id(go_id)
if prim_id:
go_id = prim_id
else:
go_id = get_go_id_from_label_or_synonym(loc)
if not go_id:
return None
# If we managed to get a GO ID either way, we get its label and return it
# with some extra caution to not return a None name under any
# circumstances
if go_id:
loc = get_go_label(go_id)
if loc:
return loc
return None
[docs]def get_namespace(go_id: str) -> Union[str, None]:
"""Return the GO namespace associated with a GO ID.
Parameters
----------
go_id :
The GO ID to get the namespace for
Returns
-------
:
The GO namespace for the given ID. This is one of
molecular_function, biological_process or cellular_component.
If the GO ID is not available as an entry, None is returned.
"""
return _client.entries.get(go_id, {}).get('namespace')