Source code for indra.databases.rgd_client

"""A client for accessing RGD rat gene data."""

from collections import defaultdict
from typing import List, Union

from indra.util import read_unicode_csv
from indra.resources import get_resource_path


[docs]def get_id_from_name(name: str) -> Union[str, None]: """Return an RGD ID from an RGD gene symbol. Parameters ---------- name : The RGD gene symbol whose ID will be returned. Returns ------- : The RGD ID (without prefix) or None if not available. """ return rgd_name_to_id.get(name)
[docs]def get_name_from_id(rgd_id: str) -> Union[str, None]: """Return the RGD gene symbol for a given RGD ID. Parameters ---------- rgd_id : The RGD ID (without prefix) whose symbol will be returned. Returns ------- : The RGD symbol for the given ID or None if not available. """ return rgd_id_to_name.get(rgd_id)
[docs]def get_synonyms(rgd_id: str) -> List[str]: """Return the synonyms for an RGD ID. Parameters ---------- rgd_id : An RGD ID, without prefix. Returns ------- : The list of synonyms corresponding to the RGD ID, or an empty list if not available. """ return rgd_synonyms.get(rgd_id, [])
[docs]def get_id_from_name_synonym(name_synonym: str) -> Union[None, str, List[str]]: """Return an RGD ID from an RGD gene symbol or synonym. If the given name or synonym is the official symbol of a gene, its ID is returned. If the input is a synonym, it can correspond to one or more genes. If there is a single gene whose synonym matches the input, the ID is returned as a string. If multiple genes share the given synonym, their IDs are returned in a list. If the input doesn't match any names or synonyms, None is returned. Parameters ---------- name_synonym : The RGD gene symbol or synonym whose ID will be returned. Returns ------- : The RGD ID (without prefix) of a single gene, a list of RGD IDs, or None. """ rgd_id = rgd_name_to_id.get(name_synonym) if rgd_id: return rgd_id rgd_ids = rgd_synonyms_reverse.get(name_synonym) if rgd_ids: if len(rgd_ids) == 1: return rgd_ids[0] else: return rgd_ids return None
[docs]def get_ensembl_id(rgd_id: str) -> Union[str, None]: """Return the Ensembl ID for an RGD ID. Parameters ---------- rgd_id : An RGD ID, without prefix. Returns ------- : A list of Ensembl IDs corresponding to the RGD ID, or None if not available. """ return rgd_id_to_ensembl.get(rgd_id)
def _read_rgd(): fname = get_resource_path('rgd_entries.tsv') rgd_id_to_name = {} rgd_name_to_id = {} rgd_synonyms = {} rgd_synonyms_reverse = defaultdict(list) rgd_id_to_ensembl = {} ensemble_id_to_rgd = {} for rgd_id, name, synonyms_str, ensembl_id in \ read_unicode_csv(fname, '\t'): if name: rgd_id_to_name[rgd_id] = name rgd_name_to_id[name] = rgd_id if synonyms_str: synonyms = synonyms_str.split(';') rgd_synonyms[rgd_id] = synonyms for synonym in synonyms: rgd_synonyms_reverse[synonym].append(rgd_id) if ensembl_id: ensemble_ids = ensembl_id.split(';') rgd_id_to_ensembl[rgd_id] = ensemble_ids for ensemble_id in ensemble_ids: ensemble_id_to_rgd[ensemble_id] = rgd_id return rgd_id_to_name, rgd_name_to_id, rgd_synonyms, \ dict(rgd_synonyms_reverse), rgd_id_to_ensembl, ensemble_id_to_rgd rgd_id_to_name, rgd_name_to_id, rgd_synonyms, rgd_synonyms_reverse, \ rgd_id_to_ensembl, ensemble_id_to_rgd = _read_rgd()