Source code for indra.databases.chebi_client

import os
import logging
import requests
from lxml import etree
from functools import lru_cache, cmp_to_key
from indra.util import read_unicode_csv
from indra.databases.obo_client import OboClient

_obo_client = OboClient(prefix='chebi')

logger = logging.getLogger(__name__)

# Namespaces used in the XML
chebi_xml_ns = {'n': 'http://schemas.xmlsoap.org/soap/envelope/',
                'c': 'https://www.ebi.ac.uk/webservices/chebi'}


def _add_prefix(chid):
    if chid and not chid.startswith('CHEBI:'):
        return 'CHEBI:%s' % chid
    else:
        return chid


[docs]def get_pubchem_id(chebi_id): """Return the PubChem ID corresponding to a given ChEBI ID. Parameters ---------- chebi_id : str ChEBI ID to be converted. Returns ------- pubchem_id : str PubChem ID corresponding to the given ChEBI ID. If the lookup fails, None is returned. """ pubchem_id = chebi_pubchem.get(_add_prefix(chebi_id)) return pubchem_id
[docs]def get_chebi_id_from_pubchem(pubchem_id): """Return the ChEBI ID corresponding to a given Pubchem ID. Parameters ---------- pubchem_id : str Pubchem ID to be converted. Returns ------- chebi_id : str ChEBI ID corresponding to the given Pubchem ID. If the lookup fails, None is returned. """ chebi_id = pubchem_chebi.get(pubchem_id) return chebi_id
[docs]def get_chembl_id(chebi_id): """Return a ChEMBL ID from a given ChEBI ID. Parameters ---------- chebi_id : str ChEBI ID to be converted. Returns ------- chembl_id : str ChEMBL ID corresponding to the given ChEBI ID. If the lookup fails, None is returned. """ return chebi_chembl.get(_add_prefix(chebi_id))
[docs]def get_chebi_id_from_chembl(chembl_id): """Return a ChEBI ID from a given ChEBML ID. Parameters ---------- chembl_id : str ChEBML ID to be converted. Returns ------- chebi_id : str ChEBI ID corresponding to the given ChEBML ID. If the lookup fails, None is returned. """ return chembl_chebi.get(chembl_id)
[docs]def get_chebi_id_from_cas(cas_id): """Return a ChEBI ID corresponding to the given CAS ID. Parameters ---------- cas_id : str The CAS ID to be converted. Returns ------- chebi_id : str The ChEBI ID corresponding to the given CAS ID. If the lookup fails, None is returned. """ return cas_chebi.get(cas_id)
[docs]def get_chebi_name_from_id(chebi_id, offline=True): """Return a ChEBI name corresponding to the given ChEBI ID. Parameters ---------- chebi_id : str The ChEBI ID whose name is to be returned. offline : Optional[bool] If False, the ChEBI web service is invoked in case a name mapping could not be found in the local resource file. Default: True Returns ------- chebi_name : str The name corresponding to the given ChEBI ID. If the lookup fails, None is returned. """ chebi_id = _add_prefix(chebi_id) name = _obo_client.get_name_from_id(chebi_id) if name is None and not offline: return get_chebi_name_from_id_web(chebi_id) else: return name
[docs]def get_chebi_id_from_name(chebi_name): """Return a ChEBI ID corresponding to the given ChEBI name. Parameters ---------- chebi_name : str The ChEBI name whose ID is to be returned. Returns ------- chebi_id : str The ID corresponding to the given ChEBI name. If the lookup fails, None is returned. """ return _obo_client.get_id_from_name(chebi_name)
[docs]@lru_cache(maxsize=5000) def get_chebi_entry_from_web(chebi_id): """Return a ChEBI entry corresponding to a given ChEBI ID using a REST API. Parameters ---------- chebi_id : str The ChEBI ID whose entry is to be returned. Returns ------- xml.etree.ElementTree.Element An ElementTree element representing the ChEBI entry. """ url_base = 'http://www.ebi.ac.uk/webservices/chebi/2.0/test/' url_fmt = url_base + 'getCompleteEntity?chebiId=%s' resp = requests.get(url_fmt % chebi_id) if resp.status_code != 200: logger.warning("Got bad code form CHEBI client: %s" % resp.status_code) return None tree = etree.fromstring(resp.content) path = 'n:Body/c:getCompleteEntityResponse/c:return' elem = tree.find(path, namespaces=chebi_xml_ns) return elem
def _get_chebi_value_from_entry(entry, key): if entry is None: return None path = 'c:%s' % key elem = entry.find(path, namespaces=chebi_xml_ns) if elem is not None: return elem.text return None
[docs]def get_chebi_name_from_id_web(chebi_id): """Return a ChEBI name corresponding to a given ChEBI ID using a REST API. Parameters ---------- chebi_id : str The ChEBI ID whose name is to be returned. Returns ------- chebi_name : str The name corresponding to the given ChEBI ID. If the lookup fails, None is returned. """ entry = get_chebi_entry_from_web(chebi_id) return _get_chebi_value_from_entry(entry, 'chebiAsciiName')
[docs]def get_inchi_key(chebi_id): """Return an InChIKey corresponding to a given ChEBI ID using a REST API. Parameters ---------- chebi_id : str The ChEBI ID whose InChIKey is to be returned. Returns ------- str The InChIKey corresponding to the given ChEBI ID. If the lookup fails, None is returned. """ entry = get_chebi_entry_from_web(chebi_id) return _get_chebi_value_from_entry(entry, 'inchiKey')
[docs]def get_primary_id(chebi_id): """Return the primary ID corresponding to a ChEBI ID. Note that if the provided ID is a primary ID, it is returned unchanged. Parameters ---------- chebi_id : str The ChEBI ID that should be mapped to its primary equivalent. Returns ------- str or None The primary ChEBI ID or None if the provided ID is neither primary nor a secondary ID with a primary mapping. """ chebi_id = _add_prefix(chebi_id) if chebi_id in _obo_client.entries: return chebi_id prim_id = _obo_client.get_id_from_alt_id(chebi_id) return prim_id
[docs]def get_specific_id(chebi_ids): """Return the most specific ID in a list based on the hierarchy. Parameters ---------- chebi_ids : list of str A list of ChEBI IDs some of which may be hierarchically related. Returns ------- str The first ChEBI ID which is at the most specific level in the hierarchy with respect to the input list. """ if not chebi_ids: return chebi_ids from indra.ontology.bio import bio_ontology def isa_cmp(a, b): """Compare two entries based on isa relationships for sorting.""" if not a.startswith('CHEBI:'): a = 'CHEBI:%s' % a if not b.startswith('CHEBI:'): b = 'CHEBI:%s' % b if bio_ontology.isa('CHEBI', a, 'CHEBI', b): return -1 if bio_ontology.isa('CHEBI', b, 'CHEBI', a): return 1 return 0 chebi_ids = [_add_prefix(chebi_id) for chebi_id in chebi_ids] chebi_id = sorted(chebi_ids, key=cmp_to_key(isa_cmp))[0] return chebi_id
[docs]def get_chebi_id_from_hmdb(hmdb_id): """Return the ChEBI ID corresponding to an HMDB ID. Parameters ---------- hmdb_id : str An HMDB ID. Returns ------- str The ChEBI ID that the given HMDB ID maps to or None if no mapping was found. """ return hmdb_chebi.get(hmdb_id)
# Read resource files into module-level variables def _read_chebi_to_pubchem(): csv_reader = _read_resource_csv('chebi_to_pubchem.tsv') next(csv_reader) chebi_pubchem = {} pubchem_chebi = {} ik_matches = {} # Here, in case there are many possible mappings, we make it so that we # end up with one that has an explicit InChiKey match over one that # doesn't, if such a mapping is available for chebi_id, pc_id, ik_match in csv_reader: chebi_id = 'CHEBI:%s' % chebi_id if chebi_id not in chebi_pubchem: chebi_pubchem[chebi_id] = pc_id ik_matches[(chebi_id, pc_id)] = ik_match elif ik_match == 'Y' and not \ ik_matches.get((chebi_id, chebi_pubchem[chebi_id])): chebi_pubchem[chebi_id] = pc_id if pc_id not in pubchem_chebi: pubchem_chebi[pc_id] = chebi_id ik_matches[(chebi_id, pc_id)] = ik_match elif ik_match == 'Y' and not \ ik_matches.get((pubchem_chebi[pc_id], pc_id)): pubchem_chebi[pc_id] = chebi_id return chebi_pubchem, pubchem_chebi def _read_chebi_to_chembl(): csv_reader = _read_resource_csv('chebi_to_chembl.tsv') chebi_chembl = {} chembl_chebi = {} next(csv_reader) for row in csv_reader: chebi_id, chembl_id = row chebi_id = 'CHEBI:%s' % chebi_id chebi_chembl[chebi_id] = chembl_id chembl_chebi[chembl_id] = chebi_id return chebi_chembl, chembl_chebi def _read_cas_to_chebi(): csv_reader = _read_resource_csv('cas_to_chebi.tsv') cas_chebi = {} next(csv_reader) for row in csv_reader: cas_chebi[row[0]] = 'CHEBI:%s' % row[1] # These are missing from the resource but appear often, so we map # them manually extra_entries = {'24696-26-2': 'CHEBI:17761', '23261-20-3': 'CHEBI:18035', '165689-82-7': 'CHEBI:16618'} cas_chebi.update(extra_entries) return cas_chebi def _read_hmdb_to_chebi(): csv_reader = _read_resource_csv('hmdb_to_chebi.tsv') hmdb_chebi = {} next(csv_reader) for row in csv_reader: hmdb_chebi[row[0]] = 'CHEBI:%s' % row[1] return hmdb_chebi def _read_resource_csv(fname): file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'resources', fname) csv_reader = read_unicode_csv(file_path, delimiter='\t') return csv_reader chebi_pubchem, pubchem_chebi = _read_chebi_to_pubchem() chebi_chembl, chembl_chebi = _read_chebi_to_chembl() cas_chebi = _read_cas_to_chebi() hmdb_chebi = _read_hmdb_to_chebi()