Source code for indra.databases.pubchem_client

import logging
import requests
from typing import List, Union
from functools import lru_cache
from indra.resources import get_resource_path
from indra.util import read_unicode_csv


pubchem_url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'


logger = logging.getLogger(__name__)


[docs]@lru_cache(maxsize=5000) def get_inchi_key(pubchem_cid): """Return the InChIKey for a given PubChem CID. Parameters ---------- pubchem_cid : str The PubChem CID whose InChIKey should be returned. Returns ------- str The InChIKey corresponding to the PubChem CID. """ url = '%s/compound/cid/%s/property/InChIKey/TXT' % \ (pubchem_url, pubchem_cid) res = requests.get(url) if res.status_code != 200: logger.error('Could not retrieve InChIKey for %s' % pubchem_cid) return None return res.text.strip()
[docs]@lru_cache(maxsize=5000) def get_json_record(pubchem_cid): """Return the JSON record of a given PubChem CID. Parameters ---------- pubchem_cid : str The PubChem CID whose record should be returned. Returns ------- dict The record deserialized from JSON. """ url = (pubchem_url + '_view/data/compound/%s/JSON/') % pubchem_cid res = requests.get(url) return res.json()
[docs]def get_preferred_compound_ids(pubchem_cid): """Return a list of preferred CIDs for a given PubChem CID. Parameters ---------- pubchem_cid : str The PubChem CID whose preferred CIDs should be returned. Returns ------- list of str The list of preferred CIDs for the given CID. If there are no preferred CIDs for the given CID then an empty list is returned. """ record = get_json_record(pubchem_cid) sections = record['Record']['Section'] pref_ids = set() for section in sections: if section['TOCHeading'] == 'Preferred Compound': for pref_cpd in section['Information']: pref_ids |= set(pref_cpd['Value']['Number']) pref_ids = sorted([str(pid) for pid in pref_ids]) return pref_ids
[docs]def get_pmids(pubchem_cid: str) -> List[str]: """Return depositor provided PMIDs for a given PubChem CID. Note that this information can also be obtained via PubMed at https://www.ncbi.nlm.nih.gov/sites/entrez?LinkName=pccompound_pubmed&db=pccompound&cmd=Link&from_uid=<pubchem_cid>. Parameters ---------- pubchem_cid : The PubChem CID whose PMIDs will be returned. Returns ------- : PMIDs corresponding to the given PubChem CID. If none present, or the query fails, an empty list is returned. """ url = '%s/compound/cid/%s/xrefs/PubMedID/JSON' % \ (pubchem_url, pubchem_cid) res = requests.get(url) if res.status_code != 200: logger.error('Could not retrieve PMIDs for %s' % pubchem_cid) return [] res_json = res.json() pmids_list = [str(pmid) for pmid in res_json['InformationList']['Information'][0]['PubMedID']] return pmids_list
[docs]def get_mesh_id(pubchem_cid: str) -> Union[str, None]: """Return the MeSH ID for a given PubChem CID. Parameters ---------- pubchem_cid : The PubChem CID whose MeSH ID should be returned. Returns ------- : The MeSH ID corresponding to the PubChem CID or None if not available. """ return pubchem_mesh_map.get(pubchem_cid)
def _load_pubchem_mesh_map(): rows = read_unicode_csv(get_resource_path('pubchem_mesh_map.tsv'), delimiter='\t') mappings = {row[0]: row[1] for row in rows} return mappings pubchem_mesh_map = _load_pubchem_mesh_map()