Source code for indra.databases.chembl_client

import os
import logging
import requests
from sympy.physics import units
from indra.databases import chebi_client, uniprot_client
from indra.statements import Inhibition, Agent, Evidence
from collections import defaultdict
from indra.util import read_unicode_csv

logger = logging.getLogger(__name__)


def get_inhibition(drug, target):
    chebi_id = drug.db_refs.get('CHEBI')
    mesh_id = drug.db_refs.get('MESH')
    if chebi_id:
        drug_chembl_id = chebi_client.get_chembl_id(chebi_id)
    elif mesh_id:
        drug_chembl_id = get_chembl_id(mesh_id)
    else:
        logger.error('Drug missing ChEBI or MESH grounding.')
        return None

    target_upid = target.db_refs.get('UP')
    if not target_upid:
        logger.error('Target missing UniProt grounding.')
        return None
    target_chembl_id = get_target_chemblid(target_upid)

    logger.info('Drug: %s, Target: %s' % (drug_chembl_id, target_chembl_id))
    query_dict = {'query': 'activity',
                  'params': {'molecule_chembl_id': drug_chembl_id,
                             'target_chembl_id': target_chembl_id,
                             'limit': 10000}}
    res = send_query(query_dict)
    evidence = []
    for assay in res['activities']:
        ev = get_evidence(assay)
        if not ev:
            continue
        evidence.append(ev)
    st = Inhibition(drug, target, evidence=evidence)
    return st


[docs]def get_drug_inhibition_stmts(drug): """Query ChEMBL for kinetics data given drug as Agent get back statements Parameters ---------- drug : Agent Agent representing drug with MESH or CHEBI grounding Returns ------- stmts : list of INDRA statements INDRA statements generated by querying ChEMBL for all kinetics data of a drug interacting with protein targets """ chebi_id = drug.db_refs.get('CHEBI') mesh_id = drug.db_refs.get('MESH') if chebi_id: drug_chembl_id = chebi_client.get_chembl_id(chebi_id) elif mesh_id: drug_chembl_id = get_chembl_id(mesh_id) else: logger.error('Drug missing ChEBI or MESH grounding.') return None logger.info('Drug: %s' % (drug_chembl_id)) query_dict = {'query': 'activity', 'params': {'molecule_chembl_id': drug_chembl_id, 'limit': 10000} } res = send_query(query_dict) activities = res['activities'] targ_act_dict = activities_by_target(activities) target_chembl_ids = [x for x in targ_act_dict] protein_targets = get_protein_targets_only(target_chembl_ids) filtered_targ_act_dict = {t: targ_act_dict[t] for t in [x for x in protein_targets]} stmts = [] for target_chembl_id in filtered_targ_act_dict: target_activity_ids = filtered_targ_act_dict[target_chembl_id] target_activites = [x for x in activities if x['activity_id'] in target_activity_ids] target_upids = [] targ_comp = protein_targets[target_chembl_id]['target_components'] for t_c in targ_comp: target_upids.append(t_c['accession']) evidence = [] for assay in target_activites: ev = get_evidence(assay) if not ev: continue evidence.append(ev) if len(evidence) > 0: for target_upid in target_upids: agent_name = uniprot_client.get_gene_name(target_upid) target_agent = Agent(agent_name, db_refs={'UP': target_upid}) st = Inhibition(drug, target_agent, evidence=evidence) stmts.append(st) return stmts
[docs]def send_query(query_dict): """Query ChEMBL API Parameters ---------- query_dict : dict 'query' : string of the endpoint to query 'params' : dict of params for the query Returns ------- js : dict dict parsed from json that is unique to the submitted query """ query = query_dict['query'] params = query_dict['params'] url = 'https://www.ebi.ac.uk/chembl/api/data/' + query + '.json' r = requests.get(url, params=params) r.raise_for_status() js = r.json() return js
[docs]def query_target(target_chembl_id): """Query ChEMBL API target by id Parameters ---------- target_chembl_id : str Returns ------- target : dict dict parsed from json that is unique for the target """ query_dict = {'query': 'target', 'params': {'target_chembl_id': target_chembl_id, 'limit': 1}} res = send_query(query_dict) target = res['targets'][0] return target
[docs]def activities_by_target(activities): """Get back lists of activities in a dict keyed by ChEMBL target id Parameters ---------- activities : list response from a query returning activities for a drug Returns ------- targ_act_dict : dict dictionary keyed to ChEMBL target ids with lists of activity ids """ targ_act_dict = defaultdict(lambda: []) for activity in activities: target_chembl_id = activity['target_chembl_id'] activity_id = activity['activity_id'] targ_act_dict[target_chembl_id].append(activity_id) for target_chembl_id in targ_act_dict: targ_act_dict[target_chembl_id] = \ list(set(targ_act_dict[target_chembl_id])) return targ_act_dict
[docs]def get_protein_targets_only(target_chembl_ids): """Given list of ChEMBL target ids, return dict of SINGLE PROTEIN targets Parameters ---------- target_chembl_ids : list list of chembl_ids as strings Returns ------- protein_targets : dict dictionary keyed to ChEMBL target ids with lists of activity ids """ protein_targets = {} for target_chembl_id in target_chembl_ids: target = query_target(target_chembl_id) if 'SINGLE PROTEIN' in target['target_type']: protein_targets[target_chembl_id] = target return protein_targets
[docs]def get_evidence(assay): """Given an activity, return an INDRA Evidence object. Parameters ---------- assay : dict an activity from the activities list returned by a query to the API Returns ------- ev : :py:class:`Evidence` an :py:class:`Evidence` object containing the kinetics of the """ kin = get_kinetics(assay) source_id = assay.get('assay_chembl_id') if not kin: return None annotations = {'kinetics': kin} chembl_doc_id = str(assay.get('document_chembl_id')) pmid = get_pmid(chembl_doc_id) ev = Evidence(source_api='chembl', pmid=pmid, source_id=source_id, annotations=annotations) return ev
[docs]def get_kinetics(assay): """Given an activity, return its kinetics values. Parameters ---------- assay : dict an activity from the activities list returned by a query to the API Returns ------- kin : dict dictionary of values with units keyed to value types 'IC50', 'EC50', 'INH', 'Potency', 'Kd' """ try: val = float(assay.get('standard_value')) except TypeError: logger.warning('Invalid assay value: %s' % assay.get('standard_value')) return None unit = assay.get('standard_units') if unit == 'nM': unit_sym = 1e-9 * units.mol / units.liter elif unit == 'uM': unit_sym = 1e-6 * units.mol / units.liter else: logger.warning('Unhandled unit: %s' % unit) return None param_type = assay.get('standard_type') if param_type not in ['IC50', 'EC50', 'INH', 'Potency', 'Kd']: logger.warning('Unhandled parameter type: %s' % param_type) logger.info(str(assay)) return None kin = {param_type: val * unit_sym} return kin
[docs]def get_pmid(doc_id): """Get PMID from document_chembl_id Parameters ---------- doc_id : str Returns ------- pmid : str """ url_pmid = 'https://www.ebi.ac.uk/chembl/api/data/document.json' params = {'document_chembl_id': doc_id} res = requests.get(url_pmid, params=params) js = res.json() pmid = str(js['documents'][0]['pubmed_id']) return pmid
[docs]def get_target_chemblid(target_upid): """Get ChEMBL ID from UniProt upid Parameters ---------- target_upid : str Returns ------- target_chembl_id : str """ url = 'https://www.ebi.ac.uk/chembl/api/data/target.json' params = {'target_components__accession': target_upid} r = requests.get(url, params=params) r.raise_for_status() js = r.json() target_chemblid = js['targets'][0]['target_chembl_id'] return target_chemblid
[docs]def get_mesh_id(nlm_mesh): """Get MESH ID from NLM MESH Parameters ---------- nlm_mesh : str Returns ------- mesh_id : str """ url_nlm2mesh = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' params = {'db': 'mesh', 'term': nlm_mesh, 'retmode': 'JSON'} r = requests.get(url_nlm2mesh, params=params) res = r.json() mesh_id = res['esearchresult']['idlist'][0] return mesh_id
[docs]def get_pcid(mesh_id): """Get PC ID from MESH ID Parameters ---------- mesh : str Returns ------- pcid : str """ url_mesh2pcid = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi' params = {'dbfrom': 'mesh', 'id': mesh_id, 'db': 'pccompound', 'retmode': 'JSON'} r = requests.get(url_mesh2pcid, params=params) res = r.json() pcid = res['linksets'][0]['linksetdbs'][0]['links'][0] return pcid
[docs]def get_chembl_id(nlm_mesh): """Get ChEMBL ID from NLM MESH Parameters ---------- nlm_mesh : str Returns ------- chembl_id : str """ mesh_id = get_mesh_id(nlm_mesh) pcid = get_pcid(mesh_id) url_mesh2pcid = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/' + \ 'cid/%s/synonyms/JSON' % pcid r = requests.get(url_mesh2pcid) res = r.json() synonyms = res['InformationList']['Information'][0]['Synonym'] chembl_id = [syn for syn in synonyms if 'CHEMBL' in syn and 'SCHEMBL' not in syn][0] return chembl_id
[docs]def get_chembl_name(chembl_id): """Return a standard ChEMBL name from an ID if available in the local resource. Parameters ---------- chembl_id : str The ChEBML ID to get the name for. Returns ------- str or None The corresponding ChEBML name or None if not available. """ return chembl_names.get(chembl_id)
resource_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'resources', 'chembl_tas.csv') def _load_resource(): chembl_names = {} for chembl_id, chembl_name, _ in read_unicode_csv(resource_file, delimiter=',', skiprows=1): if chembl_name: chembl_names[chembl_id] = chembl_name return chembl_names chembl_names = _load_resource()