import logging
from xml.etree import ElementTree
from indra.statements import *
from indra.databases.identifiers import ensure_chebi_prefix, \
ensure_chembl_prefix
from indra.statements.validate import assert_valid_db_refs
from indra.ontology.standardize import standardize_name_db_refs, \
get_standard_agent
logger = logging.getLogger(__name__)
drugbank_ns = {'db': 'http://www.drugbank.ca'}
[docs]class DrugbankProcessor:
"""Processor to extract INDRA Statements from DrugBank content.
The processor assumes that an ElementTree is available which it then
traverses to find drug-target information.
Parameters
----------
xml_tree : xml.etree.ElementTree.ElementTree
An XML ElementTree representing DrugBank XML content.
Attributes
----------
statements : list of indra.statements.Statement
A list of INDRA Statements that were extracted from DrugBank content.
"""
def __init__(self, xml_tree: ElementTree.ElementTree):
self.xml_tree = xml_tree
self.statements = []
def extract_statements(self):
root = self.xml_tree.getroot()
for drug in db_findall(root, 'db:drug'):
for stmt in self._extract_statements_for_drug(drug):
self.statements.append(stmt)
@staticmethod
def _extract_statements_for_drug(drug_element):
drug = DrugbankProcessor._get_drug_agent(drug_element)
for target_element in db_findall(drug_element, 'db:targets/db:target'):
actions = {a.text for a in db_findall(target_element,
'db:actions/db:action')}
if not actions:
actions = {'N/A'}
for action in actions:
stmt_type = DrugbankProcessor._get_statement_type(action)
if not stmt_type:
continue
annotations = {'drugbank_action': action}
evs = DrugbankProcessor._get_evidences(target_element)
for ev in evs:
ev.annotations = annotations
target = DrugbankProcessor._get_target_agent(target_element)
yield stmt_type(drug, target, evidence=evs)
@staticmethod
def _get_statement_type(action):
if action in neutral_actions:
return None
elif action in activation_actions:
return Activation
elif action in inhibition_actions:
return Inhibition
elif action in decrease_amount_actions:
return DecreaseAmount
elif action in increase_amount_actions:
return IncreaseAmount
elif action == 'N/A':
return Inhibition
else:
return None
@staticmethod
def _get_target_agent(target_element):
name_tag = db_find(target_element, 'db:name')
name = name_tag.text
db_refs = {}
# Get Drugbank target ID
target_id = db_find(target_element, 'db:id').text
db_refs['DRUGBANKV4.TARGET'] = target_id
# Extract other xrefs
for xref_tag in db_findall(target_element, 'db:polypeptide/'
'db:external-identifiers/'
'db:external-identifier'):
resource = db_find(xref_tag, 'db:resource').text
identifier = db_find(xref_tag, 'db:identifier').text
if resource == 'HUGO Gene Nomenclature Committee (HGNC)':
db_refs['HGNC'] = identifier[5:]
elif resource == 'UniProtKB':
db_refs['UP'] = identifier
return get_standard_agent(name, db_refs=db_refs)
@staticmethod
def _get_drug_agent(drug_element):
name_tag = db_find(drug_element, 'db:name')
name = name_tag.text
db_refs = {}
# Extract the DrugBank ID
drugbank_id_tags = db_findall(drug_element, 'db:drugbank-id')
# We do a sort here because sometimes there's more than one
# DrugBank ID and we choose the "smaller" one here
drugbank_id = sorted([di.text for di in drugbank_id_tags
if di.text.startswith('DB')])[0]
db_refs['DRUGBANK'] = drugbank_id
# Extract CAS ID
cas_tag = db_find(drug_element, 'db:cas-number')
if cas_tag is not None and cas_tag.text is not None:
db_refs['CAS'] = cas_tag.text
# Extract other xrefs
for xref_tag in db_findall(drug_element, 'db:external-identifiers/'
'db:external-identifier'):
resource = db_find(xref_tag, 'db:resource').text
identifier = db_find(xref_tag, 'db:identifier').text
if resource == 'ChEMBL':
db_refs['CHEMBL'] = ensure_chembl_prefix(identifier)
elif resource == 'PubChem Compound':
db_refs['PUBCHEM'] = identifier
elif resource == 'ChEBI':
db_refs['CHEBI'] = ensure_chebi_prefix(identifier)
assert_valid_db_refs(db_refs)
return get_standard_agent(name, db_refs)
@staticmethod
def _get_evidences(target_element):
# TODO: is there a source ID we can use here?
# TODO: is there context we can extract?
# refs also has: textbooks, attachments
pmids = db_findall(target_element,
'db:references/db:articles/db:article/db:pubmed-id')
urls = db_findall(target_element,
'db:references/db:links/db:link/db:url')
if pmids:
evs = [Evidence(source_api='drugbank', pmid=pmid.text)
for pmid in pmids]
elif urls:
evs = [Evidence(source_api='drugbank',
text_refs={'URL': url.text})
for url in urls]
else:
evs = [Evidence(source_api='drugbank')]
return evs
def db_find(element, path):
return element.find(path, namespaces=drugbank_ns)
def db_findall(element, path):
return element.findall(path, namespaces=drugbank_ns)
activation_actions = {'substrate', 'agonist', 'inducer', 'potentiator',
'stimulator', 'cofactor', 'activator', 'ligand',
'chaperone', 'partial agonist', 'protector',
'positive allosteric modulator', 'positive modulator'}
inhibition_actions = {'antagonist', 'inhibitor', 'binder', 'antibody',
'inactivator', 'binding', 'blocker', 'negative modulator',
'inverse agonist', 'neutralizer', 'weak inhibitor',
'suppressor', 'disruptor',
'inhibitory allosteric modulator'}
decrease_amount_actions = {'downregulator', 'metabolizer', 'chelator',
'degradation',
'incorporation into and destabilization'}
increase_amount_actions = {'stabilization'}
neutral_actions = {'modulator', 'other/unknown', 'unknown', 'other',
'regulator'}