from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import re
import sys
import logging
import itertools
import collections
try:
from functools import lru_cache
except ImportError:
from functools32 import lru_cache
from indra.java_vm import autoclass, JavaException, cast
from indra.databases import hgnc_client, uniprot_client
from indra.statements import *
from . import pathway_commons_client as pcc
from indra.util import decode_obj
logger = logging.getLogger('biopax')
# TODO:
# - Extract cellularLocation from each PhysicalEntity
# - Look at participantStoichiometry within BiochemicalReaction
# - Check whether to use Control or only Catalysis (Control might not
# be direct)
# - Implement extracting modifications with Complex enzyme
# - Implement extracting modifications with Complex substrate
[docs]class BiopaxProcessor(object):
"""The BiopaxProcessor extracts INDRA Statements from a BioPAX model.
The BiopaxProcessor uses pattern searches in a BioPAX OWL model to
extract mechanisms from which it constructs INDRA Statements.
Parameters
----------
model : org.biopax.paxtools.model.Model
A BioPAX model object (java object)
Attributes
----------
model : org.biopax.paxtools.model.Model
A BioPAX model object (java object) which is queried using Paxtools
to extract INDRA Statements
statements : list[indra.statements.Statement]
A list of INDRA Statements that were extracted from the model.
"""
def __init__(self, model):
self.model = model
self.statements = []
[docs] def print_statements(self):
"""Print all INDRA Statements collected by the processors."""
for i, stmt in enumerate(self.statements):
print("%s: %s" % (i, stmt))
[docs] def save_model(self, file_name=None):
"""Save the BioPAX model object in an OWL file.
Parameters
----------
file_name : Optional[str]
The name of the OWL file to save the model in.
"""
if file_name is None:
logger.error('Missing file name')
return
pcc.model_to_owl(self.model, file_name)
[docs] def get_complexes(self):
"""Extract INDRA Complex Statements from the BioPAX model.
This method searches for org.biopax.paxtools.model.level3.Complex
objects which represent molecular complexes. It doesn't reuse
BioPAX Pattern's org.biopax.paxtools.pattern.PatternBox.inComplexWith
query since that retrieves pairs of complex members rather than
the full complex.
"""
for obj in self.model.getObjects().toArray():
bpe = _cast_biopax_element(obj)
if not _is_complex(bpe):
continue
ev = self._get_evidence(bpe)
members = self._get_complex_members(bpe)
if members is not None:
if len(members) > 10:
logger.info('Skipping complex with more than 10 members.')
continue
complexes = _get_combinations(members)
for c in complexes:
self.statements.append(decode_obj(Complex(c, ev),
encoding='utf-8'))
[docs] def get_modifications(self):
"""Extract INDRA Modification Statements from the BioPAX model.
To extract Modifications, this method reuses the structure of
BioPAX Pattern's
org.biopax.paxtools.pattern.PatternBox.constrolsStateChange pattern
with additional constraints to specify the type of state change
occurring (phosphorylation, deubiquitination, etc.).
"""
for modclass, modtype in modclass_to_modtype.items():
# TODO: we could possibly try to also extract generic
# modifications here
if modtype == 'modification':
continue
stmts = self._get_generic_modification(modclass)
self.statements += stmts
[docs] def get_activity_modification(self):
"""Extract INDRA ActiveForm statements from the BioPAX model.
This method extracts ActiveForm Statements that are due to
protein modifications. This method reuses the structure of
BioPAX Pattern's
org.biopax.paxtools.pattern.PatternBox.constrolsStateChange pattern
with additional constraints to specify the gain or loss of a
modification occurring (phosphorylation, deubiquitination, etc.)
and the gain or loss of activity due to the modification state
change.
"""
mod_filter = 'residue modification, active'
for is_active in [True, False]:
p = self._construct_modification_pattern()
rel = mcct.GAIN if is_active else mcct.LOSS
p.add(mcc(rel, mod_filter),
"input simple PE", "output simple PE")
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
for r in res_array:
reaction = r[p.indexOf('Conversion')]
activity = 'activity'
input_spe = r[p.indexOf('input simple PE')]
output_spe = r[p.indexOf('output simple PE')]
# Get the modifications
mod_in = \
BiopaxProcessor._get_entity_mods(input_spe)
mod_out = \
BiopaxProcessor._get_entity_mods(output_spe)
mod_shared = _get_mod_intersection(mod_in, mod_out)
gained_mods = _get_mod_difference(mod_out, mod_in)
# Here we get the evidence for the BiochemicalReaction
ev = self._get_evidence(reaction)
agents = self._get_agents_from_entity(output_spe)
for agent in _listify(agents):
static_mods = _get_mod_difference(agent.mods,
gained_mods)
# NOTE: with the ActiveForm representation we cannot
# separate static_mods and gained_mods. We assume here
# that the static_mods are inconsequential and therefore
# are not mentioned as an Agent condition, following
# don't care don't write semantics. Therefore only the
# gained_mods are listed in the ActiveForm as Agent
# conditions.
if gained_mods:
agent.mods = gained_mods
stmt = ActiveForm(agent, activity, is_active,
evidence=ev)
self.statements.append(decode_obj(stmt,
encoding='utf-8'))
[docs] def get_regulate_activities(self):
"""Get Activation/Inhibition INDRA Statements from the BioPAX model.
This method extracts Activation/Inhibition Statements and reuses the
structure of BioPAX Pattern's
org.biopax.paxtools.pattern.PatternBox.constrolsStateChange pattern
with additional constraints to specify the gain or loss of
activity state but assuring that the activity change is not due to
a modification state change (which are extracted by get_modifications
and get_activity_modification).
"""
mcc = _bpp('constraint.ModificationChangeConstraint')
mcct = _bpp('constraint.ModificationChangeConstraint$Type')
mod_filter = 'residue modification, active'
# Start with a generic modification pattern
p = BiopaxProcessor._construct_modification_pattern()
stmts = []
for act_class, gain_loss in zip([Activation, Inhibition],
[mcct.GAIN, mcct.LOSS]):
p.add(mcc(gain_loss, mod_filter),
"input simple PE", "output simple PE")
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
for r in res_array:
controller_pe = r[p.indexOf('controller PE')]
input_pe = r[p.indexOf('input PE')]
input_spe = r[p.indexOf('input simple PE')]
output_spe = r[p.indexOf('output simple PE')]
reaction = r[p.indexOf('Conversion')]
control = r[p.indexOf('Control')]
if not _is_catalysis(control):
continue
cat_dir = control.getCatalysisDirection()
if cat_dir is not None and cat_dir.name() != 'LEFT_TO_RIGHT':
logger.info('Unexpected catalysis direction: %s.' % \
control.getCatalysisDirection())
continue
subjs = BiopaxProcessor._get_primary_controller(controller_pe)
if not subjs:
continue
'''
if _is_complex(input_pe):
# TODO: It is possible to find which member of the complex
# is actually activated. That member will be the substrate
# and all other members of the complex will be bound to it.
logger.info('Cannot handle complex subjects.')
continue
'''
objs = BiopaxProcessor._get_agents_from_entity(input_spe,
expand_pe=False)
ev = self._get_evidence(control)
for subj, obj in itertools.product(_listify(subjs),
_listify(objs)):
# Get the modifications
mod_in = \
BiopaxProcessor._get_entity_mods(input_spe)
mod_out = \
BiopaxProcessor._get_entity_mods(output_spe)
# We assume if modifications change then this is not really
# a pure activation event
gained_mods = _get_mod_difference(mod_out, mod_in)
lost_mods = _get_mod_difference(mod_in, mod_out)
if gained_mods or lost_mods:
continue
stmt = act_class(subj, obj, 'activity', evidence=ev)
self.statements.append(decode_obj(stmt, encoding='utf-8'))
[docs] def get_regulate_amounts(self):
"""Extract INDRA RegulateAmount Statements from the BioPAX model.
This method extracts IncreaseAmount/DecreaseAmount Statements from
the BioPAX model. It fully reuses BioPAX Pattern's
org.biopax.paxtools.pattern.PatternBox.controlsExpressionWithTemplateReac
pattern to find TemplateReactions which control the expression of
a protein.
"""
p = pb.controlsExpressionWithTemplateReac()
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
stmts = []
for res in res_array:
# FIXME: for some reason labels are not accessible
# for these queries. It would be more reliable
# to get results by label instead of index.
'''
controller_er = res[p.indexOf('controller ER')]
generic_controller_er = res[p.indexOf('generic controller ER')]
controller_simple_pe = res[p.indexOf('controller simple PE')]
controller_pe = res[p.indexOf('controller PE')]
control = res[p.indexOf('Control')]
conversion = res[p.indexOf('Conversion')]
input_pe = res[p.indexOf('input PE')]
input_simple_pe = res[p.indexOf('input simple PE')]
changed_generic_er = res[p.indexOf('changed generic ER')]
output_pe = res[p.indexOf('output PE')]
output_simple_pe = res[p.indexOf('output simple PE')]
changed_er = res[p.indexOf('changed ER')]
'''
# TODO: here, res[3] is the complex physical entity
# for instance http://pathwaycommons.org/pc2/
# Complex_43c6b8330562c1b411d21e9d1185bae9
# consists of 3 components: JUN, FOS and NFAT
# where NFAT further contains 3 member physical entities.
#
# However, res[2] iterates over all 5 member physical entities
# of the complex which doesn't represent the underlying
# structure faithfully. It would be better to use res[3]
# (the complex itself) and look at components and then
# members. However, then, it would not be clear how to
# construct an INDRA Agent for the controller.
controller = self._get_agents_from_entity(res[2])
controlled_pe = res[6]
controlled = self._get_agents_from_entity(controlled_pe)
conversion = res[5]
direction = conversion.getTemplateDirection()
if direction is not None:
direction = direction.name()
if direction != 'FORWARD':
logger.warning('Unhandled conversion direction %s' %
direction)
continue
# Sometimes interaction type is annotated as
# term=='TRANSCRIPTION'. Other times this is not
# annotated.
int_type = conversion.getInteractionType().toArray()
if int_type:
for it in int_type:
for term in it.getTerm().toArray():
pass
control = res[4]
control_type = control.getControlType()
if control_type:
control_type = control_type.name()
ev = self._get_evidence(control)
for subj, obj in itertools.product(_listify(controller),
_listify(controlled)):
subj_act = ActivityCondition('transcription', True)
subj.activity = subj_act
if control_type == 'ACTIVATION':
st = IncreaseAmount(subj, obj, evidence=ev)
elif control_type == 'INHIBITION':
st = DecreaseAmount(subj, obj, evidence=ev)
else:
logger.warning('Unhandled control type %s' % control_type)
continue
st_dec = decode_obj(st, encoding='utf-8')
self.statements.append(st_dec)
[docs] def get_conversions(self):
"""Extract Conversion INDRA Statements from the BioPAX model.
This method uses a custom BioPAX Pattern
(one that is not implemented PatternBox) to query for
BiochemicalReactions whose left and right hand sides are collections
of SmallMolecules. This pattern thereby extracts metabolic
conversions as well as signaling processes via small molecules
(e.g. lipid phosphorylation or cleavage).
"""
# NOTE: This pattern gets all reactions in which a protein is the
# controller and chemicals are converted. But with this pattern only
# a single chemical is extracted from each side. This can be misleading
# since we want to capture all inputs and all outputs of the
# conversion. So we need to step back to the conversion itself and
# enumerate all inputs/outputs, make sure they constitute the kind
# of conversion we can capture here and then extract as a Conversion
# Statement. Another issue here is that the same reaction will be
# extracted multiple times if there is more then one input or output.
# Therefore we need to cache the ID of the reactions that have already
# been handled.
p = _bpp('Pattern')(_bpimpl('PhysicalEntity')().getModelInterface(),
'controller PE')
# Getting the control itself
p.add(cb.peToControl(), "controller PE", "Control")
# Make sure the controller is a protein
# TODO: possibly allow Complex too
p.add(tp(_bpimpl('Protein')().getModelInterface()), "controller PE")
# Link the control to the conversion that it controls
p.add(cb.controlToConv(), "Control", "Conversion")
# Make sure this is a BiochemicalRection (as opposed to, for instance,
# ComplexAssembly)
p.add(tp(_bpimpl('BiochemicalReaction')().getModelInterface()),
"Conversion")
# The controller shouldn't be a participant of the conversion
p.add(_bpp('constraint.NOT')(cb.participant()),
"Conversion", "controller PE")
# Get the input participant of the conversion
p.add(pt(rt.INPUT, True), "Control", "Conversion", "input PE")
# Link to the other side of the conversion
p.add(cs(cst.OTHER_SIDE), "input PE", "Conversion", "output PE")
# Make sure the two sides are not the same
p.add(_bpp('constraint.Equality')(False), "input PE", "output PE")
# Make sure the input/output is a chemical
p.add(tp(_bpimpl('SmallMolecule')().getModelInterface()), "input PE")
p.add(tp(_bpimpl('SmallMolecule')().getModelInterface()), "output PE")
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
stmts = []
reaction_extracted = set()
for r in res_array:
controller_pe = r[p.indexOf('controller PE')]
reaction = r[p.indexOf('Conversion')]
control = r[p.indexOf('Control')]
input_pe = r[p.indexOf('input PE')]
output_pe = r[p.indexOf('output PE')]
if control.getUri() in reaction_extracted:
continue
# Get controller
subj_list = self._get_agents_from_entity(controller_pe)
# Get inputs and outputs
left = reaction.getLeft().toArray()
right = reaction.getRight().toArray()
# Skip this if not all participants are chemicals
if any([not _is_small_molecule(e) for e in left]):
continue
if any([not _is_small_molecule(e) for e in right]):
continue
obj_left = []
obj_right = []
for participant in left:
agent = self._get_agents_from_entity(participant)
obj_left.append(agent)
for participant in right:
agent = self._get_agents_from_entity(participant)
obj_right.append(agent)
ev = self._get_evidence(control)
for subj in _listify(subj_list):
st = Conversion(subj, obj_left, obj_right, evidence=ev)
st_dec = decode_obj(st, encoding='utf-8')
self.statements.append(st_dec)
reaction_extracted.add(control.getUri())
def _gef_gap_base(self):
# The following constraints were pieced together based on the
# following two higher level constrains: pb.controlsStateChange(),
# pb.controlsPhosphorylation().
p = _bpp('Pattern')(_bpimpl('PhysicalEntity')().getModelInterface(),
'controller PE')
# Getting the control itself
p.add(cb.peToControl(), "controller PE", "Control")
# Link the control to the conversion that it controls
p.add(cb.controlToConv(), "Control", "Conversion")
# The controller shouldn't be a participant of the conversion
p.add(_bpp('constraint.NOT')(cb.participant()),
"Conversion", "controller PE")
# Get the input participant of the conversion
p.add(pt(rt.INPUT, True), "Control", "Conversion", "input PE")
# Get the specific PhysicalEntity
p.add(cb.linkToSpecific(), "input PE", "input simple PE")
# Link to ER
p.add(cb.peToER(), "input simple PE", "input simple ER")
# Make sure the participant is a protein
p.add(tp(_bpimpl('Protein')().getModelInterface()), "input simple PE")
# Make sure the controller is a protein
# TODO: possibly allow Complex too
p.add(tp(_bpimpl('Protein')().getModelInterface()), "controller PE")
# Link to the other side of the conversion
p.add(cs(cst.OTHER_SIDE), "input PE", "Conversion", "output PE")
# Make sure the two sides are not the same
p.add(_bpp('constraint.Equality')(False), "input PE", "output PE")
# Get the specific PhysicalEntity
p.add(cb.linkToSpecific(), "output PE", "output simple PE")
# Link to ER
p.add(cb.peToER(), "output simple PE", "output simple ER")
p.add(_bpp('constraint.Equality')(True), "input simple ER",
"output simple ER")
# Make sure the input/output is a Complex
p.add(tp(_bpimpl('Complex')().getModelInterface()), "output PE")
p.add(tp(_bpimpl('Complex')().getModelInterface()), "input PE")
return p
[docs] def get_gef(self):
"""Extract Gef INDRA Statements from the BioPAX model.
This method uses a custom BioPAX Pattern
(one that is not implemented PatternBox) to query for controlled
BiochemicalReactions in which the same protein is in complex with
GDP on the left hand side and in complex with GTP on the
right hand side. This implies that the controller is a GEF for the
GDP/GTP-bound protein.
"""
p = self._gef_gap_base()
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
for r in res_array:
controller_pe = r[p.indexOf('controller PE')]
input_pe = r[p.indexOf('input PE')]
input_spe = r[p.indexOf('input simple PE')]
output_pe = r[p.indexOf('output PE')]
output_spe = r[p.indexOf('output simple PE')]
reaction = r[p.indexOf('Conversion')]
control = r[p.indexOf('Control')]
# Make sure the GEF is not a complex
# TODO: it could be possible to extract certain complexes here, for
# instance ones that only have a single protein
if _is_complex(controller_pe):
continue
members_in = self._get_complex_members(input_pe)
members_out = self._get_complex_members(output_pe)
if not (members_in and members_out):
continue
# Make sure the outgoing complex has exactly 2 members
# TODO: by finding matching proteins on either side, in principle
# it would be possible to find Gef relationships in complexes
# with more members
if len(members_out) != 2:
continue
# Make sure complex starts with GDP that becomes GTP
gdp_in = False
for member in members_in:
if isinstance(member, Agent) and member.name == 'GDP':
gdp_in = True
gtp_out = False
for member in members_out:
if isinstance(member, Agent) and member.name == 'GTP':
gtp_out = True
if not (gdp_in and gtp_out):
continue
ras_list = self._get_agents_from_entity(input_spe)
gef_list = self._get_agents_from_entity(controller_pe)
ev = self._get_evidence(control)
for gef, ras in itertools.product(_listify(gef_list),
_listify(ras_list)):
st = Gef(gef, ras, evidence=ev)
st_dec = decode_obj(st, encoding='utf-8')
self.statements.append(st_dec)
[docs] def get_gap(self):
"""Extract Gap INDRA Statements from the BioPAX model.
This method uses a custom BioPAX Pattern
(one that is not implemented PatternBox) to query for controlled
BiochemicalReactions in which the same protein is in complex with
GTP on the left hand side and in complex with GDP on the
right hand side. This implies that the controller is a GAP for the
GDP/GTP-bound protein.
"""
p = self._gef_gap_base()
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
for r in res_array:
controller_pe = r[p.indexOf('controller PE')]
input_pe = r[p.indexOf('input PE')]
input_spe = r[p.indexOf('input simple PE')]
output_pe = r[p.indexOf('output PE')]
output_spe = r[p.indexOf('output simple PE')]
reaction = r[p.indexOf('Conversion')]
control = r[p.indexOf('Control')]
# Make sure the GAP is not a complex
# TODO: it could be possible to extract certain complexes here, for
# instance ones that only have a single protein
if _is_complex(controller_pe):
continue
members_in = self._get_complex_members(input_pe)
members_out = self._get_complex_members(output_pe)
if not (members_in and members_out):
continue
# Make sure the outgoing complex has exactly 2 members
# TODO: by finding matching proteins on either side, in principle
# it would be possible to find Gap relationships in complexes
# with more members
if len(members_out) != 2:
continue
# Make sure complex starts with GDP that becomes GTP
gtp_in = False
for member in members_in:
if isinstance(member, Agent) and member.name == 'GTP':
gtp_in = True
gdp_out = False
for member in members_out:
if isinstance(member, Agent) and member.name == 'GDP':
gdp_out = True
if not (gtp_in and gdp_out):
continue
ras_list = self._get_agents_from_entity(input_spe)
gap_list = self._get_agents_from_entity(controller_pe)
ev = self._get_evidence(control)
for gap, ras in itertools.product(_listify(gap_list),
_listify(ras_list)):
st = Gap(gap, ras, evidence=ev)
st_dec = decode_obj(st, encoding='utf-8')
self.statements.append(st_dec)
@staticmethod
def _get_complex_members(cplx):
# Get the members of a complex. This is returned as a list
# of lists since complexes can contain other complexes. The
# list of lists solution allows us to preserve this.
member_pes = cplx.getComponent().toArray()
# Make a dict of member URIs and their
# corresponding stoichiometries
member_stos = {s.getPhysicalEntity().getUri():
s.getStoichiometricCoefficient() for
s in cplx.getComponentStoichiometry().toArray()}
# Some complexes do not have any members explicitly listed
if not member_pes:
member_pes = cplx.getMemberPhysicalEntity().toArray()
if not member_pes:
logger.info('Complex "%s" has no members.' %
cplx.getDisplayName())
return None
members = []
for m in member_pes:
if _is_complex(m):
ms = BiopaxProcessor._get_complex_members(m)
if ms is None:
return None
members.extend(ms)
else:
ma = BiopaxProcessor._get_agents_from_entity(m)
try:
sto = member_stos[m.getUri()]
sto_int = int(sto)
except KeyError:
# No stoichiometry information - assume it is 1
members.append(ma)
sto_int = 1
for i in range(sto_int):
members.append(ma)
return members
@staticmethod
def _get_entity_mods(bpe):
"""Get all the modifications of an entity in INDRA format"""
if _is_entity(bpe):
features = bpe.getFeature().toArray()
else:
features = bpe.getEntityFeature().toArray()
mods = []
for feature in features:
if not _is_modification(feature):
continue
mc = BiopaxProcessor._extract_mod_from_feature(feature)
if mc is not None:
mods.append(mc)
return mods
def _get_generic_modification(self, mod_class):
"""Get all modification reactions given a Modification class."""
mod_type = modclass_to_modtype[mod_class]
if issubclass(mod_class, RemoveModification):
mod_gain_const = mcct.LOSS
mod_type = modtype_to_inverse[mod_type]
else:
mod_gain_const = mcct.GAIN
mod_filter = mod_type[:5]
# Start with a generic modification pattern
p = BiopaxProcessor._construct_modification_pattern()
p.add(mcc(mod_gain_const, mod_filter),
"input simple PE", "output simple PE")
s = _bpp('Searcher')
res = s.searchPlain(self.model, p)
res_array = [_match_to_array(m) for m in res.toArray()]
stmts = []
for r in res_array:
controller_pe = r[p.indexOf('controller PE')]
input_pe = r[p.indexOf('input PE')]
input_spe = r[p.indexOf('input simple PE')]
output_spe = r[p.indexOf('output simple PE')]
reaction = r[p.indexOf('Conversion')]
control = r[p.indexOf('Control')]
if not _is_catalysis(control):
continue
cat_dir = control.getCatalysisDirection()
if cat_dir is not None and cat_dir.name() != 'LEFT_TO_RIGHT':
logger.info('Unexpected catalysis direction: %s.' % \
control.getCatalysisDirection())
continue
enzs = BiopaxProcessor._get_primary_controller(controller_pe)
if not enzs:
continue
'''
if _is_complex(input_pe):
sub_members_in = self._get_complex_members(input_pe)
sub_members_out = self._get_complex_members(output_pe)
# TODO: It is possible to find which member of the complex is
# actually modified. That member will be the substrate and
# all other members of the complex will be bound to it.
logger.info('Cannot handle complex substrates.')
continue
'''
subs = BiopaxProcessor._get_agents_from_entity(input_spe,
expand_pe=False)
ev = self._get_evidence(control)
for enz, sub in itertools.product(_listify(enzs), _listify(subs)):
# Get the modifications
mod_in = \
BiopaxProcessor._get_entity_mods(input_spe)
mod_out = \
BiopaxProcessor._get_entity_mods(output_spe)
sub.mods = _get_mod_intersection(mod_in, mod_out)
if issubclass(mod_class, AddModification):
gained_mods = _get_mod_difference(mod_out, mod_in)
else:
gained_mods = _get_mod_difference(mod_in, mod_out)
for mod in gained_mods:
# Is it guaranteed that these are all modifications
# of the type we are extracting?
if mod.mod_type not in (mod_type,
modtype_to_inverse[mod_type]):
continue
stmt = mod_class(enz, sub, mod.residue, mod.position,
evidence=ev)
stmts.append(decode_obj(stmt, encoding='utf-8'))
return stmts
@staticmethod
def _get_primary_controller(controller_pe):
# If it's not a complex, just return the corresponding agent
if not _is_complex(controller_pe):
enzs = BiopaxProcessor._get_agents_from_entity(controller_pe)
return enzs
# Identifying the "real" enzyme in a complex may not always be
# possible.
# One heuristic here could be to find the member which is
# active and if it is the only active member then
# set this as the enzyme to which all other members of the
# complex are bound.
# Get complex members
members = BiopaxProcessor._get_complex_members(controller_pe)
if members is None:
return None
# Separate out protein and non-protein members
protein_members = []
non_protein_members = []
for m in members:
if isinstance(m, Agent):
if m.db_refs.get('UP') or \
m.db_refs.get('HGNC'):
protein_members.append(m)
else:
non_protein_members.append(m)
else:
all_protein = True
for subm in m:
if not (subm.db_refs.get('UP') or \
subm.db_refs.get('HGNC')):
all_protein = False
break
if all_protein:
protein_members.append(m)
else:
non_protein_members.append(m)
# If there is only one protein member, we can assume that
# it is the enzyme, and everything else is just bound
# to it.
if len(protein_members) == 1:
enzs = protein_members[0]
# Iterate over non-protein members
for bound in non_protein_members:
if isinstance(bound, Agent):
bc = BoundCondition(bound, True)
if isinstance(enzs, Agent):
enzs.bound_conditions.append(bc)
else:
for enz in enzs:
enz.bound_conditions.append(bc)
else:
msg = 'Cannot handle complex enzymes with ' + \
'aggregate non-protein binding partners.'
logger.info(msg)
continue
return enzs
else:
msg = 'Cannot handle complex enzymes with ' + \
'multiple protein members.'
logger.info(msg)
return None
@staticmethod
def _construct_modification_pattern():
"""Construct the BioPAX pattern to extract modification reactions."""
# The following constraints were pieced together based on the
# following two higher level constrains: pb.controlsStateChange(),
# pb.controlsPhosphorylation().
p = _bpp('Pattern')(_bpimpl('PhysicalEntity')().getModelInterface(),
'controller PE')
# Getting the control itself
p.add(cb.peToControl(), "controller PE", "Control")
# Link the control to the conversion that it controls
p.add(cb.controlToConv(), "Control", "Conversion")
# The controller shouldn't be a participant of the conversion
p.add(_bpp('constraint.NOT')(cb.participant()),
"Conversion", "controller PE")
# Get the input participant of the conversion
p.add(pt(rt.INPUT, True), "Control", "Conversion", "input PE")
# Get the specific PhysicalEntity
p.add(cb.linkToSpecific(), "input PE", "input simple PE")
# Link to ER
p.add(cb.peToER(), "input simple PE", "input simple ER")
# Make sure the participant is a protein
p.add(tp(_bpimpl('Protein')().getModelInterface()), "input simple PE")
# Link to the other side of the conversion
p.add(cs(cst.OTHER_SIDE), "input PE", "Conversion", "output PE")
# Make sure the two sides are not the same
p.add(_bpp('constraint.Equality')(False), "input PE", "output PE")
# Get the specific PhysicalEntity
p.add(cb.linkToSpecific(), "output PE", "output simple PE")
# Link to ER
p.add(cb.peToER(), "output simple PE", "output simple ER")
p.add(_bpp('constraint.Equality')(True), "input simple ER",
"output simple ER")
# Make sure the output is a Protein
p.add(tp(_bpimpl('Protein')().getModelInterface()), "output simple PE")
p.add(_bpp('constraint.NOT')(cb.linkToSpecific()),
"input PE", "output simple PE")
p.add(_bpp('constraint.NOT')(cb.linkToSpecific()),
"output PE", "input simple PE")
return p
@staticmethod
def _get_agent_from_entity(bpe):
name = BiopaxProcessor._get_element_name(bpe)
db_refs = BiopaxProcessor._get_db_refs(bpe)
if _is_protein(bpe):
mcs = BiopaxProcessor._get_entity_mods(bpe)
else:
mcs = []
agent = Agent(name, db_refs=db_refs, mods=mcs)
return agent
@staticmethod
def _get_agents_from_entity(bpe, expand_pe=True, expand_er=True):
# If the entity has members (like a protein family),
# we iterate over them
if expand_pe:
members = bpe.getMemberPhysicalEntity().toArray()
if members:
agents = []
for m in members:
member_agents = BiopaxProcessor._get_agents_from_entity(m)
if isinstance(member_agents, Agent):
agents.append(member_agents)
else:
agents.extend(member_agents)
return agents
# If the entity has a reference which has members, we iterate
# over them.
if expand_er:
er = BiopaxProcessor._get_entref(bpe)
if er is not None:
members = er.getMemberEntityReference().toArray()
if members:
agents = []
for m in members:
agent = BiopaxProcessor._get_agent_from_entity(m)
# For entity references, we remove context
agent.mods = []
agents.append(agent)
return agents
# If it is a single entity, we get its name and database
# references
agent = BiopaxProcessor._get_agent_from_entity(bpe)
return agent
@staticmethod
def _extract_mod_from_feature(mf):
"""Extract the type of modification and the position from
a ModificationFeature object in the INDRA format."""
# ModificationFeature / SequenceModificationVocabulary
mf_type = mf.getModificationType()
if mf_type is None:
return None
mf_type_terms = mf_type.getTerm().toArray()
known_mf_type = None
for t in mf_type_terms:
if t.startswith('MOD_RES '):
t = t[8:]
mf_type_indra = _mftype_dict.get(t)
if mf_type_indra is not None:
known_mf_type = mf_type_indra
break
if not known_mf_type:
logger.info('Skipping modification with unknown terms: %s' %
', '.join(mf_type_terms))
return None
mod_type, residue = known_mf_type
# getFeatureLocation returns SequenceLocation, which is the
# generic parent class of SequenceSite and SequenceInterval.
# Here we need to cast to SequenceSite in order to get to
# the sequence position.
mf_pos = mf.getFeatureLocation()
if mf_pos is not None:
# If it is not a SequenceSite we can't handle it
if not mf_pos.modelInterface.getName() == \
'org.biopax.paxtools.model.level3.SequenceSite':
mod_pos = None
else:
mf_site = cast(_bp('SequenceSite'), mf_pos)
mf_pos_status = mf_site.getPositionStatus()
if mf_pos_status is None:
mod_pos = None
elif mf_pos_status and mf_pos_status.toString() != 'EQUAL':
logger.info('Modification site position is %s' %
mf_pos_status.toString())
else:
mod_pos = mf_site.getSequencePosition()
mod_pos = '%s' % mod_pos
else:
mod_pos = None
mc = ModCondition(mod_type, residue, mod_pos, True)
return mc
@staticmethod
def _get_evidence(bpe):
citations = BiopaxProcessor._get_citations(bpe)
source_id = bpe.getUri()
if not citations:
citations = [None]
epi = {'direct': True}
ev = [Evidence(source_api='biopax', pmid=cit,
source_id=source_id, epistemics=epi)
for cit in citations]
return ev
@staticmethod
def _get_citations(bpe):
xrefs = bpe.getXref().toArray()
refs = []
for xr in xrefs:
db_name = xr.getDb()
if db_name is not None and db_name.upper() == 'PUBMED':
refs.append(xr.getId())
# TODO: handle non-pubmed evidence
# TODO: do we need to look at bpe.getEvidence()
return refs
@staticmethod
def _get_db_refs(bpe):
db_refs = {}
if _is_protein(bpe) or _is_rna(bpe):
hgnc_id = BiopaxProcessor._get_hgnc_id(bpe)
uniprot_id = BiopaxProcessor._get_uniprot_id(bpe)
# Handle missing HGNC/UP ids
if hgnc_id and not uniprot_id:
uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
elif uniprot_id and not hgnc_id:
if uniprot_client.is_human(uniprot_id):
hgnc_name = uniprot_client.get_gene_name(uniprot_id, False)
if hgnc_name:
hgnc_id = hgnc_client.get_hgnc_id(hgnc_name)
# If we have both an HGNC ID and a Uniprot ID, override the
# Uniprot ID with the one associated with the HGNC ID
elif uniprot_id and hgnc_id:
hgnc_up_id = hgnc_client.get_uniprot_id(hgnc_id)
if hgnc_up_id != uniprot_id:
logger.info('Uniprot ID %s does not match %s obtained '
'from HGNC ID %s' %
(uniprot_id, hgnc_up_id, hgnc_id))
if hgnc_id is not None:
db_refs['HGNC'] = hgnc_id
if uniprot_id is not None:
db_refs['UP'] = uniprot_id
elif _is_small_molecule(bpe):
chebi_id = BiopaxProcessor._get_chebi_id(bpe)
if chebi_id is not None:
db_refs['CHEBI'] = chebi_id
else:
chebi_id = BiopaxProcessor._get_chebi_id(bpe)
if chebi_id is not None:
db_refs['CHEBI'] = chebi_id
hgnc_id = BiopaxProcessor._get_hgnc_id(bpe)
if hgnc_id is not None:
db_refs['HGNC'] = hgnc_id
uniprot_id = BiopaxProcessor._get_uniprot_id(bpe)
if uniprot_id is not None:
db_refs['UP'] = uniprot_id
return db_refs
@staticmethod
@lru_cache(maxsize=1000)
def _get_element_name(bpe):
def get_name(bpe):
# FIXME Deal with case when HGNC entry is not name
# Deal with case when multiple Uniprot IDs marked as
# primary
hgnc_id = BiopaxProcessor._get_hgnc_id(bpe)
uniprot_id = BiopaxProcessor._get_uniprot_id(bpe)
if hgnc_id is not None:
name = BiopaxProcessor._get_hgnc_name(hgnc_id)
if name is None:
name = bpe.getDisplayName()
elif uniprot_id is not None:
name = uniprot_client.get_gene_name(uniprot_id)
if name is None:
name = bpe.getDisplayName()
else:
name = bpe.getDisplayName()
return name
if _is_protein(bpe) or _is_rna(bpe):
name = get_name(bpe)
elif _is_small_molecule(bpe):
name = bpe.getDisplayName()
elif _is_physical_entity(bpe):
name = bpe.getDisplayName()
else:
logger.info('Unhandled entity type %s' %
bpe.getModelInterface().getName())
name = bpe.getDisplayName()
return name
@staticmethod
def _get_uniprot_id(bpe):
# There is often more than one UniProt ID reported.
# This usually corresponds to the primary accession ID and one or more
# secondary accession IDs (these IDs are from deprecated entries that
# have been merged into the primary.
def map_to_up_primary(ids):
primary_ids = []
for up_id in ids:
if not uniprot_client.is_secondary(up_id):
primary_ids.append(up_id)
continue
primary_id = uniprot_client.get_primary_id(up_id)
primary_ids.append(primary_id)
# If there are no primary IDs, we return None
if not primary_ids:
return None
# Try to get primary IDs if there are
# If there is more than one primary ID then we return the first one
elif len(primary_ids) > 1:
human_upids = [id for id in primary_ids
if uniprot_client.is_human(id)]
if not human_upids:
logger.info('More than one primary id but none human, '
'choosing the first: %s'
% ','.join(primary_ids))
primary_id = primary_ids[0]
elif len(human_upids) > 1:
logger.info('More than one human primary id, choosing '
'the first: %s' % ','.join(human_upids))
primary_id = human_upids[0]
# Only one, so use it
else:
primary_id = human_upids[0]
# One primary ID, so use it
else:
primary_id = primary_ids[0]
# Make sure it's unicode
return str(primary_id)
bp_entref = BiopaxProcessor._get_entref(bpe)
if bp_entref is None:
return None
uri = bp_entref.getUri()
# First try to match the URI itself to see if it is a UniProt
# reference.
m = re.match('http://identifiers.org/uniprot/([A-Z0-9]+)', uri)
if m:
uniprot_id = m.groups()[0]
primary_id = map_to_up_primary([uniprot_id])
return primary_id
# If the URI is not a UniProt reference then we look through xrefs
xrefs = bp_entref.getXref().toArray()
uniprot_refs = [x for x in xrefs if
(x.getDb() is not None and
x.getDb().lower() in ('uniprot knowledgebase',
'uniprotkb'))]
if not uniprot_refs:
return None
uniprot_ids = [r.getId() for r in uniprot_refs]
primary_id = map_to_up_primary(uniprot_ids)
return primary_id
@staticmethod
def _get_chebi_id(bpe):
bp_entref = BiopaxProcessor._get_entref(bpe)
if bp_entref is None:
return None
xrefs = bp_entref.getXref().toArray()
chebi_ids = []
for xr in xrefs:
if xr.getDb().upper() == 'CHEBI':
chebi_ids.append(xr.getId().replace('CHEBI:', ''))
elif xr.getDb().upper() == 'CAS':
# Special handling of common entities
if xr.getId() == '86-01-1':
chebi_ids.append('15996')
elif xr.getId() == '86527-72-2':
chebi_ids.append('15996')
elif xr.getId() == '24696-26-2':
chebi_ids.append('17761')
elif xr.getId() == '23261-20-3':
chebi_ids.append('18035')
elif xr.getId() == '146-91-8':
chebi_ids.append('17552')
elif xr.getId() == '165689-82-7':
chebi_ids.append('16618')
else:
logger.info('Unknown cas id: %s (%s)' %
(xr.getId(), bpe.getDisplayName()))
if not chebi_ids:
return None
elif len(chebi_ids) == 1:
return chebi_ids[0]
else:
return chebi_ids
@staticmethod
def _get_hgnc_id(bpe):
bp_entref = BiopaxProcessor._get_entref(bpe)
if bp_entref is None:
return None
xrefs = bp_entref.getXref().toArray()
# Check for HGNC IDs
hgnc_ids = [x.getId() for x in xrefs if
(x.getDb() is not None and x.getDb().lower() == 'hgnc')]
hgnc_id = None
for hgnc_id in hgnc_ids:
m = re.match('([0-9]+)', hgnc_id)
if m:
hgnc_id = str(m.groups()[0])
else:
m = re.match('hgnc:([0-9]+)', hgnc_id.lower())
if m:
hgnc_id = str(m.groups()[0])
# If there is no HGNC ID, check for an HGNC symbol and convert back
# to HGNC
if not hgnc_id:
hgnc_syms = [x.getId() for x in xrefs
if (x.getDb() is not None and
x.getDb().lower() == 'hgnc symbol')]
# If no symbol and no ID, return None
if not hgnc_syms:
return None
# On the off chance that there is more than one symbol, issue
# a log message and choose the first
else:
if len(hgnc_syms) > 1:
logger.info('No HGNC ID, and more than one HGNC symbol '
'found, using 1st: %s' % str(hgnc_syms))
hgnc_sym = hgnc_syms[0]
hgnc_id = hgnc_client.get_hgnc_id(hgnc_sym)
return hgnc_id
@staticmethod
def _get_hgnc_name(hgnc_id):
hgnc_name = hgnc_client.get_hgnc_name(hgnc_id)
return hgnc_name
@staticmethod
def _get_entref(bpe):
"""Returns the entity reference of an entity if it exists or
return the entity reference that was passed in as argument."""
if not _is_reference(bpe):
try:
er = bpe.getEntityReference()
except AttributeError:
return None
return er
else:
return bpe
def get_coverage(self):
uids = set()
objs = self.model.getObjects()
for obj in objs.toArray():
if isinstance(obj, _bpimpl('Catalysis')) or \
isinstance(obj, _bpimpl('TemplateReactionRegulation')):
uids.add(obj.getUri())
stmt_uids = set()
for stmt in self.statements:
for ev in stmt.evidence:
stmt_uids.add(ev.source_id)
uids_not_covered = uids - stmt_uids
print('Total in model: %d' % len(uids))
print('Total covered: %d' % len(uids & stmt_uids))
print('%.2f%% coverage' % (100.0*len(uids & stmt_uids)/len(uids)))
return len(uids), len(uids & stmt_uids)
_mftype_dict = {
'phosres': ('phosphorylation', None),
'phosphorylation': ('phosphorylation', None),
'phosphorylated residue': ('phosphorylation', None),
'phosphorylated': ('phosphorylation', None),
'O-phospho-L-serine': ('phosphorylation', 'S'),
'O-phosphopantetheine-L-serine': ('phosphorylation', 'S'),
'opser': ('phosphorylation', 'S'),
'O-phospho-L-threonine': ('phosphorylation', 'T'),
'opthr': ('phosphorylation', 'T'),
'O-phospho-L-tyrosine': ('phosphorylation', 'Y'),
'O4\'-phospho-L-tyrosine': ('phosphorylation', 'Y'),
'optyr': ('phosphorylation', 'Y'),
'ubiquitinated lysine': ('ubiquitination', 'K'),
'N4-glycosyl-L-asparagine': ('glycosylation', 'N'),
'n4glycoasn': ('glycosylation', 'N'),
'O-glycosyl-L-threonine': ('glycosylation', 'T'),
'S-palmitoyl-L-cysteine': ('palmitoylation', 'C'),
'N6-acetyllysine': ('acetylation', 'K'),
'N6-acetyl-L-lysine' : ('acetylation', 'K'),
'n6aclys': ('acetylation', 'K'),
'naclys': ('acetylation', 'K'),
'N-acetylated L-lysine': ('acetylation', 'K'),
'N-acetylglycine': ('acetylation', 'G'),
'N-acetylmethionine': ('acetylation', 'M'),
'Hydroxyproline': ('hydroxylation', 'P'),
'hydroxylated proline': ('hydroxylation', 'P'),
'3-hydroxyproline': ('hydroxylation', 'P'),
'4-hydroxyproline': ('hydroxylation', 'P'),
'5-hydroxylysine': ('hydroxylation', 'K'),
'N-myristoylglycine': ('myristoylation', 'G'),
'N-myristoyl-glycine': ('myristoylation', 'G'),
'sumoylated lysine': ('sumoylation', 'K'),
'mearg': ('methylation', 'R'),
'methylated L-arginine': ('methylation', 'R'),
'methylated arginine': ('methylation', 'R'),
'melys' : ('methylation', 'K'),
'methylated lysine' : ('methylation', 'K'),
'methylated L-lysine' : ('methylation', 'K'),
'ubiquitination': ('ubiquitination', None),
'ubiquitinylated lysine': ('ubiquitination', 'K'),
'ubiquitination signature tetrapeptidyl lysine': ('ubiquitination', 'K'),
'Phosphoserine': ('phosphorylation', 'S'),
'Phosphothreonine': ('phosphorylation', 'T'),
'Phosphotyrosine': ('phosphorylation', 'Y'),
'N-acetylalanine': ('acetylation', 'A'),
'N-acetylserine': ('acetylation', 'S'),
'N-acetylthreonine': ('acetylation', 'T'),
'N-acetylvaline': ('acetylation', 'V'),
'Omega-N-methylarginine': ('methylation', 'R'),
'N6-methyllysine': ('methylation', 'K'),
'Dimethylated arginine': ('methylation', 'R'),
'Asymmetric dimethylarginine': ('methylation', 'R'),
'Omega-N-methylated arginine': ('methylation', 'R'),
'N6,N6-dimethyllysine': ('methylation', 'K'),
'N6,N6,N6-trimethyllysine': ('methylation', 'K'),
'Symmetric dimethylarginine': ('methylation', 'R'),
'ADP-ribosylarginine': ('ribosylation', 'R'),
'ADP-ribosylcysteine': ('ribosylation', 'C'),
'ADP-ribosylasparagine': ('ribosylation', 'N'),
'PolyADP-ribosyl glutamic acid': ('ribosylation', 'E'),
'O-acetylserine': ('acetylation', 'S'),
'O-acetyl-L-serine': ('acetylation', 'S'),
'N-acetyl-L-alanine': ('acetylation', 'A'),
'omega-N-methyl-L-arginine': ('methylation', 'R'),
'symmetric dimethyl-L-arginine': ('methylation', 'R'),
'N-acetylproline': ('acetylation', 'P'),
'acetylated': ('acetylation', None),
'acetylation': ('acetylation', None),
'(3R)-3-hydroxyaspartate': ('hydroxylation', 'D'),
'(3R)-3-hydroxyasparagine': ('hydroxylation', 'N'),
'Tele-methylhistidine': ('methylation', 'H'),
'N-acetylglutamate': ('acetylation', 'E'),
'N-acetylaspartate': ('acetylation', 'D'),
'n6me2lys': ('methylation', 'K'),
'Phosphohistidine': ('phosphorylation', 'H'),
'S-farnesyl-L-cysteine': ('farnesylation', 'C'),
'modified glycine residue': ('modification', 'G'),
'N-acetyl-L-methionine': ('acetylation', 'M'),
'4-hydroxy-L-proline': ('hydroxylation', 'P'),
'4hypro': ('hydroxylation', 'P'),
'3-hydroxy-L-proline': ('hydroxylation', 'P'),
'5-hydroxy-L-lysine': ('hydroxylation', 'K'),
}
# Functions for accessing frequently used java classes with shortened path
def _bp(path):
prefix = 'org.biopax.paxtools.model.level3'
classname = prefix + '.' + path
return _autoclass_robust(classname)
def _bpp(path):
prefix = 'org.biopax.paxtools.pattern'
classname = prefix + '.' + path
return _autoclass_robust(classname)
def _bpimpl(path):
prefix = 'org.biopax.paxtools.impl.level3'
postfix = 'Impl'
classname = prefix + '.' + path + postfix
return _autoclass_robust(classname)
def _autoclass_robust(path):
try:
cl = autoclass(path)
except JavaException:
logger.error('Could not instantiate ' + path)
return None
return cl
def _cast_biopax_element(bpe):
""" Casts a generic BioPAXElement object into a specific type.
This is useful when a search only returns generic elements. """
return cast(bpe.getModelInterface().getName(), bpe)
def _match_to_array(m):
""" Returns an array consisting of the elements obtained from a pattern
search cast into their appropriate classes. """
return [_cast_biopax_element(m.get(i)) for i in range(m.varSize())]
def _is_complex(pe):
"""Return True if the physical entity is a complex"""
val = isinstance(pe, _bp('Complex')) or \
isinstance(pe, _bpimpl('Complex'))
return val
def _is_protein(pe):
"""Return True if the element is a protein"""
val = isinstance(pe, _bp('Protein')) or \
isinstance(pe, _bpimpl('Protein')) or \
isinstance(pe, _bp('ProteinReference')) or \
isinstance(pe, _bpimpl('ProteinReference'))
return val
def _is_rna(pe):
"""Return True if the element is an RNA"""
val = isinstance(pe, _bp('Rna')) or isinstance(pe, _bpimpl('Rna'))
return val
def _is_small_molecule(pe):
"""Return True if the element is a small molecule"""
val = isinstance(pe, _bp('SmallMolecule')) or \
isinstance(pe, _bpimpl('SmallMolecule')) or \
isinstance(pe, _bp('SmallMoleculeReference')) or \
isinstance(pe, _bpimpl('SmallMoleculeReference'))
return val
def _is_physical_entity(pe):
"""Return True if the element is a physical entity"""
val = isinstance(pe, _bp('PhysicalEntity')) or \
isinstance(pe, _bpimpl('PhysicalEntity'))
return val
def _is_modification(feature):
return (_is_modification_or_activity(feature) == 'modification')
def _is_activity(feature):
return (_is_modification_or_activity(feature) == 'activity')
def _is_modification_or_activity(feature):
"""Return True if the feature is a modification"""
if not (isinstance(feature, _bp('ModificationFeature')) or \
isinstance(feature, _bpimpl('ModificationFeature'))):
return None
mf_type = feature.getModificationType()
if mf_type is None:
return None
mf_type_terms = mf_type.getTerm().toArray()
for term in mf_type_terms:
if term in ('residue modification, active',
'residue modification, inactive',
'active', 'inactive'):
return 'activity'
return 'modification'
def _is_reference(bpe):
"""Return True if the element is an entity reference."""
if isinstance(bpe, _bp('ProteinReference')) or \
isinstance(bpe, _bpimpl('ProteinReference')) or \
isinstance(bpe, _bp('SmallMoleculeReference')) or \
isinstance(bpe, _bpimpl('SmallMoleculeReference')) or \
isinstance(bpe, _bp('RnaReference')) or \
isinstance(bpe, _bpimpl('RnaReference')) or \
isinstance(bpe, _bp('EntityReference')) or \
isinstance(bpe, _bpimpl('EntityReference')):
return True
else:
return False
def _is_entity(bpe):
"""Return True if the element is a physical entity."""
if isinstance(bpe, _bp('Protein')) or \
isinstance(bpe, _bpimpl('Protein')) or \
isinstance(bpe, _bp('SmallMolecule')) or \
isinstance(bpe, _bpimpl('SmallMolecule')) or \
isinstance(bpe, _bp('Complex')) or \
isinstance(bpe, _bpimpl('Complex')) or \
isinstance(bpe, _bp('Rna')) or \
isinstance(bpe, _bpimpl('Rna')) or \
isinstance(bpe, _bp('RnaRegion')) or \
isinstance(bpe, _bpimpl('RnaRegion')) or \
isinstance(bpe, _bp('DnaRegion')) or \
isinstance(bpe, _bpimpl('DnaRegion')) or \
isinstance(bpe, _bp('PhysicalEntity')) or \
isinstance(bpe, _bpimpl('PhysicalEntity')):
return True
else:
return False
def _is_catalysis(bpe):
"""Return True if the element is Catalysis."""
if isinstance(bpe, _bp('Catalysis')) or \
isinstance(bpe, _bpimpl('Catalysis')):
return True
else:
return False
def _has_members(bpe):
if _is_reference(bpe):
members = bpe.getMemberEntityReference().toArray()
elif _is_entity(bpe):
members = bpe.getMemberPhysicalEntity().toArray()
else:
return False
if len(members) > 0:
return True
else:
return False
def _listify(lst):
if not isinstance(lst, collections.Iterable):
return [lst]
else:
return lst
def _list_listify(lst):
return [l if isinstance(l, collections.Iterable) else [l] for l in lst]
def _get_combinations(lst):
return itertools.product(*_list_listify(lst))
def _get_mod_intersection(mods1, mods2):
shared_mods = []
for m1 in mods1:
found = False
for m2 in mods2:
if m1.matches(m2):
found = True
break
if found:
shared_mods.append(m1)
return shared_mods
def _get_mod_difference(mods1, mods2):
difference_mods = []
for m1 in mods1:
found = False
for m2 in mods2:
if m1.matches(m2):
found = True
break
if not found:
difference_mods.append(m1)
return difference_mods
# Some BioPAX Pattern classes as shorthand
pb = _bpp('PatternBox')
cb = _bpp('constraint.ConBox')
rt = _bpp('util.RelType')
tp = _bpp('constraint.Type')
cs = _bpp('constraint.ConversionSide')
cst = _bpp('constraint.ConversionSide$Type')
pt = _bpp('constraint.Participant')
mcc = _bpp('constraint.ModificationChangeConstraint')
mcct = _bpp('constraint.ModificationChangeConstraint$Type')