__all__ = ['Agent', 'BoundCondition', 'MutCondition', 'ModCondition',
'ActivityCondition', 'default_ns_order']
import logging
from collections import OrderedDict as _o
from indra.statements.statements import modtype_conditions, modtype_to_modclass
from .concept import Concept
from .resources import get_valid_residue, activity_types, amino_acids
logger = logging.getLogger(__name__)
default_ns_order = ['FPLX', 'UPPRO', 'HGNC', 'UP', 'CHEBI', 'GO', 'MESH',
'MIRBASE', 'DOID', 'HP', 'EFO']
[docs]class Agent(Concept):
"""A molecular entity, e.g., a protein.
Parameters
----------
name : str
The name of the agent, preferably a canonicalized name such as an
HGNC gene name.
mods : list of :py:class:`ModCondition`
Modification state of the agent.
bound_conditions : list of :py:class:`BoundCondition`
Other agents bound to the agent in this context.
mutations : list of :py:class:`MutCondition`
Amino acid mutations of the agent.
activity : :py:class:`ActivityCondition`
Activity of the agent.
location : str
Cellular location of the agent. Must be a valid name (e.g. "nucleus")
or identifier (e.g. "GO:0005634")for a GO cellular compartment.
db_refs : dict
Dictionary of database identifiers associated with this agent.
"""
def __init__(self, name, mods=None, activity=None,
bound_conditions=None, mutations=None,
location=None, db_refs=None):
super(Agent, self).__init__(name, db_refs=db_refs)
if mods is None:
self.mods = []
# Promote to list
elif isinstance(mods, ModCondition):
self.mods = [mods]
else:
self.mods = mods
if bound_conditions is None:
self.bound_conditions = []
# Promote to list
elif isinstance(bound_conditions, BoundCondition):
self.bound_conditions = [bound_conditions]
else:
self.bound_conditions = bound_conditions
if mutations is None:
self.mutations = []
elif isinstance(mutations, MutCondition):
self.mutations = [mutations]
else:
self.mutations = mutations
self.activity = activity
self.location = location
[docs] def matches_key(self):
"""Return a key to identify the identity and state of the Agent."""
key = (self.entity_matches_key(),
self.state_matches_key())
return str(key)
[docs] def entity_matches_key(self):
"""Return a key to identify the identity of the Agent not its state.
The key is based on the preferred grounding for the Agent, or if not
available, the name of the Agent is used.
Returns
-------
str
The key used to identify the Agent.
"""
db_ns, db_id = self.get_grounding()
if db_ns and db_id:
return str((db_ns, db_id))
return self.name
[docs] def state_matches_key(self):
"""Return a key to identify the state of the Agent."""
# NOTE: Making a set of the mod matches_keys might break if
# you have an agent with two phosphorylations at serine
# with unknown sites.
act_key = (self.activity.matches_key() if self.activity else None)
key = (sorted([m.matches_key() for m in self.mods]),
sorted([m.matches_key() for m in self.mutations]),
act_key, self.location,
len(self.bound_conditions),
tuple((bc.agent.matches_key(), bc.is_bound)
for bc in sorted(self.bound_conditions,
key=lambda x: x.agent.name)))
return str(key)
# Function to get the namespace to look in
[docs] def get_grounding(self, ns_order=None):
"""Return a tuple of a preferred grounding namespace and ID.
Returns
-------
tuple
A tuple whose first element is a grounding namespace (HGNC,
CHEBI, etc.) and the second element is an identifier in the
namespace. If no preferred grounding is available, a tuple of
Nones is returned.
"""
return get_grounding(self.db_refs, ns_order=ns_order)
def isa(self, other, ontology):
# Get the namespaces for the comparison
(self_ns, self_id) = self.get_grounding()
(other_ns, other_id) = other.get_grounding()
# If one of the agents isn't grounded to a relevant namespace,
# there can't be an isa relationship
if not all((self_ns, self_id, other_ns, other_id)):
return False
# Check for isa relationship
return ontology.isa_or_partof(self_ns, self_id, other_ns,
other_id)
def refinement_of(self, other, ontology, entities_refined=False):
from indra.databases import go_client
# Make sure the Agent types match
if type(self) != type(other):
return False
# ENTITIES
# Check that the basic entity of the agent either matches or is related
# to the entity of the other agent. If not, no match.
# If the entities, match, then we can continue
if not (entities_refined or
(self.entity_matches(other) or self.isa(other, ontology))):
return False
# BOUND CONDITIONS
# Now check the bound conditions. For self to be a refinement of
# other in terms of the bound conditions, it has to include all of the
# bound conditions in the other agent, and add additional context.
# TODO: For now, we do not check the bound conditions of the bound
# conditions.
# Iterate over the bound conditions in the other agent, and make sure
# they are all matched in self.
used_idx = set()
for bc_other in other.bound_conditions:
# Iterate over the bound conditions in self to find a match
bc_found = False
for idx, bc_self in enumerate(self.bound_conditions):
if (idx not in used_idx) and \
(bc_self.is_bound == bc_other.is_bound) and \
bc_self.agent.refinement_of(bc_other.agent, ontology):
bc_found = True
used_idx.add(idx)
break
# If we didn't find a match for this bound condition in self, then
# no refinement
if not bc_found:
return False
# MODIFICATIONS
# Similar to the above, we check that self has all of the modifications
# of other.
# Here we need to make sure that a mod in self.mods is only matched
# once to a mod in other.mods. Otherwise ('phoshporylation') would be
# considered a refinement of ('phosphorylation', 'phosphorylation')
matched_indices = []
# This outer loop checks that each modification in the other Agent
# is matched.
for other_mod in other.mods:
mod_found = False
# We need to keep track of indices for this Agent's modifications
# to make sure that each one is used at most once to match
# the modification of one of the other Agent's modifications.
for ix, self_mod in enumerate(self.mods):
if self_mod.refinement_of(other_mod, ontology):
# If this modification hasn't been used for matching yet
if ix not in matched_indices:
# Set the index as used
matched_indices.append(ix)
mod_found = True
break
# If we didn't find an exact match for this mod in other, then
# no refinement
if not mod_found:
return False
# MUTATIONS
# Similar to the above, we check that self has all of the mutations
# of other.
matched_indices = []
# This outer loop checks that each mutation in the other Agent
# is matched.
for other_mut in other.mutations:
mut_found = False
# We need to keep track of indices for this Agent's mutations
# to make sure that each one is used at most once to match
# the mutation of one of the other Agent's mutations.
for ix, self_mut in enumerate(self.mutations):
if self_mut.refinement_of(other_mut):
# If this mutation hasn't been used for matching yet
if ix not in matched_indices:
# Set the index as used
matched_indices.append(ix)
mut_found = True
break
# If we didn't find an exact match for this mut in other, then
# no refinement
if not mut_found:
return False
# LOCATION
# If the other location is specified and this one is not then self
# cannot be a refinement
if self.location is None:
if other.location is not None:
return False
# If both this location and the other one is specified, we check the
# hierarchy.
elif other.location is not None:
# If the other location is part of this location then
# self.location is not a refinement
sl = go_client.get_go_id_from_label(self.location)
ol = go_client.get_go_id_from_label(other.location)
if not ontology.isa_or_partof('GO', sl, 'GO', ol):
return False
# ACTIVITY
if self.activity is None:
if other.activity is not None:
return False
elif other.activity is not None:
if not self.activity.refinement_of(other.activity, ontology):
return False
# Everything checks out
return True
def equals(self, other):
matches = (self.name == other.name) and \
(self.activity == other.activity) and \
(self.location == other.location) and \
(self.db_refs == other.db_refs)
if len(self.mods) == len(other.mods):
for s, o in zip(self.mods, other.mods):
matches = matches and s.equals(o)
else:
return False
if len(self.mutations) == len(other.mutations):
for s, o in zip(self.mutations, other.mutations):
matches = matches and s.equals(o)
else:
return False
if len(self.bound_conditions) == len(other.bound_conditions):
for s, o in zip(self.bound_conditions, other.bound_conditions):
matches = matches and s.agent.equals(o.agent) and \
s.is_bound == o.is_bound
else:
return False
return matches
def to_json(self):
json_dict = _o({'name': self.name})
if self.mods:
json_dict['mods'] = [mc.to_json() for mc in self.mods]
if self.mutations:
json_dict['mutations'] = [mc.to_json() for mc in self.mutations]
if self.bound_conditions:
json_dict['bound_conditions'] = [bc.to_json() for bc in
self.bound_conditions]
if self.activity is not None:
json_dict['activity'] = self.activity.to_json()
if self.location is not None:
json_dict['location'] = self.location
json_dict['db_refs'] = self.db_refs
return json_dict
@classmethod
def _from_json(cls, json_dict):
name = json_dict.get('name')
db_refs = json_dict.get('db_refs', {})
mods = json_dict.get('mods', [])
mutations = json_dict.get('mutations', [])
activity = json_dict.get('activity')
bound_conditions = json_dict.get('bound_conditions', [])
location = json_dict.get('location')
if not name:
logger.error('Agent missing name.')
return None
if not db_refs:
db_refs = {}
agent = Agent(name, db_refs=db_refs)
agent.mods = [ModCondition._from_json(mod) for mod in mods]
agent.mutations = [MutCondition._from_json(mut) for mut in mutations]
agent.bound_conditions = [BoundCondition._from_json(bc)
for bc in bound_conditions]
agent.location = location
if activity:
agent.activity = ActivityCondition._from_json(activity)
return agent
def __str__(self):
attr_strs = []
if self.mods:
mod_str = 'mods: '
mod_str += ', '.join(['%s' % m for m in self.mods])
attr_strs.append(mod_str)
if self.activity:
if self.activity.is_active:
attr_strs.append('%s' % self.activity.activity_type)
else:
attr_strs.append('%s: %s' % (self.activity.activity_type,
self.activity.is_active))
if self.mutations:
mut_str = 'muts: '
mut_str += ', '.join(['%s' % m for m in self.mutations])
attr_strs.append(mut_str)
if self.bound_conditions:
attr_strs += ['bound: [%s, %s]' % (b.agent.name, b.is_bound)
for b in self.bound_conditions]
if self.location:
attr_strs += ['location: %s' % self.location]
#if self.db_refs:
# attr_strs.append('db_refs: %s' % self.db_refs)
attr_str = ', '.join(attr_strs)
agent_name = self.name
return '%s(%s)' % (agent_name, attr_str)
[docs]class BoundCondition(object):
"""Identify Agents bound (or not bound) to a given Agent in a given context.
Parameters
----------
agent : :py:class:`Agent`
Instance of Agent.
is_bound : bool
Specifies whether the given Agent is bound or unbound in the current
context. Default is True.
Examples
--------
EGFR bound to EGF:
>>> egf = Agent('EGF')
>>> egfr = Agent('EGFR', bound_conditions=[BoundCondition(egf)])
BRAF *not* bound to a 14-3-3 protein (YWHAB):
>>> ywhab = Agent('YWHAB')
>>> braf = Agent('BRAF', bound_conditions=[BoundCondition(ywhab, False)])
"""
def __init__(self, agent, is_bound=True):
self.agent = agent
self.is_bound = is_bound
def matches(self, other):
return (self.matches_key() == other.matches_key())
def matches_key(self):
key = (self.agent.matches_key, self.is_bound)
return str(key)
def to_json(self):
json_dict = _o({'agent': self.agent.to_json(),
'is_bound': self.is_bound})
return json_dict
@classmethod
def _from_json(cls, json_dict):
agent_entry = json_dict.get('agent')
if agent_entry is None:
logger.error('BoundCondition missing agent.')
return None
agent = Agent._from_json(agent_entry)
if agent is None:
return None
is_bound = json_dict.get('is_bound')
if is_bound is None:
logger.warning('BoundCondition missing is_bound, defaulting '
'to True.')
is_bound = True
bc = BoundCondition(agent, is_bound)
return bc
[docs]class MutCondition(object):
"""Mutation state of an amino acid position of an Agent.
Parameters
----------
position : str
Residue position of the mutation in the protein sequence.
residue_from : str
Wild-type (unmodified) amino acid residue at the given position.
residue_to : str
Amino acid at the position resulting from the mutation.
Examples
--------
Represent EGFR with a L858R mutation:
>>> egfr_mutant = Agent('EGFR', mutations=[MutCondition('858', 'L', 'R')])
"""
def __init__(self, position, residue_from, residue_to=None):
self.position = position
self.residue_from = get_valid_residue(residue_from)
self.residue_to = get_valid_residue(residue_to)
def matches(self, other):
return (self.matches_key() == other.matches_key())
def matches_key(self):
key = (str(self.position), str(self.residue_from),
str(self.residue_to))
return str(key)
def equals(self, other):
pos_match = (self.position == other.position)
residue_from_match = (self.residue_from == other.residue_from)
residue_to_match = (self.residue_to == other.residue_to)
return (pos_match and residue_from_match and residue_to_match)
def to_json(self):
json_dict = _o({'position': self.position,
'residue_from': self.residue_from,
'residue_to': self.residue_to})
return json_dict
def to_hgvs(self):
res_from = _aa_short_caps(self.residue_from)
res_to = _aa_short_caps(self.residue_to)
if res_to and res_from and self.position:
hgvs_str = 'p.%s%s%s' % (res_from, self.position, res_to)
elif res_to is None and res_from and self.position:
hgvs_str = 'p.%s%s?' % (res_from, self.position)
else:
hgvs_str = 'p.?'
return hgvs_str
@classmethod
def _from_json(cls, json_dict):
position = json_dict.get('position')
residue_from = json_dict.get('residue_from')
residue_to = json_dict.get('residue_to')
mc = cls(position, residue_from, residue_to)
return mc
def __str__(self):
s = '(%s, %s, %s)' % (self.residue_from, self.position,
self.residue_to)
return s
def __repr__(self):
return 'MutCondition' + str(self)
def refinement_of(self, other):
from_match = (self.residue_from == other.residue_from or
(self.residue_from is not None and other.residue_from is None))
to_match = (self.residue_to == other.residue_to or
(self.residue_to is not None and other.residue_to is None))
pos_match = (self.position == other.position or
(self.position is not None and other.position is None))
return (from_match and to_match and pos_match)
[docs]class ModCondition(object):
"""Post-translational modification state at an amino acid position.
Parameters
----------
mod_type : str
The type of post-translational modification, e.g., 'phosphorylation'.
Valid modification types currently include: 'phosphorylation',
'ubiquitination', 'sumoylation', 'hydroxylation', and 'acetylation'.
If an invalid modification type is passed an InvalidModTypeError is
raised.
residue : str or None
String indicating the modified amino acid, e.g., 'Y' or 'tyrosine'.
If None, indicates that the residue at the modification site is
unknown or unspecified.
position : str or None
String indicating the position of the modified amino acid, e.g., '202'.
If None, indicates that the position is unknown or unspecified.
is_modified : bool
Specifies whether the modification is present or absent. Setting the
flag specifies that the Agent with the ModCondition is unmodified
at the site.
Examples
--------
Doubly-phosphorylated MEK (MAP2K1):
>>> phospho_mek = Agent('MAP2K1', mods=[
... ModCondition('phosphorylation', 'S', '202'),
... ModCondition('phosphorylation', 'S', '204')])
ERK (MAPK1) unphosphorylated at tyrosine 187:
>>> unphos_erk = Agent('MAPK1', mods=(
... ModCondition('phosphorylation', 'Y', '187', is_modified=False)))
"""
def __init__(self, mod_type, residue=None, position=None,
is_modified=True):
if mod_type not in modtype_conditions:
logger.warning('Unknown modification type: %s' % mod_type)
self.mod_type = mod_type
self.residue = get_valid_residue(residue)
if isinstance(position, int):
self.position = str(position)
else:
self.position = position
self.is_modified = is_modified
def refinement_of(self, other, ontology):
if self.is_modified != other.is_modified:
return False
type_match = (self.mod_type == other.mod_type or
ontology.isa('INDRA_MODS', self.mod_type,
'INDRA_MODS', other.mod_type))
residue_match = (self.residue == other.residue or
(self.residue is not None and other.residue is None))
pos_match = (self.position == other.position or
(self.position is not None and other.position is None))
return (type_match and residue_match and pos_match)
def matches(self, other):
return (self.matches_key() == other.matches_key())
def matches_key(self):
key = (str(self.mod_type), str(self.residue),
str(self.position), str(self.is_modified))
return str(key)
def __str__(self):
ms = '%s' % self.mod_type
if self.residue is not None:
ms += ', %s' % self.residue
if self.position is not None:
ms += ', %s' % self.position
if not self.is_modified:
ms += ', False'
ms = '(' + ms + ')'
return ms
def __repr__(self):
return str(self)
def to_json(self):
json_dict = _o({'mod_type': self.mod_type})
if self.residue is not None:
json_dict['residue'] = self.residue
if self.position is not None:
json_dict['position'] = self.position
json_dict['is_modified'] = self.is_modified
return json_dict
@classmethod
def _from_json(cls, json_dict):
mod_type = json_dict.get('mod_type')
if not mod_type:
logger.error('ModCondition missing mod_type.')
return None
if mod_type not in modtype_to_modclass.keys():
logger.warning('Unknown modification type: %s' % mod_type)
residue = json_dict.get('residue')
position = json_dict.get('position')
is_modified = json_dict.get('is_modified')
if is_modified is None:
logger.warning('ModCondition missing is_modified, defaulting '
'to True')
is_modified = True
mc = ModCondition(mod_type, residue, position, is_modified)
return mc
def equals(self, other):
type_match = (self.mod_type == other.mod_type)
residue_match = (self.residue == other.residue)
pos_match = (self.position == other.position)
is_mod_match = (self.is_modified == other.is_modified)
return (type_match and residue_match and pos_match and is_mod_match)
def __hash__(self):
return hash(self.matches_key())
[docs]class ActivityCondition(object):
"""An active or inactive state of a protein.
Examples
--------
Kinase-active MAP2K1:
>>> mek_active = Agent('MAP2K1',
... activity=ActivityCondition('kinase', True))
Transcriptionally inactive FOXO3:
>>> foxo_inactive = Agent('FOXO3',
... activity=ActivityCondition('transcription', False))
Parameters
----------
activity_type : str
The type of activity, e.g. 'kinase'. The basic, unspecified molecular
activity is represented as 'activity'. Examples of other activity
types are 'kinase', 'phosphatase', 'catalytic', 'transcription',
etc.
is_active : bool
Specifies whether the given activity type is present or absent.
"""
def __init__(self, activity_type, is_active):
if activity_type not in activity_types:
logger.warning('Invalid activity type: %s' % activity_type)
self.activity_type = activity_type
self.is_active = is_active
def refinement_of(self, other, ontology):
if self.is_active != other.is_active:
return False
if self.activity_type == other.activity_type:
return True
if ontology.isa('INDRA_ACTIVITIES', self.activity_type,
'INDRA_ACTIVITIES', other.activity_type):
return True
def equals(self, other):
type_match = (self.activity_type == other.activity_type)
is_act_match = (self.is_active == other.is_active)
return (type_match and is_act_match)
def matches(self, other):
return self.matches_key() == other.matches_key()
def matches_key(self):
key = (str(self.activity_type), str(self.is_active))
return str(key)
def to_json(self):
json_dict = _o({'activity_type': self.activity_type,
'is_active': self.is_active})
return json_dict
@classmethod
def _from_json(cls, json_dict):
activity_type = json_dict.get('activity_type')
is_active = json_dict.get('is_active')
if not activity_type:
logger.error('ActivityCondition missing activity_type, ' +
'defaulting to `activity`')
activity_type = 'activity'
if is_active is None:
logger.warning('ActivityCondition missing is_active, ' +
'defaulting to True')
is_active = True
ac = ActivityCondition(activity_type, is_active)
return ac
def __str__(self):
s = '%s' % self.activity_type
if not self.is_active:
s += ', False'
s = '(' + s + ')'
return s
def __repr__(self):
return str(self)
def _aa_short_caps(res):
if res is None:
return None
res_info = amino_acids.get(res)
if not res_info:
return None
return res_info['short_name'].capitalize()
def get_grounding(db_refs, ns_order=None):
"""Return a tuple of a preferred grounding namespace and ID.
Parameters
----------
db_refs : dict
A dict of namespace to ID references associated with an agent.
ns_order : list
A list of namespaces which are in order of priority. The first
matched namespace will be used as the grounding.
Returns
-------
tuple
A tuple whose first element is a grounding namespace (HGNC,
CHEBI, etc.) and the second element is an identifier in the
namespace. If no preferred grounding is available, a tuple of
Nones is returned.
"""
if ns_order is None:
ns_order = default_ns_order
for db_ns in ns_order:
db_id = db_refs.get(db_ns)
if not db_id:
continue
if isinstance(db_id, (list, tuple)):
db_id = db_id[0]
return db_ns, db_id
return None, None