Source code for indra.assemblers.english_assembler

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import logging
import indra.statements as ist

logger = logging.getLogger('english_assembler')

[docs]class EnglishAssembler(object): """This assembler generates English sentences from INDRA Statements. Parameters ---------- stmts : Optional[list[indra.statements.Statement]] A list of INDRA Statements to be added to the assembler. Attributes ---------- statements : list[indra.statements.Statement] A list of INDRA Statements to assemble. model : str The assembled sentences as a single string. """ def __init__(self, stmts=None): if stmts is None: self.statements = [] else: self.statements = stmts self.model = None
[docs] def add_statements(self, stmts): """Add INDRA Statements to the assembler's list of statements. Parameters ---------- stmts : list[indra.statements.Statement] A list of :py:class:`indra.statements.Statement` to be added to the statement list of the assembler. """ self.statements += stmts
[docs] def make_model(self): """Assemble text from the set of collected INDRA Statements. Returns ------- stmt_strs : str Return the assembled text as unicode string. By default, the text is a single string consisting of one or more sentences with periods at the end. """ stmt_strs = [] for stmt in self.statements: if isinstance(stmt, ist.Modification): stmt_strs.append(_assemble_modification(stmt)) elif isinstance(stmt, ist.Autophosphorylation): stmt_strs.append(_assemble_autophosphorylation(stmt)) elif isinstance(stmt, ist.Complex): stmt_strs.append(_assemble_complex(stmt)) elif isinstance(stmt, ist.RegulateActivity): stmt_strs.append(_assemble_regulate_activity(stmt)) elif isinstance(stmt, ist.RegulateAmount): stmt_strs.append(_assemble_regulate_amount(stmt)) elif isinstance(stmt, ist.ActiveForm): stmt_strs.append(_assemble_activeform(stmt)) elif isinstance(stmt, ist.Translocation): stmt_strs.append(_assemble_translocation(stmt)) elif isinstance(stmt, ist.Gef): stmt_strs.append(_assemble_gef(stmt)) elif isinstance(stmt, ist.Gap): stmt_strs.append(_assemble_gap(stmt)) else: logger.warning('Unhandled statement type: %s.' % type(stmt)) if stmt_strs: return ' '.join(stmt_strs) else: return ''
def _assemble_agent_str(agent): """Assemble an Agent object to text.""" agent_str = agent.name # Handle mutation conditions if agent.mutations: mut_strs = [] for mut in agent.mutations: res_to = mut.residue_to if mut.residue_to else '' res_from = mut.residue_from if mut.residue_from else '' pos = mut.position if mut.position else '' mut_str = '%s%s%s' % (res_from, pos, res_to) mut_strs.append(mut_str) mut_strs = '/'.join(mut_strs) agent_str = '%s-%s' % (agent_str, mut_strs) # Handle location if agent.location is not None: agent_str += ' in the ' + agent.location if not agent.mods and not agent.bound_conditions and not agent.activity: return agent_str # Handle bound conditions bound_to = [bc.agent.name for bc in agent.bound_conditions if bc.is_bound] not_bound_to = [bc.agent.name for bc in agent.bound_conditions if not bc.is_bound] if bound_to: agent_str += ' bound to ' + _join_list(bound_to) if not_bound_to: agent_str += ' and not bound to ' +\ _join_list(not_bound_to) else: if not_bound_to: agent_str += ' not bound to ' +\ _join_list(not_bound_to) # Handle modification conditions if agent.mods: # Special case if len(agent.mods) == 1 and agent.mods[0].position is None: prefix = _mod_state_str(agent.mods[0].mod_type) if agent.mods[0].residue is not None: residue_str =\ ist.amino_acids[agent.mods[0].residue]['full_name'] prefix = residue_str + '-' + prefix agent_str = prefix + ' ' + agent_str else: if agent.bound_conditions: agent_str += ' and' agent_str += ' %s on ' % _mod_state_str(agent.mods[0].mod_type) mod_lst = [] for m in agent.mods: if m.position is None: if m.residue is not None: residue_str =\ ist.amino_acids[m.residue]['full_name'] mod_lst.append(residue_str) else: mod_lst.append('an unknown residue') else: mod_lst.append(m.residue + m.position) agent_str += _join_list(mod_lst) # Handle activity conditions if agent.activity is not None: # TODO: handle activity types if agent.activity.is_active: prefix = 'active' else: prefix = 'inactive' agent_str = prefix + ' ' + agent_str return agent_str def _join_list(lst): """Join a list of words in a gramatically correct way.""" if len(lst) > 2: s = ', '.join(lst[:-1]) s += ' and ' + lst[-1] elif len(lst) == 2: s = lst[0] + ' and ' + lst[1] elif len(lst) == 1: s = lst[0] else: s = '' return s def _assemble_activeform(stmt): """Assemble ActiveForm statements into text.""" subj_str = _assemble_agent_str(stmt.agent) if stmt.is_active: is_active_str = 'active' else: is_active_str = 'inactive' if stmt.activity == 'activity': stmt_str = subj_str + ' is ' + is_active_str elif stmt.activity == 'kinase': stmt_str = subj_str + ' is kinase-' + is_active_str elif stmt.activity == 'phosphatase': stmt_str = subj_str + ' is phosphatase-' + is_active_str elif stmt.activity == 'catalytic': stmt_str = subj_str + ' is catalytically ' + is_active_str elif stmt.activity == 'transcription': stmt_str = subj_str + ' is transcriptionally ' + is_active_str elif stmt.activity == 'gtpbound': stmt_str = subj_str + ' is GTP-bound ' + is_active_str return _make_sentence(stmt_str) def _assemble_modification(stmt): """Assemble Modification statements into text.""" sub_str = _assemble_agent_str(stmt.sub) if stmt.enz is not None: enz_str = _assemble_agent_str(stmt.enz) if _get_is_direct(stmt): mod_str = ' ' + _mod_process_verb(stmt) + ' ' else: mod_str = ' leads to the ' + _mod_process_noun(stmt) + ' of ' stmt_str = enz_str + mod_str + sub_str else: stmt_str = sub_str + ' is ' + _mod_state_stmt(stmt) if stmt.residue is not None: if stmt.position is None: mod_str = 'on ' + ist.amino_acids[stmt.residue]['full_name'] else: mod_str = 'on ' + stmt.residue + stmt.position else: mod_str = '' stmt_str += ' ' + mod_str return _make_sentence(stmt_str) def _assemble_complex(stmt): """Assemble Complex statements into text.""" member_strs = [_assemble_agent_str(m) for m in stmt.members] stmt_str = member_strs[0] + ' binds ' + _join_list(member_strs[1:]) return _make_sentence(stmt_str) def _assemble_autophosphorylation(stmt): """Assemble Autophosphorylation statements into text.""" enz_str = _assemble_agent_str(stmt.enz) stmt_str = enz_str + ' phosphorylates itself' if stmt.residue is not None: if stmt.position is None: mod_str = 'on ' + ist.amino_acids[stmt.residue]['full_name'] else: mod_str = 'on ' + stmt.residue + stmt.position else: mod_str = '' stmt_str += ' ' + mod_str return _make_sentence(stmt_str) def _assemble_regulate_activity(stmt): """Assemble RegulateActivity statements into text.""" subj_str = _assemble_agent_str(stmt.subj) obj_str = _assemble_agent_str(stmt.obj) if stmt.is_activation: rel_str = ' activates ' else: rel_str = ' inhibits ' stmt_str = subj_str + rel_str + obj_str return _make_sentence(stmt_str) def _assemble_regulate_amount(stmt): """Assemble RegulateAmount statements into text.""" obj_str = _assemble_agent_str(stmt.obj) if stmt.subj is not None: subj_str = _assemble_agent_str(stmt.subj) if isinstance(stmt, ist.IncreaseAmount): rel_str = ' increases the amount of ' elif isinstance(stmt, ist.DecreaseAmount): rel_str = ' decreases the amount of ' stmt_str = subj_str + rel_str + obj_str else: if isinstance(stmt, ist.IncreaseAmount): stmt_str = obj_str + ' is produced' elif isinstance(stmt, ist.DecreaseAmount): stmt_str = obj_str + ' is degraded' return _make_sentence(stmt_str) def _assemble_translocation(stmt): """Assemble Translocation statements into text.""" agent_str = _assemble_agent_str(stmt.agent) stmt_str = agent_str + ' translocates' if stmt.from_location is not None: stmt_str += ' from the ' + stmt.from_location if stmt.to_location is not None: stmt_str += ' to the ' + stmt.to_location return _make_sentence(stmt_str) def _assemble_gap(stmt): """Assemble Gap statements into text.""" subj_str = _assemble_agent_str(stmt.gap) obj_str = _assemble_agent_str(stmt.ras) stmt_str = subj_str + ' is a GAP for ' + obj_str return _make_sentence(stmt_str) def _assemble_gef(stmt): """Assemble Gef statements into text.""" subj_str = _assemble_agent_str(stmt.gef) obj_str = _assemble_agent_str(stmt.ras) stmt_str = subj_str + ' is a GEF for ' + obj_str return _make_sentence(stmt_str) def _make_sentence(txt): """Make a sentence from a piece of text.""" #Make sure first letter is capitalized txt = txt.strip(' ') txt = txt[0].upper() + txt[1:] + '.' return txt def _get_is_direct(stmt): '''Returns true if there is evidence that the statement is a direct interaction. If any of the evidences associated with the statement indicates a direct interatcion then we assume the interaction is direct. If there is no evidence for the interaction being indirect then we default to direct.''' any_indirect = False for ev in stmt.evidence: if ev.epistemics.get('direct') is True: return True elif ev.epistemics.get('direct') is False: # This guarantees that we have seen at least # some evidence that the statement is indirect any_indirect = True if any_indirect: return False return True def _get_is_hypothesis(stmt): '''Returns true if there is evidence that the statement is only hypothetical. If all of the evidences associated with the statement indicate a hypothetical interaction then we assume the interaction is hypothetical.''' for ev in stmt.evidence: if not ev.epistemics.get('hypothesis') is True: return True return False def _get_is_hypothesis_adverb(stmt): '''Returns the string associated with a statement being hypothetical.''' if _get_is_hypothesis(stmt): return ' hypothetically ' else: return '' def _mod_process_verb(stmt): mod_name = stmt.__class__.__name__.lower() return mod_process_prefix.get(mod_name) def _mod_process_noun(stmt): mod_name = stmt.__class__.__name__.lower() return mod_name def _mod_state_stmt(stmt): mod_name = stmt.__class__.__name__.lower() return mod_state_prefix.get(mod_name) def _mod_state_str(s): return mod_state_prefix.get(s) mod_state_prefix = { 'phosphorylation': 'phosphorylated', 'dephosphorylation': 'dephosphorylated', 'ubiquitination': 'ubiquitinated', 'deubiquitination': 'deubiquitinated', 'acetylation': 'acetylated', 'deacetylation': 'deacetylated', 'hydroxylation': 'hydroxylated', 'dehydroxylation': 'dehydroxylated', 'sumoylation': 'sumoylated', 'desumoylation': 'desumoylated', 'farnesylation': 'farnesylated', 'defarnesylation': 'defarnesylated', 'glycosylation': 'glycosylated', 'deglycosylation': 'deglycosylated', 'ribosylation': 'ribosylated', 'deribosylation': 'deribosylated' } mod_process_prefix = { 'phosphorylation': 'phosphorylates', 'dephosphorylation': 'dephosphorylates', 'ubiquitination': 'ubiquitinates', 'deubiquitination': 'deubiquitinates', 'acetylation': 'acetylates', 'deacetylation': 'deacetylates', 'hydroxylation': 'hydroxylates', 'dehydroxylation': 'dehydroxylates', 'sumoylation': 'sumoylates', 'desumoylation': 'desumoylates', 'farnesylation': 'farnesylates', 'defarnesylation': 'defarnesylates', 'glycosylation': 'glycosylates', 'deglycosylation': 'deglycosylates', 'ribosylation': 'ribosylates', 'deribosylation': 'deribosylates' }