Source code for indra.assemblers.sbgn.assembler

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import copy
import logging
import lxml.etree
import lxml.builder
from indra.statements import *
from indra.assemblers.pysb.assembler import PysbPreassembler

logger = logging.getLogger(__name__)

sbgn_ns = 'http://sbgn.org/libsbgn/0.2'
emaker = lxml.builder.ElementMaker(nsmap={None: sbgn_ns})


[docs]class SBGNAssembler(object):
    """This class assembles an SBGN model from a set of INDRA Statements.

    The Systems Biology Graphical Notation (SBGN) is a widely used
    graphical notation standard for systems biology models.
    This assembler creates SBGN models following the Process Desctiption (PD)
    standard, documented at:
    https://github.com/sbgn/process-descriptions/blob/master/UserManual/sbgn_PD-level1-user-public.pdf.
    For more information on SBGN, see: http://sbgn.github.io/sbgn/

    Parameters
    ----------
    stmts : Optional[list[indra.statements.Statement]]
        A list of INDRA Statements to be assembled.

    Attributes
    ----------
    statements : list[indra.statements.Statement]
        A list of INDRA Statements to be assembled.
    sbgn : lxml.etree.ElementTree
        The structure of the SBGN model that is assembled, represented as an
        XML ElementTree.
    """

    process_style = {'x': '0', 'y': '0', 'w': '10', 'h': '10'}
    source_sink_style = {'x': '0', 'y': '0', 'w': '10', 'h': '10'}
    monomer_style = {'x': '0', 'y': '0', 'w': '60', 'h': '30'}
    complex_style = {'x': '1', 'y': '1', 'w': '60', 'h': '65'}
    entity_type_style = {'x': '0', 'y': '0', 'w': '30', 'h': '12'}
    entity_state_style = {'x': '1', 'y': '1', 'w': '28', 'h': '12'}

    def __init__(self, statements=None):
        if not statements:
            self.statements = []
        else:
            self.statements = statements
        self.sbgn = emaker.sbgn()
        self._map = emaker.map(language='process description')
        self.sbgn.append(self._map)
        self._id_counter = 0
        self._agent_ids = {}

[docs]    def add_statements(self, stmts):
        """Add INDRA Statements to the assembler's list of statements.

        Parameters
        ----------
        stmts : list[indra.statements.Statement]
            A list of :py:class:`indra.statements.Statement`
            to be added to the statement list of the assembler.
        """
        for stmt in stmts:
            if not self.statement_exists(stmt):
                self.statements.append(stmt)

[docs]    def make_model(self):
        """Assemble the SBGN model from the collected INDRA Statements.

        This method assembles an SBGN model from the set of INDRA Statements.
        The assembled model is set as the assembler's sbgn attribute (it is
        represented as an XML ElementTree internally). The model is returned
        as a serialized XML string.

        Returns
        -------
        sbgn_str : str
            The XML serialized SBGN model.
        """
        ppa = PysbPreassembler(self.statements)
        ppa.replace_activities()
        self.statements = ppa.statements
        self.sbgn = emaker.sbgn()
        self._map = emaker.map()
        self.sbgn.append(self._map)
        for stmt in self.statements:
            if isinstance(stmt, Modification):
                self._assemble_modification(stmt)
            elif isinstance(stmt, RegulateActivity):
                self._assemble_regulateactivity(stmt)
            elif isinstance(stmt, RegulateAmount):
                self._assemble_regulateamount(stmt)
            elif isinstance(stmt, Complex):
                self._assemble_complex(stmt)
            elif isinstance(stmt, ActiveForm):
                #self._assemble_activeform(stmt)
                pass
            else:
                logger.warning("Unhandled Statement type %s" % type(stmt))
                continue
        sbgn_str = self.print_model()
        return sbgn_str

[docs]    def print_model(self, pretty=True, encoding='utf8'):
        """Return the assembled SBGN model as an XML string.

        Parameters
        ----------
        pretty : Optional[bool]
            If True, the SBGN string is formatted with indentation (for human
            viewing) otherwise no indentation is used. Default: True

        Returns
        -------
        sbgn_str : bytes (str in Python 2)
            An XML string representation of the SBGN model.
        """
        return lxml.etree.tostring(self.sbgn, pretty_print=pretty,
                                   encoding=encoding, xml_declaration=True)

[docs]    def save_model(self, file_name='model.sbgn'):
        """Save the assembled SBGN model in a file.

        Parameters
        ----------
        file_name : Optional[str]
            The name of the file to save the SBGN network to.
            Default: model.sbgn
        """
        model = self.print_model()
        with open(file_name, 'wb') as fh:
            fh.write(model)

    def _assemble_modification(self, stmt):
        if not stmt.enz:
            return
        # Make glyph for enz
        enz_glyph = self._agent_glyph(stmt.enz)
        mc_changed = stmt._get_mod_condition()
        mc_unchanged = stmt._get_mod_condition()
        mc_unchanged.is_modified = not mc_unchanged.is_modified
        # Make glyphs for sub
        sub_changed = copy.deepcopy(stmt.sub)
        sub_changed.mods.append(mc_changed)
        sub_unchanged = copy.deepcopy(stmt.sub)
        sub_unchanged.mods.append(mc_unchanged)
        sub_in, sub_out = \
            (sub_unchanged, sub_changed) if isinstance(stmt, AddModification) else \
            (sub_changed, sub_unchanged)
        sub_in_glyph = self._agent_glyph(sub_in)
        sub_out_glyph = self._agent_glyph(sub_out)
        # Make the process glyph
        process_glyph = self._process_glyph('process')
        # Add the arcs
        self._arc('consumption', sub_in_glyph, process_glyph)
        self._arc('production', process_glyph, sub_out_glyph)
        self._arc('catalysis', enz_glyph, process_glyph)

    def _assemble_regulateactivity(self, stmt):
        # Make glyph for subj
        subj_glyph = self._agent_glyph(stmt.subj)
        # Make glyphs for obj
        obj_act = copy.deepcopy(stmt.obj)
        obj_inact = copy.deepcopy(stmt.obj)
        obj_act.activity = ActivityCondition(stmt.obj_activity, True)
        obj_inact.activity = ActivityCondition(stmt.obj_activity, False)
        obj_in, obj_out = (obj_inact, obj_act) if stmt.is_activation else \
                          (obj_act, obj_inact)
        obj_in_glyph = self._agent_glyph(obj_in)
        obj_out_glyph = self._agent_glyph(obj_out)
        # Make the process glyph
        process_glyph = self._process_glyph('process')
        # Add the arcs
        self._arc('consumption', obj_in_glyph, process_glyph)
        self._arc('production', process_glyph, obj_out_glyph)
        self._arc('catalysis', subj_glyph, process_glyph)

    def _assemble_regulateamount(self, stmt):
        # Make glyphs for obj
        obj_glyph = self._agent_glyph(stmt.obj)
        # Make the process glyph
        process_glyph = self._process_glyph('process')
        # Add the arcs
        if isinstance(stmt, DecreaseAmount):
            self._arc('consumption', obj_glyph, process_glyph)
        else:
            self._arc('production', process_glyph, obj_glyph)
        # Make glyph for subj and add arc if needed
        if stmt.subj:
            subj_glyph = self._agent_glyph(stmt.subj)
            self._arc('catalysis', subj_glyph, process_glyph)

    def _assemble_complex(self, stmt):
        # Make glyph for individual members
        member_glyphs = [self._agent_glyph(m) for m in stmt.members]
        # Make glyph for complex
        # First we need to unroll all members and their bound conditions
        # into a single list with a single prime agent
        all_members = []
        for i, member in enumerate(stmt.members):
            member_tmp = copy.deepcopy(member)
            bound = [bc.agent for bc in member_tmp.bound_conditions
                     if bc.is_bound]
            member_tmp.bound_conditions = []
            if i == 0:
                prime_agent = member_tmp
            else:
                all_members.append(member_tmp)
            all_members += bound
        # Now we set all the other members as bound conditions on the prime
        # agent
        prime_agent.bound_conditions = \
            [BoundCondition(m, True) for m in all_members]
        complex_glyph = self._agent_glyph(prime_agent)
        process_glyph = self._process_glyph('association')
        for member_glyph in member_glyphs:
            self._arc('consumption', member_glyph, process_glyph)
        self._arc('production', process_glyph, complex_glyph)

    def _assemble_activeform(self, stmt):
        agent_glyph = self._agent_glyph(stmt.agent)
        agent_active = copy.deepcopy(stmt.agent)
        agent_active.activity = ActivityCondition(stmt.activity,
                                                  stmt.is_active)
        agent_active_glyph = self._agent_glyph(agent_active)
        process_glyph = self._process_glyph('process')
        self._arc('consumption', agent_glyph, process_glyph)
        self._arc('production', process_glyph, agent_active_glyph)

    def _arc(self, class_name, source, target):
        arc_id = self._make_id()
        arc = emaker.arc(class_(class_name), source=source, target=target,
                         id=arc_id)
        self._map.append(arc)

    def _process_glyph(self, class_name):
        process_id = self._make_id()
        process_glyph = emaker.glyph(emaker.bbox(**self.process_style),
                                     class_(class_name), id=process_id)
        self._map.append(process_glyph)
        return process_id

    def _none_glyph(self):
        glyph_id = self._make_id()
        none_glyph = emaker.glyph(emaker.bbox(**self.source_sink_style),
                                  class_('source and sink'), id=glyph_id)
        self._map.append(none_glyph)
        return glyph_id

    def _agent_glyph(self, agent, append=True):
        # Make the main glyph for the agent
        # TODO: handle bound conditions
        agent_id = self._make_agent_id(agent)
        agent_type = _get_agent_type(agent)
        glyph = emaker.glyph(emaker.label(text=agent.name),
                             emaker.bbox(**self.monomer_style),
                             class_(agent_type), id=agent_id)

        # Temporarily remove
        # Make a glyph for the agent type
        # TODO: handle other agent types
        #type_glyph = emaker.glyph(emaker.label(text='mt:prot'),
        #                          class_('unit of information'),
        #                          emaker.bbox(**self.entity_type_style),
        #                          id=self._make_id())
        #glyph.append(type_glyph)

        # Make glyphs for agent state
        # TODO: handle location, mutation
        for m in agent.mods:
            if m.residue is not None:
                mod = m.residue
            else:
                mod = abbrevs[m.mod_type]
            mod_pos = m.position if m.position is not None else ''
            variable = '%s%s' % (mod, mod_pos)
            value = states[m.mod_type][1 if m.is_modified else 0]
            state = emaker.state(variable=variable, value=value)
            state_glyph = \
                emaker.glyph(state, emaker.bbox(**self.entity_state_style),
                             class_('state variable'), id=self._make_id())
            glyph.append(state_glyph)
        if agent.activity:
            value = 'a' if agent.activity.is_active else 'i'
            state = emaker.state(variable=abbrevs[agent.activity.activity_type],
                                 value=value)
            state_glyph = \
                emaker.glyph(state, emaker.bbox(**self.entity_state_style),
                             class_('state variable'), id=self._make_id())
            glyph.append(state_glyph)

        # Handle bound conditions as complexes
        if agent.bound_conditions:
            members = [glyph]
            for bc in agent.bound_conditions:
                if bc.is_bound:
                    member_glyph = self._agent_glyph(bc.agent, append=False)
                    members.append(member_glyph)
            # Exclude the case where only negative bound conditions
            # are given and so members has only 1 element
            if len(members) > 1:
                complex_glyph = \
                    emaker.glyph(emaker.bbox(**self.complex_style),
                                 class_('complex'), id=self._make_id())
                for member in members:
                    complex_glyph.append(member)
                glyph = complex_glyph
        if append:
            self._map.append(glyph)
            return agent_id
        return glyph

    def _glyph_for_complex_pattern(self, pattern):
        """Add glyph and member glyphs for a PySB ComplexPattern."""
        # Make the main glyph for the agent
        monomer_glyphs = []
        for monomer_pattern in pattern.monomer_patterns:
            glyph = self._glyph_for_monomer_pattern(monomer_pattern)
            monomer_glyphs.append(glyph)

        if len(monomer_glyphs) > 1:
            pattern.matches_key = lambda: str(pattern)
            agent_id = self._make_agent_id(pattern)
            complex_glyph = \
                emaker.glyph(emaker.bbox(**self.complex_style),
                             class_('complex'), id=agent_id)
            for glyph in monomer_glyphs:
                glyph.attrib['id'] = agent_id + glyph.attrib['id']
                complex_glyph.append(glyph)
            return complex_glyph
        return monomer_glyphs[0]

    def _glyph_for_monomer_pattern(self, pattern):
        """Add glyph for a PySB MonomerPattern."""
        pattern.matches_key = lambda: str(pattern)
        agent_id = self._make_agent_id(pattern)
        # Handle sources and sinks
        if pattern.monomer.name in ('__source', '__sink'):
            return None
        # Handle molecules
        glyph = emaker.glyph(emaker.label(text=pattern.monomer.name),
                             emaker.bbox(**self.monomer_style),
                             class_('macromolecule'), id=agent_id)
        # Temporarily remove this
        # Add a glyph for type
        #type_glyph = emaker.glyph(emaker.label(text='mt:prot'),
        #                          class_('unit of information'),
        #                          emaker.bbox(**self.entity_type_style),
        #                          id=self._make_id())
        #glyph.append(type_glyph)
        for site, value in pattern.site_conditions.items():
            if value is None or isinstance(value, int):
                continue
            # Make some common abbreviations
            if site == 'phospho':
                site = 'p'
            elif site == 'activity':
                site = 'act'
                if value == 'active':
                    value = 'a'
                elif value == 'inactive':
                    value = 'i'
            state = emaker.state(variable=site, value=value)
            state_glyph = \
                emaker.glyph(state, emaker.bbox(**self.entity_state_style),
                             class_('state variable'), id=self._make_id())
            glyph.append(state_glyph)
        return glyph

    def _make_id(self):
        element_id = 'id_%d' % self._id_counter
        self._id_counter += 1
        return element_id

    def _make_agent_id(self, agent):
        key = agent.matches_key()
        mapped_id = self._agent_ids.get(key)
        if mapped_id:
            return mapped_id
        new_id = self._make_id()
        self._agent_ids[key] = new_id
        return new_id

    def statement_exists(self, stmt):
        for s in self.statements:
            if stmt.matches(s):
                return True
        return False


def _get_agent_type(agent):
    if agent.db_refs.get('UP') or agent.db_refs.get('HGNC') or \
        agent.db_refs.get('FPLX') or agent.db_refs.get('PF'):
        return 'macromolecule'
    elif agent.db_refs.get('CHEBI') or agent.db_refs.get('PUBCHEM'):
        return 'simple chemical'
    elif agent.db_refs.get('GO'):
        return 'phenotype'
    return 'unspecified entity'


def class_(name):
    return {'class': name}


abbrevs = {
    'phosphorylation': 'phospho',
    'ubiquitination': 'ub',
    'farnesylation': 'farnesyl',
    'hydroxylation': 'hydroxyl',
    'acetylation': 'acetyl',
    'sumoylation': 'sumo',
    'glycosylation': 'glycosyl',
    'methylation': 'methyl',
    'modification': 'mod',
    'activity': 'act',
    'kinase': 'kin'
}


states = {
    'phosphorylation': ['u', 'p'],
    'ubiquitination': ['n', 'y'],
    'farnesylation': ['n', 'y'],
    'hydroxylation': ['n', 'y'],
    'acetylation': ['n', 'y'],
    'sumoylation': ['n', 'y'],
    'glycosylation': ['n', 'y'],
    'methylation': ['n', 'y'],
    'modification': ['n', 'y'],
}