Source code for indra.sources.acsn.processor

from typing import Union

from indra.statements import *
from indra.ontology.bio import bio_ontology
from indra.databases.hgnc_client import get_hgnc_id
from indra.ontology.standardize import get_standard_agent

rel_mapping = {
    'CATALYSIS': Activation,
    'INHIBITION': Inhibition,
    'HETERODIMER_ASSOCIATION': Complex,
    'CATALYSIS;HETERODIMER_ASSOCIATION': Complex
}


[docs]class AcsnProcessor:
    """Processes Atlas of cancer signalling network (ACSN) relationships
    into INDRA statements

    Attributes
    ----------
    relations_df : pandas.DataFrame
        A tab-separated data frame which consists of binary relationship between
        proteins with PMIDs.
    correspondence_dict : dict
        A dictionary with correspondences between ACSN entities and their
        HGNC symbols.
    """
    def __init__(self, relations_df, correspondence_dict):
        """Constructor for AcsnProcessor class"""
        self.relations_df = relations_df
        self.correspondence_dict = correspondence_dict
        self.fplx_lookup = _make_famplex_lookup()
        self.statements = []

[docs]    def extract_statements(self):
        """Return INDRA Statements Extracted from ACSN relations."""
        for _, row in self.relations_df.iterrows():
            acsn_agent_a, stmt_types, acsn_agent_b, pmids = list(row)
            stmt_type = get_stmt_type(stmt_types)
            if stmt_type:
                agent_a = self.get_agent(acsn_agent_a)
                agent_b = self.get_agent(acsn_agent_b)
                if agent_a and agent_b:
                    if str(pmids) == 'nan':
                        evs = [Evidence(source_api='acsn')]

                    else:
                        evs = [Evidence(source_api='acsn', pmid=pmid)
                               for pmid in pmids.split(';')]

                    if stmt_type == Complex:
                        stmt = stmt_type([agent_a, agent_b], evidence=evs)
                    else:
                        stmt = stmt_type(agent_a, agent_b, evidence=evs)

                    self.statements.append(stmt)

[docs]    def get_agent(self, acsn_agent: str) -> Union[Agent, None]:
        """Return an INDRA Agent corresponding to an ACSN agent.

        Parameters
        ----------
        acsn_agent :
            Agent extracted from the relations statement data frame

        Returns
        -------
        :
            Returns INDRA agent with HGNC or FamPlex ID in db_refs. If there
            are no groundings available, we return None.
        """
        mapping = self.correspondence_dict.get(acsn_agent)
        if not mapping:
            return None
        if len(mapping) == 1:
            hgnc_id = get_hgnc_id(mapping[0])
            if hgnc_id:
                db_refs = {'HGNC': hgnc_id}
                return get_standard_agent(mapping[0], db_refs=db_refs)
        else:
            fplx_rel = self.fplx_lookup.get(tuple(sorted(
                self.correspondence_dict[acsn_agent])))
            if fplx_rel:
                db_refs = {'FPLX': fplx_rel}
                return get_standard_agent(fplx_rel, db_refs=db_refs)
        return None


[docs]def get_stmt_type(stmt_type: str) -> Union[None, Statement]:
    """Return INDRA statement type from ACSN relation.

    Parameters
    ----------
    stmt_type :
        An ACSN relationship type

    Returns
    -------
    :
        INDRA equivalent of the ACSN relation type or None if a mappings
        is not available.
    """
    if stmt_type in rel_mapping:
        mapped_stmt_type = rel_mapping[stmt_type]
        return mapped_stmt_type


def _make_famplex_lookup():
    """Create a famplex lookup dictionary.

    Keys are sorted tuples of HGNC gene names and values are
    the corresponding FamPlex ID.
    """

    fplx_lookup = {}
    bio_ontology.initialize()
    for node in bio_ontology.nodes:
        ns, id = bio_ontology.get_ns_id(node)
        if ns == 'FPLX':
            children = bio_ontology.get_children(ns, id)
            hgnc_children = [bio_ontology.get_name(*c)
                             for c in children if c[0] == 'HGNC']
            fplx_lookup[tuple(sorted(hgnc_children))] = id
    return fplx_lookup