Source code for indra.assemblers.index_card.assembler

import json
import logging
from indra.statements import *
from indra.literature import id_lookup
from indra.databases import hgnc_client, uniprot_client, chebi_client, \
    go_client

logger = logging.getLogger(__name__)

global_submitter = 'indra'


[docs]class IndexCardAssembler(object): """Assembler creating index cards from a set of INDRA Statements. Parameters ---------- statements : list A list of INDRA statements to be assembled. pmc_override : Optional[str] A PMC ID to assign to the index card. Attributes ---------- statements : list A list of INDRA statements to be assembled. """ def __init__(self, statements=None, pmc_override=None): if statements is None: self.statements = [] else: self.statements = statements self.cards = [] self.pmc_override = pmc_override
[docs] def add_statements(self, statements): """Add statements to the assembler. Parameters ---------- statements : list[indra.statement.Statements] The list of Statements to add to the assembler. """ self.statements.extend(statements)
[docs] def make_model(self): """Assemble statements into index cards.""" for stmt in self.statements: card = self.assemble_one_card(stmt, self.pmc_override) if card is not None: self.cards.append(card) return self.cards
@staticmethod def assemble_one_card(stmt, pmc_override=None): if isinstance(stmt, Modification): card = assemble_modification(stmt) elif isinstance(stmt, SelfModification): card = assemble_selfmodification(stmt) elif isinstance(stmt, Complex): card = assemble_complex(stmt) elif isinstance(stmt, Translocation): card = assemble_translocation(stmt) elif isinstance(stmt, RegulateActivity): card = assemble_regulate_activity(stmt) elif isinstance(stmt, RegulateAmount): card = assemble_regulate_amount(stmt) else: return None if card is not None: card.card['meta'] = {'id': stmt.uuid, 'belief': stmt.belief} ev_info = get_evidence_info(stmt) card.card['interaction']['hypothesis_information'] = \ ev_info['hypothesis'] card.card['interaction']['context'] = ev_info['context'] card.card['evidence'] = ev_info['text'] card.card['submitter'] = global_submitter if pmc_override is not None: card.card['pmc_id'] = pmc_override else: card.card['pmc_id'] = get_pmc_id(stmt) return card
[docs] def print_model(self): """Return the assembled cards as a JSON string. Returns ------- cards_json : str The JSON string representing the assembled cards. """ cards = [c.card for c in self.cards] # If there is only one card, print it as a single # card not as a list if len(cards) == 1: cards = cards[0] cards_json = json.dumps(cards, indent=1) return cards_json
[docs] def save_model(self, file_name='index_cards.json'): """Save the assembled cards into a file. Parameters ---------- file_name : Optional[str] The name of the file to save the cards into. Default: index_cards.json """ with open(file_name, 'wt') as fh: fh.write(self.print_model())
class IndexCard(object): def __init__(self): self.card = { 'pmc_id': None, 'submitter': None, 'interaction': { 'negative_information': False, 'hypothesis_information' : None, 'interaction_type': None, 'participant_a': { 'entity_type': None, 'entity_text': None, 'identifier': None }, 'participant_b': { 'entity_type': None, 'entity_text': None, 'identifier': None } } } def get_string(self): return json.dumps(self.card) def assemble_complex(stmt): card = IndexCard() card.card['interaction']['interaction_type'] = 'complexes_with' card.card['interaction'].pop('participant_b', None) # NOTE: fill out entity_text card.card['interaction']['participant_a']['entity_type'] = 'complex' card.card['interaction']['participant_a']['entity_text'] = [''] card.card['interaction']['participant_a'].pop('identifier', None) card.card['interaction']['participant_a']['entities'] = [] for m in stmt.members: p = get_participant(m) card.card['interaction']['participant_a']['entities'].append(p) return card def assemble_regulate_activity(stmt): # Top level card card = IndexCard() int_type = ('increases' if stmt.is_activation else 'decreases') card.card['interaction']['interaction_type'] = int_type card.card['interaction']['participant_a'] = get_participant(stmt.subj) # Embedded interaction interaction = {} interaction['negative_information'] = False interaction['participant_a'] = get_participant(stmt.obj) if stmt.obj_activity == 'kinase': interaction['participant_b'] = get_generic('protein') interaction['interaction_type'] = 'adds_modification' interaction['modifications'] = [{ 'feature_type': 'modification_feature', 'modification_type': 'phosphorylation', }] card.card['interaction']['participant_b'] = interaction elif stmt.obj_activity == 'transcription': interaction['participant_b'] = get_generic('gene') interaction['interaction_type'] = 'increases' card.card['interaction']['participant_b'] = interaction else: return None return card def assemble_regulate_amount(stmt): # Top level card card = IndexCard() if isinstance(stmt, IncreaseAmount): int_type = 'increases' else: int_type = 'decreases' card.card['interaction']['interaction_type'] = int_type card.card['interaction']['participant_a'] = get_participant(stmt.subj) card.card['interaction']['participant_b'] = get_participant(stmt.obj) return card def assemble_modification(stmt): card = IndexCard() mod_type = modclass_to_modtype[stmt.__class__] interaction = {} interaction['negative_information'] = False if isinstance(stmt, RemoveModification): interaction['interaction_type'] = 'removes_modification' mod_type = modtype_to_inverse[mod_type] else: interaction['interaction_type'] = 'adds_modification' interaction['modifications'] = [{ 'feature_type': 'modification_feature', 'modification_type': mod_type, }] if stmt.position is not None: pos = int(stmt.position) interaction['modifications'][0]['location'] = pos if stmt.residue is not None: interaction['modifications'][0]['aa_code'] = stmt.residue # If the statement is direct or there is no enzyme if _get_is_direct(stmt) or stmt.enz is None: interaction['participant_a'] = get_participant(stmt.enz) interaction['participant_b'] = get_participant(stmt.sub) card.card['interaction'] = interaction # If the statement is indirect, we generate an index card: # SUB increases (GENERIC adds_modification ENZ) else: interaction['participant_a'] = get_participant(None) interaction['participant_b'] = get_participant(stmt.sub) card.card['interaction']['interaction_type'] = 'increases' card.card['interaction']['negative_information'] = False card.card['interaction']['participant_a'] = get_participant(stmt.enz) card.card['interaction']['participant_b'] = interaction return card def assemble_selfmodification(stmt): card = IndexCard() mod_type = stmt.__class__.__name__.lower() if mod_type.endswith('phosphorylation'): mod_type = 'phosphorylation' else: return None interaction = {} interaction['negative_information'] = False interaction['interaction_type'] = 'adds_modification' interaction['modifications'] = [{ 'feature_type': 'modification_feature', 'modification_type': mod_type, }] if stmt.position is not None: pos = int(stmt.position) interaction['modifications'][0]['location'] = pos if stmt.residue is not None: interaction['modifications'][0]['aa_code'] = stmt.residue # If the statement is direct or there is no enzyme if _get_is_direct(stmt) or stmt.enz is None: interaction['participant_a'] = get_participant(stmt.enz) interaction['participant_b'] = get_participant(stmt.enz) card.card['interaction'] = interaction return card def assemble_translocation(stmt): # Index cards don't allow missing to_location if stmt.to_location is None: return None card = IndexCard() interaction = {} interaction['negative_information'] = False interaction['interaction_type'] = 'translocates' if stmt.from_location is not None: interaction['from_location_text'] = stmt.from_location from_loc_id = \ go_client.get_go_id_from_label_or_synonym(stmt.from_location) interaction['from_location_id'] = from_loc_id interaction['to_location_text'] = stmt.to_location to_loc_id = \ go_client.get_go_id_from_label_or_synonym(stmt.to_location) interaction['to_location_id'] = to_loc_id interaction['participant_a'] = get_participant(None) interaction['participant_b'] = get_participant(stmt.agent) card.card['interaction'] = interaction return card def get_generic(entity_type='protein'): participant = { 'entity_text': [''], 'entity_type': entity_type, 'identifier': 'GENERIC' } return participant def get_participant(agent): # Handle missing Agent as generic protein if agent is None: return get_generic('protein') # The Agent is not missing text_name = agent.db_refs.get('TEXT') if text_name is None: text_name = agent.name participant = {} participant['entity_text'] = [text_name] hgnc_id = agent.db_refs.get('HGNC') uniprot_id = agent.db_refs.get('UP') chebi_id = agent.db_refs.get('CHEBI') pfam_def_ids = agent.db_refs.get('PFAM-DEF') # If HGNC grounding is available, that is the first choice if hgnc_id: uniprot_id = hgnc_client.get_uniprot_id(hgnc_id) if uniprot_id: uniprot_mnemonic = str(uniprot_client.get_mnemonic(uniprot_id)) participant['identifier'] = 'UNIPROT:%s' % uniprot_mnemonic participant['entity_type'] = 'protein' elif chebi_id: pubchem_id = chebi_client.get_pubchem_id(chebi_id) participant['identifier'] = 'PUBCHEM:%s' % pubchem_id participant['entity_type'] = 'chemical' elif pfam_def_ids: participant['entity_type'] = 'protein_family' participant['entities'] = [] pfam_def_list = [] for p in pfam_def_ids.split('|'): dbname, dbid = p.split(':') pfam_def_list.append({dbname: dbid}) for pdi in pfam_def_list: # TODO: handle non-uniprot protein IDs here uniprot_id = pdi.get('UP') if uniprot_id: entity_dict = {} uniprot_mnemonic = \ str(uniprot_client.get_mnemonic(uniprot_id)) gene_name = uniprot_client.get_gene_name(uniprot_id) if gene_name is None: gene_name = "" entity_dict['entity_text'] = [gene_name] entity_dict['identifier'] = 'UNIPROT:%s' % uniprot_mnemonic entity_dict['entity_type'] = 'protein' participant['entities'].append(entity_dict) else: participant['identifier'] = '' participant['entity_type'] = 'protein' features = [] not_features = [] # Binding features for bc in agent.bound_conditions: feature = { 'feature_type': 'binding_feature', 'bound_to': { # NOTE: get type and identifier for bound to protein 'entity_type': 'protein', 'entity_text': [bc.agent.name], 'identifier': '' } } if bc.is_bound: features.append(feature) else: not_features.append(feature) # Modification features for mc in agent.mods: feature = { 'feature_type': 'modification_feature', 'modification_type': mc.mod_type.lower(), } if mc.position is not None: pos = int(mc.position) feature['location'] = pos if mc.residue is not None: feature['aa_code'] = mc.residue if mc.is_modified: features.append(feature) else: not_features.append(feature) # Mutation features for mc in agent.mutations: feature = {} feature['feature_type'] = 'mutation_feature' if mc.residue_from is not None: feature['from_aa'] = mc.residue_from if mc.residue_to is not None: feature['to_aa'] = mc.residue_to if mc.position is not None: pos = int(mc.position) feature['location'] = pos features.append(feature) if features: participant['features'] = features if not_features: participant['not_features'] = not_features return participant def get_pmc_id(stmt): pmc_id = '' for ev in stmt.evidence: pmc_id = id_lookup(ev.pmid, 'pmid')['pmcid'] if pmc_id is not None: if not pmc_id.startswith('PMC'): pmc_id = 'PMC' + pmc_id else: pmc_id = '' return str(pmc_id) def get_evidence_info(stmt): ev_txts = [] contexts = [] hypotheses = [] evs = (('', stmt.evidence), ('PARTIAL: ', ([] if not hasattr(stmt, 'partial_evidence') else stmt.partial_evidence))) for prefix, ev_list in evs: for ev in ev_list: if ev.text is None: ev_txts.append( '%sEvidence text not available for %s entry: %s' % (prefix, ev.source_api, ev.source_id)) else: ev_txts.append('%s%s' % (prefix, ev.text)) if ev.context and ev.context.species: species = ev.context.species obj = {} obj['name'] = species.name obj['taxonomy'] = species.db_refs.get('TAXONOMY') \ if species.db_refs is not None else None else: obj = None contexts.append(obj) hypothesis = ev.epistemics.get('hypothesis') hypotheses.append(hypothesis) return {'text': ev_txts, 'context': contexts, 'hypothesis': hypotheses} def _get_is_direct(stmt): """Returns true if there is evidence that the statement is a direct interaction. If any of the evidences associated with the statement indicates a direct interatcion then we assume the interaction is direct. If there is no evidence for the interaction being indirect then we default to direct.""" any_indirect = False for ev in stmt.evidence: if ev.epistemics.get('direct') is True: return True elif ev.epistemics.get('direct') is False: # This guarantees that we have seen at least # some evidence that the statement is indirect any_indirect = True if any_indirect: return False return True