Source code for indra.statements.evidence

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
from future.utils import python_2_unicode_compatible


__all__ = ['Evidence']


import sys
import json
import textwrap
from collections import OrderedDict as _o
from .util import *
from .context import Context


[docs]@python_2_unicode_compatible class Evidence(object): """Container for evidence supporting a given statement. Parameters ---------- source_api : str or None String identifying the INDRA API used to capture the statement, e.g., 'trips', 'biopax', 'bel'. source_id : str or None For statements drawn from databases, ID of the database entity corresponding to the statement. pmid : str or None String indicating the Pubmed ID of the source of the statement. text : str Natural language text supporting the statement. annotations : dict Dictionary containing additional information on the context of the statement, e.g., species, cell line, tissue type, etc. The entries may vary depending on the source of the information. epistemics : dict A dictionary describing various forms of epistemic certainty associated with the statement. text_refs : dict A dictionary of various reference ids to the source text, e.g. DOI, PMID, URL, etc. There are some attributes which are not set by the parameters above: source_hash : int A hash calculated from the evidence text, source api, and pmid and/or source_id if available. This is generated automatcially when the object is instantiated. stmt_tag : int This is a hash calculated by a Statement to which this evidence refers, and is set by said Statement. It is useful for tracing ownership of an Evidence object. """ def __init__(self, source_api=None, source_id=None, pmid=None, text=None, annotations=None, epistemics=None, context=None, text_refs=None): self.source_api = source_api self.source_id = source_id self.pmid = pmid self.text_refs = {} if pmid is not None: self.text_refs['PMID'] = pmid if text_refs is not None: self.text_refs.update(text_refs) self.text = text if annotations: self.annotations = annotations else: self.annotations = {} if epistemics: self.epistemics = epistemics else: self.epistemics = {} self.context = context self.source_hash = None self.get_source_hash() self.stmt_tag = None def __setstate__(self, state): if 'context' not in state: state['context'] = None if 'text_refs' not in state: state['text_refs'] = {} if 'stmt_tag' not in state: state['stmt_tag'] = None if 'source_hash' not in state: state['source_hash'] = None self.__dict__ = state
[docs] def get_source_hash(self, refresh=False): """Get a hash based off of the source of this statement. The resulting value is stored in the source_hash attribute of the class and is preserved in the json dictionary. """ if hasattr(self, 'source_hash') and self.source_hash is not None \ and not refresh: return self.source_hash s = str(self.source_api) + str(self.source_id) if self.text and isinstance(self.text, str): s += self.text elif self.pmid and isinstance(self.pmid, str): s += self.pmid self.source_hash = make_hash(s, 16) return self.source_hash
def matches_key(self): key_lst = [self.source_api, self.source_id, self.pmid, self.text] for d in [self.annotations, self.epistemics]: d_key = list(d.items()) d_key.sort() key_lst.append(d_key) key = str(key_lst) return key.replace('"', '').replace('\'', '').replace('None', '~')[1:-1] def equals(self, other): matches = (self.source_api == other.source_api) and \ (self.source_id == other.source_id) and \ (self.pmid == other.pmid) and \ (self.text == other.text) and \ (self.annotations == other.annotations) and \ (self.epistemics == other.epistemics) and \ (self.context == other.context) return matches
[docs] def to_json(self): """Convert the evidence object into a JSON dict.""" json_dict = _o({}) if self.source_api: json_dict['source_api'] = self.source_api if self.pmid: json_dict['pmid'] = self.pmid if self.source_id: json_dict['source_id'] = self.source_id if self.text: json_dict['text'] = self.text if self.annotations: json_dict['annotations'] = self.annotations if self.epistemics: json_dict['epistemics'] = self.epistemics if self.context: json_dict['context'] = self.context.to_json() if self.text_refs: json_dict['text_refs'] = self.text_refs json_dict['source_hash'] = self.get_source_hash() if self.stmt_tag: json_dict['stmt_tag'] = self.stmt_tag return json_dict
@classmethod def _from_json(cls, json_dict): source_api = json_dict.get('source_api') source_id = json_dict.get('source_id') pmid = json_dict.get('pmid') text = json_dict.get('text') annotations = json_dict.get('annotations', {}).copy() epistemics = json_dict.get('epistemics', {}).copy() context_entry = json_dict.get('context') text_refs = json_dict.get('text_refs', {}).copy() if context_entry: context = Context.from_json(context_entry) else: context = None stmt_tag = json_dict.get('stmt_tag') # Note that the source hash will be re-generated upon loading, so if # any of the relevant attributes used to create the hash changed, the # hash will also have changed. ev = Evidence(source_api=source_api, source_id=source_id, pmid=pmid, text=text, annotations=annotations, epistemics=epistemics, context=context, text_refs=text_refs) ev.stmt_tag = stmt_tag return ev def __str__(self): ev_str = 'Evidence(' tab_len = len(ev_str) def _indented_join(s_list, depth): return '\n'.join(' '*depth + s for s in s_list).lstrip(' ') lines = [] def _add_line(name, s): lines.append('%s=%s' % (name, s)) def _format_line(name, s): return _add_line(name, "'%s'" % s) def _format_dict(d, name, indent=9): s = json.dumps(d, indent=1) s = _indented_join(s.splitlines(), indent+len(name)+1) return _add_line(name, s) if self.source_api: _format_line('source_api', self.source_api) if self.pmid: _format_line('pmid', self.pmid) if self.source_id: _format_line('source_id', self.source_id) if self.text: txt = _indented_join(textwrap.wrap(self.text, width=65), tab_len+6) _format_line('text', txt) if self.annotations: _format_dict(self.annotations, 'annotations') if self.context: _format_dict(self.context.to_json(), 'context') if self.epistemics: _format_dict(self.epistemics, 'epistemics') div = ',\n' + ' '*9 ev_str += div.join(lines) if len(ev_str.splitlines()) > 1: ev_str += '\n' + ' '*9 ev_str += ')\n\n' return ev_str def __repr__(self): if sys.version_info[0] >= 3: return str(self) else: return str(self).encode('utf-8')