"""This module implements classes and functions that are used for
finding refinements between INDRA Statements as part of the
knowledge-assembly process. These are imported by the preassembler
module."""
__all__ = ['get_agent_key', 'get_relevant_keys', 'RefinementFilter',
'RefinementConfirmationFilter', 'OntologyRefinementFilter',
'SplitGroupFilter', 'default_refinement_fun']
import time
import logging
import collections
from indra.statements import Event
from indra.statements import stmt_type as indra_stmt_type
logger = logging.getLogger(__name__)
# TODO: we could make the agent key function parameterizable with the
# preassembler to allow custom agent mappings to the ontology.
[docs]def get_agent_key(agent):
"""Return a key for an Agent for use in refinement finding.
Parameters
----------
agent : indra.statements.Agent or None
An INDRA Agent whose key should be returned.
Returns
-------
tuple or None
The key that maps the given agent to the ontology, with special
handling for ungrounded and None Agents.
"""
if isinstance(agent, Event):
agent = agent.concept
if agent is None:
agent_key = None
else:
agent_key = agent.get_grounding()
if not agent_key[0]:
agent_key = ('NAME', agent.name)
return agent_key
[docs]def get_relevant_keys(agent_key, all_keys_for_role, ontology, direction):
"""Return relevant agent keys for an agent key for refinement finding.
Parameters
----------
agent_key : tuple or None
An agent key of interest.
all_keys_for_role : set
The set of all agent keys in a given statement corpus with a
role matching that of the given agent_key.
ontology : indra.ontology.IndraOntology
An IndraOntology instance with respect to which relevant other
agent keys are found for the purposes of refinement.
direction: str
The direction in which to find relevant agents. The two options
are 'less_specific' and 'more_specific' for agents that are less and
more specific, per the ontology, respectively.
Returns
-------
set
The set of relevant agent keys which this given agent key can
possibly refine.
"""
rel_fun = ontology.get_parents if direction == 'less_specific' else \
ontology.get_children
relevant_keys = {None, agent_key}
if agent_key is not None:
relevant_keys |= set(rel_fun(*agent_key))
relevant_keys &= all_keys_for_role
return relevant_keys
[docs]class RefinementFilter:
"""A filter which is applied to one or more statements to eliminate
candidate refinements that are not possible according to some
criteria. By applying a series of such filters, the preassembler can avoid
doing n-by-n comparisons to determine refinements among n statements.
The filter class can take any number of constructor arguments that it
needs to perform its task. The base class' constructor initializes
a shared_data attribute as an empty dict.
It also needs to implement an initialize function which is called
with a stmts_by_hash argument, containing a dict of statements keyed by
hash. This function can build any data structures that may be needed
to efficiently apply the filter later. It cab store any
such data structures in the shared_data dict to be accessed by
other functions later.
Finally, the class needs to implement a get_related function, which
takes a single INDRA Statement as input to return the hashes of
potentially related other statements that the filter was initialized
with. The function also needs to take a possibly_related argument
which is either None (no other filter was run before) or a set,
which is the superset of possible relations as determined by some
other previously applied filter.
"""
def __init__(self):
self.shared_data = {}
[docs] def initialize(self, stmts_by_hash):
"""Initialize the filter class with a set of statements.
The filter can build up some useful data structures in this
function before being applied to any specific statements.
Parameters
----------
stmts_by_hash : dict[int, indra.statements.Statement]
A dict of statements keyed by their hashes.
"""
self.shared_data['stmts_by_hash'] = stmts_by_hash
[docs] def get_more_specifics(self, stmt, possibly_related=None):
"""Return a set of hashes of statements that are potentially related
and more specific than the given statement."""
return self.get_related(stmt, possibly_related=possibly_related,
direction='more_specific')
[docs] def get_less_specifics(self, stmt, possibly_related=None):
"""Return a set of hashes of statements that are potentially related
and less specific than the given statement."""
return self.get_related(stmt, possibly_related=possibly_related,
direction='less_specific')
[docs] def extend(self, stmts_by_hash):
"""Extend the initial data structures with a set of new statements.
Parameters
----------
stmts_by_hash : dict[int, indra.statements.Statement]
A dict of statements keyed by their hashes.
"""
# We can assume that these stmts_by_hash are unique
self.shared_data['stmts_by_hash'].update(stmts_by_hash)
[docs]class OntologyRefinementFilter(RefinementFilter):
"""This filter uses an ontology to position statements and their agents
to filter down significantly on the set of possible relations for
a given statement.
Parameters
----------
ontology : indra.ontology.OntologyGraph
An INDRA ontology graph.
"""
def __init__(self, ontology):
super().__init__()
self.ontology = ontology
[docs] def initialize(self, stmts_by_hash):
self.shared_data['stmts_by_hash'] = {}
self.extend(stmts_by_hash)
[docs] def extend(self, stmts_by_hash):
self.shared_data['stmts_by_hash'].update(stmts_by_hash)
# Build up data structure of statement hashes by
# statement type
stmts_by_type = collections.defaultdict(set)
for stmt_hash, stmt in stmts_by_hash.items():
stmts_by_type[indra_stmt_type(stmt)].add(stmt_hash)
stmts_by_type = dict(stmts_by_type)
# Now iterate over each statement type and build up
# data structures for quick filtering
for stmt_type, stmts_this_type in stmts_by_type.items():
# Step 1. initialize data structures
# noinspection PyProtectedMember
roles = stmts_by_hash[next(iter(stmts_this_type))]._agent_order
if stmt_type not in self.shared_data:
self.shared_data[stmt_type] = {}
# Mapping agent keys to statement hashes
self.shared_data[stmt_type]['agent_key_to_hash'] = \
{role: collections.defaultdict(set) for role in roles}
# Mapping statement hashes to agent keys
self.shared_data[stmt_type]['hash_to_agent_key'] = \
{role: collections.defaultdict(set) for role in roles}
# All agent keys for a given agent role
self.shared_data[stmt_type]['all_keys_by_role'] = {}
# Step 2. Fill up the initial data structures in preparation
# for identifying potential refinements
for sh in stmts_this_type:
for role in roles:
agent_keys = self._agent_keys_for_stmt_role(
stmts_by_hash[sh], role)
for agent_key in agent_keys:
self.shared_data[stmt_type]['agent_key_to_hash'][
role][agent_key].add(sh)
self.shared_data[stmt_type]['hash_to_agent_key'][
role][sh].add(agent_key)
for role in roles:
self.shared_data[stmt_type]['all_keys_by_role'][role] = \
set(self.shared_data[stmt_type]['agent_key_to_hash'][role])
@staticmethod
def _agent_keys_for_stmt_role(stmt, role):
"""Return a set of agent keys for a statement's agent in a role.
The agent key is an "anchor" to the ontology being used and positons
a statement, via its agent in this role against other statements it
may be related to.
"""
agents = getattr(stmt, role)
# Handle a special case here where a list=like agent
# role can be empty, here we will consider anything else
# to be a refinement, hence add a None key
if isinstance(agents, list) and not agents:
agent_keys = {None}
# Generally, we take all the agent keys for a single or
# list-like agent role.
else:
agent_keys = {get_agent_key(agent) for agent in
(agents if isinstance(agents, list)
else [agents])}
return agent_keys
[docs]class RefinementConfirmationFilter(RefinementFilter):
"""This class runs the refinement function between potentially
related statements to confirm whether they are indeed, conclusively
in a refinement relationship with each other.
In this sense, this isn't a real filter, though implementing it
as one is convenient. This filter is meant to be used as the final
component in a series of pre-filters.
"""
def __init__(self, ontology, refinement_fun=None):
self.ontology = ontology
self.refinement_fun = refinement_fun if refinement_fun else \
default_refinement_fun
self.shared_data = {}
self.comparison_counter = 0
[docs]class SplitGroupFilter(RefinementFilter):
"""This filter implements splitting statements into two groups and
only considering refinement relationships between the groups but not
within them."""
def __init__(self, split_groups):
super().__init__()
self.split_groups = split_groups
def default_refinement_fun(st1, st2, ontology, entities_refined):
return st1.refinement_of(st2, ontology, entities_refined)