__all__ = ['stmts_from_json', 'stmts_from_json_file', 'stmts_to_json',
'stmts_to_json_file', 'draw_stmt_graph', 'pretty_print_stmts',
'UnresolvedUuidError', 'InputError',
'set_pretty_print_max_width', 'print_stmt_summary',
'stmt_from_json', 'stmt_from_json_str']
import json
import logging
import os
import pathlib
from collections import Counter
from typing import Collection, List, Optional, Union
from indra.statements.statements import Statement, Unresolved
logger = logging.getLogger(__name__)
[docs]def stmts_from_json(json_in, on_missing_support='handle'):
"""Get a list of Statements from Statement jsons.
In the case of pre-assembled Statements which have `supports` and
`supported_by` lists, the uuids will be replaced with references to
Statement objects from the json, where possible. The method of handling
missing support is controled by the `on_missing_support` key-word argument.
Parameters
----------
json_in : iterable[dict]
A json list containing json dict representations of INDRA Statements,
as produced by the `to_json` methods of subclasses of Statement, or
equivalently by `stmts_to_json`.
on_missing_support : Optional[str]
Handles the behavior when a uuid reference in `supports` or
`supported_by` attribute cannot be resolved. This happens because uuids
can only be linked to Statements contained in the `json_in` list, and
some may be missing if only some of all the Statements from pre-
assembly are contained in the list.
Options:
- *'handle'* : (default) convert unresolved uuids into `Unresolved`
Statement objects.
- *'ignore'* : Simply omit any uuids that cannot be linked to any
Statements in the list.
- *'error'* : Raise an error upon hitting an un-linkable uuid.
Returns
-------
stmts : list[:py:class:`Statement`]
A list of INDRA Statements.
"""
stmts = []
uuid_dict = {}
for json_stmt in json_in:
try:
st = Statement._from_json(json_stmt)
except Exception as e:
logger.warning("Error creating statement: %s" % e)
continue
stmts.append(st)
uuid_dict[st.uuid] = st
for st in stmts:
_promote_support(st.supports, uuid_dict, on_missing_support)
_promote_support(st.supported_by, uuid_dict, on_missing_support)
return stmts
[docs]def stmt_from_json(json_in):
"""Deserialize a single statement JSON into a Statement object.
Parameters
----------
json_in : dict
A JSON representation of the INDRA Statement.
Returns
-------
stmt : :py:class:`Statement`
The INDRA Statement.
"""
stmt = stmts_from_json([json_in], on_missing_support='ignore')
return stmt[0]
[docs]def stmt_from_json_str(json_in):
"""Deserialize a single statement JSON string into a Statement object.
Parameters
----------
json_in : str
A JSON-string serialized INDRA Statement.
Returns
-------
stmt : :py:class:`Statement`
The deserialized INDRA Statement.
"""
return stmt_from_json(json.loads(json_in))
[docs]def stmts_from_json_file(
fname: Union[str, pathlib.Path, os.PathLike], format='json',
):
"""Return a list of statements loaded from a JSON file.
Parameters
----------
fname :
Path to the JSON file to load statements from.
format : Optional[str]
One of 'json' to assume regular JSON formatting or
'jsonl' assuming each statement is on a new line.
Returns
-------
list[indra.statements.Statement]
The list of INDRA Statements loaded from the JSOn file.
"""
with open(fname, 'r') as fh:
if format == 'json':
return stmts_from_json(json.load(fh))
else:
return stmts_from_json([json.loads(line)
for line in fh.readlines()])
[docs]def stmts_to_json_file(
stmts,
fname: Union[str, pathlib.Path, os.PathLike],
format='json',
**kwargs,
):
"""Serialize a list of INDRA Statements into a JSON file.
Parameters
----------
stmts : list[indra.statement.Statements]
The list of INDRA Statements to serialize into the JSON file.
fname :
Path to the JSON file to serialize Statements into.
format : Optional[str]
One of 'json' to use regular JSON with indent=1 formatting or
'jsonl' to put each statement on a new line without indents.
"""
sj = stmts_to_json(stmts, **kwargs)
with open(fname, 'w') as fh:
if format == 'json':
json.dump(sj, fh, indent=1)
else:
for json_stmt in sj:
json.dump(json_stmt, fh)
fh.write('\n')
[docs]def stmts_to_json(stmts_in, use_sbo=False, matches_fun=None):
"""Return the JSON-serialized form of one or more INDRA Statements.
Parameters
----------
stmts_in : Statement or list[Statement]
A Statement or list of Statement objects to serialize into JSON.
use_sbo : Optional[bool]
If True, SBO annotations are added to each applicable element of the
JSON. Default: False
matches_fun : Optional[function]
A custom function which, if provided, is used to construct the
matches key which is then hashed and put into the return value.
Default: None
Returns
-------
json_dict : dict
JSON-serialized INDRA Statements.
"""
if not isinstance(stmts_in, list):
json_dict = stmts_in.to_json(use_sbo=use_sbo)
return json_dict
else:
json_dict = [st.to_json(use_sbo=use_sbo, matches_fun=matches_fun)
for st in stmts_in]
return json_dict
def _promote_support(sup_list, uuid_dict, on_missing='handle'):
"""Promote the list of support-related uuids to Statements, if possible."""
valid_handling_choices = ['handle', 'error', 'ignore']
if on_missing not in valid_handling_choices:
raise InputError('Invalid option for `on_missing_support`: \'%s\'\n'
'Choices are: %s.'
% (on_missing, str(valid_handling_choices)))
for idx, uuid in enumerate(sup_list):
if uuid in uuid_dict.keys():
sup_list[idx] = uuid_dict[uuid]
elif on_missing == 'handle':
sup_list[idx] = Unresolved(uuid)
elif on_missing == 'ignore':
sup_list.remove(uuid)
elif on_missing == 'error':
raise UnresolvedUuidError("Uuid %s not found in stmt jsons."
% uuid)
return
[docs]def draw_stmt_graph(stmts):
"""Render the attributes of a list of Statements as directed graphs.
The layout works well for a single Statement or a few Statements at a time.
This function displays the plot of the graph using plt.show().
Parameters
----------
stmts : list[indra.statements.Statement]
A list of one or more INDRA Statements whose attribute graph should
be drawn.
"""
import networkx
try:
import matplotlib.pyplot as plt
except Exception:
logger.error('Could not import matplotlib, not drawing graph.')
return
try: # This checks whether networkx has this package to work with.
import pygraphviz
except Exception:
logger.error('Could not import pygraphviz, not drawing graph.')
return
import numpy
g = networkx.compose_all([stmt.to_graph() for stmt in stmts])
plt.figure()
plt.ion()
g.graph['graph'] = {'rankdir': 'LR'}
pos = networkx.drawing.nx_agraph.graphviz_layout(g, prog='dot')
g = g.to_undirected()
# Draw nodes
options = {
'marker': 'o',
's': 200,
'c': [0.85, 0.85, 1],
'facecolor': '0.5',
'lw': 0,
}
ax = plt.gca()
nodelist = list(g)
xy = numpy.asarray([pos[v] for v in nodelist])
node_collection = ax.scatter(xy[:, 0], xy[:, 1], **options)
node_collection.set_zorder(2)
# Draw edges
networkx.draw_networkx_edges(g, pos, arrows=False, edge_color='0.5')
# Draw labels
edge_labels = {(e[0], e[1]): e[2].get('label') for e in g.edges(data=True)}
networkx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels)
node_labels = {n[0]: n[1].get('label') for n in g.nodes(data=True)}
for key, label in node_labels.items():
if len(label) > 25:
parts = label.split(' ')
parts.insert(int(len(parts)/2), '\n')
label = ' '.join(parts)
node_labels[key] = label
networkx.draw_networkx_labels(g, pos, labels=node_labels)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
pretty_print_max_width = None
[docs]def set_pretty_print_max_width(new_max):
"""Set the max display width for pretty prints, in characters."""
global pretty_print_max_width
if new_max is not None and not isinstance(new_max, int):
raise ValueError("Max width must be an integer or None.")
pretty_print_max_width = new_max
[docs]def pretty_print_stmts(stmt_list: List[Statement],
stmt_limit: Optional[int] = None,
ev_limit: Optional[int] = 5,
width: Optional[int] = None) -> None:
"""Print a formatted list of statements along with evidence text.
Requires the tabulate package (https://pypi.org/project/tabulate).
Parameters
----------
stmt_list : List[Statement]
The list of INDRA Statements to be printed.
stmt_limit : Optional[int]
The maximum number of INDRA Statements to be printed. If None, all
Statements are printed. (Default is None)
ev_limit : Optional[int]
The maximum number of Evidence to print for each Statement. If None, all
evidence will be printed for each Statement. (Default is 5)
width : Optional[int]
Manually set the width of the table. If `None` the function will try to
match the current terminal width using `os.get_terminal_size()`. If
this fails the width defaults to 80 characters. The maximum width can
be controlled by setting :data:`pretty_print_max_width` using the
:func:`set_pretty_print_max_width` function. This is useful in
Jupyter notebooks where the environment returns a terminal size
of 80 characters regardless of the width of the window. (Default
is None).
"""
# Import some modules helpful for text formatting.
from textwrap import TextWrapper
from tabulate import tabulate
from os import get_terminal_size
# Try to get the actual number of columns in the terminal.
if width is None:
width = 80
try:
width = get_terminal_size().columns
except Exception as e:
logger.debug(f"Failed to get terminal size (using default "
f"{width}): {e}.")
# Apply the maximum.
if pretty_print_max_width is not None:
assert isinstance(pretty_print_max_width, int)
width = min(width, pretty_print_max_width)
# Parameterize the text wrappers that format the ev text and the metadata.
stmt_tr = TextWrapper(width=width)
metadata_tr = TextWrapper(width=16)
evidence_tr = TextWrapper(width=width - metadata_tr.width - 2)
# Print the table.
for i, s in enumerate(stmt_list[:stmt_limit]):
# Print the Statement heading.
stmt_str = f"[LIST INDEX: {i}] " + str(s)
print(stmt_tr.fill(stmt_str))
print("="*width)
# Print the evidence
for j, ev in enumerate(s.evidence[:ev_limit]):
# Gather the metadata we want to display.
metadata = [("EV INDEX", j), ("SOURCE", ev.source_api)]
for id_type in ['PMID', 'PMCID', 'DOI']:
if id_type in ev.text_refs:
metadata.append((id_type, ev.text_refs[id_type]))
break
# Form the metadata string to fill out its allocated space.
metadata_str = '\n'.join(line + ' '*(metadata_tr.width - len(line))
for k, v in metadata
for line in metadata_tr.wrap(f"{k}: {v}"))
# Form the evidence string.
if ev.text:
text_str = evidence_tr.fill(ev.text)
else:
text_str = evidence_tr.fill("(No evidence text)")
# Print the entire thing
full_str = tabulate([[metadata_str, text_str]], tablefmt='plain')
print(full_str)
print('-'*width)
print()
[docs]def print_stmt_summary(statements: Collection[Statement]):
"""Print a summary of a list of statements by statement type
Requires the tabulate package (https://pypi.org/project/tabulate).
Parameters
----------
statements : List[Statement]
The list of INDRA Statements to be printed.
"""
from tabulate import tabulate
print(tabulate(
Counter(
statement.__class__.__name__
for statement in statements
).most_common(),
headers=["Statement Type", "Count"],
tablefmt='github',
))
[docs]class UnresolvedUuidError(Exception):
pass