Source code for indra.sources.hypothesis.api

__all__ = ['process_annotations', 'get_annotations', 'upload_annotation',
           'upload_statement_annotation', 'statement_to_annotations']

import logging
import requests
from indra.config import get_config
from .processor import HypothesisProcessor
from .annotator import statement_to_annotations

logger = logging.getLogger(__name__)

base_url = 'https://api.hypothes.is/api/'
api_key = get_config('HYPOTHESIS_API_KEY')
headers = {'Authorization': 'Bearer %s' % api_key,
           'Accept': 'application/vnd.hypothesis.v1+json',
           'content-type': 'application/json'}
indra_group = get_config('HYPOTHESIS_GROUP')


def send_get_request(endpoint, **params):
    """Send a request to the hypothes.is web service and return JSON response.

    Note that it is assumed that `HYPOTHESIS_API_KEY` is set either as a
    configuration entry or as an environmental variable.

    Parameters
    ----------
    endpoint : str
        The endpoint to call, e.g., `search`.
    params : kwargs
        A set of keyword arguments that are passed to the `requests.get` call
        as `params`.
    """
    if api_key is None:
        return ValueError('No API key set in HYPOTHESIS_API_KEY')
    res = requests.get(base_url + endpoint, headers=headers,
                       params=params)
    res.raise_for_status()
    return res.json()


def send_post_request(endpoint, **params):
    """Send a post request to the hypothes.is web service and return JSON
    response.

    Note that it is assumed that `HYPOTHESIS_API_KEY` is set either as a
    configuration entry or as an environmental variable.

    Parameters
    ----------
    endpoint : str
        The endpoint to call, e.g., `search`.
    params : kwargs
        A set of keyword arguments that are passed to the `requests.post` call
        as `json`.
    """
    if api_key is None:
        return ValueError('No API key set in HYPOTHESIS_API_KEY')
    res = requests.post(base_url + endpoint, headers=headers,
                        json=params)
    res.raise_for_status()
    return res.json()


[docs]def upload_annotation(url, annotation, target_text=None, tags=None,
                      group=None):
    """Upload an annotation to hypothes.is.

    Parameters
    ----------
    url : str
        The URL of the resource being annotated.
    annotation : str
        The text content of the annotation itself.
    target_text : Optional[str]
        The specific span of text that the annotation applies to.
    tags : list[str]
        A list of tags to apply to the annotation.
    group : Optional[str]
        The hypothesi.is key of the group (not its name). If not given, the
        HYPOTHESIS_GROUP configuration in the config file or an environmental
        variable is used.

    Returns
    -------
    json
        The full response JSON from the web service.
    """
    if group is None:
        if indra_group:
            group = indra_group
        else:
            raise ValueError('No group provided and HYPOTHESIS_GROUP '
                             'is not set.')
    params = {
        'uri': url,
        'group': group,
        'text': annotation,
    }
    if target_text:
        params['target'] = [{
            'source': [url],
            'selector': [
                {'type': 'TextQuoteSelector',
                 'exact': target_text}
            ]
        }]
    if tags:
        params['tags'] = tags
    permissions = {'read': ['group:%s' % group]}
    params['permissions'] = permissions
    res = send_post_request('annotations', **params)
    return res


[docs]def upload_statement_annotation(stmt, annotate_agents=True):
    """Construct and upload all annotations for a given INDRA Statement.

    Parameters
    ----------
    stmt : indra.statements.Statement
        An INDRA Statement.
    annotate_agents : Optional[bool]
        If True, the agents in the annotation text are linked to outside
        databases based on their grounding. Default: True

    Returns
    -------
    list of dict
        A list of annotation structures that were uploaded to hypothes.is.
    """
    annotations = statement_to_annotations(stmt,
                                           annotate_agents=annotate_agents)
    for annotation in annotations:
        annotation['tags'].append('indra_upload')
        upload_annotation(**annotation)
    return annotations


[docs]def get_annotations(group=None):
    """Return annotations in hypothes.is in a given group.

    Parameters
    ----------
    group : Optional[str]
        The hypothesi.is key of the group (not its name). If not given, the
        HYPOTHESIS_GROUP configuration in the config file or an environmental
        variable is used.
    """
    if group is None:
        if indra_group:
            group = indra_group
        else:
            raise ValueError('No group provided and HYPOTHESIS_GROUP '
                             'is not set.')
    # Note that this batch size is the maximum that the API allows, therefore
    # it makes sense to run queries with this fixed limit.
    limit = 200
    offset = 0
    annotations = []
    while True:
        logger.info('Getting up to %d annotations from offset %d' %
                    (limit, offset))
        res = send_get_request('search', group=group, limit=limit, offset=offset)
        rows = res.get('rows', [])
        if not rows:
            break
        annotations += rows
        offset += len(rows)
    logger.info('Got a total of %d annotations' % len(annotations))
    return annotations


[docs]def process_annotations(group=None, reader=None, grounder=None):
    """Process annotations in hypothes.is in a given group.

    Parameters
    ----------
    group : Optional[str]
        The hypothesi.is key of the group (not its name). If not given, the
        HYPOTHESIS_GROUP configuration in the config file or an environmental
        variable is used.
    reader : Optional[None, str, Callable[[str], Processor]]
        A handle for a function which takes a single str argument
        (text to process) and returns a processor object with a statements
        attribute containing INDRA Statements. By default, the REACH reader's
        process_text function is used with default parameters. Note that
        if the function requires extra parameters other than the input text,
        functools.partial can be used to set those. Can be alternatively
        set to :func:`indra.sources.bel.process_text` by using the string
        "bel".
    grounder : Optional[function]
        A handle for a function which takes a positional str argument (entity
        text to ground) and an optional context key word argument and returns
        a list of objects matching the structure of gilda.grounder.ScoredMatch.
        By default, Gilda's ground function is used for grounding.

    Returns
    -------
    HypothesisProcessor
        A HypothesisProcessor object which contains a list of extracted
        INDRA Statements in its statements attribute, and a list of extracted
        grounding curations in its groundings attribute.

    Example
    -------
    Process all annotations that have been written in BEL with:

    .. code-block:: python

        from indra.sources import hypothesis
        processor = hypothesis.process_annotations(group='Z8RNqokY', reader='bel')
        processor.statements
        # returns: [Phosphorylation(AKT(), PCGF2(), T, 334)]

    If this example doesn't work, try joining the group with this link:
    https://hypothes.is/groups/Z8RNqokY/cthoyt-bel.
    """
    annotations = get_annotations(group=group)
    hp = HypothesisProcessor(annotations, reader=reader, grounder=grounder)
    hp.extract_statements()
    hp.extract_groundings()
    return hp