Source code for indra.sources.hypothesis.api

__all__ = ['process_annotations', 'get_annotations', 'upload_annotation',
           'upload_statement_annotation', 'statement_to_annotations']

import logging
import requests
from indra.config import get_config
from .processor import HypothesisProcessor
from .annotator import statement_to_annotations

logger = logging.getLogger(__name__)

base_url = 'https://api.hypothes.is/api/'
api_key = get_config('HYPOTHESIS_API_KEY')
headers = {'Authorization': 'Bearer %s' % api_key,
           'Accept': 'application/vnd.hypothesis.v1+json',
           'content-type': 'application/json'}
indra_group = get_config('HYPOTHESIS_GROUP')


def send_get_request(endpoint, **params):
    """Send a request to the hypothes.is web service and return JSON response.

    Note that it is assumed that `HYPOTHESIS_API_KEY` is set either as a
    configuration entry or as an environmental variable.

    Parameters
    ----------
    endpoint : str
        The endpoint to call, e.g., `search`.
    params : kwargs
        A set of keyword arguments that are passed to the `requests.get` call
        as `params`.
    """
    if api_key is None:
        return ValueError('No API key set in HYPOTHESIS_API_KEY')
    res = requests.get(base_url + endpoint, headers=headers,
                       params=params)
    res.raise_for_status()
    return res.json()


def send_post_request(endpoint, **params):
    """Send a post request to the hypothes.is web service and return JSON
    response.

    Note that it is assumed that `HYPOTHESIS_API_KEY` is set either as a
    configuration entry or as an environmental variable.

    Parameters
    ----------
    endpoint : str
        The endpoint to call, e.g., `search`.
    params : kwargs
        A set of keyword arguments that are passed to the `requests.post` call
        as `json`.
    """
    if api_key is None:
        return ValueError('No API key set in HYPOTHESIS_API_KEY')
    res = requests.post(base_url + endpoint, headers=headers,
                        json=params)
    res.raise_for_status()
    return res.json()


[docs]def upload_annotation(url, annotation, target_text=None, tags=None, group=None): """Upload an annotation to hypothes.is. Parameters ---------- url : str The URL of the resource being annotated. annotation : str The text content of the annotation itself. target_text : Optional[str] The specific span of text that the annotation applies to. tags : list[str] A list of tags to apply to the annotation. group : Optional[str] The hypothesi.is key of the group (not its name). If not given, the HYPOTHESIS_GROUP configuration in the config file or an environmental variable is used. Returns ------- json The full response JSON from the web service. """ if group is None: if indra_group: group = indra_group else: raise ValueError('No group provided and HYPOTHESIS_GROUP ' 'is not set.') params = { 'uri': url, 'group': group, 'text': annotation, } if target_text: params['target'] = [{ 'source': [url], 'selector': [ {'type': 'TextQuoteSelector', 'exact': target_text} ] }] if tags: params['tags'] = tags permissions = {'read': ['group:%s' % group]} params['permissions'] = permissions res = send_post_request('annotations', **params) return res
[docs]def upload_statement_annotation(stmt, annotate_agents=True): """Construct and upload all annotations for a given INDRA Statement. Parameters ---------- stmt : indra.statements.Statement An INDRA Statement. annotate_agents : Optional[bool] If True, the agents in the annotation text are linked to outside databases based on their grounding. Default: True Returns ------- list of dict A list of annotation structures that were uploaded to hypothes.is. """ annotations = statement_to_annotations(stmt, annotate_agents=annotate_agents) for annotation in annotations: annotation['tags'].append('indra_upload') upload_annotation(**annotation) return annotations
[docs]def get_annotations(group=None): """Return annotations in hypothes.is in a given group. Parameters ---------- group : Optional[str] The hypothesi.is key of the group (not its name). If not given, the HYPOTHESIS_GROUP configuration in the config file or an environmental variable is used. """ if group is None: if indra_group: group = indra_group else: raise ValueError('No group provided and HYPOTHESIS_GROUP ' 'is not set.') # Note that this batch size is the maximum that the API allows, therefore # it makes sense to run queries with this fixed limit. limit = 200 offset = 0 annotations = [] while True: logger.info('Getting up to %d annotations from offset %d' % (limit, offset)) res = send_get_request('search', group=group, limit=limit, offset=offset) rows = res.get('rows', []) if not rows: break annotations += rows offset += len(rows) logger.info('Got a total of %d annotations' % len(annotations)) return annotations
[docs]def process_annotations(group=None, reader=None, grounder=None): """Process annotations in hypothes.is in a given group. Parameters ---------- group : Optional[str] The hypothesi.is key of the group (not its name). If not given, the HYPOTHESIS_GROUP configuration in the config file or an environmental variable is used. reader : Optional[None, str, Callable[[str], Processor]] A handle for a function which takes a single str argument (text to process) and returns a processor object with a statements attribute containing INDRA Statements. By default, the REACH reader's process_text function is used with default parameters. Note that if the function requires extra parameters other than the input text, functools.partial can be used to set those. Can be alternatively set to :func:`indra.sources.bel.process_text` by using the string "bel". grounder : Optional[function] A handle for a function which takes a positional str argument (entity text to ground) and an optional context key word argument and returns a list of objects matching the structure of gilda.grounder.ScoredMatch. By default, Gilda's ground function is used for grounding. Returns ------- HypothesisProcessor A HypothesisProcessor object which contains a list of extracted INDRA Statements in its statements attribute, and a list of extracted grounding curations in its groundings attribute. Example ------- Process all annotations that have been written in BEL with: .. code-block:: python from indra.sources import hypothesis processor = hypothesis.process_annotations(group='Z8RNqokY', reader='bel') processor.statements # returns: [Phosphorylation(AKT(), PCGF2(), T, 334)] If this example doesn't work, try joining the group with this link: https://hypothes.is/groups/Z8RNqokY/cthoyt-bel. """ annotations = get_annotations(group=group) hp = HypothesisProcessor(annotations, reader=reader, grounder=grounder) hp.extract_statements() hp.extract_groundings() return hp