Source code for indra.sources.rlimsp.api

__all__ = ['process_from_webservice', 'process_from_json_file',
           'process_from_jsonish_str']

import json
import logging
import requests

from .processor import RlimspProcessor


logger = logging.getLogger(__name__)


RLIMSP_URL = ('https://research.bioinformatics.udel.edu/itextmine/api/data/'
              'rlims/')


class RLIMSP_Error(Exception):
    pass


[docs]def process_from_webservice(id_val, id_type='pmcid', source='pmc'): """Return an output from RLIMS-p for the given PubMed ID or PMC ID. The web service is documented at: https://research.bioinformatics.udel.edu/itextmine/api/. The /data/rlims URL endpoint is extended with three additional elements: /{collection}/{key}/{value} where collection is "medline" or "pmc", key is "pmid" or "pmcid", and value is a specific PMID or PMCID. Parameters ---------- id_val : str A PMCID, with the prefix PMC, or PMID, with no prefix, of the paper to be "read". Corresponds to the "value" argument of the REST API. id_type : Optional[str] Either 'pmid' or 'pmcid'. The default is 'pmcid'. Corresponds to the "key" argument of the REST API. source : Optional[str] Either 'pmc' or 'medline', whether you want pmc fulltext or medline abstracts. Corresponds to the "collection" argument of the REST API. Returns ------- :py:class:`indra.sources.rlimsp.processor.RlimspProcessor` An RlimspProcessor which contains a list of extracted INDRA Statements in its statements attribute. """ resp = requests.get(RLIMSP_URL + '%s/%s/%s' % (source, id_type, id_val)) if resp.status_code != 200: raise RLIMSP_Error("Bad status code: %d - %s" % (resp.status_code, resp.reason)) rp = RlimspProcessor(resp.json()) rp.extract_statements() return rp
[docs]def process_from_json_file(filename, doc_id_type=None): """Process RLIMSP extractions from a bulk-download JSON file. Parameters ---------- filename : str Path to the JSON file. doc_id_type : Optional[str] In some cases the RLIMS-P paragraph info doesn't contain 'pmid' or 'pmcid' explicitly, instead if contains a 'docId' key. This parameter allows defining what ID type 'docId' sould be interpreted as. Its values should be 'pmid' or 'pmcid' or None if not used. Returns ------- :py:class:`indra.sources.rlimsp.processor.RlimspProcessor` An RlimspProcessor which contains a list of extracted INDRA Statements in its statements attribute. """ with open(filename, 'rt') as f: lines = f.readlines() json_list = [] for line in lines: json_list.append(json.loads(line)) rp = RlimspProcessor(json_list, doc_id_type=doc_id_type) rp.extract_statements() return rp
[docs]def process_from_jsonish_str(jsonish_str, doc_id_type=None): """Process RLIMSP extractions from a bulk-download JSON file. Parameters ---------- jsonish_str : str The contents of one of the not-quite-json files you can find here: https://hershey.dbi.udel.edu/textmining/export doc_id_type : Optional[str] In some cases the RLIMS-P paragraph info doesn't contain 'pmid' or 'pmcid' explicitly, instead if contains a 'docId' key. This parameter allows defining what ID type 'docId' sould be interpreted as. Its values should be 'pmid' or 'pmcid' or None if not used. Returns ------- :py:class:`indra.sources.rlimsp.processor.RlimspProcessor` An RlimspProcessor which contains a list of extracted INDRA Statements in its statements attribute. """ lines = jsonish_str.splitlines() json_list = [] for line in lines: json_list.append(json.loads(line)) rp = RlimspProcessor(json_list, doc_id_type=doc_id_type) rp.extract_statements() return rp