Source code for indra.sources.sofia.api

import json
import time
import openpyxl
import requests
from indra.config import get_config
from .processor import SofiaJsonProcessor, SofiaExcelProcessor


[docs]def process_table(fname): """Return processor by processing a given sheet of a spreadsheet file. Parameters ---------- fname : str The name of the Excel file (typically .xlsx extension) to process Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ book = openpyxl.load_workbook(fname, read_only=True) try: rel_sheet = book['Relations'] except Exception as e: rel_sheet = book['Causal'] event_sheet = book['Events'] entities_sheet = book['Entities'] sp = SofiaExcelProcessor(rel_sheet.rows, event_sheet.rows, entities_sheet.rows) sp.extract_relations(rel_sheet.rows) sp.extract_events(event_sheet.rows, rel_sheet.rows) return sp
[docs]def process_text(text, out_file='sofia_output.json', auth=None): """Return processor by processing text given as a string. Parameters ---------- text : str A string containing the text to be processed with Sofia. out_file : Optional[str] The path to a file to save the reader's output into. Default: sofia_output.json auth : Optional[list] A username/password pair for the Sofia web service. If not given, the SOFIA_USERNAME and SOFIA_PASSWORD values are loaded from either the INDRA config or the environment. Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. If the API did not process the text, None is returned. """ text_json = {'text': text} if not auth: user, password = _get_sofia_auth() else: user, password = auth if not user or not password: raise ValueError('Could not use SOFIA web service since' ' authentication information is missing. Please' ' set SOFIA_USERNAME and SOFIA_PASSWORD in the' ' INDRA configuration file or as environmental' ' variables.') json_response, status_code, process_status = \ _text_processing(text_json=text_json, user=user, password=password) # Check response status if process_status != 'Done' or status_code != 200: return None # Cache reading output if out_file: with open(out_file, 'w') as fh: json.dump(json_response, fh, indent=1) return process_json(json_response)
[docs]def process_json(json_obj): """Return processor by processing a JSON object returned by Sofia. Parameters ---------- json_obj : json A JSON object containing extractions from Sofia. Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ sp = SofiaJsonProcessor(json_obj) sp.extract_relations(json_obj) sp.extract_events(json_obj) return sp
[docs]def process_json_file(fname): """Return processor by processing a JSON file produced by Sofia. Parameters ---------- fname : str The name of the JSON file to process Returns ------- indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ with open(fname, 'r') as fh: jd = json.load(fh) return process_json(jd)
def _get_sofia_auth(): sofia_username = get_config('SOFIA_USERNAME') sofia_password = get_config('SOFIA_PASSWORD') return sofia_username, sofia_password def _sofia_api_post(api, option, json, auth): return requests.post(url=api + option, json=json, auth=auth) def _text_processing(text_json, user, password): assert len(text_json) > 0 sofia_api = 'https://sofia.worldmodelers.com' auth = (user, password) # Initialize process resp = _sofia_api_post(api=sofia_api, option='/process_text', json=text_json, auth=auth) res_json = resp.json() # Get status status = _sofia_api_post(api=sofia_api, option='/status', json=res_json, auth=auth) # Check status every two seconds while status.json()['Status'] == 'Processing': time.sleep(2.0) status = _sofia_api_post(api=sofia_api, option='/status', json=res_json, auth=auth) results = _sofia_api_post(api=sofia_api, option='/results', json=res_json, auth=auth) status_code = results.status_code process_status = status.json()['Status'] return results.json(), status_code, process_status