from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import re
import sys
import getopt
import xml.dom.minidom
import logging
import requests

logger = logging.getLogger('trips')

trips_url = ''

[docs]def send_query(text, query_args=None): """Send a query to the TRIPS web service. Parameters ---------- text : str The text to be processed. query_args : Optional[dict] A dictionary of arguments to be passed with the query. Returns ------- html : str The HTML result returned by the web service. """ if query_args is None: query_args = {} query_args.update({'input': text}) res = requests.get(trips_url, query_args, timeout=3600) if not res.status_code == 200: logger.error('Problem with TRIPS query: status code %s' % res.status_code) return '' # Gets unicode content return res.text
[docs]def get_xml(html): """Extract the EKB XML from the HTML output of the TRIPS web service. Parameters ---------- html : str The HTML output from the TRIPS web service. Returns ------- The extraction knowledge base (EKB) XML that contains the event and term extractions. """ ekb = re.findall(r'<ekb.*?>(.*?)</ekb>', html, re.MULTILINE | re.DOTALL) if ekb: events_terms = ekb[0] else: events_terms = '' header = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><ekb>' footer = '</ekb>' return header + events_terms + footer
[docs]def save_xml(xml_str, file_name, pretty=True): """Save the TRIPS EKB XML in a file. Parameters ---------- xml_str : str The TRIPS EKB XML string to be saved. file_name : str The name of the file to save the result in. pretty : Optional[bool] If True, the XML is pretty printed. """ try: fh = open(file_name, 'wt') except IOError: logger.error('Could not open %s for writing.' % file_name) return if pretty: xmld = xml.dom.minidom.parseString(xml_str) xml_str_pretty = xmld.toprettyxml() fh.write(xml_str_pretty) else: fh.write(xml_str) fh.close()
if __name__ == '__main__': filemode = False text = 'Active BRAF phosphorylates MEK1 at Ser222.' outfile_name = 'braf_test.xml' opts, extraparams = getopt.getopt(sys.argv[1:], 's:f:o:h', ['string=', 'file=', 'output=', 'help']) for o, p in opts: if o in ['-h', '--help']: print('String mode: python --string "RAS binds GTP" --output text.xml') print('File mode: python --file test.txt --output text.xml') sys.exit() elif o in ['-s', '--string']: text = p elif o in ['-f', '--file']: filemode = True infile_name = p elif o in ['-o', '--output']: outfile_name = p if filemode: try: fh = open(infile_name, 'rt') except IOError: print('Could not open %s.' % infile_name) exit() text = fh.close() print('Parsing contents of %s...' % infile_name) else: print('Parsing string: %s' % text) html = send_query(text) xml = get_xml(html) save_xml(xml, outfile_name)