Source code for indra.sources.signor.api

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import sys
import logging
import requests
from io import StringIO, BytesIO
from collections import namedtuple
from .processor import SignorProcessor
from indra.util import read_unicode_csv, read_unicode_csv_fileobj

logger = logging.getLogger(__name__)

_signor_fields = [
    'ENTITYA',
    'TYPEA',
    'IDA',
    'DATABASEA',
    'ENTITYB',
    'TYPEB',
    'IDB',
    'DATABASEB',
    'EFFECT',
    'MECHANISM',
    'RESIDUE',
    'SEQUENCE',
    'TAX_ID',
    'CELL_DATA',
    'TISSUE_DATA',
    'MODULATOR_COMPLEX',
    'TARGET_COMPLEX',
    'MODIFICATIONA',
    'MODASEQ',
    'MODIFICATIONB',
    'MODBSEQ',
    'PMID',
    'DIRECT',
    'NOTES',
    'ANNOTATOR',
    'SENTENCE',
    'SCORE',
    'SIGNOR_ID',
]


_SignorRow_ = namedtuple('SignorRow', _signor_fields)


[docs]def process_from_file(signor_data_file, signor_complexes_file=None, delimiter='\t'): """Process Signor interaction data from CSV files. Parameters ---------- signor_data_file : str Path to the Signor interaction data file in CSV format. signor_complexes_file : Optional[str] Path to the Signor complexes data in CSV format. If specified, Signor complexes will not be expanded to their constitutents. delimiter : Optional[str] The delimiter used in the data file. Older data files use ; as a delimiter whereas more recent ones use tabs. Returns ------- indra.sources.signor.SignorProcessor SignorProcessor containing Statements extracted from the Signor data. """ # Get generator over the CSV file data_iter = read_unicode_csv(signor_data_file, delimiter=delimiter, skiprows=1) complexes_iter = None if signor_complexes_file: complexes_iter = read_unicode_csv(signor_complexes_file, delimiter=';', skiprows=1) else: logger.warning('Signor complex mapping file not provided, Statements ' 'involving complexes will not be expanded to members.') return _processor_from_data(data_iter, complexes_iter)
[docs]def process_from_web(signor_data_file=None, signor_complexes_file=None): """Process Signor interaction data from the web. This downloads the latest interaction data directly from the Signor website without an intermediate local file. Parameters ---------- signor_data_file : Optional[str] If specified, the interaction data will be written to this file. signor_complexes_file : Optional[str] If specified, the complex data will be written to this file. Returns ------- indra.sources.signor.SignorProcessor SignorProcessor containing Statements extracted from the Signor data. """ # Get interaction data data_url = 'https://signor.uniroma2.it/download_entity.php' res = requests.post(data_url, data={'organism': 'human', 'format': 'csv', 'submit': 'Download'}) data_iter = _handle_response(res, '\t', fname=signor_data_file) # Get complexes complexes_url = 'https://signor.uniroma2.it/download_complexes.php' res = requests.post(complexes_url, data={'submit': 'Download complex data'}) complexes_iter = _handle_response(res, ';', fname=signor_complexes_file) return _processor_from_data(data_iter, complexes_iter)
def _handle_response(res, delimiter, fname=None): """Get an iterator over the CSV data from the response.""" if res.status_code == 200: # Python 2 -- csv.reader will need bytes if sys.version_info[0] < 3: csv_io = BytesIO(res.content) # Optionally write to file if fname: with open(fname, 'wb') as fh: fh.write(res.content) # Python 3 -- csv.reader needs str else: csv_io = StringIO(res.text) # Optionally write to file if fname: with open(fname, 'wt') as fh: fh.write(res.text) data_iter = read_unicode_csv_fileobj(csv_io, delimiter=delimiter, skiprows=1) else: raise Exception('Could not download Signor data.') return data_iter def _processor_from_data(data_iter, complexes_iter): # Process into a list of SignorRow namedtuples # Strip off any funky \xa0 whitespace characters data = [_SignorRow_(*[f.strip() for f in r]) for r in data_iter] complex_map = {} if complexes_iter: for crow in complexes_iter: complex_map[crow[0]] = [c for c in crow[2].split(', ') if c] return SignorProcessor(data, complex_map)