Source code for indra.sources.phosphoelm.api

import csv
import logging

from tqdm import tqdm

from .processor import PhosphoElmProcessor

logger = logging.getLogger(__name__)


[docs]def process_from_dump(fname, delimiter='\t'): """Process a Phospho.ELM file dump The dump can be obtained at http://phospho.elm.eu.org/dataset.html. Parameters ---------- fname : str File path to the phospho.ELM file dump. delimiter : str The delimiter to use for csv.reader Returns ------- indra.sources.phosphoelm.PhosphoElmProcessor An instance of a PhosphoElmProcessor containing the statements generated from the file dump """ with open(fname, 'r') as f: csv_reader = csv.reader(f, delimiter=delimiter) ppelm_json = _get_json_from_entry_rows(csv_reader) pep = PhosphoElmProcessor(ppelm_json) pep.process_phosphorylations() return pep
def _get_json_from_entry_rows(row_iter): """Loop body to generate a json friendly structure""" ppelm_json = [] columns = next(row_iter) logger.info('Processing Phospho.ELM dump') for entry in tqdm(row_iter): row_dict = {c: e for c, e in zip(columns, entry)} ppelm_json.append(row_dict) return ppelm_json