Source code for indra.sources.creeds.api

# -*- coding: utf-8 -*-

"""API for CREEDS."""

import json
import requests
from pathlib import Path
from typing import Union

from .processor import (
    CREEDSChemicalProcessor,
    CREEDSDiseaseProcessor,
    CREEDSGeneProcessor,
    CREEDSProcessor,
)

BASE_URL = "http://amp.pharm.mssm.edu/CREEDS/download"
urls = {
    "gene": f"{BASE_URL}/single_gene_perturbations-v1.0.json",
    "disease": f"{BASE_URL}/disease_signatures-v1.0.json",
    "chemical": f"{BASE_URL}/single_drug_perturbations-v1.0.json",
}

processors = {
    "gene": CREEDSGeneProcessor,
    "disease": CREEDSDiseaseProcessor,
    "chemical": CREEDSChemicalProcessor,
}

__all__ = [
    "process_from_file",
    "process_from_web",
]


[docs]def process_from_web(entity_type: str) -> CREEDSProcessor:
    """Process statements from CREEDS by automatially downloading them.

    Parameters
    ----------
    entity_type :
        Either 'gene', 'disease', or 'chemical' to specify
        which dataset to get.

    Returns
    -------
    :
        A processor with pre-extracted statements.
    """
    url = urls[entity_type]
    res = requests.get(url)
    res.raise_for_status()
    records = res.json()
    return process_records(records, entity_type)


[docs]def process_from_file(
    path: Union[str, Path],
    entity_type: str,
) -> CREEDSProcessor:
    """Process statements from CREEDS in a file.

    Parameters
    ----------
    path :
        The path to a JSON file containing records for the CREEDS data
    entity_type :
        Either 'gene', 'disease', or 'chemical' to specify
        which dataset to get.

    Returns
    -------
    :
        A processor with pre-extracted statements.
    """
    with open(path) as file:
        records = json.load(file)
    return process_records(records, entity_type)


def process_records(records, entity_type):
    """Process statements from CREEDS records.

    Parameters
    ----------
    records :
        A list of records from the CREEDS data
    entity_type :
        Either 'gene', 'disease', or 'chemical' to specify
        which dataset the records represent.

    Returns
    -------
    :
        A processor with pre-extracted statements.
    """
    processor_cls = processors[entity_type]
    processor = processor_cls(records)
    processor.extract_statements()
    return processor