Source code for indra.sources.gnbr.api

__all__ = ['process_gene_gene', 'process_gene_gene_from_web',
           'process_gene_disease', 'process_gene_disease_from_web',
           'process_chemical_disease', 'process_chemical_disease_from_web',
           'process_chemical_gene', 'process_chemical_gene_from_web',
           'process_from_files', 'process_from_web']

import pandas as pd
import logging
from .processor import GnbrProcessor

base_url = 'https://zenodo.org/record/3459420/files'
logger = logging.getLogger(__name__)


[docs]def process_gene_gene(part1_path: str, part2_path: str, indicator_only: bool = True) -> GnbrProcessor: """Process gene–gene interactions. Parameters ---------- part1_path : Path to the first dataset which contains dependency paths and themes. part2_path : Path to the second dataset which contains dependency paths and entity pairs. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_files(part1_path, part2_path, 'gene', 'gene', indicator_only=indicator_only)
[docs]def process_chemical_gene(part1_path: str, part2_path: str, indicator_only: bool = True) -> GnbrProcessor: """Process chemical–gene interactions. Parameters ---------- part1_path : Path to the first dataset of dependency paths and themes. part2_path : Path to the second dataset of dependency paths and entity pairs. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_files(part1_path, part2_path, 'chemical', 'gene', indicator_only=indicator_only)
[docs]def process_gene_disease(part1_path: str, part2_path: str, indicator_only: bool = True) -> GnbrProcessor: """Process gene–disease interactions. Parameters ---------- part1_path : Path to the first dataset which contains dependency paths and themes. part2_path : Path to the second dataset which contains dependency paths and entity pairs. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_files(part1_path, part2_path, 'gene', 'disease', indicator_only=indicator_only)
[docs]def process_chemical_disease(part1_path: str, part2_path: str, indicator_only: bool = True) \ -> GnbrProcessor: """Process chemical–disease interactions. Parameters ---------- part1_path : Path to the first dataset which contains dependency paths and themes. part2_path : Path to the second dataset which contains dependency paths and entity pairs. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_files(part1_path, part2_path, 'chemical', 'disease', indicator_only=indicator_only)
[docs]def process_from_files(part1_path: str, part2_path: str, first_type: str, second_type: str, indicator_only: bool = True) \ -> GnbrProcessor: """Loading the databases from the given files. Parameters ---------- part1_path : Path to the first dataset which contains dependency paths and themes. part2_path : Path to the second dataset which contains dependency paths and themes. first_type : Type of the first agent. second_type : Type of the second agent. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ logger.info(f'Loading part 1 table from {part1_path}') df1: pd.DataFrame = pd.read_csv(part1_path, sep='\t') logger.info(f'Loading part 2 table from {part2_path}') df2: pd.DataFrame = pd.read_csv(part2_path, sep='\t', header=None) gp: GnbrProcessor = GnbrProcessor(df1, df2, first_type, second_type, indicator_only=indicator_only) gp.extract_stmts() return gp
[docs]def process_gene_gene_from_web(indicator_only: bool = True) -> GnbrProcessor: """Call process_gene_gene function on the GNBR datasets. Parameters ---------- indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_web('gene', 'gene', indicator_only=indicator_only)
[docs]def process_chemical_gene_from_web(indicator_only: bool = True) \ -> GnbrProcessor: """Call process_chemical_gene function on the GNBR datasets. Parameters ---------- indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_web('chemical', 'gene', indicator_only=indicator_only)
[docs]def process_gene_disease_from_web(indicator_only: bool = True) -> GnbrProcessor: """Call process_gene_disease function on the GNBR datasets. Parameters ---------- indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_web('gene', 'disease', indicator_only=indicator_only)
[docs]def process_chemical_disease_from_web(indicator_only: bool = True)\ -> GnbrProcessor: """Call process_chemical_disease function on the GNBR datasets. Parameters ---------- indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ return process_from_web('chemical', 'disease', indicator_only=indicator_only)
[docs]def process_from_web(first_type, second_type, indicator_only: bool = True)\ -> GnbrProcessor: """Loading the databases from the given urls. Parameters ---------- first_type : Type of the first agent. second_type : Type of the second agent. indicator_only : A switch to filter the data which is part of the flagship path set for each theme. Returns ------- : A GnbrProcessor object which contains a list of extracted INDRA Statements in its statements attribute. """ fname1 = (f'{base_url}/part-i-{first_type}-{second_type}-path-theme-' f'distributions.txt.gz') fname2 = (f'{base_url}/part-ii-dependency-paths-{first_type}-{second_type}' f'-sorted-with-themes.txt.gz') return process_from_files(fname1, fname2, first_type, second_type, indicator_only=indicator_only)