Source code for indra.sources.biopax.api

__all__ = ['process_pc_neighborhood', 'process_pc_pathsbetween',
           'process_pc_pathsfromto', 'process_owl', 'process_owl_str',
           'process_owl_gz', 'process_model']

import itertools
from pybiopax import model_from_owl_file, model_from_owl_str, \
    model_from_pc_query, model_from_owl_gz
from .processor import BiopaxProcessor

default_databases = ['wp', 'smpdb', 'reconx', 'reactome', 'psp', 'pid',
                     'panther', 'netpath', 'msigdb', 'mirtarbase', 'kegg',
                     'intact', 'inoh', 'humancyc', 'hprd',
                     'drugbank', 'dip', 'corum']


[docs]def process_pc_neighborhood(gene_names, neighbor_limit=1, database_filter=None): """Returns a BiopaxProcessor for a PathwayCommons neighborhood query. The neighborhood query finds the neighborhood around a set of source genes. http://www.pathwaycommons.org/pc2/#graph http://www.pathwaycommons.org/pc2/#graph_kind Parameters ---------- gene_names : list A list of HGNC gene symbols to search the neighborhood of. Examples: ['BRAF'], ['BRAF', 'MAP2K1'] neighbor_limit : Optional[int] The number of steps to limit the size of the neighborhood around the gene names being queried. Default: 1 database_filter : Optional[list] A list of database identifiers to which the query is restricted. Examples: ['reactome'], ['biogrid', 'pid', 'psp'] If not given, all databases are used in the query. For a full list of databases see http://www.pathwaycommons.org/pc2/datasources Returns ------- BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in its model attribute and a list of extracted INDRA Statements from the model in its statements attribute. """ model = model_from_pc_query('neighborhood', source=gene_names, limit=neighbor_limit, datasource=database_filter) if model is not None: return process_model(model)
[docs]def process_pc_pathsbetween(gene_names, neighbor_limit=1, database_filter=None, block_size=None): """Returns a BiopaxProcessor for a PathwayCommons paths-between query. The paths-between query finds the paths between a set of genes. Here source gene names are given in a single list and all directions of paths between these genes are considered. http://www.pathwaycommons.org/pc2/#graph http://www.pathwaycommons.org/pc2/#graph_kind Parameters ---------- gene_names : list A list of HGNC gene symbols to search for paths between. Examples: ['BRAF', 'MAP2K1'] neighbor_limit : Optional[int] The number of steps to limit the length of the paths between the gene names being queried. Default: 1 database_filter : Optional[list] A list of database identifiers to which the query is restricted. Examples: ['reactome'], ['biogrid', 'pid', 'psp'] If not given, all databases are used in the query. For a full list of databases see http://www.pathwaycommons.org/pc2/datasources block_size : Optional[int] Large paths-between queries (above ~60 genes) can error on the server side. In this case, the query can be replaced by a series of smaller paths-between and paths-from-to queries each of which contains block_size genes. Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ if not block_size: model = model_from_pc_query('pathsbetween', source=gene_names, limit=neighbor_limit, datasource=database_filter) if model is not None: return process_model(model) else: gene_blocks = [gene_names[i:i + block_size] for i in range(0, len(gene_names), block_size)] stmts = [] # Run pathsfromto between pairs of blocks and pathsbetween # within each block. This breaks up a single call with N genes into # (N/block_size)*(N/blocksize) calls with block_size genes for genes1, genes2 in itertools.product(gene_blocks, repeat=2): if genes1 == genes2: bp = process_pc_pathsbetween(genes1, database_filter=database_filter, block_size=None) else: bp = process_pc_pathsfromto(genes1, genes2, database_filter=database_filter) stmts += bp.statements
[docs]def process_pc_pathsfromto(source_genes, target_genes, neighbor_limit=1, database_filter=None): """Returns a BiopaxProcessor for a PathwayCommons paths-from-to query. The paths-from-to query finds the paths from a set of source genes to a set of target genes. http://www.pathwaycommons.org/pc2/#graph http://www.pathwaycommons.org/pc2/#graph_kind Parameters ---------- source_genes : list A list of HGNC gene symbols that are the sources of paths being searched for. Examples: ['BRAF', 'RAF1', 'ARAF'] target_genes : list A list of HGNC gene symbols that are the targets of paths being searched for. Examples: ['MAP2K1', 'MAP2K2'] neighbor_limit : Optional[int] The number of steps to limit the length of the paths between the source genes and target genes being queried. Default: 1 database_filter : Optional[list] A list of database identifiers to which the query is restricted. Examples: ['reactome'], ['biogrid', 'pid', 'psp'] If not given, all databases are used in the query. For a full list of databases see http://www.pathwaycommons.org/pc2/datasources Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ model = model_from_pc_query('pathsfromto', source=source_genes, target=target_genes, limit=neighbor_limit, datasource=database_filter) if model is not None: return process_model(model)
[docs]def process_owl(owl_filename, encoding=None): """Returns a BiopaxProcessor for a BioPAX OWL file. Parameters ---------- owl_filename : str The name of the OWL file to process. encoding : Optional[str] The encoding type to be passed to :func:`pybiopax.model_from_owl_file`. Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ model = model_from_owl_file(owl_filename, encoding=encoding) return process_model(model)
[docs]def process_owl_gz(owl_gz_filename): """Returns a BiopaxProcessor for a gzipped BioPAX OWL file. Parameters ---------- owl_gz_filename : str The name of the gzipped OWL file to process. Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ model = model_from_owl_gz(owl_gz_filename) return process_model(model)
[docs]def process_owl_str(owl_str): """Returns a BiopaxProcessor for a BioPAX OWL file. Parameters ---------- owl_str : str The string content of an OWL file to process. Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ model = model_from_owl_str(owl_str) return process_model(model)
[docs]def process_model(model): """Returns a BiopaxProcessor for a BioPAX model object. Parameters ---------- model : org.biopax.paxtools.model.Model A BioPAX model object. Returns ------- bp : BiopaxProcessor A BiopaxProcessor containing the obtained BioPAX model in bp.model. """ bp = BiopaxProcessor(model) bp.process_all() return bp