from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import os
import re
import csv
import xml.etree.ElementTree as ET
import requests
# Python3
try:
from functools import lru_cache
# Python2
except ImportError:
from functools32 import lru_cache
from indra.util import read_unicode_csv, UnicodeXMLTreeBuilder as UTB
hgnc_url = 'http://rest.genenames.org/fetch/'
[docs]def get_uniprot_id(hgnc_id):
"""Return the UniProt ID corresponding to the given HGNC ID.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted. Note that the HGNC ID is a number that is
passed as a string. It is not the same as the HGNC gene symbol.
Returns
-------
uniprot_id : str
The UniProt ID corresponding to the given HGNC ID.
"""
uniprot_id = uniprot_ids.get(hgnc_id)
# The lookup can yield an empty string. Instead return None.
if not uniprot_id:
return None
return uniprot_id
[docs]def get_entrez_id(hgnc_id):
"""Return the Entrez ID corresponding to the given HGNC ID.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted. Note that the HGNC ID is a number that is
passed as a string. It is not the same as the HGNC gene symbol.
Returns
-------
entrez_id : str
The Entrez ID corresponding to the given HGNC ID.
"""
entrez_id = entrez_ids.get(hgnc_id)
# The lookup can yield an empty string. Instead return None.
if not entrez_id:
return None
return entrez_id
[docs]def get_hgnc_from_entrez(entrez_id):
"""Return the HGNC ID corresponding to the given Entrez ID.
Parameters
----------
entrez_id : str
The EntrezC ID to be converted, a number passed as a strig.
Returns
-------
hgnc_id : str
The HGNC ID corresponding to the given Entrez ID.
"""
for k, v in entrez_ids.items():
if v == entrez_id and k not in hgnc_withdrawn:
hgnc_id = k
return hgnc_id
[docs]def get_hgnc_name(hgnc_id):
"""Return the HGNC symbol corresponding to the given HGNC ID.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted.
Returns
-------
hgnc_name : str
The HGNC symbol corresponding to the given HGNC ID.
"""
try:
hgnc_name = hgnc_names[hgnc_id]
except KeyError:
xml_tree = get_hgnc_entry(hgnc_id)
if xml_tree is None:
return None
hgnc_name_tag =\
xml_tree.find("result/doc/str[@name='symbol']")
if hgnc_name_tag is None:
return None
hgnc_name = hgnc_name_tag.text.strip()
return hgnc_name
[docs]def get_hgnc_id(hgnc_name):
"""Return the HGNC ID corresponding to the given HGNC symbol.
Parameters
----------
hgnc_name : str
The HGNC symbol to be converted. Example: BRAF
Returns
-------
hgnc_id : str
The HGNC ID corresponding to the given HGNC symbol.
"""
return hgnc_ids.get(hgnc_name)
[docs]def get_hgnc_from_mouse(mgi_id):
"""Return the HGNC ID corresponding to the given MGI mouse gene ID.
Parameters
----------
mgi_id : str
The MGI ID to be converted. Example: "2444934"
Returns
-------
hgnc_id : str
The HGNC ID corresponding to the given MGI ID.
"""
if mgi_id.startswith('MGI:'):
mgi_id = mgi_id[4:]
return mouse_map.get(mgi_id)
[docs]def get_hgnc_from_rat(rgd_id):
"""Return the HGNC ID corresponding to the given RGD rat gene ID.
Parameters
----------
rgd_id : str
The RGD ID to be converted. Example: "1564928"
Returns
-------
hgnc_id : str
The HGNC ID corresponding to the given RGD ID.
"""
if rgd_id.startswith('RGD:'):
rgd_id = rgd_id[4:]
return rat_map.get(rgd_id)
[docs]def get_rat_id(hgnc_id):
"""Return the RGD rat ID corresponding to the given HGNC ID.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted. Example: ""
Returns
-------
rgd_id : str
The RGD ID corresponding to the given HGNC ID.
"""
for k, v in rat_map.items():
if v == hgnc_id:
return k
[docs]def get_mouse_id(hgnc_id):
"""Return the MGI mouse ID corresponding to the given HGNC ID.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted. Example: ""
Returns
-------
mgi_id : str
The MGI ID corresponding to the given HGNC ID.
"""
for k, v in mouse_map.items():
if v == hgnc_id:
return k
@lru_cache(maxsize=1000)
[docs]def get_hgnc_entry(hgnc_id):
"""Return the HGNC entry for the given HGNC ID from the web service.
Parameters
----------
hgnc_id : str
The HGNC ID to be converted.
Returns
-------
xml_tree : ElementTree
The XML ElementTree corresponding to the entry for the
given HGNC ID.
"""
url = hgnc_url + 'hgnc_id/%s' % hgnc_id
headers = {'Accept': '*/*'}
res = requests.get(url, headers=headers)
if not res.status_code == 200:
return None
xml_tree = ET.XML(res.content, parser=UTB())
return xml_tree
def _read_hgnc_maps():
hgnc_file = os.path.dirname(os.path.abspath(__file__)) + \
'/../resources/hgnc_entries.tsv'
csv_rows = read_unicode_csv(hgnc_file, delimiter='\t', encoding='utf-8')
hgnc_names = {}
hgnc_ids = {}
hgnc_withdrawn = []
uniprot_ids = {}
entrez_ids = {}
mouse_map = {}
rat_map = {}
for row in csv_rows:
hgnc_id = row[0][5:]
hgnc_status = row[3]
if hgnc_status == 'Approved':
hgnc_name = row[1]
hgnc_names[hgnc_id] = hgnc_name
hgnc_ids[hgnc_name] = hgnc_id
elif hgnc_status == 'Symbol Withdrawn':
descr = row[2]
m = re.match(r'symbol withdrawn, see ([^ ]*)', descr)
new_name = m.groups()[0]
hgnc_withdrawn.append(hgnc_id)
hgnc_names[hgnc_id] = new_name
# Uniprot
uniprot_id = row[6]
uniprot_ids[hgnc_id] = uniprot_id
# Entrez
entrez_id = row[5]
entrez_ids[hgnc_id] = entrez_id
# Mouse
mgi_id = row[7]
if mgi_id:
mgi_ids = mgi_id.split(', ')
for mgi_id in mgi_ids:
if mgi_id.startswith('MGI:'):
mgi_id = mgi_id[4:]
mouse_map[mgi_id] = hgnc_id
# Rat
rgd_id = row[8]
if rgd_id:
rgd_ids = rgd_id.split(', ')
for rgd_id in rgd_ids:
if rgd_id.startswith('RGD:'):
rgd_id = rgd_id[4:]
rat_map[rgd_id] = hgnc_id
return (hgnc_names, hgnc_ids, hgnc_withdrawn,
uniprot_ids, entrez_ids, mouse_map, rat_map)
(hgnc_names, hgnc_ids, hgnc_withdrawn, uniprot_ids, entrez_ids,
mouse_map, rat_map) = \
_read_hgnc_maps()