Spaces:

UoS-HGIG
/

HPO_Mapper

Running

App Files Files Community

HPO_Mapper / app.py

akadhim

Update app.py

caa89c2 verified 4 months ago

raw

history blame contribute delete

4.28 kB

	import gradio as gr
	import sqlite3
	import json
	import numpy as np
	from numpy.linalg import norm
	from huggingface_hub import hf_hub_download
	from sentence_transformers import SentenceTransformer
	import os

	# Get Hugging Face Token from Environment Variables
	HF_TOKEN = os.environ.get("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")

	# Load the Nomic-Embed Model from Hugging Face
	EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
	embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)

	# Download database from Hugging Face Datasets if it does not exist
	db_filename = "hpo_genes.db"
	db_repo = "UoS-HGIG/hpo_genes"
	db_path = os.path.join(os.getcwd(), db_filename)

	if not os.path.exists(db_path):
	db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)


	def find_best_hpo_match(finding, region, threshold):
	query_text = f"{finding} in {region}" if region else finding
	query_embedding = embedder.encode(query_text)

	conn = sqlite3.connect(db_path)
	cursor = conn.cursor()
	cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")

	best_match, best_score = None, -1

	for hpo_id, hpo_name, embedding_str in cursor.fetchall():
	hpo_embedding = np.array(json.loads(embedding_str))
	similarity = np.dot(query_embedding, hpo_embedding) / (norm(query_embedding) * norm(hpo_embedding))

	if similarity > best_score:
	best_score = similarity
	best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}

	conn.close()
	return best_match if best_score >= threshold else None


	def get_genes_for_hpo(hpo_id):
	conn = sqlite3.connect(db_path)
	cursor = conn.cursor()
	cursor.execute("SELECT genes FROM hpo_gene WHERE hpo_id = ?", (hpo_id,))
	result = cursor.fetchone()
	conn.close()
	return result[0].split(", ") if result else []


	def hpo_mapper_ui(finding, region, threshold):
	if not finding:
	return "Please enter a pathological finding.", "", ""

	match = find_best_hpo_match(finding, region, threshold)

	if match:
	genes = get_genes_for_hpo(match["hpo_id"])
	return match["hpo_id"], match["hpo_term"], ", ".join(genes)

	return "No match found.", "", ""


	demo = gr.Interface(
	fn=hpo_mapper_ui,
	inputs=[
	gr.Textbox(label="Pathological Finding"),
	gr.Textbox(label="Anatomical Region (optional)"),
	gr.Slider(0.0, 1.0, step=0.01, value=0.76, label="Similarity Threshold")
	],
	outputs=[
	gr.Textbox(label="HPO ID"),
	gr.Textbox(label="HPO Term"),
	gr.Textbox(label="Disease genes annotated as being associated with this HPO term"),
	],
	title="Human Phenotype Ontology (HPO) Mapper",
	description=(
	'Enter a pathological finding (e.g., "chronic inflammation") and anatomical region '
	'(e.g., "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
	'term and retrieve genes annotated as being associated with this HPO term.\n\n'
	'References:\n'
	'Kadhim, A. Z., Green, Z., Nazari, I., Baker, J., George, M., Heinson, A., Stammers, M., Kipps, C., Beattie, R. M., Ashton, J. J., & Ennis, S. (2025).\n'
	'Application of generative artificial intelligence to utilise unstructured clinical data for acceleration of inflammatory bowel disease research.\n'
	'medRxiv. [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\n\n'
	'Gargano, M. A., Matentzoglu, N., Coleman, B., Addo-Lartey, E. B., Anagnostopoulos, A. V., Anderton, J., Avillach, P., Bagley, A. M., Bakštein, E., Balhoff, J. P., Baynam, G., Bello, S. M., Berk, M., Bertram, H., Bishop, S., Blau, H., Bodenstein, D. F., Botas, P., Boztug, K., Čady, J., … Robinson, P. N. (2024)\n'
	'The Human Phenotype Ontology in 2024: phenotypes around the world.\n'
	'Nucleic Acids Research [https://doi.org/10.1093/nar/gkad1005](https://doi.org/10.1093/nar/gkad1005)\n\n'
	'HPO to gene mappings obtained from [Jax](https://hpo.jax.org/data/annotations)'
	)
	)

	if __name__ == "__main__":
	demo.launch()