Spaces:
Running
Running
from ast import Str | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
from PIL import Image | |
import ujson as json | |
import pickle as pk | |
from collections import Counter | |
import math | |
import sklearn | |
from plotnine import * | |
# -- Set page config | |
apptitle = "PhenoGenius" | |
st.set_page_config( | |
page_title=apptitle, | |
page_icon=":genie:", | |
layout="wide", | |
initial_sidebar_state="auto", | |
) | |
# -- Set Sidebar | |
image_pg = Image.open("data/img/phenogenius.png") | |
st.sidebar.image(image_pg, caption=None, width=100) | |
st.sidebar.title("PhenoGenius") | |
st.sidebar.header( | |
"Learning phenotypic patterns in genetic diseases by symptom interaction modeling" | |
) | |
st.sidebar.markdown( | |
""" | |
This webapp presents symptom interaction models in genetic diseases to provide: | |
- Standardized clinical descriptions | |
- Interpretable matches between symptoms and genes | |
Code source is available in GitHub: | |
[https://github.com/kyauy/PhenoGenius](https://github.com/kyauy/PhenoGenius) | |
PhenoGenius is a collaborative project from: | |
""" | |
) | |
image_uga = Image.open("data/img/logo-uga.png") | |
st.sidebar.image(image_uga, caption=None, width=95) | |
image_seqone = Image.open("data/img/logo-seqone.png") | |
st.sidebar.image(image_seqone, caption=None, width=95) | |
image_miai = Image.open("data/img/logoMIAI-rvb.png") | |
st.sidebar.image(image_miai, caption=None, width=95) | |
image_chuga = Image.open("data/img/logo-chuga.png") | |
st.sidebar.image(image_chuga, caption=None, width=60) | |
def convert_df(df): | |
return df.to_csv(sep="\t").encode("utf-8") | |
def load_data(): | |
matrix = pd.read_csv( | |
"data/resources/ohe_all_thesaurus_weighted.tsv.gz", | |
sep="\t", | |
compression="gzip", | |
index_col=0, | |
) | |
return matrix | |
def load_umap_cohort(): | |
matrix = pd.read_csv( | |
"data/resources/umap_loc_cohort.tsv", | |
sep="\t", | |
index_col=0, | |
) | |
return matrix | |
def load_cohort(): | |
matrix = pd.read_csv( | |
"data/resources/cohort_diag.tsv", | |
sep="\t", | |
) | |
return matrix | |
def load_nmf_model(): | |
with open("data/resources/pheno_NMF_390_model_42.pkl", "rb") as pickle_file: | |
pheno_NMF = pk.load(pickle_file) | |
with open("data/resources/pheno_NMF_390_matrix_42.pkl", "rb") as pickle_file: | |
reduced = pk.load(pickle_file) | |
return pheno_NMF, reduced | |
def symbol_to_id_to_dict(): | |
# from NCBI | |
ncbi_df = pd.read_csv("data/resources/Homo_sapiens.gene_info.gz", sep="\t") | |
ncbi_df = ncbi_df[ncbi_df["#tax_id"] == 9606] | |
ncbi_df_ncbi = ncbi_df.set_index("Symbol") | |
ncbi_to_dict_ncbi = ncbi_df_ncbi["GeneID"].to_dict() | |
ncbi_df = ncbi_df.set_index("GeneID") | |
ncbi_to_dict = ncbi_df["Symbol"].to_dict() | |
return ncbi_to_dict_ncbi, ncbi_to_dict | |
def load_hp_ontology(): | |
with open("data/resources/hpo_obo.json") as json_data: | |
data_dict = json.load(json_data) | |
return data_dict | |
def hpo_description_to_id(): | |
data_dict = {} | |
for key, value in hp_onto.items(): | |
data_dict[value["name"]] = key | |
return data_dict | |
def load_cluster_data(): | |
with open("data/resources/cluster_info.json") as json_data: | |
data_dict = json.load(json_data) | |
return data_dict | |
def load_topic_data(): | |
topic = pd.read_csv( | |
"data/resources/main_topics_hpo_390_42_filtered_norm_004.tsv", | |
sep="\t", | |
index_col=0, | |
) | |
return topic | |
def load_similarity_dict(): | |
with open("data/resources/similarity_dict_threshold_80.json") as json_data: | |
data_dict = json.load(json_data) | |
return data_dict | |
# @st.cache_data( | |
# hash_funcs={"Pickle": lambda _: None}, max_entries=50 | |
# ) | |
# def load_projection(): | |
# with open("data/resources/clustering_model.pkl", "rb") as pickle_file: | |
# cluster = pk.load(pickle_file) | |
# with open("data/resources/umap_projection.pkl", "rb") as pickle_file: | |
# umap = pk.load(pickle_file) | |
# return cluster, umap | |
def get_symbol(gene): | |
if gene in symbol.keys(): | |
return symbol[gene] | |
def get_hpo_name(hpo): | |
names = {} | |
if hpo in hp_onto.keys(): | |
names[hpo] = hp_onto[hpo]["name"] | |
return names | |
def get_hpo_name_only(hpo): | |
if hpo in hp_onto.keys(): | |
return hp_onto[hpo]["name"] | |
else: | |
return None | |
def get_hpo_name_list(hpo_list, hp_onto): | |
names = {} | |
for hpo in hpo_list: | |
if hpo in hp_onto.keys(): | |
names[hpo] = hp_onto[hpo]["name"] | |
return names | |
def get_similar_terms(hpo_list, similarity_terms_dict): | |
hpo_list_w_simi = {} | |
for term in hpo_list: | |
hpo_list_w_simi[term] = 1 | |
if term in similarity_terms_dict.keys(): | |
for key, value in similarity_terms_dict[term].items(): | |
if value > 0.8: | |
score = value / len(similarity_terms_dict[term].keys()) | |
if key in hpo_list_w_simi.keys(): | |
if score > hpo_list_w_simi[key]: | |
hpo_list_w_simi[key] = score | |
else: | |
pass | |
else: | |
hpo_list_w_simi[key] = score | |
hpo_list_all = hpo_list_w_simi.keys() | |
return hpo_list_w_simi, list(hpo_list_all) | |
def score(hpo_list, matrix): | |
matrix_filter = matrix[hpo_list] | |
matrix_filter["sum"] = matrix_filter.sum(axis=1) | |
matrix_filter["gene_symbol"] = matrix_filter.index.to_series().apply(get_symbol) | |
return matrix_filter.sort_values("sum", ascending=False) | |
def score_sim_add(hpo_list_add, matrix, sim_dict): | |
matrix_filter = matrix[hpo_list_add] | |
for key, value in sim_dict.items(): | |
matrix_filter[key] = matrix_filter[key] * value | |
matrix_filter["sum"] = matrix_filter.sum(axis=1) | |
matrix_filter["gene_symbol"] = matrix_filter.index.to_series().apply(get_symbol) | |
return matrix_filter.sort_values("sum", ascending=False) | |
def get_phenotype_specificity(gene_diag, data_patient): | |
rank = data_patient.loc[int(ncbi[gene_diag]), "rank"] | |
max_rank = data_patient["rank"].max() | |
if rank == max_rank: | |
return "D - the reported phenotype is NOT consistent with what is expected for the gene/genomic region or not consistent in general." | |
elif rank < 41: | |
return "A - the reported phenotype is highly specific and relatively unique to the gene (top 40, 50 perc of diagnosis in PhenoGenius cohort)." | |
elif rank < 250: | |
return "B - the reported phenotype is consistent with the gene, is highly specific, but not necessarily unique to the gene (top 250, 75 perc of diagnosis in PhenoGenius cohort)." | |
else: | |
return "C - the phenotype is reported with limited association with the gene, not highly specific and/or with high genetic heterogeneity." | |
def get_relatives_list(hpo_list, hp_onto): | |
all_list = [] | |
for hpo in hpo_list: | |
all_list.append(hpo) | |
if hpo in hp_onto.keys(): | |
for parent in hp_onto[hpo]["parents"]: | |
all_list.append(parent) | |
for children in hp_onto[hpo]["childrens"]: | |
all_list.append(children) | |
return list(set(all_list)) | |
def get_hpo_id(hpo_list): | |
hpo_id = [] | |
for description in hpo_list: | |
hpo_id.append(hp_desc_id[description]) | |
return ",".join(hpo_id) | |
hp_onto = load_hp_ontology() | |
hp_desc_id = hpo_description_to_id() | |
ncbi, symbol = symbol_to_id_to_dict() | |
# hpo = form.text_input( | |
# label="Provide your HPOs (separated by comma)", | |
# value="HP:0000107,HP:0000108,HP:0001407", | |
# ) | |
with st.form("my_form"): | |
c1, c2 = st.columns(2) | |
with c1: | |
hpo_raw = st.multiselect( | |
"Select interactively your HPOs or...", | |
list(hp_desc_id.keys()), | |
["Renal cyst", "Hepatic cysts"], | |
) | |
with c2: | |
hpo = st.text_input( | |
"copy/paste your HPOs, separated with comma", | |
"HP:0000107,HP:0001407", | |
) | |
gene_diag_input = st.multiselect( | |
"Optional: provide HGNC gene symbol to be tested", | |
options=list(ncbi.keys()), | |
default=["PKD1"], | |
max_selections=1, | |
) | |
submit_button = st.form_submit_button( | |
label="Submit", | |
) | |
# form = st.form(key="my_form") | |
# gene_diag_input = form.text_input( | |
# label="Optional: provide HGNC gene symbol to be tested (in CAPITAL format)", | |
# value="PKD1", | |
# ) | |
if submit_button: | |
if hpo_raw != ["Renal cyst", "Hepatic cysts"] and len(hpo_raw) > 0: | |
hpo = get_hpo_id(hpo_raw) | |
data = load_data() | |
pheno_NMF, reduced = load_nmf_model() | |
# cluster, umap = load_projection() | |
# umap_cohort = load_umap_cohort() | |
cohort = load_cohort() | |
cluster_info = load_cluster_data() | |
topic = load_topic_data() | |
similarity_terms_dict = load_similarity_dict() | |
hpo_list_ini = hpo.strip().split(",") | |
if gene_diag_input: | |
if gene_diag_input[0] in ncbi.keys(): | |
gene_diag = gene_diag_input[0] | |
else: | |
st.write( | |
gene_diag_input | |
+ " gene are not in our database. Please check gene name (need to be in CAPITAL format)." | |
) | |
gene_diag = None | |
else: | |
gene_diag = None | |
hpo_list_up = [] | |
for hpo in hpo_list_ini: | |
if hpo in ["HP:0000001"]: | |
pass | |
elif len(hpo) != 10: | |
st.write( | |
"Incorrect HPO format: " | |
+ hpo | |
+ ". Please check (7-digits terms with prefix HP:, and separed by commas)." | |
) | |
pass | |
elif hpo not in data.columns: | |
pass | |
st.write(hpo + " not available in current database. Please modify.") | |
else: | |
if data[hpo].astype(bool).sum(axis=0) != 0: | |
hpo_list_up.append(hpo) | |
else: | |
hpo_to_test = hp_onto[hpo]["direct_parent"][0] | |
while data[hpo_to_test].astype(bool).sum( | |
axis=0 | |
) == 0 and hpo_to_test not in ["HP:0000001"]: | |
hpo_to_test = hp_onto[hpo_to_test]["direct_parent"][0] | |
if hpo_to_test in ["HP:0000001"]: | |
st.write( | |
"No gene-HPO associations was found for " | |
+ hpo | |
+ " and parents." | |
) | |
else: | |
hpo_list_up.append(hpo_to_test) | |
st.write( | |
"We replaced: ", | |
hpo, | |
" by ", | |
hp_onto[hpo]["direct_parent"][0], | |
"-", | |
get_hpo_name(hpo_to_test), | |
) | |
hpo_list = list(set(hpo_list_up)) | |
del hpo_list_up | |
if hpo_list: | |
with st.expander("See HPO inputs"): | |
st.write(get_hpo_name_list(hpo_list_ini, hp_onto)) | |
del hpo_list_ini | |
hpo_list_name = get_relatives_list(hpo_list, hp_onto) | |
st.header("Clinical description with symptom interaction modeling") | |
witness = np.zeros(len(data.columns)) | |
witness_nmf = np.matmul(pheno_NMF.components_, witness) | |
patient = np.zeros(len(data.columns)) | |
for hpo in hpo_list: | |
hpo_index = list(data.columns).index(hpo) | |
patient[hpo_index] = 1 | |
patient_nmf = np.matmul(pheno_NMF.components_, patient) | |
witness_sugg_df = ( | |
pd.DataFrame(reduced) | |
.set_index(data.index) | |
.apply(lambda x: (x - witness_nmf) ** 2, axis=1) | |
) | |
patient_sugg_df = ( | |
pd.DataFrame(reduced) | |
.set_index(data.index) | |
.apply(lambda x: (x - patient_nmf) ** 2, axis=1) | |
) | |
case_sugg_df = (patient_sugg_df - witness_sugg_df).sum() | |
patient_df_info = pd.DataFrame(case_sugg_df).merge( | |
topic, left_index=True, right_index=True | |
) | |
patient_df_info["mean_score"] = round( | |
patient_df_info[0] / (patient_df_info["total_weight"] ** 2), 4 | |
) | |
patient_df_info_write = patient_df_info[ | |
["mean_score", "main_term", "n_hpo", "hpo_name", "hpo_list", "weight"] | |
].sort_values("mean_score", ascending=False) | |
del case_sugg_df | |
del patient_sugg_df | |
del witness_sugg_df | |
del patient | |
with st.expander("See projection in groups of symptoms dimension*"): | |
st.dataframe(patient_df_info_write) | |
st.write( | |
"\* For interpretability, we report only the top 10% of the 390 groups of interacting symptom associations" | |
) | |
match_proj_csv = convert_df(patient_df_info_write) | |
st.download_button( | |
"Download description projection", | |
match_proj_csv, | |
"clin_desc_projected.tsv", | |
"text/csv", | |
key="download-csv-proj", | |
) | |
# patient_transposed = sklearn.preprocessing.normalize( | |
# np.array(patient_df_info["mean_score"]).reshape(1, -1), norm="l1" | |
# ) | |
# del patient_df_info | |
# | |
# patient_nmf_umap = umap.transform(pd.DataFrame(patient_transposed)) | |
# del patient_transposed | |
# | |
# with st.expander("See projection in cohort"): | |
# umap_cohort["dist"] = abs(umap_cohort["x"] - patient_nmf_umap[0, 0]) + abs( | |
# umap_cohort["y"] - patient_nmf_umap[0, 1] | |
# ) | |
# del patient_nmf_umap | |
# closest_patient = umap_cohort.nsmallest(3, "dist") | |
# st.write("Closest patients in the cohort are: ", closest_patient) | |
# st.write("Closest patient: ", cohort.loc[closest_patient.index[0]]) | |
# st.write( | |
# get_hpo_name_list( | |
# cohort.loc[closest_patient.index[0]].hpo_list.split(","), | |
# hp_onto, | |
# ) | |
# ) | |
# | |
# cluster_selected = cluster_info[str(closest_patient["cluster"].values[0])] | |
# st.write("Selected cluster: ", closest_patient["cluster"].values[0]) | |
# st.write("Number of patient in cluster: ", cluster_selected["n_patients"]) | |
# del closest_patient | |
# | |
# gene_in_cluster = pd.DataFrame.from_dict( | |
# dict(Counter(cluster_selected["gene_list"])), orient="index" | |
# ) | |
# gene_in_cluster.columns = ["count"] | |
# if gene_diag: | |
# if gene_diag in gene_in_cluster.index: | |
# st.write("Gene diag in cluster", gene_in_cluster.loc[gene_diag, :]) | |
# | |
# st.write( | |
# "Gene(s) involved in cluster: ", | |
# gene_in_cluster.sort_values("count", ascending=False), | |
# ) | |
# del gene_in_cluster | |
# | |
# group_involved = cluster_selected["group"] | |
# if ( | |
# isinstance(group_involved, float) | |
# and math.isnan(float(group_involved)) == False | |
# ): | |
# topic_involved = topic.loc[topic_involved, :] | |
# st.write( | |
# "Group(s) of symptoms statistically enriched: ", topic_involved | |
# ) | |
# elif isinstance(group_involved, str): | |
# group_list = [int(x) for x in cluster_selected["group"].split(",")] | |
# topic_involved = topic.loc[group_list, :] | |
# st.write( | |
# "Group(s) of symptoms statistically enriched: ", topic_involved | |
# ) | |
# del topic_involved | |
# del group_involved | |
# | |
# dict_count_print = {} | |
# dict_count = dict(Counter(cluster_selected["hpo_list"])) | |
# dict_count_sorted = sorted( | |
# dict_count.items(), key=lambda x: x[1], reverse=True | |
# ) | |
# del cluster_selected | |
# for element in dict_count_sorted: | |
# dict_count_print[element[0]] = { | |
# "description": hp_onto[element[0]]["name"], | |
# "count": element[1], | |
# } | |
# st.write( | |
# "HPOs declared in cluster:", | |
# pd.DataFrame.from_dict(dict_count_print, orient="index"), | |
# ) | |
# del dict_count | |
# del dict_count_print | |
# del dict_count_sorted | |
# | |
sim_dict, hpo_list_add = get_similar_terms(hpo_list, similarity_terms_dict) | |
similar_list = list(set(hpo_list_add) - set(hpo_list)) | |
similar_list_desc = get_hpo_name_list(similar_list, hp_onto) | |
if similar_list_desc: | |
with st.expander("See symptoms with similarity > 80%"): | |
similar_list_desc_df = pd.DataFrame.from_dict( | |
similar_list_desc, orient="index" | |
) | |
similar_list_desc_df.columns = ["description"] | |
st.write(similar_list_desc_df) | |
del similar_list_desc_df | |
del similar_list | |
del similar_list_desc | |
st.header("Phenotype matching") | |
results_sum = score(hpo_list, data) | |
results_sum["matchs"] = results_sum[hpo_list].astype(bool).sum(axis=1) | |
results_sum["score"] = results_sum["matchs"] + results_sum["sum"] | |
results_sum["rank"] = ( | |
results_sum["score"].rank(ascending=False, method="max").astype(int) | |
) | |
cols = results_sum.columns.tolist() | |
cols = cols[-4:] + cols[:-4] | |
match = results_sum[cols].sort_values(by=["score"], ascending=False) | |
st.dataframe(match[match["score"] > 1.01].drop(columns=["sum"])) | |
match_csv = convert_df(match) | |
st.download_button( | |
"Download matching results", | |
match_csv, | |
"match.tsv", | |
"text/csv", | |
key="download-csv-match", | |
) | |
if gene_diag: | |
if int(ncbi[gene_diag]) in results_sum.index: | |
p = ( | |
ggplot(match, aes("score")) | |
+ geom_density() | |
+ geom_vline( | |
xintercept=results_sum.loc[int(ncbi[gene_diag]), "score"], | |
linetype="dashed", | |
color="red", | |
size=1.5, | |
) | |
+ ggtitle("Matching score distribution") | |
+ xlab("Gene matching score") | |
+ ylab("% of genes") | |
+ theme_bw() | |
+ theme( | |
text=element_text(size=12), | |
figure_size=(5, 5), | |
axis_ticks=element_line(colour="black", size=4), | |
axis_line=element_line(colour="black", size=2), | |
axis_text_x=element_text(angle=45, hjust=1), | |
axis_text_y=element_text(angle=60, hjust=1), | |
subplots_adjust={"wspace": 0.1}, | |
legend_position=(0.7, 0.35), | |
) | |
) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.pyplot(ggplot.draw(p)) | |
st.write( | |
"Gene ID rank:", | |
results_sum.loc[int(ncbi[gene_diag]), "rank"], | |
" | ", | |
"Gene ID count:", | |
round(results_sum.loc[int(ncbi[gene_diag]), "sum"], 4), | |
) | |
st.write(results_sum.loc[[int(ncbi[gene_diag])]]) | |
st.write( | |
"Gene ID phenotype specificity:", | |
get_phenotype_specificity(gene_diag, results_sum), | |
) | |
del p | |
else: | |
st.write("Gene ID rank:", " Gene not available in PhenoGenius database") | |
del results_sum | |
del match | |
st.header("Phenotype matching by similarity of symptoms") | |
results_sum_add = score_sim_add(hpo_list_add, data, sim_dict) | |
results_sum_add["rank"] = ( | |
results_sum_add["sum"].rank(ascending=False, method="max").astype(int) | |
) | |
cols = results_sum_add.columns.tolist() | |
cols = cols[-2:] + cols[:-2] | |
match_sim = results_sum_add[cols].sort_values(by=["sum"], ascending=False) | |
st.dataframe(match_sim[match_sim["sum"] > 0.01]) | |
match_sim_csv = convert_df(match_sim) | |
st.download_button( | |
"Download matching results", | |
match_sim_csv, | |
"match_sim.tsv", | |
"text/csv", | |
key="download-csv-match-sim", | |
) | |
if gene_diag: | |
if int(ncbi[gene_diag]) in results_sum_add.index: | |
p2 = ( | |
ggplot(match_sim, aes("sum")) | |
+ geom_density() | |
+ geom_vline( | |
xintercept=results_sum_add.loc[int(ncbi[gene_diag]), "sum"], | |
linetype="dashed", | |
color="red", | |
size=1.5, | |
) | |
+ ggtitle("Matching score distribution") | |
+ xlab("Gene matching score") | |
+ ylab("% of genes") | |
+ theme_bw() | |
+ theme( | |
text=element_text(size=12), | |
figure_size=(5, 5), | |
axis_ticks=element_line(colour="black", size=4), | |
axis_line=element_line(colour="black", size=2), | |
axis_text_x=element_text(angle=45, hjust=1), | |
axis_text_y=element_text(angle=60, hjust=1), | |
subplots_adjust={"wspace": 0.1}, | |
legend_position=(0.7, 0.35), | |
) | |
) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.pyplot(ggplot.draw(p2)) | |
st.write( | |
"Gene ID rank:", | |
results_sum_add.loc[int(ncbi[gene_diag]), "rank"], | |
" | ", | |
"Gene ID count:", | |
round(results_sum_add.loc[int(ncbi[gene_diag]), "sum"], 4), | |
) | |
st.write( | |
"Gene ID phenotype specificity:", | |
get_phenotype_specificity(gene_diag, results_sum_add), | |
) | |
del p2 | |
else: | |
st.write("Gene ID rank:", " Gene not available in PhenoGenius database") | |
del sim_dict | |
del hpo_list_add | |
del results_sum_add | |
del match_sim | |
st.header("Phenotype matching by groups of symptoms") | |
patient_df = ( | |
pd.DataFrame(reduced) | |
.set_index(data.index) | |
.apply(lambda x: sum((x - patient_nmf) ** 2), axis=1) | |
) | |
witness_df = ( | |
pd.DataFrame(reduced) | |
.set_index(data.index) | |
.apply(lambda x: sum((x - witness_nmf) ** 2), axis=1) | |
) | |
del patient_nmf | |
del witness | |
del witness_nmf | |
case_df = pd.DataFrame(patient_df - witness_df) | |
case_df.columns = ["score"] | |
case_df["score_norm"] = abs(case_df["score"] - case_df["score"].max()) | |
# case_df["frequency"] = matrix_frequency["variant_number"] | |
case_df["sum"] = case_df["score_norm"] # + case_df["frequency"] | |
case_df_sort = case_df.sort_values(by="sum", ascending=False) | |
case_df_sort["rank"] = ( | |
case_df_sort["sum"].rank(ascending=False, method="max").astype(int) | |
) | |
case_df_sort["gene_symbol"] = case_df_sort.index.to_series().apply(get_symbol) | |
match_nmf = case_df_sort[["gene_symbol", "rank", "sum"]] | |
st.dataframe(match_nmf[match_nmf["sum"] > 0.01]) | |
match_nmf_csv = convert_df(match_nmf) | |
st.download_button( | |
"Download matching results", | |
match_nmf_csv, | |
"match_groups.tsv", | |
"text/csv", | |
key="download-csv-match-groups", | |
) | |
if gene_diag: | |
if int(ncbi[gene_diag]) in case_df_sort.index: | |
p3 = ( | |
ggplot(match_nmf, aes("sum")) | |
+ geom_density() | |
+ geom_vline( | |
xintercept=case_df_sort.loc[int(ncbi[gene_diag]), "sum"], | |
linetype="dashed", | |
color="red", | |
size=1.5, | |
) | |
+ ggtitle("Matching score distribution") | |
+ xlab("Gene matching score") | |
+ ylab("% of genes") | |
+ theme_bw() | |
+ theme( | |
text=element_text(size=12), | |
figure_size=(5, 5), | |
axis_ticks=element_line(colour="black", size=4), | |
axis_line=element_line(colour="black", size=2), | |
axis_text_x=element_text(angle=45, hjust=1), | |
axis_text_y=element_text(angle=60, hjust=1), | |
subplots_adjust={"wspace": 0.1}, | |
legend_position=(0.7, 0.35), | |
) | |
) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.pyplot(ggplot.draw(p3)) | |
st.write( | |
"Gene ID rank:", | |
case_df_sort.loc[int(ncbi[gene_diag]), "rank"], | |
" | ", | |
"Gene ID count:", | |
round(case_df_sort.loc[int(ncbi[gene_diag]), "sum"], 4), | |
) | |
st.write( | |
"Gene ID phenotype specificity:", | |
get_phenotype_specificity(gene_diag, case_df_sort), | |
) | |
del p3 | |
else: | |
st.write("Gene ID rank:", " Gene not available in PhenoGenius database") | |
del case_df_sort | |
del match_nmf | |
del case_df | |
else: | |
st.write( | |
"No HPO terms provided in correct format.", | |
) | |