|
import gradio as gr |
|
import pandas as pd |
|
import requests |
|
import rpy2.robjects as robjects |
|
|
|
|
|
def get_SNP(): |
|
r_code = """ |
|
library(GWAScFDR) |
|
library(dplyr) |
|
library(data.table) |
|
|
|
rm(list = ls()) |
|
|
|
AD <- fread("Lambert/IGAP_summary_statistics/IGAP_stage_1.txt", header = TRUE, stringsAsFactors = F) |
|
AMD_F <- fread("Fritsche/Fritsche-26691988.txt.gz", header = TRUE, stringsAsFactors = F) |
|
|
|
AD <- AD[as.numeric(AD$Pvalue) < 0.00001, ] |
|
AMD_F <- AMD_F[as.numeric(AMD_F$GC.Pvalue) < 0.00001, ] |
|
|
|
names(AD)[c(3, 8)] <- c("Marker", "p_AD") |
|
names(AMD_F)[c(1, 8)] <- c("Marker", "p_AMDF") |
|
|
|
# merge data |
|
AD_AMDF <- merge(AD, AMD_F, by = "Marker", all = FALSE) |
|
|
|
# calculate cFDR |
|
AD_AMDF$FDR_AD_AMD <- cFDR(as.numeric(AD_AMDF$p_AD), as.numeric(AD_AMDF$p_AMDF)) |
|
AD_AMDF$FDR_AMD_AD <- cFDR(as.numeric(AD_AMDF$p_AMDF), as.numeric(AD_AMDF$p_AD)) |
|
|
|
# read package to calculate ccFDR (the bigher one) |
|
AD_AMDF$ccFDR <- pmax(AD_AMDF$FDR_AD_AMD, AD_AMDF$FDR_AMD_AD) |
|
|
|
AD_AMDF <- AD_AMDF[AD_AMDF$ccFDR < 0.001, ] |
|
|
|
# write.csv(AD_AMDF, file = "ccFDR_AD_AMDF.csv") |
|
|
|
AD_AMDF_rsid <- AD_AMDF$Marker |
|
""" |
|
|
|
robjects.r(r_code) |
|
rsidlist = list(robjects.globalenv["AD_AMDF_rsid"]) |
|
|
|
return rsidlist |
|
|
|
|
|
def get_SNP_gene(rsidlist): |
|
rsid_gene = {} |
|
for rsid in rsidlist: |
|
url = f"https://www.ebi.ac.uk/gwas/rest/api/singleNucleotidePolymorphisms/{rsid}" |
|
print(url) |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
data = response.json() |
|
|
|
genelist = [] |
|
for context in data["genomicContexts"]: |
|
if context["distance"] != 0: |
|
continue |
|
else: |
|
genelist.append(context["gene"]["geneName"]) |
|
genelist = list(set(genelist)) |
|
|
|
rsid_gene[rsid] = genelist |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error occurred while fetching data for {rsid}: {str(e)}") |
|
continue |
|
|
|
return rsid_gene |
|
|
|
def todf(disease1, disease2): |
|
if disease1 != "" and disease2 != "": |
|
rsidlist = get_SNP() |
|
rsid_gene = get_SNP_gene(rsidlist) |
|
|
|
for key, value in rsid_gene.items(): |
|
if value == []: |
|
rsid_gene[key] = ["None"] |
|
df = pd.DataFrame(list(rsid_gene.items()), columns=['rsid', 'genes']) |
|
df['genes'] = df['genes'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x) |
|
|
|
return df |
|
|
|
def visible(identifiers): |
|
identifiers = identifiers.split("\n") |
|
identifiers = list(filter(None, identifiers)) |
|
if len(identifiers) > 0: |
|
return topo_para.update(visible=True), network_stats.update( |
|
visible=True), enrichment.update( |
|
visible=True), button_download.update(visible=True) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
disease1 = gr.Dropdown(choices=['Alzheimer disease'], value='Alzheimer disease', label='Disease 1') |
|
disease2 = gr.Dropdown(choices=['age-related macular degeneration'], value='age-related macular degeneration', label='Disease 2') |
|
|
|
|
|
with gr.Row(): |
|
|
|
button_input = gr.Button("Submit", elem_id="submit-button") |
|
|
|
|
|
cache = gr.Textbox(visible=False) |
|
|
|
with gr.Column(scale=3.25, min_width=985): |
|
rsid_gene_df = gr.Dataframe(label="Variants and Genes:", elem_id="topo-para", visible=True) |
|
|
|
|
|
button_input.click(fn=todf, inputs=[disease1, disease2], outputs=rsid_gene_df) |
|
|
|
|
|
demo.launch(share=False) |
|
|