FDSRashid's picture
Update app.py
5ad39da verified
raw
history blame contribute delete
No virus
2.51 kB
import numpy as np
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
from datasets import Features, Value
import plotly.express as px
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
Secret_token = os.getenv('HF_token')
dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
df = dataset["train"].to_pandas()
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')
matn_info = matn_info.reset_index()
df = df.reset_index()
cols_to_use = df.columns.difference(matn_info.columns)
joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
df = joined_df.copy()
taraf_max = np.max(df['taraf_ID'].unique())
def plot_similarity_score(taraf_num):
taraf_df = df[df['taraf_ID']== taraf_num]
taraf_df['Number'] = np.arange(len(taraf_df))
embed_taraf = taraf_df['embed'].to_list()
cos_score = cosine_similarity(embed_taraf)
fig = px.imshow(cos_score)
matr = cos_score
rows, cols = matr.shape
mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
lower_triangle = matr[mask]
data = lower_triangle.flatten()
fig_dis = px.histogram(x = data, title = f'Similarity Distribution for Taraf {taraf_num}', labels = {'x': 'Similarity Score'}, nbins = 20, template = 'ggplot2' )
return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
with gr.Blocks() as demo:
gr.Markdown('# Semantic Similarity Visualizer')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
btn.click(fn = plot_similarity_score, inputs = [taraf_number], outputs = [gr.Plot(),gr.Plot(), gr.DataFrame(wrap=True)])
demo.launch()