import gradio as gr from pyvis.network import Network import networkx as nx import numpy as np import pandas as pd import os from datasets import load_dataset from datasets import Features from datasets import Value from datasets import Dataset import matplotlib.pyplot as plt import re pattern = r'"(.*?)"' # this pattern captures anything in a double quotes. Secret_token = os.getenv('HF_token') dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train') edge_info = dataset.to_pandas() features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'), 'Generation': Value('string')}) narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features ) narrator_bios = narrator_bios['train'].to_pandas() narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله' narrator_bios.loc[49845, 'Number of Narrations'] = 0 narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int) narrator_bios.loc[49845, 'Number of Narrations'] = 327512 # 8125 Narrators have no Generation, listed in dataset as None narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1]) narrator_bios['Generation'] = narrator_bios['Generation'].astype(int) features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')}) dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features) matn_info = dataset['train'].to_pandas() matn_info = matn_info.drop(97550) matn_info = matn_info.drop(307206) matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1) matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int) # Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas() isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')]) # Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID taraf_max = np.max(matn_info['taraf_ID'].unique()) isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')])) cmap = plt.colormaps['cool'] books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas() matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0])) matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1])) matn_info = pd.merge(matn_info, books, on='Book_ID') def value_to_hex(value): rgba_color = cmap(value) return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255)) #edge_info, matn_info, narrator_bios, isnad_info def visualize_isnad(taraf_num, yaxis): taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list() taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list() taraf_hadith_split = [i.split('_') for i in taraf_hadith] taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list() taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list() taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list() lst_hadith = [] for i in range(len(taraf_hadith_split)): # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info #This loop get the end transmitter of each Hadith in the Taraf isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x ) isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']] G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph()) node = [int(n) for n, d in G.out_degree() if d == 0][0] gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0] name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0] lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]]) df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number']) #hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) ) hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x) isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']] narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"}) isnad_hadith["Student"] = narrators['Student'] isnad_hadith["Teacher"] = narrators['Teacher'] filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)] filtered['Student'] = filtered['Student'].apply(lambda x: x[0]) filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0]) net = Network(directed =True) for _, row in filtered.iterrows(): source = row['Teacher'] target = row['Student'] teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])] student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])] isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])] teacher_narrations = teacher_info['Number of Narrations'].to_list()[0] student_narrations = student_info['Number of Narrations'].to_list()[0] teacher_gen = teacher_info['Generation'].to_list()[0] student_gen = student_info['Generation'].to_list()[0] if row['Source'] == '99999': net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000') else: net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]} \n ID: {row["Source"]} - Gen {teacher_gen}') net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]} \n ID: {row["Destination"]} - Gen {student_gen}') net.add_edge(source, target, color = value_to_hex(int(isnad[f'{yaxis} Count'].to_list()[0])), label = f"{isnad[f'{yaxis} Count'].to_list()[0]}") net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200) html = net.generate_html() html = html.replace("'", "\"") return f"""""" , df def taraf_booknum(taraf_num): taraf = matn_info[matn_info['taraf_ID'] == taraf_num] return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author']] def visualize_subTaraf(df, yaxis): df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str) hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])] taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()] hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split)) isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']] narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"}) isnad_hadith["Student"] = narrators['Student'] isnad_hadith["Teacher"] = narrators['Teacher'] filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)] filtered['Student'] = filtered['Student'].apply(lambda x: x[0]) filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0]) net = Network(directed =True) for _, row in filtered.iterrows(): source = row['Teacher'] target = row['Student'] teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])] student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])] isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])] teacher_narrations = teacher_info['Number of Narrations'].to_list()[0] student_narrations = student_info['Number of Narrations'].to_list()[0] teacher_gen = teacher_info['Generation'].to_list()[0] student_gen = student_info['Generation'].to_list()[0] if row['Source'] == '99999': net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000') else: net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]} \n ID: {row["Source"]} - Gen {teacher_gen}') net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]} \n ID: {row["Destination"]} - Gen {student_gen}') net.add_edge(source, target, color = value_to_hex(int(isnad[f'{yaxis} Count'].to_list()[0])), label = f"{isnad[f'{yaxis} Count'].to_list()[0]}") net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200) html = net.generate_html() html = html.replace("'", "\"") return f"""""" with gr.Blocks() as demo: with gr.Tab("Whole Taraf Visualizer"): Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.') taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1) btn = gr.Button('Submit') btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True)]) with gr.Tab("Book and Hadith Number Retriever"): taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1) btn_num = gr.Button('Retrieve') btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame()]) with gr.Tab('Select Hadith Isnad Visualizer'): yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.') hadith_selection = gr.Dataframe( headers=["Book_ID", "Hadith Number"], datatype=["number", "number"], row_count=5, col_count=(2, "fixed")) btn_hadith = gr.Button('Search') btn_hadith.click(fn=visualize_subTaraf, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML()]) demo.launch()