Spaces:
Sleeping
Sleeping
import gradio as gr | |
from sentence_transformers import SentenceTransformer | |
import pandas as pd | |
import pickle | |
from pathlib import Path | |
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct') | |
def make_clickable_both(val): | |
name, url = val.split('#') | |
print(name+"\n") | |
print(url+"\n") | |
return f'<a href="{url}">{name}</a>' | |
def find(query): | |
def get_detailed_instruct(task_description: str, query: str) -> str: | |
return f'Instruct: {task_description}\nQuery: {query}' | |
# Each query must come with a one-sentence instruction that describes the task | |
task = 'Given a web search query, retrieve relevant passages that answer the query' | |
queries = [ | |
get_detailed_instruct(task, query) | |
] | |
print("cekpoin0\n") | |
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|") | |
file = open('quran-splitted.sav','rb') | |
quran_splitted = pickle.load(file) | |
documents = quran_splitted['text'].tolist() | |
# document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True) | |
# filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav' | |
# pickle.dump(embeddings, open(filename, 'wb')) | |
file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb') | |
document_embeddings = pickle.load(file) | |
print("cekpoin1\n") | |
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True) | |
scores = (query_embeddings @ document_embeddings.T) * 100 | |
print("cekpoin2\n") | |
# insert the similarity value to dataframe & sort it | |
file = open('quran-splitted.sav','rb') | |
quran_splitted = pickle.load(file) | |
quran_splitted['similarity'] = scores.tolist()[0] | |
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False) | |
print("cekpoin3\n") | |
#results = "" | |
results = pd.DataFrame() | |
i = 0 | |
while i<20: | |
result = sorted_quran.iloc[i] | |
result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])] | |
results = pd.concat([results, result_quran]) | |
#results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n" | |
i=i+1 | |
url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir' | |
results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')' | |
results = results.drop(columns=['sura', 'aya']) | |
#results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir' | |
#results = results.style.format({'text': make_clickable_both}) | |
#return sorted_quran | |
filepath = Path(query+'.csv') | |
results.to_csv(filepath,index=False) | |
return results, filepath | |
demo = gr.Interface( | |
fn=find, | |
inputs="textbox", | |
outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()], | |
cache_examples="lazy", | |
examples=[ | |
["law of inheritance in islam"], | |
["tunjukilah jalan yang lurus"], | |
["سليمان"], | |
], | |
title="Quran Finder") | |
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox") | |
if __name__ == "__main__": | |
demo.launch() |