Spaces:
Build error
Build error
File size: 2,808 Bytes
ffbadc4 e85ca86 c4baae7 69e5f39 ffbadc4 3836430 d8758a7 3836430 fc50127 e8569d3 5f6349c b5392ae b64bcd9 5f6349c 50a6b52 5f6349c f33586f 45fde58 50a6b52 ab70a4d 45fde58 e8569d3 69e5f39 45fde58 b64bcd9 1043bb5 b5392ae 908d90a b64bcd9 7b6cfde 1043bb5 908d90a b5392ae ffbadc4 b164b4d 38ea0d4 3836430 1043bb5 854df65 c9b1232 a3c8aa4 c9b1232 ffbadc4 a3c8aa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
def make_clickable_both(val):
name, url = val.split('#')
print(name+"\n")
print(url+"\n")
return f'<a href="{url}">{name}</a>'
def find(query):
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, query)
]
print("cekpoin0\n")
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
documents = quran_splitted['text'].tolist()
# document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
# filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
# pickle.dump(embeddings, open(filename, 'wb'))
file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("cekpoin1\n")
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
scores = (query_embeddings @ document_embeddings.T) * 100
print("cekpoin2\n")
# insert the similarity value to dataframe & sort it
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
quran_splitted['similarity'] = scores.tolist()[0]
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
print("cekpoin3\n")
#results = ""
results = pd.DataFrame()
i = 0
while i<20:
result = sorted_quran.iloc[i]
result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
results = pd.concat([results, result_quran])
#results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
i=i+1
results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
results = results.style.format({'text': make_clickable_both})
#return sorted_quran
return results
demo = gr.Interface(fn=find, inputs="textbox", outputs=gr.Dataframe(headers=['sura', 'aya', 'text'],wrap=True,datatype=["str", "str", "html"]))
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch(share=True) |