Spaces:
Build error
Build error
File size: 2,311 Bytes
ffbadc4 e85ca86 c4baae7 69e5f39 ffbadc4 fc50127 e8569d3 5f6349c b5392ae b64bcd9 5f6349c 50a6b52 5f6349c f33586f 45fde58 50a6b52 ab70a4d 45fde58 e8569d3 69e5f39 45fde58 b64bcd9 b5392ae b64bcd9 7b6cfde 6d63854 b5392ae ffbadc4 c9b1232 615cf43 c9b1232 ffbadc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
def find(query):
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, query)
]
print("cekpoin0\n")
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
documents = quran_splitted['text'].tolist()
# document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
# filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
# pickle.dump(embeddings, open(filename, 'wb'))
file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("cekpoin1\n")
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
scores = (query_embeddings @ document_embeddings.T) * 100
print("cekpoin2\n")
# insert the similarity value to dataframe & sort it
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
quran_splitted['similarity'] = scores.tolist()[0]
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
print("cekpoin3\n")
results = ""
i = 0
while i<6:
result = sorted_quran.iloc[i]
result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
i=i+1
return sorted_quran
#return results
demo = gr.Interface(fn=find, inputs="textbox", outputs=gr.Dataframe(headers=['sura', 'aya', 'similarity', 'text']))
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch() |