Spaces:
Running
Running
File size: 4,286 Bytes
7ab5cc9 549b455 7b84d22 7ab5cc9 4525d51 0368e08 4e43404 4525d51 0368e08 4e43404 e8cda75 0368e08 e8cda75 0368e08 e8cda75 7ab5cc9 549b455 0368e08 7ab5cc9 e9c92d6 0368e08 7ab5cc9 e9c92d6 7ab5cc9 4525d51 0368e08 7ab5cc9 4525d51 0368e08 7ab5cc9 4525d51 0368e08 7ab5cc9 4525d51 0368e08 7ab5cc9 955cdd2 7ab5cc9 4525d51 0368e08 549b455 7ab5cc9 08e5eff 62c86fa 7ab5cc9 5a55441 7ab5cc9 5a55441 dc171bc 7ab5cc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
from pathlib import Path
import time
from datetime import datetime
print("load model start")
print(datetime.fromtimestamp(time.time()))
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
print("load model end")
print(datetime.fromtimestamp(time.time()))
quran = pd.read_csv('quran-eng.csv', delimiter=",")
print("load quran eng")
print(datetime.fromtimestamp(time.time()))
file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("load quran embedding")
print(datetime.fromtimestamp(time.time()))
def make_clickable_both(val):
name, url = val.split('#')
print(name+"\n")
print(url+"\n")
return f'<a href="{url}">{name}</a>'
def find(query):
print("start")
print(datetime.fromtimestamp(time.time()))
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, query)
]
#file = open('quran-splitted.sav','rb')
#quran_splitted = pickle.load(file)
#print("load quran\n")
#print(datetime.fromtimestamp(time.time()))
#documents = quran_splitted['text'].tolist()
# document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
# filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
# pickle.dump(embeddings, open(filename, 'wb'))
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
print("embed query")
print(datetime.fromtimestamp(time.time()))
scores = (query_embeddings @ document_embeddings.T) * 100
print("count similarities")
print(datetime.fromtimestamp(time.time()))
# insert the similarity value to dataframe & sort it
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
print("load quran")
print(datetime.fromtimestamp(time.time()))
quran_splitted['similarity'] = scores.tolist()[0]
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
print("sort by similarity")
print(datetime.fromtimestamp(time.time()))
#results = ""
results = pd.DataFrame()
i = 0
while i<3:
result = sorted_quran.iloc[i]
result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
results = pd.concat([results, result_quran])
#results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
i=i+1
print("collect results")
print(datetime.fromtimestamp(time.time()))
url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
results = results.drop(columns=['sura', 'aya'])
#results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
#results = results.style.format({'text': make_clickable_both})
#return sorted_quran
#filepath = Path(query+'.csv')
#results.to_csv(filepath,index=False)
#return results, filepath
return results
demo = gr.Interface(
fn=find,
inputs="textbox",
#outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()],
outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)],
cache_examples="lazy",
examples=[
["law of inheritance in islam"],
["tunjukilah jalan yang lurus"],
["سليمان"],
],
title="Quran Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch() |