File size: 4,286 Bytes
c064465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
from pathlib import Path  
import time
from datetime import datetime

print("load model start")
print(datetime.fromtimestamp(time.time()))
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
print("load model end")
print(datetime.fromtimestamp(time.time()))

quran = pd.read_csv('quran-eng.csv', delimiter=",")
print("load quran eng")
print(datetime.fromtimestamp(time.time()))

file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("load quran embedding")
print(datetime.fromtimestamp(time.time()))

def make_clickable_both(val): 
    name, url = val.split('#')
    print(name+"\n")
    print(url+"\n")
    return f'<a href="{url}">{name}</a>'

def find(query):
    print("start")
    print(datetime.fromtimestamp(time.time()))
    def get_detailed_instruct(task_description: str, query: str) -> str:
        return f'Instruct: {task_description}\nQuery: {query}'
    
    # Each query must come with a one-sentence instruction that describes the task
    task = 'Given a web search query, retrieve relevant passages that answer the query'
    queries = [
        get_detailed_instruct(task, query)
    ]

    #file = open('quran-splitted.sav','rb')
    #quran_splitted = pickle.load(file)
    #print("load quran\n")
    #print(datetime.fromtimestamp(time.time()))
    
    #documents = quran_splitted['text'].tolist()
    # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
    # filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
    # pickle.dump(embeddings, open(filename, 'wb'))
    
    query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
    print("embed query")
    print(datetime.fromtimestamp(time.time()))
    scores = (query_embeddings @ document_embeddings.T) * 100
    print("count similarities")
    print(datetime.fromtimestamp(time.time()))

    # insert the similarity value to dataframe & sort it
    file = open('quran-splitted.sav','rb')
    quran_splitted = pickle.load(file)
    print("load quran")
    print(datetime.fromtimestamp(time.time()))
    quran_splitted['similarity'] = scores.tolist()[0]
    sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
    print("sort by similarity")
    print(datetime.fromtimestamp(time.time()))
    
    #results = ""
    results = pd.DataFrame()
    i = 0
    while i<3:
        result = sorted_quran.iloc[i]
        result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
        results = pd.concat([results, result_quran])
        #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
        i=i+1
    print("collect results")
    print(datetime.fromtimestamp(time.time()))
    
    url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')'
    results = results.drop(columns=['sura', 'aya'])
    #results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    
    #results = results.style.format({'text': make_clickable_both})
    
    #return sorted_quran
    #filepath = Path(query+'.csv')  
    #results.to_csv(filepath,index=False)  
    #return results, filepath
    return results
    
demo = gr.Interface(
    fn=find, 
    inputs="textbox", 
    #outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()],  
    outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)],  
    cache_examples="lazy",
    examples=[
                ["law of inheritance in islam"],
                ["tunjukilah jalan yang lurus"],
                ["ุณู„ูŠู…ุงู†"],
            ],
    title="Quran Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
    
if __name__ == "__main__":
    demo.launch()