File size: 1,613 Bytes
ffbadc4
471985d
e85ca86
c4baae7
ffbadc4
fc50127
471985d
af79189
fc50127
c87bac2
 
b64bcd9
c87bac2
471985d
c87bac2
 
 
 
 
 
 
 
 
 
 
b64bcd9
 
 
135533a
b5392ae
b64bcd9
 
 
b5392ae
 
 
b64bcd9
7b6cfde
6d63854
b5392ae
 
 
ffbadc4
fc50127
ffbadc4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import pickle
from sentence_transformers import SentenceTransformer
import pandas as pd

def find(query):
    # transform query from user
    model = SentenceTransformer('Bofandra/fine-tuning-use-cmlm-multilingual-quran-translation')
    encoded_query_text = model.encode(query)

    # get encoded quran text
    file = open('encoded_quran_fine-tuning-use-cmlm-multilingual-quran-splitted.sav','rb')
    encoded_quran_text = pickle.load(file)
    file.close()

    # compare query to each quran verse
    i = 0
    text_similarity = []
    for encoded_quran_ayat in encoded_quran_text:
      similarity = encoded_query_text @ encoded_quran_ayat.T
      text_similarity.append(similarity)
      i=i+1
      print(i)

    # insert the similarity value to dataframe & sort it
    file = open('quran-splitted.sav','rb')
    quran_splitted = pickle.load(file)
    quran_splitted['similarity'] = text_similarity
    sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
    
    # insert the similarity value to dataframe & sort it
    quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
    
    results = ""
    i = 0
    while i<6:
        result = sorted_quran.iloc[i]
        result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
        results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
        i=i+1
    
    return results

demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
    
if __name__ == "__main__":
    demo.launch()