Bofandra commited on
Commit
edd3ce2
1 Parent(s): 8b106bd

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ all_hadiths_clean[[:space:]](1).csv filter=lfs diff=lfs merge=lfs -text
37
+ all_hadiths_clean[[:space:]](2).csv filter=lfs diff=lfs merge=lfs -text
38
+ encoded_hadiths_multilingual-e5-large-instruct[[:space:]](1)[[:space:]](1).sav filter=lfs diff=lfs merge=lfs -text
all_hadiths_clean (1).csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6800edb54d45a6e6c5228cf0342c0c72d90eca62a857bcbb5c61684e550db15
3
+ size 41069994
all_hadiths_clean (2).csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6800edb54d45a6e6c5228cf0342c0c72d90eca62a857bcbb5c61684e550db15
3
+ size 41069994
app (1).py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ import pandas as pd
4
+ #import pickle
5
+ import torch
6
+ from pathlib import Path
7
+
8
+ # Define the function to categorize ages
9
+ def categorize_source(source):
10
+ if "Nasa'i" in source:
11
+ return 'nasai'
12
+ elif "Majah" in source:
13
+ return 'ibnmajah'
14
+ elif "Da'ud" in source:
15
+ return 'abudawud'
16
+ elif "Muslim" in source:
17
+ return 'muslim'
18
+ elif "Bukhari" in source:
19
+ return 'bukhari'
20
+ elif "Tirmidhi" in source:
21
+ return 'tirmidhi'
22
+ else:
23
+ return ''
24
+
25
+ def find(query):
26
+ def get_detailed_instruct(task_description: str, query: str) -> str:
27
+ return f'Instruct: {task_description}\nQuery: {query}'
28
+
29
+ # Each query must come with a one-sentence instruction that describes the task
30
+ task = 'Given a web search query, retrieve relevant passages that answer the query'
31
+ queries = [
32
+ get_detailed_instruct(task, query)
33
+ ]
34
+ print("cekpoin0\n")
35
+
36
+ hadiths = pd.read_csv('all_hadiths_clean.csv', delimiter=",")
37
+
38
+ model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
39
+
40
+ document_embeddings = torch.load('encoded_hadiths_multilingual-e5-large-instruct (1).sav',map_location ='cpu')
41
+ #file = open('encoded_hadiths_multilingual-e5-large-instruct (1).sav','rb')
42
+ #document_embeddings = pickle.load(file)
43
+ print("cekpoin1\n")
44
+
45
+ query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
46
+ scores = (query_embeddings @ document_embeddings.T) * 100
47
+ print("cekpoin2\n")
48
+
49
+ # insert the similarity value to dataframe & sort it
50
+ hadiths['similarity'] = scores.tolist()[0]
51
+ sorted_hadiths = hadiths.sort_values(by='similarity', ascending=False)
52
+ print("cekpoin3\n")
53
+
54
+ results = sorted_hadiths.head(20).drop(columns=['id', 'hadith_id', 'chain_indx'])
55
+ results['source_cat'] = results['source'].apply(categorize_source)
56
+ results['hadith_no'] = results['hadith_no'].str.strip()
57
+
58
+ url = 'https://sunnah.com/'+results['source_cat'].astype(str)+':'+results['hadith_no'].astype(str)
59
+ results['text_ar'] = '<a href="'+url+'">'+results['text_ar']+'</a>'
60
+ results = results.drop(columns=['source_cat'])
61
+
62
+ #return sorted_quran
63
+ filepath = Path(query+'.csv')
64
+ results.to_csv(filepath,index=False)
65
+ return results, filepath
66
+
67
+ demo = gr.Interface(
68
+ fn=find,
69
+ inputs="textbox",
70
+ outputs=[gr.Dataframe(headers=['source', 'chapter_no', 'hadith_no', 'chapter', 'text_ar', 'text_en'],datatype=["str", "str", "str", "str", "markdown", "str"],wrap=True),gr.DownloadButton()],
71
+ examples=[
72
+ ["law of inheritance in islam"],
73
+ ["tunjukilah jalan yang lurus"],
74
+ ["سليمان"],
75
+ ],
76
+ title="Hadiths Finder")
77
+ #demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
78
+
79
+ if __name__ == "__main__":
80
+ demo.launch()
encoded_hadiths_multilingual-e5-large-instruct (1) (1).sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2821e7b77e91294ab6385974537e653ef5be43bd59a30df6db9276483c93999c
3
+ size 141071516
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sentence_transformers
2
+ pandas
3
+ gradio