muhalmutaz commited on
Commit
b4e3c5c
1 Parent(s): 61ba970
Files changed (4) hide show
  1. app.py +108 -0
  2. pickle.pkl +3 -0
  3. quran.csv +0 -0
  4. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # external
2
+ import pytest
3
+ # project
4
+ import textdistance
5
+ import csv
6
+ import pyarabic.araby as araby
7
+
8
+ import numpy as np
9
+ from statistics import mean
10
+
11
+ import pickle
12
+ import os
13
+
14
+ import gradio as gr
15
+
16
+ def textdistance_lcsseq(A,B):
17
+ C = textdistance.lcsseq(A,B)
18
+
19
+ str_return = ""
20
+
21
+ i=0
22
+ j=0
23
+ inside=False
24
+ grade = 0
25
+ seq = 1
26
+ while i<len(B) and j<len(C):
27
+ if B[i] == C[j]:
28
+ if inside:
29
+ str_return += B[i]
30
+ else:
31
+ inside = True
32
+ str_return += "("
33
+ str_return += B[i]
34
+ grade += seq
35
+ seq += 1
36
+ i+=1
37
+ j+=1
38
+ else:
39
+ seq = 1
40
+ grade -= 0.3
41
+ if inside:
42
+ str_return += ")"
43
+ inside = False
44
+ str_return += B[i]
45
+ i+=1
46
+ else:
47
+ if C[j]==" ":
48
+ while not B[i] == C[j]:
49
+ str_return += B[i]
50
+ i+=1
51
+ j+=1
52
+ str_return += B[i]
53
+ i+=1
54
+ if inside:
55
+ str_return += ")"
56
+ while i<len(B):
57
+ grade -= 0.3
58
+ str_return += B[i]
59
+ i+=1
60
+
61
+ for wordA in A.split(" "):
62
+ for wordB in B.split(" "):
63
+ if wordA == wordB:
64
+ grade+=10
65
+
66
+ return str_return,grade
67
+
68
+ def load():
69
+ quran = []
70
+
71
+ filename = "pickle.pkl"
72
+ if(not os.path.exists(filename)):
73
+ picklefile = open(filename, 'wb')
74
+ csv_file = open('quran.csv', encoding="utf-8")
75
+ csv_reader = csv.reader(csv_file, delimiter=',')
76
+ for i,row in enumerate(csv_reader):
77
+ quran.append(araby.strip_diacritics(row[2]))
78
+ pickle.dump(quran, picklefile)
79
+ else:
80
+ picklefile = open(filename, 'rb')
81
+ quran = pickle.load(picklefile)
82
+
83
+ return quran
84
+
85
+ def search(query,numberOfResults):
86
+ quran = load()
87
+ lcsseq = []
88
+ lengths = []
89
+
90
+
91
+ for q in quran:
92
+ tmp1,tmp2 = textdistance_lcsseq(query,q)
93
+ lcsseq.append(tmp1)
94
+ lengths.append(tmp2)
95
+
96
+ indices = [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)]
97
+ lengths_sorted = sorted(lengths,reverse=True)
98
+ meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))])
99
+
100
+
101
+ toReturn =""
102
+ for i in range(0,min(numberOfResults,len(indices))):
103
+ if(lengths[indices[i]] > meanOfHead):
104
+ toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n"
105
+ return toReturn
106
+
107
+
108
+ gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch()
pickle.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667ceecae8bb243b294d05e889e2024d2ec5d51ae970d79d52e67e0e50129c8c
3
+ size 757817
quran.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pytest
2
+ textdistance
3
+ csv
4
+ pyarabic
5
+ numpy
6
+ pickle