Spaces:

Muhalmutaz
/

lcseq

Runtime error

App Files Files Community

muhalmutaz commited on Oct 25, 2022

Commit

b4e3c5c

•

1 Parent(s): 61ba970

main

Browse files

Files changed (4) hide show

app.py +108 -0
pickle.pkl +3 -0
quran.csv +0 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# external
+import pytest
+# project
+import textdistance
+import csv
+import pyarabic.araby as araby
+import numpy as np
+from statistics import mean
+import pickle
+import os
+import gradio as gr
+def textdistance_lcsseq(A,B):
+    C = textdistance.lcsseq(A,B)
+    str_return = ""
+    i=0
+    j=0
+    inside=False
+    grade = 0
+    seq = 1
+    while i<len(B) and j<len(C):
+        if B[i] == C[j]:
+            if inside:
+                str_return += B[i]
+            else:
+                inside = True
+                str_return += "("
+                str_return += B[i]
+            grade += seq
+            seq += 1
+            i+=1
+            j+=1
+        else:
+            seq = 1
+            grade -= 0.3
+            if inside:
+                str_return += ")"
+                inside = False
+                str_return += B[i]
+                i+=1
+            else:
+                if C[j]==" ":
+                    while not B[i] == C[j]:
+                        str_return += B[i]
+                        i+=1
+                    j+=1
+                str_return += B[i]
+                i+=1
+    if inside:
+        str_return += ")"
+    while i<len(B):
+        grade -= 0.3
+        str_return += B[i]
+        i+=1
+    for wordA in A.split(" "):
+        for wordB in B.split(" "):
+            if wordA == wordB:
+                grade+=10
+    return str_return,grade
+def load():
+    quran = []
+    filename = "pickle.pkl"
+    if(not os.path.exists(filename)):
+        picklefile = open(filename, 'wb')
+        csv_file = open('quran.csv', encoding="utf-8")
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        for i,row in enumerate(csv_reader):
+            quran.append(araby.strip_diacritics(row[2]))
+        pickle.dump(quran, picklefile)
+    else:
+        picklefile = open(filename, 'rb')
+        quran = pickle.load(picklefile)
+    return quran
+def search(query,numberOfResults):
+    quran = load()
+    lcsseq = []
+    lengths = []
+    for q in quran:
+        tmp1,tmp2 = textdistance_lcsseq(query,q)
+        lcsseq.append(tmp1)
+        lengths.append(tmp2)
+    indices =  [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)]
+    lengths_sorted = sorted(lengths,reverse=True)
+    meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))])
+    toReturn =""
+    for i in range(0,min(numberOfResults,len(indices))):
+        if(lengths[indices[i]] > meanOfHead):
+            toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n"
+    return toReturn
+gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch()

pickle.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:667ceecae8bb243b294d05e889e2024d2ec5d51ae970d79d52e67e0e50129c8c
+size 757817

quran.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+pytest
+textdistance
+csv
+pyarabic
+numpy
+pickle