Spaces:
Runtime error
Runtime error
AdityaMahimkar
commited on
Commit
•
58e1fdd
1
Parent(s):
9d3773b
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
3 |
+
from nltk.corpus import stopwords
|
4 |
+
nltk.download('punkt')
|
5 |
+
nltk.download('stopwords')
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
#longest common subsequence
|
10 |
+
#dynamic programming algorithm for finding lcs
|
11 |
+
def lcs(l1,l2):
|
12 |
+
s1=word_tokenize(l1)
|
13 |
+
s2=word_tokenize(l2)
|
14 |
+
# storing the dp values
|
15 |
+
dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)]
|
16 |
+
|
17 |
+
for i in range(len(s2)+1):
|
18 |
+
for j in range(len(s1)+1):
|
19 |
+
if i == 0 or j == 0:
|
20 |
+
dp[i][j] = 0
|
21 |
+
elif s2[i-1] == s1[j-1]:
|
22 |
+
dp[i][j] = dp[i-1][j-1]+1
|
23 |
+
else:
|
24 |
+
dp[i][j] = max(dp[i-1][j] , dp[i][j-1])
|
25 |
+
return dp[len(s2)][len(s1)]
|
26 |
+
|
27 |
+
def plagiarismChecker(orig, plag):
|
28 |
+
sent_o=sent_tokenize(orig)
|
29 |
+
sent_p=sent_tokenize(plag)
|
30 |
+
|
31 |
+
#maximum length of LCS for a sentence in suspicious text
|
32 |
+
max_lcs=0
|
33 |
+
sum_lcs=0
|
34 |
+
|
35 |
+
for i in sent_p:
|
36 |
+
for j in sent_o:
|
37 |
+
l=lcs(i,j)
|
38 |
+
max_lcs=max(max_lcs,l)
|
39 |
+
sum_lcs+=max_lcs
|
40 |
+
max_lcs=0
|
41 |
+
|
42 |
+
score=sum_lcs/len(tokens_p)
|
43 |
+
return score*100
|
44 |
+
|
45 |
+
plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Original'), gr.inputs.Textbox(lines=10, label='Plagiarised')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach')
|
46 |
+
plagiarismUI.launch(inbrowser=False)
|