AdityaMahimkar commited on
Commit
58e1fdd
1 Parent(s): 9d3773b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize, sent_tokenize
3
+ from nltk.corpus import stopwords
4
+ nltk.download('punkt')
5
+ nltk.download('stopwords')
6
+
7
+ import gradio as gr
8
+
9
+ #longest common subsequence
10
+ #dynamic programming algorithm for finding lcs
11
+ def lcs(l1,l2):
12
+ s1=word_tokenize(l1)
13
+ s2=word_tokenize(l2)
14
+ # storing the dp values
15
+ dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)]
16
+
17
+ for i in range(len(s2)+1):
18
+ for j in range(len(s1)+1):
19
+ if i == 0 or j == 0:
20
+ dp[i][j] = 0
21
+ elif s2[i-1] == s1[j-1]:
22
+ dp[i][j] = dp[i-1][j-1]+1
23
+ else:
24
+ dp[i][j] = max(dp[i-1][j] , dp[i][j-1])
25
+ return dp[len(s2)][len(s1)]
26
+
27
+ def plagiarismChecker(orig, plag):
28
+ sent_o=sent_tokenize(orig)
29
+ sent_p=sent_tokenize(plag)
30
+
31
+ #maximum length of LCS for a sentence in suspicious text
32
+ max_lcs=0
33
+ sum_lcs=0
34
+
35
+ for i in sent_p:
36
+ for j in sent_o:
37
+ l=lcs(i,j)
38
+ max_lcs=max(max_lcs,l)
39
+ sum_lcs+=max_lcs
40
+ max_lcs=0
41
+
42
+ score=sum_lcs/len(tokens_p)
43
+ return score*100
44
+
45
+ plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Original'), gr.inputs.Textbox(lines=10, label='Plagiarised')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach')
46
+ plagiarismUI.launch(inbrowser=False)