AdityaMahimkar's picture
Update app.py
1cb1ebf
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
import gradio as gr
#longest common subsequence
#dynamic programming algorithm for finding lcs
def lcs(l1,l2):
s1=word_tokenize(l1)
s2=word_tokenize(l2)
# storing the dp values
dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)]
for i in range(len(s2)+1):
for j in range(len(s1)+1):
if i == 0 or j == 0:
dp[i][j] = 0
elif s2[i-1] == s1[j-1]:
dp[i][j] = dp[i-1][j-1]+1
else:
dp[i][j] = max(dp[i-1][j] , dp[i][j-1])
return dp[len(s2)][len(s1)]
def plagiarismChecker(orig, plag):
sent_o=sent_tokenize(orig)
sent_p=sent_tokenize(plag)
tokens_p = word_tokenize(plag)
#maximum length of LCS for a sentence in suspicious text
max_lcs=0
sum_lcs=0
for i in sent_p:
for j in sent_o:
l=lcs(i,j)
max_lcs=max(max_lcs,l)
sum_lcs+=max_lcs
max_lcs=0
score=sum_lcs/len(tokens_p)
return score*100
plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Text 1'), gr.inputs.Textbox(lines=10, label='Text 2')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach')
plagiarismUI.launch(inbrowser=False)