Spaces:
Runtime error
Runtime error
import nltk | |
from nltk.tokenize import word_tokenize, sent_tokenize | |
from nltk.corpus import stopwords | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
import gradio as gr | |
#longest common subsequence | |
#dynamic programming algorithm for finding lcs | |
def lcs(l1,l2): | |
s1=word_tokenize(l1) | |
s2=word_tokenize(l2) | |
# storing the dp values | |
dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)] | |
for i in range(len(s2)+1): | |
for j in range(len(s1)+1): | |
if i == 0 or j == 0: | |
dp[i][j] = 0 | |
elif s2[i-1] == s1[j-1]: | |
dp[i][j] = dp[i-1][j-1]+1 | |
else: | |
dp[i][j] = max(dp[i-1][j] , dp[i][j-1]) | |
return dp[len(s2)][len(s1)] | |
def plagiarismChecker(orig, plag): | |
sent_o=sent_tokenize(orig) | |
sent_p=sent_tokenize(plag) | |
tokens_p = word_tokenize(plag) | |
#maximum length of LCS for a sentence in suspicious text | |
max_lcs=0 | |
sum_lcs=0 | |
for i in sent_p: | |
for j in sent_o: | |
l=lcs(i,j) | |
max_lcs=max(max_lcs,l) | |
sum_lcs+=max_lcs | |
max_lcs=0 | |
score=sum_lcs/len(tokens_p) | |
return score*100 | |
plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Text 1'), gr.inputs.Textbox(lines=10, label='Text 2')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach') | |
plagiarismUI.launch(inbrowser=False) |