import nltk from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords nltk.download('punkt') nltk.download('stopwords') import gradio as gr #longest common subsequence #dynamic programming algorithm for finding lcs def lcs(l1,l2): s1=word_tokenize(l1) s2=word_tokenize(l2) # storing the dp values dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)] for i in range(len(s2)+1): for j in range(len(s1)+1): if i == 0 or j == 0: dp[i][j] = 0 elif s2[i-1] == s1[j-1]: dp[i][j] = dp[i-1][j-1]+1 else: dp[i][j] = max(dp[i-1][j] , dp[i][j-1]) return dp[len(s2)][len(s1)] def plagiarismChecker(orig, plag): sent_o=sent_tokenize(orig) sent_p=sent_tokenize(plag) tokens_p = word_tokenize(plag) #maximum length of LCS for a sentence in suspicious text max_lcs=0 sum_lcs=0 for i in sent_p: for j in sent_o: l=lcs(i,j) max_lcs=max(max_lcs,l) sum_lcs+=max_lcs max_lcs=0 score=sum_lcs/len(tokens_p) return score*100 plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Text 1'), gr.inputs.Textbox(lines=10, label='Text 2')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach') plagiarismUI.launch(inbrowser=False)