File size: 1,396 Bytes
58e1fdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8adac66
58e1fdd
 
 
b38f496
 
58e1fdd
 
 
 
 
 
 
 
 
 
 
 
 
 
1cb1ebf
58e1fdd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')

import gradio as gr

#longest common subsequence
#dynamic programming algorithm for finding lcs
def lcs(l1,l2):
  s1=word_tokenize(l1)
  s2=word_tokenize(l2)
  # storing the dp values 
  dp = [[None]*(len(s1)+1) for i in range(len(s2)+1)] 
  
  for i in range(len(s2)+1): 
      for j in range(len(s1)+1): 
          if i == 0 or j == 0: 
              dp[i][j] = 0
          elif s2[i-1] == s1[j-1]: 
              dp[i][j] = dp[i-1][j-1]+1
          else: 
              dp[i][j] = max(dp[i-1][j] , dp[i][j-1]) 
  return dp[len(s2)][len(s1)]
    
def plagiarismChecker(orig, plag):
  sent_o=sent_tokenize(orig)
  sent_p=sent_tokenize(plag)
  
  tokens_p = word_tokenize(plag)
  
  #maximum length of LCS for a sentence in suspicious text
  max_lcs=0
  sum_lcs=0
  
  for i in sent_p:
      for j in sent_o:
          l=lcs(i,j)
          max_lcs=max(max_lcs,l)
      sum_lcs+=max_lcs
      max_lcs=0
  
  score=sum_lcs/len(tokens_p)
  return score*100
  
plagiarismUI = gr.Interface(fn=plagiarismChecker, inputs=[gr.inputs.Textbox(lines=10, label='Text 1'), gr.inputs.Textbox(lines=10, label='Text 2')], outputs=gr.outputs.Textbox(label='Plagiarism Level'), title="Plagiarism Checker", theme='dark-peach')
plagiarismUI.launch(inbrowser=False)