File size: 1,424 Bytes
b592be7
 
 
 
 
 
 
 
 
58db145
b592be7
 
58db145
b592be7
 
 
 
bc17936
b79c707
c8d4682
 
 
b592be7
c8d4682
 
 
 
 
 
 
 
 
 
b592be7
c8d4682
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
import openai
import pandas as pd 
import numpy as np

openai.api_key="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"
from openai.embeddings_utils import get_embedding
from openai.embeddings_utils import cosine_similarity

def similarity(input):
    df= pd.read_csv("meg_embeddings.csv")
    df['embedding'] = df['embedding'].apply(eval).apply(np.array)
    input = input
    input_vector = get_embedding(input, engine="text-embedding-ada-002")
    df["similarities"] = df['embedding'].apply(lambda x: cosine_similarity(x, input_vector))
    sorted_df =df.sort_values("similarities", ascending=False)
    top_row = sorted_df.loc[0]
    return sorted_df.iloc[0][["text", "similarities"]]
    
input_text = gr.inputs.Textbox(label="Enter your text here")
text_output = gr.outputs.Textbox(label="Most similar text")
similarity_output = gr.outputs.Textbox(label="Similarity score")

ui = gr.Interface(fn=similarity,
                  inputs=input_text,
                  outputs=[text_output, similarity_output],
                  title="Semantic Plagiarism Checker",
                  description="Check if your text is semantically similar to pre-existing texts to prevent plagiarism.",
                  theme="compact",
                  layout="vertical",
                  inputs_layout="stacked",
                  outputs_layout="stacked",
                  allow_flagging=False)



ui.launch()