vjain's picture
Update app.py
bac2b8e
import gradio as gr
import openai
import pandas as pd
import numpy as np
import os
openai.api_key="openai_apiKey"
from openai.embeddings_utils import get_embedding
from openai.embeddings_utils import cosine_similarity
def similarity(input):
df= pd.read_csv("meg_embeddings.csv")
df['embedding'] = df['embedding'].apply(eval).apply(np.array)
input = input
input_vector = get_embedding(input, engine="text-embedding-ada-002")
df["similarities"] = df['embedding'].apply(lambda x: cosine_similarity(x, input_vector))
sorted_df =df.sort_values("similarities", ascending=False)
top_row = sorted_df.loc[0]
return sorted_df.iloc[0][["text", "similarities"]]
input_text = gr.inputs.Textbox(label="Enter your text here")
text_output = gr.outputs.Textbox(label="Most similar text")
similarity_output = gr.outputs.Textbox(label="Similarity score")
ui = gr.Interface(fn=similarity,
inputs=input_text,
outputs=[text_output, similarity_output],
title="Semantic Plagiarism Checker",
description="Check if your text is semantically similar to pre-existing texts to prevent plagiarism.",
theme="compact",
layout="vertical",
inputs_layout="stacked",
outputs_layout="stacked",
allow_flagging=False)
ui.launch()