evaluation / app.py
mscsasem3's picture
Update app.py
2a054f8
import gradio as gr
from matplotlib.dates import SU
from regex import F
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import spacy
import pandas as pd
import numpy as np
from tqdm import tqdm
import textdistance
from spacy.lang.en.stop_words import STOP_WORDS
#import psycopg2
import os
'''connection = psycopg2.connect(user="db_admin",
password="",
host="127.0.0.1",
port="5432",
database="my_db")'''
nlp = spacy.load("en_core_web_md")
def listToString(s):
# initialize an empty string
str1 = " "
# return string
return (str1.join(s))
def rm_stop(my_doc):
# Create list of word tokens
token_list = []
for token in my_doc:
token_list.append(token.text)
# Create list of word tokens after removing stopwords
filtered_sentence =[]
for word in token_list:
lexeme = nlp.vocab[word]
if lexeme.is_stop == False:
filtered_sentence.append(word)
return filtered_sentence
def text_processing(sentence):
sentence = [token.lemma_.lower()
for token in nlp(sentence)
if token.is_alpha and not token.is_stop]
return sentence
def jaccard_sim(sent1,sent2):
# Text Processing
sentence1 = text_processing(sent1)
sentence2 = text_processing(sent2)
# Jaccard similarity
return textdistance.jaccard.normalized_similarity(sentence1, sentence2)
def sim(Ideal_Answer,Submitted_Answer):
# SBERT EMBEDDINGS
text1=Ideal_Answer.replace("\"","").replace("\'","")
text2=Submitted_Answer.replace("\"","").replace("\'","")
output=[]
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#Compute embedding for both lists
embedding_1= model.encode(text1, convert_to_tensor=True)
embedding_2 = model.encode(text2, convert_to_tensor=True)
score=util.pytorch_cos_sim(embedding_1, embedding_2)
output.append("SBERT:"+str(int(float(str(score).split("[")[2].split("]")[0])*10.0))+",")
sbert=int(float(str(score).split("[")[2].split("]")[0])*10.0)
#Jaccard
output.append("Jaccard:"+str(int(jaccard_sim(text1,text2)*10.0))+",")
#spacy average word2vec
nlp = spacy.load("en_core_web_md") # make sure to use larger package!
doc1 = listToString(rm_stop(nlp(text1)))
doc2 = listToString(rm_stop(nlp(text2)))
# Similarity of two documents
w2v=int(nlp(doc1).similarity(nlp(doc2))*10.0)
final_score=int(0.8*sbert+0.2*w2v)
output.append("Word2Vec:"+str(int(nlp(doc1).similarity(nlp(doc2))*10.0))+",final_score:"+str(final_score))
out_string=listToString(output)
#return out_string
return str(out_string)
iface = gr.Interface(fn=sim,
inputs=["text","text"],
outputs=gr.outputs.Textbox(),)
iface.launch()