import gradio as gr from matplotlib.dates import SU from regex import F from sklearn.feature_extraction.text import TfidfVectorizer from sentence_transformers import SentenceTransformer, util from sklearn.metrics.pairwise import cosine_similarity import spacy import pandas as pd import numpy as np from tqdm import tqdm import textdistance from spacy.lang.en.stop_words import STOP_WORDS #import psycopg2 import os '''connection = psycopg2.connect(user="db_admin", password="", host="127.0.0.1", port="5432", database="my_db")''' nlp = spacy.load("en_core_web_md") def listToString(s): # initialize an empty string str1 = " " # return string return (str1.join(s)) def rm_stop(my_doc): # Create list of word tokens token_list = [] for token in my_doc: token_list.append(token.text) # Create list of word tokens after removing stopwords filtered_sentence =[] for word in token_list: lexeme = nlp.vocab[word] if lexeme.is_stop == False: filtered_sentence.append(word) return filtered_sentence def text_processing(sentence): sentence = [token.lemma_.lower() for token in nlp(sentence) if token.is_alpha and not token.is_stop] return sentence def jaccard_sim(sent1,sent2): # Text Processing sentence1 = text_processing(sent1) sentence2 = text_processing(sent2) # Jaccard similarity return textdistance.jaccard.normalized_similarity(sentence1, sentence2) def sim(Ideal_Answer,Submitted_Answer): # SBERT EMBEDDINGS text1=Ideal_Answer.replace("\"","").replace("\'","") text2=Submitted_Answer.replace("\"","").replace("\'","") output=[] model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') #Compute embedding for both lists embedding_1= model.encode(text1, convert_to_tensor=True) embedding_2 = model.encode(text2, convert_to_tensor=True) score=util.pytorch_cos_sim(embedding_1, embedding_2) output.append("SBERT:"+str(int(float(str(score).split("[")[2].split("]")[0])*10.0))+",") sbert=int(float(str(score).split("[")[2].split("]")[0])*10.0) #Jaccard output.append("Jaccard:"+str(int(jaccard_sim(text1,text2)*10.0))+",") #spacy average word2vec nlp = spacy.load("en_core_web_md") # make sure to use larger package! doc1 = listToString(rm_stop(nlp(text1))) doc2 = listToString(rm_stop(nlp(text2))) # Similarity of two documents w2v=int(nlp(doc1).similarity(nlp(doc2))*10.0) final_score=int(0.8*sbert+0.2*w2v) output.append("Word2Vec:"+str(int(nlp(doc1).similarity(nlp(doc2))*10.0))+",final_score:"+str(final_score)) out_string=listToString(output) #return out_string return str(out_string) iface = gr.Interface(fn=sim, inputs=["text","text"], outputs=gr.outputs.Textbox(),) iface.launch()