ziyadbastaili's picture
Update app.py
a501843
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import string, re
from cleanco import basename
model = None
def prepare(text):
text = text.translate(str.maketrans('', '', string.punctuation + 'β€β€œ'))
pattern = r"\b(?=[MDCLXVII])M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})([II]X|[II]V|V?[II]{0,3})\b\.?"
text = re.sub(pattern, '', text)
text = basename(text).upper()
return text
def semantic(company_1, company_2):
global model
# Single list of sentences
sentences = [prepare(company_1), prepare(company_2)]
if model is None:
model = SentenceTransformer('all-mpnet-base-v2')
#Compute embeddings
embeddings = model.encode(sentences, convert_to_tensor=True)
#Compute cosine-similarities for each sentence with each other sentence
cosine_scores = util.cos_sim(embeddings, embeddings)
#Find the pairs with the highest cosine similarity scores
pairs = []
for i in range(len(cosine_scores)-1):
for j in range(i+1, len(cosine_scores)):
pairs.append({'index': [i, j], 'score': cosine_scores[i][j]})
#Sort scores in decreasing order
pairs = sorted(pairs, key=lambda x: x['score'], reverse=True)
for pair in pairs:
return "{:.4f}".format(pair['score'])
company_1 = "Growth Capital Acquisition Corp"
company_2 = None # "Growth Capital Acquisition Corp III"
title = 'sentences_semantic'
gr.Interface(semantic,inputs=[gr.inputs.Textbox(lines=1, default=company_1, label="Company_1"), gr.inputs.Textbox(lines=1, default=company_2, label="Company_2")],
outputs=[gr.outputs.Textbox(type="auto",label="Score")],title = title).launch()