import gradio as gr from sentence_transformers import SentenceTransformer, util import string, re from cleanco import basename model = None def prepare(text): text = text.translate(str.maketrans('', '', string.punctuation + '”“')) pattern = r"\b(?=[MDCLXVII])M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})([II]X|[II]V|V?[II]{0,3})\b\.?" text = re.sub(pattern, '', text) text = basename(text).upper() return text def semantic(company_1, company_2): global model # Single list of sentences sentences = [prepare(company_1), prepare(company_2)] if model is None: model = SentenceTransformer('all-mpnet-base-v2') #Compute embeddings embeddings = model.encode(sentences, convert_to_tensor=True) #Compute cosine-similarities for each sentence with each other sentence cosine_scores = util.cos_sim(embeddings, embeddings) #Find the pairs with the highest cosine similarity scores pairs = [] for i in range(len(cosine_scores)-1): for j in range(i+1, len(cosine_scores)): pairs.append({'index': [i, j], 'score': cosine_scores[i][j]}) #Sort scores in decreasing order pairs = sorted(pairs, key=lambda x: x['score'], reverse=True) for pair in pairs: return "{:.4f}".format(pair['score']) company_1 = "Growth Capital Acquisition Corp" company_2 = None # "Growth Capital Acquisition Corp III" title = 'sentences_semantic' gr.Interface(semantic,inputs=[gr.inputs.Textbox(lines=1, default=company_1, label="Company_1"), gr.inputs.Textbox(lines=1, default=company_2, label="Company_2")], outputs=[gr.outputs.Textbox(type="auto",label="Score")],title = title).launch()