from sentence_transformers import SentenceTransformer, util import numpy as np import pandas as pd import gradio as gr #Load the model model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5') doc_emb = pd.read_excel("proposals_emb.xlsx", usecols=lambda x: str(x).isnumeric()) df = pd.read_excel("proposals_clean.xlsx") def cosine(u, v): res = np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) return res def form_link(post_id): return f"https://kusama.polkassembly.io/referenda/{post_id}" def processing(query): query_emb = model.encode(query) print(doc_emb.columns) doc_emb['sim1'] = doc_emb.apply(lambda row: cosine(row, query_emb), axis=1) sim = doc_emb.nlargest(5, 'sim1').index res = df.iloc[sim][['content', 'status']] doc_emb.drop(columns=["sim1"], inplace=True) out = [f"[Proposal #{row[0]}]({form_link(row[0])}) - {row[1]['status']}: {ind+1}" for ind, row in enumerate(res.iterrows())] markdown_string = "" for i in range(len(out)): markdown_string += f"{i+1}. {out[i]}\n" return markdown_string iface = gr.Interface(processing, "text", "markdown") iface.launch()