File size: 1,170 Bytes
3789923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd
import gradio as gr

#Load the model
model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
doc_emb = pd.read_excel("proposals_emb.xlsx", usecols=lambda x: str(x).isnumeric())
df = pd.read_excel("proposals_clean.xlsx")


def cosine(u, v):
    res = np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
    return res


def form_link(post_id):
    return f"https://kusama.polkassembly.io/referenda/{post_id}"


def processing(query):
    query_emb = model.encode(query)
    print(doc_emb.columns)
    doc_emb['sim1'] = doc_emb.apply(lambda row: cosine(row, query_emb), axis=1)
    sim = doc_emb.nlargest(5, 'sim1').index
    res = df.iloc[sim][['content', 'status']]
    doc_emb.drop(columns=["sim1"], inplace=True)

    out = [f"[Proposal #{row[0]}]({form_link(row[0])}) - {row[1]['status']}: {ind+1}"  for ind, row in enumerate(res.iterrows())]
    markdown_string = ""
    for i in range(len(out)):
        markdown_string += f"{i+1}. {out[i]}\n"
    return markdown_string


iface = gr.Interface(processing, "text", "markdown")
iface.launch()