File size: 1,794 Bytes
e1e1146
 
 
 
 
 
 
 
 
 
 
 
 
 
ccf093c
 
 
 
 
e1e1146
 
 
 
 
 
 
 
9cc4a1f
 
e1e1146
 
 
 
 
646f404
 
 
 
e1e1146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1b8fca
e1e1146
 
 
 
 
aad580c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from FlagEmbedding import BGEM3FlagModel
from FlagEmbedding import FlagReranker
import pandas as pd
import numpy as np

@st.cache_resource
def load_model():
    return BGEM3FlagModel('BAAI/bge-m3',
                        use_fp16=True)
@st.cache_resource
def load_reranker():
    return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)

@st.cache_data
def load_df(path):
    df = pd.read_csv(path)
    return df

@st.cache_data
def load_embed(path):
    embeddings_2 = np.load(path)
    return embeddings_2

model = load_model()
reranker = load_reranker()

df = load_df('BookDataFrame.csv')
embeddings_2 = load_embed('BGE_embeddings_2.npy')

st.header(":books: Book Identifier")

k = 10
with st.form(key='my_form'):
	sen1 = st.text_area(
        label = "Book description:",
        placeholder = "Write a brief description of your book here"
    )
	submit_button = st.form_submit_button(label='Submit')

if submit_button:
    embeddings_1 = model.encode(sen1,
                                batch_size=12,
                                max_length=8192,
                                )['dense_vecs']
    similarity = embeddings_1 @ embeddings_2.T

    top_k_qs = []
    topk = np.argsort(similarity)[-k:]

    for t in topk:
        pred_sum = df['Summary'].iloc[t]
        pred_ques = sen1
        pred = [pred_ques, pred_sum]
        top_k_qs.append(pred)
    rrscore = reranker.compute_score(top_k_qs, normalize=True)
    rrscore_index = np.argsort(rrscore)

    pred_book = []
    for rr in rrscore_index:
        pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")

    finalpred = []
    pred_book.reverse()
    st.write("Here is your prediction")
    for n, pred in enumerate(pred_book):
        st.write(f"{n+1}: {pred}")