File size: 1,794 Bytes
e1e1146 ccf093c e1e1146 9cc4a1f e1e1146 646f404 e1e1146 c1b8fca e1e1146 aad580c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from FlagEmbedding import BGEM3FlagModel
from FlagEmbedding import FlagReranker
import pandas as pd
import numpy as np
@st.cache_resource
def load_model():
return BGEM3FlagModel('BAAI/bge-m3',
use_fp16=True)
@st.cache_resource
def load_reranker():
return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
@st.cache_data
def load_df(path):
df = pd.read_csv(path)
return df
@st.cache_data
def load_embed(path):
embeddings_2 = np.load(path)
return embeddings_2
model = load_model()
reranker = load_reranker()
df = load_df('BookDataFrame.csv')
embeddings_2 = load_embed('BGE_embeddings_2.npy')
st.header(":books: Book Identifier")
k = 10
with st.form(key='my_form'):
sen1 = st.text_area(
label = "Book description:",
placeholder = "Write a brief description of your book here"
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
embeddings_1 = model.encode(sen1,
batch_size=12,
max_length=8192,
)['dense_vecs']
similarity = embeddings_1 @ embeddings_2.T
top_k_qs = []
topk = np.argsort(similarity)[-k:]
for t in topk:
pred_sum = df['Summary'].iloc[t]
pred_ques = sen1
pred = [pred_ques, pred_sum]
top_k_qs.append(pred)
rrscore = reranker.compute_score(top_k_qs, normalize=True)
rrscore_index = np.argsort(rrscore)
pred_book = []
for rr in rrscore_index:
pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")
finalpred = []
pred_book.reverse()
st.write("Here is your prediction")
for n, pred in enumerate(pred_book):
st.write(f"{n+1}: {pred}") |