|
import streamlit as st
|
|
from FlagEmbedding import BGEM3FlagModel
|
|
from FlagEmbedding import FlagReranker
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
@st.cache_resource
|
|
def load_model():
|
|
return BGEM3FlagModel('BAAI/bge-m3',
|
|
use_fp16=True)
|
|
@st.cache_resource
|
|
def load_reranker():
|
|
return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
|
|
|
|
@st.cache_data
|
|
def load_embed(path):
|
|
embeddings_2 = np.load(path)
|
|
return embeddings_2
|
|
|
|
model = load_model()
|
|
reranker = load_reranker()
|
|
|
|
embeddings_2 = load_embed('D:/AI_Builder/BGE_embeddings_2.npy')
|
|
|
|
data = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TESTUNCLEANbookquestions.csv'))
|
|
data2 = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TRAINbookquestions.csv'))
|
|
data3 = pd.read_csv("D:/AI_Builder/ActualProject/DataCollection/booksummaries.txt",
|
|
header=None,sep="\t",
|
|
names=["ID", "Freebase ID", "Book Name", "Book Author", "Pub date", "Genres", "Summary"])
|
|
df = pd.concat([data, data2])
|
|
df = df.merge(data3, on='ID', how='left')
|
|
df = df.rename(columns={'Book Name_x': 'Book Name'})
|
|
df = df[['ID', 'Book Name', 'Book Author', 'Questions', 'Summary']]
|
|
|
|
st.header(":books: Book Identifier")
|
|
|
|
k = 10
|
|
with st.form(key='my_form'):
|
|
sen1 = st.text_area("Book description:")
|
|
submit_button = st.form_submit_button(label='Submit')
|
|
|
|
if submit_button:
|
|
embeddings_1 = model.encode(sen1,
|
|
batch_size=12,
|
|
max_length=8192,
|
|
)['dense_vecs']
|
|
similarity = embeddings_1 @ embeddings_2.T
|
|
|
|
top_k_qs = []
|
|
topk = np.argsort(similarity)[-k:]
|
|
|
|
for t in topk:
|
|
pred_sum = df['Summary'].iloc[t]
|
|
pred_ques = sen1
|
|
pred = [pred_ques, pred_sum]
|
|
top_k_qs.append(pred)
|
|
rrscore = reranker.compute_score(top_k_qs, normalize=True)
|
|
rrscore_index = np.argsort(rrscore)
|
|
|
|
pred_book = []
|
|
for rr in rrscore_index:
|
|
pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")
|
|
|
|
finalpred = []
|
|
pred_book.reverse()
|
|
st.write("Here is your prediction")
|
|
for n, pred in enumerate(pred_book):
|
|
st.write(f"{n+1}: {pred}") |