import streamlit as st import pandas as pd import torch import numpy as np from tqdm import tqdm from transformers import AutoTokenizer, AutoModel import faiss model_name = "cointegrated/rubert-tiny2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) df = pd.read_csv('final_data.csv') MAX_LEN = 300 def embed_bert_cls(text, model=model, tokenizer=tokenizer): t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) with torch.no_grad(): model_output = model(**{k: v.to(model.device) for k, v in t.items()}) embeddings = model_output.last_hidden_state[:, 0, :] embeddings = torch.nn.functional.normalize(embeddings) return embeddings[0].cpu().squeeze() embeddings = np.loadtxt('embeddings.txt') embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings] # Создание индекса Faiss embeddings_matrix = np.stack(embeddings) index = faiss.IndexFlatIP(embeddings_matrix.shape[1]) index.add(embeddings_matrix) st.title('Приложение для рекомендации книг') text = st.text_input('Введите запрос:') num_results = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=3) # Add a button to trigger the recommendation process recommend_button = st.button('Получить рекомендации') if text and recommend_button: # Check if the user entered text and clicked the button # Встраивание запроса и поиск ближайших векторов с использованием Faiss query_embedding = embed_bert_cls(text) query_embedding = query_embedding.numpy().astype('float32') _, indices = index.search(np.expand_dims(query_embedding, axis=0), num_results) st.subheader('Топ рекомендуемых книг:') for i in indices[0]: recommended_embedding = embeddings_tensor[i].numpy() # Вектор рекомендованной книги similarity = np.dot(query_embedding, recommended_embedding) # Косинусное сходство similarity_percent = similarity * 100 col1, col2 = st.columns([1, 3]) with col1: st.image(df['image'][i], use_column_width=True) with col2: st.write(f"**Название книги:** {df['title'][i]}") st.write(f"**Автор:** {df['author'][i]}") st.write(f"**Описание:** {df['annotation'][i]}") st.write(f"**Оценка сходства:** {similarity_percent:.2f}%") st.write("---")