Ptashka25 commited on
Commit
1677af2
1 Parent(s): 6b8fdbd

Update app file

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  import torch
5
  from transformers import AutoTokenizer, AutoModel
6
  from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
 
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
9
  model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
@@ -12,7 +13,6 @@ df = pd.read_csv('data_final.csv')
12
 
13
  MAX_LEN = 300
14
 
15
- # @st.cache_resource
16
  def embed_bert_cls(text, model, tokenizer):
17
  t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
18
  with torch.no_grad():
@@ -23,6 +23,9 @@ def embed_bert_cls(text, model, tokenizer):
23
 
24
  books_vector = np.loadtxt('vectors.txt')
25
 
 
 
 
26
  st.title('Приложение для рекомендации книг')
27
 
28
  text = st.text_input('Введите запрос:')
@@ -32,17 +35,18 @@ recommend_button = st.button('Найти')
32
 
33
  if text and recommend_button:
34
  user_text_pred = embed_bert_cls(text, model, tokenizer)
35
- list_ = pairwise_distances(user_text_pred.reshape(1, -1), books_vector).argsort()[0][:num_results]
36
 
37
  st.subheader('Топ рекомендуемых книг:')
38
 
39
- for i in list_:
40
  col_1, col_2 = st.columns([1, 3])
41
 
42
  with col_1:
43
  st.image(df['image_url'][i], use_column_width=True)
 
44
  with col_2:
45
  st.write(f'Название книги: {df["title"][i]}')
46
  st.write(f'Название книги: {df["author"][i]}')
47
- st.write(f'Название книги: {df["annotation"][i]}')
48
- st.write(f'{df["page_url"][i]}')
 
4
  import torch
5
  from transformers import AutoTokenizer, AutoModel
6
  from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
7
+ import faiss
8
 
9
  tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
10
  model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
 
13
 
14
  MAX_LEN = 300
15
 
 
16
  def embed_bert_cls(text, model, tokenizer):
17
  t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
18
  with torch.no_grad():
 
23
 
24
  books_vector = np.loadtxt('vectors.txt')
25
 
26
+ index = faiss.IndexFlatIP(books_vector.shape[1])
27
+ index.add(books_vector)
28
+
29
  st.title('Приложение для рекомендации книг')
30
 
31
  text = st.text_input('Введите запрос:')
 
35
 
36
  if text and recommend_button:
37
  user_text_pred = embed_bert_cls(text, model, tokenizer)
38
+ D, I = index.search(user_text_pred.reshape(1, -1), num_results)
39
 
40
  st.subheader('Топ рекомендуемых книг:')
41
 
42
+ for i, j in zip(I[0], D[0]):
43
  col_1, col_2 = st.columns([1, 3])
44
 
45
  with col_1:
46
  st.image(df['image_url'][i], use_column_width=True)
47
+ st.write(round(j* 100, 2))
48
  with col_2:
49
  st.write(f'Название книги: {df["title"][i]}')
50
  st.write(f'Название книги: {df["author"][i]}')
51
+ st.write(f'Ссылка: {df["page_url"][i]}')
52
+ st.write(f'Название книги: {df["annotation"][i]}')