Veronika1101 commited on
Commit
2e2008d
1 Parent(s): 8345fab

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  pages/MiniLM/MiniLM_index.index filter=lfs diff=lfs merge=lfs -text
37
  mpnet_base/mpnet_base_index.index filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  pages/MiniLM/MiniLM_index.index filter=lfs diff=lfs merge=lfs -text
37
  mpnet_base/mpnet_base_index.index filter=lfs diff=lfs merge=lfs -text
38
+ pages/mpnet_base/mpnet_base_index.index filter=lfs diff=lfs merge=lfs -text
pages/mpnet_base/mpnet_base_app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import pickle
7
+ import time
8
+
9
+ # Загрузка данных
10
+ @st.cache_data
11
+ def load_data_models():
12
+ data = pd.read_csv('data/books_data2.csv')
13
+ data['annotation'] = data['annotation'].astype(str)
14
+ with open('pages/mpnet_base/mpnet_base_embeddings.pkl', 'rb') as f:
15
+ book_embeddings = pickle.load(f)
16
+
17
+ index = faiss.read_index('pages/mpnet_base/mpnet_base_index.index')
18
+ embedder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
19
+
20
+ return data, book_embeddings, index, embedder
21
+
22
+ data, book_embeddings, index, embedder = load_data_models()
23
+
24
+ # Функция для поиска
25
+ def search_books(query, num_results):
26
+ query_embedding = embedder.encode([query], convert_to_tensor=True)
27
+ query_embedding_cpu = query_embedding.cpu()
28
+ query_embedding_normalized = query_embedding_cpu / np.linalg.norm(query_embedding_cpu, axis=1)
29
+ D, I = index.search(np.array(query_embedding_normalized), num_results)
30
+ return I[0], D[0]
31
+
32
+ # Streamlit
33
+ def mpnet_base_page():
34
+ st.title('Поиск книг')
35
+ user_input = st.text_input("Введите цитату или автора:")
36
+ results_num = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
37
+
38
+ if st.button('Искать'):
39
+ start_time = time.time()
40
+ indices, distances = search_books(user_input, results_num)
41
+ end_time = time.time()
42
+ search_time = end_time - start_time
43
+ st.write("Результаты поиска:")
44
+ for idx, dist in zip(indices, distances):
45
+ book = data.iloc[idx]
46
+ st.write("---")
47
+ st.image(book['image_url'], width=250)
48
+ st.write(f"**Название:** {book['title']}")
49
+ st.write(f"**Автор:** {book['author']}")
50
+ st.write(f"**Жанр:** {book['genre']}")
51
+ if len(book['annotation']) > 50:
52
+ book['annotation'] = ' '.join(book['annotation'].split()[:50]) + '...'
53
+ st.write(f"**Описание:** {book['annotation']}")
54
+ st.write(f"**Метрика близости:** {dist}")
55
+ st.write(f'**Время поиска:** {search_time:.4f} секунд')
56
+ st.markdown(f"[Читать подробнее]({book['page_url']})", unsafe_allow_html=True)
57
+
58
+
59
+
60
+
61
+
62
+ # # Загрузка данных
63
+ # @st.cache_data
64
+ # def load_data():
65
+ # data = pd.read_csv('Data/books_data2.csv')
66
+ # with open('pages/MiniLM/MiniLM_embeddings.pkl', 'rb') as f:
67
+ # book_embeddings = pickle.load(f)
68
+ # index = faiss.read_index('pages/MiniLM/MiniLM_index.index')
69
+
70
+ # embedder = SentenceTransformer("all-MiniLM-L6-v2")
71
+ # return data, index, embedder
72
+
73
+ # # Функция поиска
74
+ # def search_books(embedder, index, data, query, n_results):
75
+ # query_embedding = embedder.encode([query])
76
+ # D, I = index.search(query_embedding, n_results)
77
+
78
+ # result_books = []
79
+ # for i in range(n_results):
80
+ # book_index = I[0][i]
81
+ # book_info = data.iloc[book_index]
82
+ # book_link = book_info['page_url']
83
+ # book_image = book_info['image_url']
84
+ # book_genre = book_info['genre']
85
+ # book_title = book_info['title']
86
+ # book_description = book_info['annotation']
87
+ # book_author = book_info['author']
88
+ # similarity_score = 1 / (1 + D[0][i])
89
+
90
+ # result_books.append({
91
+ # 'Link': book_link,
92
+ # 'Image': book_image,
93
+ # 'Genre': book_genre,
94
+ # 'Title': book_title,
95
+ # 'Description': book_description,
96
+ # 'Author': book_author,
97
+ # 'Similarity': similarity_score
98
+ # })
99
+
100
+ # return result_books
101
+
102
+ # # Streamlit
103
+ # def MiniLm_page():
104
+ # st.title('Поиск книг')
105
+ # data, index, embedder = load_data()
106
+ # query = st.text_input('Введите цитату или автора:')
107
+ # n_results = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
108
+
109
+ # if st.button("Искать"):
110
+ # if query:
111
+ # start_time = time.time()
112
+ # result_books = search_books(embedder, index, data, query, n_results)
113
+ # end_time = time.time()
114
+ # search_time = end_time - start_time
115
+ # st.write("Результаты поиска:")
116
+
117
+ # for book in result_books:
118
+ # st.write('---')
119
+ # st.image(book['Image'], width=250)
120
+ # st.write(f"**Название:** {book['Title']}")
121
+ # st.write(f"**Автор:** {book['Author']}")
122
+ # st.write(f"**Жанр:** {book['Genre']}")
123
+ # if len(book['Description']) > 50:
124
+ # book['Description'] = ' '.join(book['Description'].split()[:50]) + '...'
125
+ # st.write(f"**Описание:** {book['Description']}")
126
+ # st.write(f"**Сходство:** {book['Similarity']:.2f}")
127
+ # st.write(f'**Время поиска:** {search_time:.4f} секунд')
128
+ # st.write(f"[Читать подробнее]({book['Link']})")
129
+ # st.text("")
pages/mpnet_base/mpnet_base_embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a32d3e04e31c06f6b3162818e62c9b353e9301648097c607ecc5090021f5a8d
3
+ size 15157411
pages/mpnet_base/mpnet_base_index.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27842f132ba09b2ff5908ce74c12e4ad89c3a7e01e056f84ef687697fef0f72
3
+ size 15157293