ElijahDi commited on
Commit
4835c0a
1 Parent(s): 76005f9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ from sentence_transformers import SentenceTransformer
6
+ from resources.functions import recommend, find_rows_with_genres, get_mask_in_range
7
+
8
+
9
+ st.markdown(f"<h1 style='text-align: center;'>Глупый поиск фильмов", unsafe_allow_html=True)
10
+
11
+ df = pd.read_csv('resources/DF_FINAL.csv')
12
+ genre_lists = df['ganres'].apply(lambda x: x.split(', ') if isinstance(x, str) else [])
13
+ all_genres = sorted(list(set([genre for sublist in genre_lists for genre in sublist])))
14
+
15
+ st.write(f'<p style="text-align: center; font-family: Arial, sans-serif; font-size: 20px; color: white;">Количество фильмов \
16
+ для поиска {len(df)}</p>', unsafe_allow_html=True)
17
+
18
+ st.header(':wrench: Панель инструментов')
19
+ col1, col2, col3 = st.columns([1, 2, 1])
20
+ with col1:
21
+ top_k = st.selectbox("Сколько фильмов?", options=[5, 10, 15, 20])
22
+ with col2:
23
+ model_type = st.selectbox("Какой моделью пользуемся?\n ", options=['rubert-tiny2', 'msmarco-MiniLM-L-12-v3'])
24
+ with col3:
25
+ genres_list = st.multiselect("Какого жанра?\n ", options=all_genres)
26
+
27
+ if model_type == 'rubert-tiny2':
28
+ model = SentenceTransformer('cointegrated/rubert-tiny2')
29
+ emb = torch.load('resources/corpus_embeddings_rub.pth')
30
+ else:
31
+ model = SentenceTransformer('msmarco-MiniLM-L-12-v3')
32
+ emb = torch.load('resources/corpus_embeddings_ms.pth')
33
+
34
+ range_years = st.slider("В каком году вышел фильм?", min_value=df['year'].unique().min(),
35
+ max_value=df['year'].unique().max(),
36
+ value=(df['year'].unique().min(), df['year'].unique().max()))
37
+
38
+ text = st.text_input('Что будем искать?')
39
+ button = st.button('Начать поиск', type="primary")
40
+
41
+ if text and button:
42
+ if len(genres_list) == 0:
43
+ mask = get_mask_in_range(df=df, range_values=range_years)
44
+ else:
45
+ mask1 = find_rows_with_genres(df=df, genres_list=genres_list)
46
+ mask2 = get_mask_in_range(df=df, range_values=range_years)
47
+ mask = mask1 & mask2
48
+ try:
49
+ # emb = emb[mask]
50
+ # df = df[mask]
51
+ hits = recommend(model, text, emb, len(df))
52
+ st.write(f'<p style="font-family: Arial, sans-serif; font-size: 24px; color: pink; font-weight: bold;"><strong>\
53
+ {top_k} лучших рекомендаций</strong></p>', unsafe_allow_html=True)
54
+ st.write('\n')
55
+ mask_ind = df[mask].index.tolist()
56
+ fil_hits = [hits[0][i] for i in range(len(hits[0])) if hits[0][i]['corpus_id'] in mask_ind]
57
+ for i in range(top_k):
58
+ col4, col5 = st.columns([3, 4])
59
+ with col4:
60
+ try:
61
+ st.image(df['poster'][fil_hits[i]['corpus_id']], width=300)
62
+ except:
63
+ st.image('https://cdnn11.img.sputnik.by/img/104126/36/1041263627_235:441:1472:1802_1920x0_80_0_0_fc2acc893b618b7c650d661fafe178b8.jpg', width=300)
64
+ with col5:
65
+ st.write(f"***Название:*** {df['title'][fil_hits[i]['corpus_id']]}")
66
+ st.write(f"***Жанр:*** {(df['ganres'][fil_hits[i]['corpus_id']])}")
67
+ st.write(f"***Описание:*** {df['description'][fil_hits[i]['corpus_id']]}")
68
+ st.write(f"***Год:*** {df['year'][fil_hits[i]['corpus_id']]}")
69
+ st.write(f"***Актерский состав:*** {df['cast'][fil_hits[i]['corpus_id']]}")
70
+ st.write(f"***Косинусное сходство:*** {round(fil_hits[i]['score'], 2)}")
71
+ st.write(f"***Ссылка на фильм : {df['url'][fil_hits[i]['corpus_id']]}***")
72
+
73
+ st.markdown(
74
+ "<hr style='border: 2px solid #000; margin-top: 10px; margin-bottom: 10px;'>",
75
+ unsafe_allow_html=True
76
+ )
77
+ except:
78
+ st.write(f'<p style="font-family: Arial, sans-serif; font-size: 24px; color: pink; font-weight: bold;"><strong>\
79
+ Подходящих вариантов нет. Измените критерии поиска.</strong></p>', unsafe_allow_html=True)