Spaces:
Sleeping
Sleeping
RMakushkin
commited on
Commit
•
12a1907
1
Parent(s):
217fa17
Update func.py
Browse files
func.py
CHANGED
@@ -12,7 +12,8 @@ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cp
|
|
12 |
|
13 |
def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
|
14 |
filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
|
15 |
-
|
|
|
16 |
|
17 |
def mean_pooling(model_output, attention_mask):
|
18 |
token_embeddings = model_output['last_hidden_state']
|
@@ -21,7 +22,7 @@ def mean_pooling(model_output, attention_mask):
|
|
21 |
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
22 |
return sum_embeddings / sum_mask
|
23 |
|
24 |
-
def recommendation(
|
25 |
token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
26 |
user_embeddings = torch.Tensor().to(device)
|
27 |
model.to(device)
|
@@ -31,7 +32,7 @@ def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10):
|
|
31 |
outputs = model(**batch)
|
32 |
user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
|
33 |
user_embeddings = user_embeddings.cpu().numpy()
|
34 |
-
cosine_similarities = cosine_similarity(embeddings, user_embeddings.reshape(1, -1))
|
35 |
df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
|
36 |
dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
|
37 |
return dict_topn
|
|
|
12 |
|
13 |
def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
|
14 |
filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
|
15 |
+
filt_ind = filtered_df.index.to_list()
|
16 |
+
return filt_ind
|
17 |
|
18 |
def mean_pooling(model_output, attention_mask):
|
19 |
token_embeddings = model_output['last_hidden_state']
|
|
|
22 |
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
23 |
return sum_embeddings / sum_mask
|
24 |
|
25 |
+
def recommendation(filt_ind: list, embeddings: np.array, user_text: str, n=10):
|
26 |
token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
27 |
user_embeddings = torch.Tensor().to(device)
|
28 |
model.to(device)
|
|
|
32 |
outputs = model(**batch)
|
33 |
user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
|
34 |
user_embeddings = user_embeddings.cpu().numpy()
|
35 |
+
cosine_similarities = cosine_similarity(embeddings[filt_ind], user_embeddings.reshape(1, -1))
|
36 |
df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
|
37 |
dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
|
38 |
return dict_topn
|