RMakushkin commited on
Commit
12a1907
1 Parent(s): 217fa17

Update func.py

Browse files
Files changed (1) hide show
  1. func.py +4 -3
func.py CHANGED
@@ -12,7 +12,8 @@ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cp
12
 
13
  def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
14
  filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
15
- return filtered_df
 
16
 
17
  def mean_pooling(model_output, attention_mask):
18
  token_embeddings = model_output['last_hidden_state']
@@ -21,7 +22,7 @@ def mean_pooling(model_output, attention_mask):
21
  sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
22
  return sum_embeddings / sum_mask
23
 
24
- def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10):
25
  token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
26
  user_embeddings = torch.Tensor().to(device)
27
  model.to(device)
@@ -31,7 +32,7 @@ def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10):
31
  outputs = model(**batch)
32
  user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
33
  user_embeddings = user_embeddings.cpu().numpy()
34
- cosine_similarities = cosine_similarity(embeddings, user_embeddings.reshape(1, -1))
35
  df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
36
  dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
37
  return dict_topn
 
12
 
13
  def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
14
  filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
15
+ filt_ind = filtered_df.index.to_list()
16
+ return filt_ind
17
 
18
  def mean_pooling(model_output, attention_mask):
19
  token_embeddings = model_output['last_hidden_state']
 
22
  sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
23
  return sum_embeddings / sum_mask
24
 
25
+ def recommendation(filt_ind: list, embeddings: np.array, user_text: str, n=10):
26
  token_user_text = tokenizer(user_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
27
  user_embeddings = torch.Tensor().to(device)
28
  model.to(device)
 
32
  outputs = model(**batch)
33
  user_embeddings = torch.cat([user_embeddings, mean_pooling(outputs, batch['attention_mask'])])
34
  user_embeddings = user_embeddings.cpu().numpy()
35
+ cosine_similarities = cosine_similarity(embeddings[filt_ind], user_embeddings.reshape(1, -1))
36
  df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
37
  dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
38
  return dict_topn