Spaces:
Sleeping
Sleeping
File size: 5,081 Bytes
00e640a 39bb071 00e640a 48b0e4b 00e640a 9c3c7e9 39bb071 9c3c7e9 00e640a b826d7a 48b0e4b 00e640a 9c3c7e9 00e640a 9c3c7e9 00e640a 9c3c7e9 b01c3d3 00e640a 9c3c7e9 39bb071 9c3c7e9 00e640a 1c645c1 00e640a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import pandas as pd
import torch
# sparse_indices = torch.load("data/sparse_indices.pt")
# sparse_values = torch.load("data/sparse_values.pt")
class EASE:
def __init__(self, train,
user_col='user_id',
item_col='app_id',
score_col='is_recommended',
reg=150.):
"""
train: (DataFrame) data of training set
user_col: (String) column name of users column
item_col: (String) column name of items column
score_col: (String) column name of interactions column
reg: (Float) EASE's regularization value
"""
self.user_col = user_col
self.item_col = item_col
self.score_col = score_col
self.train = train
self.reg = reg
self.user_id_col = user_col + "_index"
self.item_id_col = item_col + "_index"
self.item_lookup = self.generate_label(train, self.item_col)
self.item_map = {}
for item, item_index in self.item_lookup.values:
self.item_map[item_index] = item
def generate_label(self, df, col):
dist_labels = df[[col]].drop_duplicates()
dist_labels[col +
"_index"] = dist_labels[col].astype("category").cat.codes
return dist_labels
def scaling_score(preds_tensor):
work_tensor = preds_tensor
min_score = torch.min(preds_tensor)
max_score = torch.max(preds_tensor)
scaled = 10 * (work_tensor - min_score) / (max_score - min_score)
return scaled
def predict_active(self, pred_df,
weight_mx,
k=10,
weight_lambda=0.8,
hybrid_model=True,
remove_owned=True):
"""
Args:
pred_df: (DataFrame) data of user interactions
weight: (Tensor) Weight matrix of pre-trained EASE model
k: (Integer) number of recommendation to be shown
remove_owned: (Boolean) Whether to remove already interacted items
"""
train = pd.concat([self.train, pred_df], axis=0)
user_lookup = self.generate_label(train, self.user_col)
train = train.merge(user_lookup, on=[self.user_col], sort=False)
train = train.merge(self.item_lookup, on=[self.item_col], sort=False)
pred_df = pred_df[[self.user_col]].drop_duplicates()
pred_df = pred_df.merge(user_lookup, on=[self.user_col], sort=False)
indices = torch.LongTensor(train[[self.user_id_col, self.item_id_col]].values)
values = torch.FloatTensor(train[self.score_col])
sparse = torch.sparse.FloatTensor(indices.T, values)
# --------------------------------------------------
user_act_tensor = sparse.index_select(
dim=0, index=torch.LongTensor(pred_df[self.user_id_col])
)
_preds_act_tensor = user_act_tensor @ weight_mx
_preds_act_tensor = 1 * \
(_preds_act_tensor - torch.min(_preds_act_tensor)) / \
(torch.max(_preds_act_tensor) - torch.min(_preds_act_tensor))
if remove_owned:
_preds_act_tensor += -1. * user_act_tensor
if hybrid_model:
_preds_act_tensor = _preds_act_tensor * weight_lambda
output_preds = []
score_preds = []
for _preds in _preds_act_tensor:
top_items = _preds.topk(k)
output_preds.append([self.item_map[id] for id in top_items.indices.tolist()])
score_preds.append( top_items.values.tolist() )
# print("'\nOutput preds:", output_preds)
# print("Score preds:", score_preds)
pred_df['predicted_items'] = output_preds
pred_df['predicted_score'] = score_preds
escaped_id = [ele for i_list in pred_df['predicted_items'].values for ele in i_list]
escaped_score = [score for s_list in pred_df['predicted_score'].values for score in s_list]
pred_result = pd.DataFrame({
'app_id' : escaped_id,
'predicted_score' : escaped_score
})
return pred_result
def ease_model(pred_df, k=10):
ease_B = torch.load("data/ease_B.pt")
train = pd.read_csv("data/recs.csv")
ease = EASE(train)
try:
res = ease.predict_active(pred_df=pred_df, weight_mx=ease_B, k=k)
except:
return
return res
# def main():
# pass
# # act_user = pd.DataFrame({
# # 'user_id': [999999, 999999, 999999, 999999, 999999, 999999],
# # 'app_id': [1689910, 1245620, 814380, 620980, 1551360, 774171],
# # 'is_recommended': [0, 1, 1, 0, 1, 1]
# # })
# # act_indices = torch.FloatTensor(ac)
# # print(
# # torch.sparse.FloatTensor(sparse_indices.T, sparse_values)
# # )
# if __name__ == '__main__':
# main() |