MohamedMotaz commited on
Commit
9697a93
1 Parent(s): d98a3d8

addinh helper file

Browse files
Files changed (2) hide show
  1. Helpers.py +106 -0
  2. requirements.txt +1 -0
Helpers.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from sklearn.metrics import mean_squared_error
5
+ from xgboost import XGBRegressor
6
+
7
+
8
+ def train_model(data,user_id, test=None, eval = False):
9
+
10
+
11
+ # select only user data
12
+ train_user = data[data['userId']==user_id]
13
+
14
+
15
+ X_train = train_user.drop(columns=['userId','rating', 'Train', 'title'])
16
+ y_train = train_user['rating']
17
+
18
+ model = XGBRegressor()
19
+ model.fit(X_train,y_train)
20
+
21
+ if eval:
22
+ test_user = test[test['userId']== user_id]
23
+ X_test = test_user.drop(columns=['userId','rating', 'Train', 'title'])
24
+ y_test = test_user['rating']
25
+ y_pred = model.predict(X_test)
26
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
27
+ print(f'RMSE: {rmse:.4f}')
28
+ # Model evaluation
29
+ # print("Predected rating:", y_pred)
30
+ # print("Actual rating:",y_test)
31
+ # print(X_test)
32
+
33
+ return model
34
+
35
+ def get_user_recommendation_XGBoost(all_moves,model, user_id, n=10):
36
+ # get all movies that the user has not seen
37
+ user_seen_movies = all_moves[all_moves['userId'] == user_id]['title']
38
+ user_unseen_movies = all_moves[~all_moves['title'].isin(user_seen_movies)]
39
+
40
+ # drop duplicates
41
+ user_unseen_movies = user_unseen_movies.drop_duplicates(subset=['title'])
42
+
43
+ # make predictions
44
+ user_unseen_movies['Pred_rating'] = model.predict(user_unseen_movies.drop(columns=['userId', 'rating', 'Train', 'title']))
45
+
46
+ # only return movies with more than 100 ratings
47
+
48
+ # get top 10 recommendations
49
+ recommendations = user_unseen_movies.sort_values(by='Pred_rating', ascending=False).head(n)['title']
50
+ return recommendations ,user_seen_movies
51
+
52
+
53
+
54
+ def get_user_recommendation(DataBase, Matrix,user_id,l=10):
55
+ user = Matrix[user_id]
56
+ user = user.sort_values(ascending=False)
57
+ # now we have a series of user similarities
58
+ # we only want to recommend movies that the user has not seen
59
+ # so we need to filter out movies that the user has seen
60
+ user_seen_movies = DataBase[DataBase['userId'] == user_id]['title']
61
+
62
+
63
+ # Now we loop through user and get top 10 recommendations
64
+ recommendations = []
65
+ print(len(user.index))
66
+ for U in user.index[1:10]:
67
+ # get all rated movies by user U
68
+ movies = DataBase[DataBase['userId'] == U]['title']
69
+ movies = movies[~movies.isin(user_seen_movies)]
70
+
71
+ # get all movies that U has rated 4 or higher
72
+ movies = movies[DataBase['rating'] >= 4]
73
+ # sort by rating
74
+ movies = movies.sort_values(ascending=False)
75
+ for movie in movies[:4]:
76
+ if movie not in recommendations:
77
+ recommendations.append(movie)
78
+
79
+ if len(recommendations) >= l:
80
+ break
81
+
82
+
83
+
84
+ return recommendations
85
+
86
+
87
+
88
+
89
+
90
+ if __name__ == '__main__':
91
+ import pickle
92
+
93
+ def load_similarity_matrix(path):
94
+ with open(path, 'rb') as f:
95
+ similarity_df = pickle.load(f)
96
+ return similarity_df
97
+
98
+ # Load the data
99
+ DataBaseCSV = r"D:\Study\ITI\Recommender Systems\Final\Movies-Recommender-System\Data\XGBoost_database.csv"
100
+ DataBase = pd.read_csv(DataBaseCSV)
101
+ # Load the similarity matrix
102
+ MatrixCSV = r"D:\Study\ITI\Recommender Systems\Final\Movies-Recommender-System\Models\user_based_matrix.pkl"
103
+ Matrix = load_similarity_matrix(MatrixCSV)
104
+ recommendations = get_user_recommendation(DataBase, Matrix,1)
105
+ print(recommendations)
106
+
requirements.txt CHANGED
@@ -3,3 +3,4 @@ requests==2.31.0
3
  pandas==2.2.2
4
  pickleshare==0.7.5
5
  gdown==5.1.0
 
 
3
  pandas==2.2.2
4
  pickleshare==0.7.5
5
  gdown==5.1.0
6
+ xgboost==2.0.3