Add project
Browse files- ML_Final_Project.ipynb +0 -0
- ml_final_project.py +410 -0
ML_Final_Project.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ml_final_project.py
ADDED
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""ML Final Project
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1Aof3bcIIqSmvsh0cux6wZ5NPk1wY-l3D
|
8 |
+
|
9 |
+
### install dependencies
|
10 |
+
"""
|
11 |
+
|
12 |
+
!gdown "1W3-WEplVSztLR3lvkyYdiKZGMT4y0cNi&confirm=t"
|
13 |
+
|
14 |
+
#!unzip IMDB.zip
|
15 |
+
|
16 |
+
#!pip install mlflow
|
17 |
+
|
18 |
+
"""# Content-based filtering
|
19 |
+
|
20 |
+
### import libraries
|
21 |
+
"""
|
22 |
+
|
23 |
+
import numpy as np
|
24 |
+
import pandas as pd
|
25 |
+
import mlflow as mf
|
26 |
+
|
27 |
+
#mf.log_artifacts({'rating':'/content/rating_small.csv', 'rating':'/content/rating_small.csv', 'movies':'/content/movies_metadata.csv','keywords':'/content/keywords.csv', 'credits':'/content/credits.csv'})
|
28 |
+
|
29 |
+
"""### read data from file"""
|
30 |
+
|
31 |
+
keywords = pd.read_csv('/content/IMDB/keywords.csv')
|
32 |
+
keywords
|
33 |
+
|
34 |
+
rating = pd.read_csv('/content/IMDB/ratings_small.csv')
|
35 |
+
rating
|
36 |
+
|
37 |
+
credits = pd.read_csv('/content/IMDB/credits.csv')
|
38 |
+
credits
|
39 |
+
|
40 |
+
metadata = pd.read_csv('/content/IMDB/movies_metadata.csv')
|
41 |
+
metadata
|
42 |
+
|
43 |
+
"""keep only related columns from released movies:"""
|
44 |
+
|
45 |
+
metadata = metadata[metadata['status'] == 'Released']
|
46 |
+
cols = np.array(['adult', 'belongs_to_collection', 'genres', 'id', 'original_language', 'title', 'production_countries', 'production_companies', 'video'])
|
47 |
+
metadata = metadata[cols]
|
48 |
+
|
49 |
+
metadata.iloc[1]
|
50 |
+
|
51 |
+
def find_collection(x):
|
52 |
+
if x == '':
|
53 |
+
return ''
|
54 |
+
return eval(str(x))['name']
|
55 |
+
|
56 |
+
metadata['belongs_to_collection'] = metadata['belongs_to_collection'].fillna('')
|
57 |
+
metadata['belongs_to_collection'] = metadata['belongs_to_collection'].apply(find_collection)
|
58 |
+
metadata.iloc[1]
|
59 |
+
|
60 |
+
def find_names(x):
|
61 |
+
if x == '':
|
62 |
+
return ''
|
63 |
+
genre_arr = eval(str(x))
|
64 |
+
return ','.join(i['name'] for i in eval(str(x)))
|
65 |
+
|
66 |
+
metadata['genres'] = metadata['genres'].fillna('')
|
67 |
+
metadata['genres']=metadata['genres'].apply(find_names)
|
68 |
+
metadata['production_countries']=metadata['production_countries'].apply(find_names)
|
69 |
+
metadata['production_companies']=metadata['production_companies'].apply(find_names)
|
70 |
+
credits['cast'] = credits['cast'].apply(find_names)
|
71 |
+
metadata.iloc[1]
|
72 |
+
|
73 |
+
keywords['keywords'] = keywords['keywords'].apply(find_names)
|
74 |
+
metadata['id'] = metadata['id'].astype(int)
|
75 |
+
metadata = pd.merge(metadata,keywords,how='inner',on='id')
|
76 |
+
metadata.iloc[1]
|
77 |
+
|
78 |
+
def to_int(x):
|
79 |
+
if x == 'True':
|
80 |
+
return 1
|
81 |
+
return 0
|
82 |
+
|
83 |
+
metadata['adult'].unique()
|
84 |
+
|
85 |
+
"""there are 3 values other than True or False in adult column. there are entered by mistake so we remove those rows."""
|
86 |
+
|
87 |
+
metadata = metadata[(metadata['adult'] == 'True') | (metadata['adult'] == 'False')]
|
88 |
+
metadata['adult'] = metadata['adult'].apply(to_int)
|
89 |
+
metadata['video'].unique()
|
90 |
+
|
91 |
+
"""removing nan values from dataset and replacing 'True' and 'False' with 1 and 0:"""
|
92 |
+
|
93 |
+
metadata = metadata[~metadata['video'].isna()]
|
94 |
+
metadata['video'] = metadata['video'].apply(to_int)
|
95 |
+
|
96 |
+
"""## Vectorize string features"""
|
97 |
+
|
98 |
+
metadata
|
99 |
+
|
100 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
101 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
102 |
+
|
103 |
+
def my_tok(text):
|
104 |
+
return text.split(",")
|
105 |
+
|
106 |
+
def vectorize_string(col_name, feature_name, limit=None, df=metadata):
|
107 |
+
vectorizer = CountVectorizer(tokenizer=my_tok, max_features=limit, min_df=2)
|
108 |
+
X = vectorizer.fit_transform(df[col_name])
|
109 |
+
vec_cols = vectorizer.get_feature_names_out()
|
110 |
+
vec_data = X.toarray()
|
111 |
+
#vec_cols = np.char.add(feature_name+':', vec_cols)
|
112 |
+
vec_cols = feature_name+':'+vec_cols
|
113 |
+
return vec_data, vec_cols
|
114 |
+
|
115 |
+
def tfidf(col_name, feature_name, limit=None, df=metadata):
|
116 |
+
vectorizer = TfidfVectorizer(tokenizer=my_tok, max_features=limit, min_df=2)
|
117 |
+
X = vectorizer.fit_transform(df[col_name])
|
118 |
+
vec_cols = vectorizer.get_feature_names_out()
|
119 |
+
vec_data = X.toarray()
|
120 |
+
#vec_cols = np.char.add(feature_name+':', vec_cols)
|
121 |
+
vec_cols = feature_name+':'+vec_cols
|
122 |
+
return vec_data, vec_cols
|
123 |
+
|
124 |
+
genre_data, genre_cols = vectorize_string('genres', 'genre')
|
125 |
+
genre_cols
|
126 |
+
|
127 |
+
companies_data, companies_cols = vectorize_string('production_companies', 'company', 100)
|
128 |
+
companies_cols
|
129 |
+
|
130 |
+
countries_data, countries_cols = vectorize_string('production_countries', 'country')
|
131 |
+
countries_cols
|
132 |
+
|
133 |
+
collection_data, collection_cols = vectorize_string('belongs_to_collection', 'collection')
|
134 |
+
collection_cols
|
135 |
+
|
136 |
+
metadata['original_language']= metadata['original_language'].fillna('')
|
137 |
+
lang_data, lang_cols = vectorize_string('original_language', 'lang')
|
138 |
+
lang_cols
|
139 |
+
|
140 |
+
collection_cols.shape
|
141 |
+
|
142 |
+
keyword_data, keyword_cols = tfidf('keywords', 'keyword', 1000)
|
143 |
+
keyword_cols
|
144 |
+
|
145 |
+
credits.drop(columns=['crew'], inplace=True)
|
146 |
+
credit_data, credit_cols = vectorize_string('cast','cast', 1000, df=credits)
|
147 |
+
credit_cols
|
148 |
+
|
149 |
+
metadata = pd.concat([metadata[['title','id','adult','video']],
|
150 |
+
pd.DataFrame(genre_data, columns=genre_cols),
|
151 |
+
pd.DataFrame(countries_data, columns=countries_cols),
|
152 |
+
pd.DataFrame(collection_data, columns=collection_cols),
|
153 |
+
pd.DataFrame(keyword_data, columns=keyword_cols),
|
154 |
+
pd.DataFrame(companies_data, columns=companies_cols),
|
155 |
+
pd.DataFrame(lang_data, columns=lang_cols)], axis=1)
|
156 |
+
|
157 |
+
credits[credit_cols] = credit_data
|
158 |
+
metadata = pd.merge(metadata, credits, how='inner', on='id')
|
159 |
+
metadata
|
160 |
+
|
161 |
+
#metadata.drop(['production_countries', 'genres', 'belongs_to_collection', 'keywords', 'production_companies', 'original_language'], axis=1, inplace=True)
|
162 |
+
|
163 |
+
"""list of all numerical features(everything except id and title)"""
|
164 |
+
|
165 |
+
feature_cols = np.concatenate((np.array(['adult', 'video']), genre_cols,countries_cols,collection_cols,keyword_cols,companies_cols,lang_cols,credit_cols))
|
166 |
+
feature_cols
|
167 |
+
#metadata[feature_cols] = metadata[feature_cols].astype('int8')
|
168 |
+
|
169 |
+
del genre_data,countries_data,collection_data,keyword_data,companies_data,lang_data,credit_data
|
170 |
+
del genre_cols,countries_cols,collection_cols,keyword_cols,companies_cols,lang_cols,credit_cols
|
171 |
+
|
172 |
+
feature_cols.shape
|
173 |
+
|
174 |
+
metadata
|
175 |
+
|
176 |
+
def split_dataframe(df, holdout_fraction=0.1):
|
177 |
+
test = df.sample(frac=holdout_fraction, replace=False)
|
178 |
+
train = df[~df.index.isin(test.index)]
|
179 |
+
return train, test
|
180 |
+
|
181 |
+
train, test = split_dataframe(metadata)
|
182 |
+
|
183 |
+
allIds = metadata['id']
|
184 |
+
|
185 |
+
number_of_batches = 4
|
186 |
+
batches = np.array_split(train, number_of_batches)
|
187 |
+
mf.log_param('number of batches', number_of_batches)
|
188 |
+
del metadata
|
189 |
+
del train
|
190 |
+
|
191 |
+
"""## Algorithm
|
192 |
+
|
193 |
+
"""
|
194 |
+
|
195 |
+
batches[0]
|
196 |
+
|
197 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
198 |
+
|
199 |
+
"""`content_based_recommmeder` returns a list of movie ids based on it's input. the input should be a dataframe which has `movieId`, `rating` columns(like `ratings_small.csv` but without `userId`)"""
|
200 |
+
|
201 |
+
number_of_batches =1
|
202 |
+
def content_based_recommender_movie(movieId):
|
203 |
+
print("movie title is:", metadata[metadata['id']==movieId])
|
204 |
+
sim_mat= cosine_similarity(metadata[feature_cols])
|
205 |
+
return sim_mat
|
206 |
+
|
207 |
+
#content_based_recommender_movie(272)
|
208 |
+
|
209 |
+
batches[1].describe()
|
210 |
+
|
211 |
+
from sklearn.metrics.pairwise import euclidean_distances as dist
|
212 |
+
def content_based_recommender(user, df, k=10, movieIds=allIds):
|
213 |
+
user_movies = pd.merge(user,df,how='inner',left_on='movieId',right_on='id')
|
214 |
+
user_movies[feature_cols] = user_movies[feature_cols].multiply(user_movies['rating'], axis="index")
|
215 |
+
mean_user_movies = user_movies[feature_cols].mean(axis=0)
|
216 |
+
sim_mat = cosine_similarity(df[feature_cols][df.id.isin(movieIds)], mean_user_movies[feature_cols].values.reshape(1,-1))
|
217 |
+
temp_data = {'id':df['id'][df.id.isin(movieIds)], 'title':df['title'][df.id.isin(movieIds)], 'sim':sim_mat.flatten()}
|
218 |
+
return pd.DataFrame(temp_data)
|
219 |
+
|
220 |
+
def content_based_all_batches(user, k=10, movieIds=allIds):
|
221 |
+
ans = content_based_recommender(user, batches[0], k, movieIds)
|
222 |
+
for i in range(1,number_of_batches):
|
223 |
+
ans.append(content_based_recommender(user, batches[i], k, movieIds))
|
224 |
+
return ans.sort_values(by='sim', ascending=False)
|
225 |
+
|
226 |
+
|
227 |
+
content_based_k = 10
|
228 |
+
mf.log_param('content based k', content_based_k)
|
229 |
+
#xx = content_based_recommender(rating[rating['userId'] == 1], batches[1], content_based_k)
|
230 |
+
xx = content_based_all_batches(rating[rating['userId'] == 1], content_based_k)
|
231 |
+
xx.shape
|
232 |
+
|
233 |
+
"""# Collaborative Filtering
|
234 |
+
|
235 |
+
### import libraries
|
236 |
+
"""
|
237 |
+
|
238 |
+
import numpy as np
|
239 |
+
import pandas as pd
|
240 |
+
from sklearn.utils.extmath import randomized_svd
|
241 |
+
|
242 |
+
"""### explore datasets"""
|
243 |
+
|
244 |
+
rating = pd.read_csv('/content/IMDB/ratings_small.csv')
|
245 |
+
rating.head()
|
246 |
+
|
247 |
+
rating.shape
|
248 |
+
|
249 |
+
links_small = pd.read_csv('/content/IMDB/links_small.csv')
|
250 |
+
links_small.head()
|
251 |
+
|
252 |
+
credits = pd.read_csv('/content/IMDB/credits.csv')
|
253 |
+
credits.head()
|
254 |
+
|
255 |
+
movie = pd.read_csv('/content/IMDB/movies_metadata.csv')
|
256 |
+
movie.head()
|
257 |
+
|
258 |
+
movie = movie.rename(columns={'id': 'movieId'})
|
259 |
+
|
260 |
+
movie.shape
|
261 |
+
|
262 |
+
movie.head()
|
263 |
+
|
264 |
+
"""### data preprocessing
|
265 |
+
|
266 |
+
There are three rows entered by mistake, so we remove that row.
|
267 |
+
"""
|
268 |
+
|
269 |
+
movie = movie[(movie['movieId']!='1997-08-20') & (movie['movieId']!='2012-09-29') & (movie['movieId']!='2014-01-01')]
|
270 |
+
|
271 |
+
def find_names(x):
|
272 |
+
if x == '':
|
273 |
+
return ''
|
274 |
+
genre_arr = eval(str(x))
|
275 |
+
return ','.join(i['name'] for i in eval(str(x)))
|
276 |
+
|
277 |
+
movie['genres'] = movie['genres'].fillna('')
|
278 |
+
|
279 |
+
movie['genres']=movie['genres'].apply(find_names)
|
280 |
+
|
281 |
+
movie.movieId = movie.movieId.astype("uint64")
|
282 |
+
|
283 |
+
"""only keep rating for movies with metadata in movie dataset"""
|
284 |
+
|
285 |
+
new_rating = pd.merge(rating, movie, how='inner', on=["movieId"])
|
286 |
+
|
287 |
+
new_rating = new_rating[["userId", "movieId", "rating"]]
|
288 |
+
|
289 |
+
movie.head()
|
290 |
+
|
291 |
+
new_rating.head()
|
292 |
+
|
293 |
+
train, test = split_dataframe(new_rating)
|
294 |
+
|
295 |
+
"""### matrix factorization"""
|
296 |
+
|
297 |
+
inter_mat_df = rating.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
|
298 |
+
inter_mat_df
|
299 |
+
|
300 |
+
inter_mat = inter_mat_df.to_numpy()
|
301 |
+
|
302 |
+
ratings_mean = np.mean(inter_mat, axis = 1)
|
303 |
+
inter_mat_normal = inter_mat - ratings_mean.reshape(-1, 1)
|
304 |
+
|
305 |
+
inter_mat_normal
|
306 |
+
|
307 |
+
"""We use singular value decomposition for matrix factorization"""
|
308 |
+
|
309 |
+
svd_U, svd_sigma, svd_V = randomized_svd(inter_mat_normal,
|
310 |
+
n_components=15,
|
311 |
+
n_iter=5,
|
312 |
+
random_state=47)
|
313 |
+
|
314 |
+
"""This function gives the diagonal form"""
|
315 |
+
|
316 |
+
svd_sigma = np.diag(svd_sigma)
|
317 |
+
|
318 |
+
"""Making predictions"""
|
319 |
+
|
320 |
+
rating_weights = np.dot(np.dot(svd_U, svd_sigma), svd_V) + ratings_mean.reshape(-1, 1)
|
321 |
+
|
322 |
+
weights_df = pd.DataFrame(rating_weights, columns = inter_mat_df.columns)
|
323 |
+
|
324 |
+
weights_df.head()
|
325 |
+
|
326 |
+
"""making recommendations"""
|
327 |
+
|
328 |
+
def recommend_top_k(preds_df, ratings_df, movie, userId, k=10):
|
329 |
+
user_row = userId-1
|
330 |
+
sorted_user_predictions = preds_df.iloc[user_row].sort_values(ascending=False)
|
331 |
+
user_data = ratings_df[ratings_df.userId == (userId)]
|
332 |
+
user_rated = user_data.merge(movie, how = 'left', left_on = 'movieId', right_on = 'movieId'). \
|
333 |
+
sort_values(['rating'], ascending=False)
|
334 |
+
user_preds = movie.merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
|
335 |
+
on = 'movieId').rename(columns = {user_row: 'prediction'}). \
|
336 |
+
sort_values('prediction', ascending = False). \
|
337 |
+
iloc[:k, :]
|
338 |
+
return user_rated, user_preds
|
339 |
+
|
340 |
+
collaborative_k = 100
|
341 |
+
user_rated, user_preds = recommend_top_k(weights_df, new_rating, movie, 220, collaborative_k)
|
342 |
+
mf.log_param('collaborative k', collaborative_k)
|
343 |
+
|
344 |
+
user_preds.head()
|
345 |
+
|
346 |
+
user_rated.head()
|
347 |
+
|
348 |
+
user_rated[["title", "genres"]].head(10)
|
349 |
+
|
350 |
+
user_preds[["title", "genres"]].head(10)
|
351 |
+
|
352 |
+
"""# Ensemble Model"""
|
353 |
+
|
354 |
+
def ensemble(userId, k=10):
|
355 |
+
user_rated, user_preds = recommend_top_k(weights_df, new_rating, movie, userId, k*k)
|
356 |
+
content_based_result = content_based_all_batches(rating[rating['userId'] == userId], k=k, movieIds=user_preds['movieId'])
|
357 |
+
return content_based_result[['id','title']]
|
358 |
+
|
359 |
+
ensemble_k=10
|
360 |
+
mf.log_param('ensemble k', ensemble_k)
|
361 |
+
ensemble(220, ensemble_k)
|
362 |
+
|
363 |
+
"""# Evaluation"""
|
364 |
+
|
365 |
+
df_res = user_preds[["movieId", "prediction"]]. \
|
366 |
+
merge(user_rated[["movieId", "rating"]], how = 'outer', on = 'movieId')
|
367 |
+
|
368 |
+
df_res.sort_values(by='prediction',ascending=False,inplace=True)
|
369 |
+
df_res
|
370 |
+
|
371 |
+
threshold = 2
|
372 |
+
df_res['prediction'] = df_res['prediction'] >= threshold
|
373 |
+
df_res['rating'] = df_res['rating'] >= threshold
|
374 |
+
df_res
|
375 |
+
|
376 |
+
def precision_at_k(df, k=10, y_test: str='rating', y_pred='prediction'):
|
377 |
+
dfK = df.head(k)
|
378 |
+
sum_df = dfK[y_pred].sum()
|
379 |
+
true_pred = dfK[dfK[y_pred] & dfK[y_test]].shape[0]
|
380 |
+
if sum_df > 0:
|
381 |
+
return true_pred/sum_df
|
382 |
+
else:
|
383 |
+
return None
|
384 |
+
|
385 |
+
def recall_at_k(df, k=10, y_test='rating', y_pred='prediction'):
|
386 |
+
dfK = df.head(k)
|
387 |
+
sum_df = df[y_test].sum()
|
388 |
+
true_pred = dfK[dfK[y_pred] & dfK[y_test]].shape[0]
|
389 |
+
if sum_df > 0:
|
390 |
+
return true_pred/sum_df
|
391 |
+
else:
|
392 |
+
return None
|
393 |
+
|
394 |
+
prec_at_k = precision_at_k(df_res, 100, y_test='rating', y_pred='prediction')
|
395 |
+
rec_at_k = recall_at_k(df_res, 100, y_test='rating', y_pred='prediction')
|
396 |
+
|
397 |
+
print("precision@k: ", prec_at_k)
|
398 |
+
print("recall@k: ", rec_at_k)
|
399 |
+
mf.log_metric('recall', rec_at_k)
|
400 |
+
mf.log_metric('precision', prec_at_k)
|
401 |
+
|
402 |
+
|
403 |
+
|
404 |
+
"""# MLOps"""
|
405 |
+
|
406 |
+
def updata_batch(new_batch):
|
407 |
+
number_of_batches = number_of_batches+1
|
408 |
+
batches = batches.append(new_batch)
|
409 |
+
mf.log_param('number of batches', number_of_batches)
|
410 |
+
|