quyanh's picture
initial commit
97ce7fb
import numpy as np
import tensorflow as tf
def normalize(Y, R):
"""
Preprocess data by subtracting mean rating for every book (every row).
Only include real ratings R(i,j)=1.
[Y_norm, Y_mean] = normalize(Y, R) normalized Y so that each book
has a rating of 0 on average. Unrated moves then have a mean rating (0)
Returns the mean rating in Y_mean.
"""
Y_mean = (np.sum(Y * R, axis=1) / (np.sum(R, axis=1) + 1e-12)).reshape(-1, 1)
Y_norm = Y - np.multiply(Y_mean, R)
return Y_norm, Y_mean
def cost_function(X, W, b, Y, R, lambda_):
"""
Returns the cost for the collaborative filtering
Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
Args:
X (ndarray (num_books,num_features)): matrix of item features
W (ndarray (num_users,num_features)) : matrix of user parameters
b (ndarray (1, num_users) : vector of user parameters
Y (ndarray (num_books,num_users) : matrix of user ratings of books
R (ndarray (num_books,num_users) : matrix, where R(i, j) = 1 if the i-th books was rated by the j-th user
lambda_ (float): regularization parameter
Returns:
J (float) : Cost
"""
j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y) * R
J = 0.5 * tf.reduce_sum(j ** 2) + (lambda_ / 2) * (tf.reduce_sum(X ** 2) + tf.reduce_sum(W ** 2))
return J