mindwrapped commited on
Commit
e271901
1 Parent(s): e0f1ad4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ from tensorflow import keras
6
+ from pathlib import Path
7
+ import matplotlib.pyplot as plt
8
+ import gradio as gr
9
+ from huggingface_hub import from_pretrained_keras
10
+
11
+ # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
12
+ movielens_data_file_url = "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
13
+ movielens_zipped_file = keras.utils.get_file("ml-latest-small.zip", movielens_data_file_url, extract=False)
14
+ keras_datasets_path = Path(movielens_zipped_file).parents[0]
15
+ movielens_dir = keras_datasets_path / "ml-latest-small"
16
+
17
+ # Only extract the data the first time the script is run.
18
+ if not movielens_dir.exists():
19
+ with ZipFile(movielens_zipped_file, "r") as zip:
20
+ # Extract files
21
+ print("Extracting all the files now...")
22
+ zip.extractall(path=keras_datasets_path)
23
+ print("Done!")
24
+
25
+ # Get the ratings file
26
+ ratings_file = movielens_dir / "ratings.csv"
27
+ df = pd.read_csv(ratings_file)
28
+
29
+ # Make the encodings for users
30
+ user_ids = df["userId"].unique().tolist()
31
+ user2user_encoded = {x: i for i, x in enumerate(user_ids)}
32
+ user_encoded2user = {i: x for i, x in enumerate(user_ids)}
33
+ df["user"] = df["userId"].map(user2user_encoded)
34
+ num_users = len(user2user_encoded)
35
+
36
+ # Make the encodings for movies
37
+ movie_ids = df["movieId"].unique().tolist()
38
+ movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
39
+ movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
40
+ df["movie"] = df["movieId"].map(movie2movie_encoded)
41
+ num_movies = len(movie_encoded2movie)
42
+
43
+ # Set ratings type
44
+ df["rating"] = df["rating"].values.astype(np.float32)
45
+ # min and max ratings will be used to normalize the ratings later
46
+ # min_rating = min(df["rating"])
47
+ # max_rating = max(df["rating"])
48
+
49
+ # Load model
50
+ model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
51
+ movie_df = pd.read_csv(movielens_dir / "movies.csv")
52
+
53
+
54
+ def update_user(id):
55
+ return get_top_rated_movies_from_user(id), get_recommendations(id)
56
+
57
+
58
+ def get_top_rated_movies_from_user(id):
59
+ decoded_id = user_encoded2user.get(id)
60
+
61
+ # Get the top rated movies by this user
62
+ movies_watched_by_user = df[df.userId == decoded_id]
63
+ top_movies_user = (
64
+ movies_watched_by_user.sort_values(by="rating", ascending=False)
65
+ .head(5)
66
+ .movieId.values
67
+ )
68
+ movie_df_rows = movie_df[movie_df["movieId"].isin(top_movies_user)]
69
+ movie_df_rows = movie_df_rows.drop('movieId', axis=1)
70
+ return movie_df_rows
71
+
72
+
73
+ def random_user():
74
+ return update_user(np.random.randint(0, num_users-1))
75
+
76
+
77
+ def get_recommendations(id):
78
+ decoded_id = user_encoded2user.get(id)
79
+
80
+ # Get the top 10 recommended movies for this user
81
+ movies_watched_by_user = df[df.userId == decoded_id]
82
+ movies_not_watched = movie_df[
83
+ ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
84
+ ]["movieId"]
85
+ movies_not_watched = list(
86
+ set(movies_not_watched).intersection(set(movie2movie_encoded.keys()))
87
+ )
88
+ movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
89
+
90
+ # Encoded user id
91
+ encoded_id = id
92
+
93
+ # Create data [[user_id, movie_id],...]
94
+ user_movie_array = np.hstack(
95
+ ([[encoded_id]] * len(movies_not_watched), movies_not_watched)
96
+ )
97
+
98
+ # Predict ratings for movies not watched
99
+ ratings = model.predict(user_movie_array).flatten()
100
+
101
+ # Get indices of top ten movies
102
+ top_ratings_indices = ratings.argsort()[-10:][::-1]
103
+
104
+ # Decode each movie
105
+ recommended_movie_ids = [
106
+ movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
107
+ ]
108
+ recommended_movies = movie_df[movie_df["movieId"].isin(recommended_movie_ids)]
109
+ recommended_movies = recommended_movies.drop('movieId', axis=1)
110
+
111
+ return recommended_movies
112
+
113
+ demo = gr.Blocks()
114
+
115
+ with demo:
116
+ gr.Markdown("""
117
+ <div>
118
+ <h1 style='text-align: center'>Movie Recommender</h1>
119
+ Collaborative Filtering is used to predict the top 10 recommended movies for a particular user from the dataset based on that user and previous movies they have rated.
120
+ </div>
121
+ """)
122
+
123
+ with gr.Box():
124
+ gr.Markdown(
125
+ """
126
+ ### Input
127
+ #### Select a user to get recommendations for.
128
+ """)
129
+
130
+ inp1 = gr.Slider(0, num_users-1, value=0, label='User')
131
+ # btn1 = gr.Button('Random User')
132
+
133
+ # top_rated_from_user = get_top_rated_from_user(0)
134
+ gr.Markdown(
135
+ """
136
+ <br>
137
+ """)
138
+ gr.Markdown(
139
+ """
140
+ #### Movies with the Highest Ratings from this user
141
+ """)
142
+ df1 = gr.DataFrame(headers=["title", "genres"], datatype=["str", "str"], interactive=False)
143
+
144
+ with gr.Box():
145
+ # recommendations = get_recommendations(0)
146
+ gr.Markdown(
147
+ """
148
+ ### Output
149
+ #### Top 10 movie recommendations
150
+ """)
151
+ df2 = gr.DataFrame(headers=["title", "genres"], datatype=["str", "str"], interactive=False)
152
+
153
+ gr.Markdown("""
154
+ <p style='text-align: center'>
155
+ <a href='https://keras.io/examples/structured_data/collaborative_filtering_movielens/' target='_blank' style='text-decoration: underline'>Keras Example by Siddhartha Banerjee</a>
156
+ <br>
157
+ Space by Scott Krstyen (mindwrapped)
158
+ </p>
159
+ """)
160
+
161
+
162
+ inp1.change(fn=update_user,
163
+ inputs=inp1,
164
+ outputs=[df1, df2])
165
+
166
+
167
+ demo.launch(debug=True)