tobiasaurer commited on
Commit
c86a927
1 Parent(s): b4c7df7

adds new recommender

Browse files
pages/1 - Popularity based recommender.py CHANGED
@@ -1,12 +1,12 @@
1
  import streamlit as st
2
  import pandas as pd
3
 
4
- # Import Data
5
  movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/movies.csv')
6
  ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/ratings.csv')
7
 
 
8
  st.title("Popularity-Based Recommender")
9
-
10
  st.write("""
11
  ### Instructions
12
  Move the slider to the desired number of recommendations you wish to receive.
@@ -42,7 +42,6 @@ genre_list = set([inner for outer in movies.genres.str.split('|') for inner in o
42
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
43
  genres_regex = transform_genre_to_regex(genres)
44
 
45
-
46
  if st.button("Recommend!"):
47
-
48
  st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
 
1
  import streamlit as st
2
  import pandas as pd
3
 
4
+ # DATA:
5
  movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/movies.csv')
6
  ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/ratings.csv')
7
 
8
+ # INSTRUCTIONS:
9
  st.title("Popularity-Based Recommender")
 
10
  st.write("""
11
  ### Instructions
12
  Move the slider to the desired number of recommendations you wish to receive.
 
42
  genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
43
  genres_regex = transform_genre_to_regex(genres)
44
 
45
+ # EXECUTION:
46
  if st.button("Recommend!"):
 
47
  st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
pages/2 - User based recommender.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ # DATA:
6
+ movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/movies.csv')
7
+ ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/ratings.csv')
8
+
9
+ # create "database" to use for recommendations
10
+ user_item_matrix = (
11
+ ratings
12
+ .merge(movies, on='movieId')[['title', 'rating', 'userId']]
13
+ .pivot_table(index='userId', columns='title', values='rating')
14
+ .fillna(0)
15
+ )
16
+
17
+ similarities_users = pd.DataFrame(cosine_similarity(user_item_matrix),
18
+ index=user_item_matrix.index,
19
+ columns=user_item_matrix.index)
20
+
21
+ # INSTRUCTIONS:
22
+ st.title("User-Based Recommender")
23
+ st.write("""
24
+ ### Instructions
25
+ Type in the user-ID you want to receive recommendations for.
26
+ Move the slider to the desired number of recommendations you wish to receive.
27
+ """)
28
+ st.write("""
29
+ Optional: You can narrow down the recommendations by picking one or several genre(s).
30
+
31
+ Note: The more genres you choose, the fewer movies will be recommended.
32
+ """)
33
+
34
+ # FUNCTIONS:
35
+
36
+ def get_user_recommendations(user_id, n, genres):
37
+
38
+ user_id = int(user_id)
39
+ # calculate weights for ratings
40
+ weights = similarities_users.loc[similarities_users.index != user_id, user_id] / sum(similarities_users.loc[similarities_users.index != user_id, user_id])
41
+
42
+ # get unwatched movies for recommendations
43
+ unwatched_movies = (
44
+ user_item_matrix
45
+ .loc[user_item_matrix.index != user_id, user_item_matrix.loc[user_id,:] == 0]
46
+ .T
47
+ )
48
+
49
+ # compute weighted averages and return the n movies with the highest predicted ratings
50
+ weighted_averages = pd.DataFrame(unwatched_movies.dot(weights), columns = ["predicted_rating"])
51
+ recommendations = (
52
+ weighted_averages
53
+ .sort_values("predicted_rating", ascending=False)
54
+ .head(n)
55
+ .merge(movies, how= 'left', left_index = True, right_on = 'title')
56
+ [lambda df: df["genres"].str.contains(genres, regex=True)]
57
+ )
58
+
59
+ return recommendations[['title', 'genres']]
60
+
61
+ def transform_genre_to_regex(genres):
62
+ regex = ""
63
+ for genre in genres:
64
+ regex += f"(?=.*{genre})"
65
+ return regex
66
+
67
+ # USER INPUT:
68
+ user_id_input = st.text_input('User-ID')
69
+
70
+ number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)
71
+
72
+ genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
73
+ genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
74
+ genres_regex = transform_genre_to_regex(genres)
75
+
76
+ # EXECUTION:
77
+ if st.button("Recommend!"):
78
+ st.write(get_user_recommendations(user_id_input, number_of_recommendations, genres_regex))
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  streamlit
2
- pandas
 
 
1
  streamlit
2
+ pandas
3
+ scikit-learn