import streamlit as st import pandas as pd from sklearn.metrics.pairwise import cosine_similarity # DATA: movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/movies.csv') ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/recommender-systems/main/movie_data/ratings.csv') # create "database" to use for recommendations user_item_matrix = ( ratings .merge(movies, on='movieId')[['title', 'rating', 'userId']] .pivot_table(index='userId', columns='title', values='rating') .fillna(0) ) similarities_users = pd.DataFrame(cosine_similarity(user_item_matrix), index=user_item_matrix.index, columns=user_item_matrix.index) # INSTRUCTIONS: st.title("User-Based Recommender") st.write(""" ### Instructions Type in the user-ID you want to receive recommendations for. Move the slider to the desired number of recommendations you wish to receive. Afterwards, simply click the "Get Recommendations" button to receive recommendations that are most suitable for the given user. """) st.write(""" Optional: You can narrow down the recommendations by picking one or several genre(s). Note: The more genres you choose, the fewer movies will be recommended. """) # FUNCTIONS: def get_user_recommendations(user_id, n, genres): user_id = int(user_id) # calculate weights for ratings weights = similarities_users.loc[similarities_users.index != user_id, user_id] / sum(similarities_users.loc[similarities_users.index != user_id, user_id]) # get unwatched movies for recommendations unwatched_movies = ( user_item_matrix .loc[user_item_matrix.index != user_id, user_item_matrix.loc[user_id,:] == 0] .T ) # compute weighted averages and return the n movies with the highest predicted ratings weighted_averages = pd.DataFrame(unwatched_movies.dot(weights), columns = ["predicted_rating"]) recommendations = ( weighted_averages .sort_values("predicted_rating", ascending=False) .merge(movies, how= 'left', left_index = True, right_on = 'title') [lambda df: df["genres"].str.contains(genres, regex=True)] .head(n) ) return recommendations[['title', 'genres']] def transform_genre_to_regex(genres): regex = "" for genre in genres: regex += f"(?=.*{genre})" return regex # USER INPUT: user_id_input = st.text_input('User-ID') number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5) genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer]) genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False) genres_regex = transform_genre_to_regex(genres) # EXECUTION: if st.button("Get Recommendations"): st.write(get_user_recommendations(user_id_input, number_of_recommendations, genres_regex))