File size: 5,200 Bytes
f912653
b4c7df7
807a8f8
 
f912653
c86a927
807a8f8
 
 
 
 
 
 
 
 
 
 
b4c7df7
c86a927
b4c7df7
eee895c
 
eb59333
807a8f8
 
 
eb59333
 
 
807a8f8
 
eee895c
 
b4c7df7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807a8f8
 
 
 
 
 
 
 
 
 
 
 
 
b0ed355
 
 
 
807a8f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4c7df7
 
 
 
 
 
 
 
 
 
 
 
 
34a675b
807a8f8
 
 
 
 
 
 
 
c86a927
807a8f8
8b2ad1d
807a8f8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
import pandas as pd
import re
import requests

# DATA:
movies = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/movies.csv')
ratings = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/ratings.csv')
links = pd.read_csv('https://raw.githubusercontent.com/tobiasaurer/movie-recommender-streamlit/main/data/links.csv')

# clean titles column by moving "The" and "A" to the beginning of the string
# this makes it more searchable for users
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = 'The ' + movies['title']
movies.loc[lambda df: df["title"].str.contains(", The", regex=True), 'title'] = movies['title'].str.replace(", The", '', regex=True)

movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = 'A ' + movies['title']
movies.loc[lambda df: df["title"].str.contains(", A", regex=True), 'title'] = movies['title'].str.replace(", A", '', regex=True)

# INSTRUCTIONS:
st.title("Popularity-Based Recommender")
st.write("""
### Instructions
Move the slider to the desired number of recommendations you wish to receive.  
  
__Optional__: You can receive links for popular streaming services for each  recommendation (if available) by selecting in your countrycode.  
Leave this field empty if you don't want to get streaming links.  
  
__Optional__: You can narrow down the recommendations by picking one or several genre(s).  
However, the more genres you choose, the fewer movies will be recommended.
  
Afterwards, simply click the "Get Recommendations" button to receive recommendations based on the most popular movies in our database.  
""")

# FUNCTIONS:

def get_popular_recommendations(n, genres):
    return (
        ratings
            .groupby('movieId')
            .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
            .merge(movies, on='movieId')
            .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
            [lambda df: df["genres"].str.contains(genres, regex=True)]
            .sort_values('combined_rating', ascending=False)
            .head(n)
            [['title', 'avg_rating', 'genres']]
    )

def get_popular_recommendations_streaming(n, genres, country, url, headers):
    recommendations = (
        ratings
            .groupby('movieId')
            .agg(avg_rating = ('rating', 'mean'), num_ratings = ('rating', 'count'))
            .merge(movies, on='movieId')
            .assign(combined_rating = lambda x: x['avg_rating'] * x['num_ratings']**0.5)
            [lambda df: df["genres"].str.contains(genres, regex=True)]
            .sort_values('combined_rating', ascending=False)
            .head(n)
            [['title', 'avg_rating', 'genres']]
    )
    # merge recommendations with links df to get imdbIds for the API calls
    recommendations_ids = (recommendations
                            .merge(movies, how = 'left', on = 'title')
                            .merge(links, how = 'left', on = 'movieId')[['title', 'genres', 'imdbId']]
        )
    recommendations_ids['imdbId'] = 'tt0' + recommendations_ids['imdbId'].astype('str')
    imdb_ids = list(recommendations_ids['imdbId'])

    # create new column for streaming links
    recommendations_ids['Streaming Availability'] = ""

    # loop through imdb_ids to make one api call for each to get available streaming links
    for id in imdb_ids:

        # make api call
        querystring = {"country":country,"imdb_id":id,"output_language":"en"}
        response = requests.request("GET", url, headers=headers, params=querystring)
        streaming_info = response.json()

        for streaming_service in streaming_info['streamingInfo']:
            recommendations_ids.loc[recommendations_ids['imdbId'] == id, 'Streaming Availability'] += f"{streaming_service}: {streaming_info['streamingInfo'][streaming_service][country]['link']} \n" 

        return recommendations_ids[['title', 'genres', 'Streaming Availability']]

def transform_genre_to_regex(genres):
    regex = ""
    for genre in genres:
        regex += f"(?=.*{genre})"
    return regex

# USER INPUT:
number_of_recommendations = st.slider("Number of recommendations", 1, 10, 5)

genre_list = set([inner for outer in movies.genres.str.split('|') for inner in outer])
genres = st.multiselect('Optional: Select one or more genres', genre_list, default=None, key=None, help=None, on_change=None, args=None, kwargs=None, disabled=False)
genres_regex = transform_genre_to_regex(genres)

streaming_country = st.selectbox('Optional: Country for streaming information', ('de', 'us'))

# API INFORMATION:
url = "https://streaming-availability.p.rapidapi.com/get/basic"
headers = {
	"X-RapidAPI-Key": st.secrets["api_key"],
	"X-RapidAPI-Host": "streaming-availability.p.rapidapi.com"
}

# EXECUTION:

if st.button("Get Recommendations"):
    if streaming_country == '':
        st.write(get_popular_recommendations(number_of_recommendations, genres_regex))
    else: 
        st.write(get_popular_recommendations_streaming(number_of_recommendations, genres_regex, streaming_country, url, headers))