Spaces:

joao-victor-campos
/

netflix-recommendation-model

Runtime error

App Files Files Community

joao-victor-campos NicholasBaraldi commited on Aug 24, 2022

Commit

3e41e06

•

1 Parent(s): eb2d08f

refactor code

Browse files

Co-authored-by: NicholasBaraldi <nicholas.baraldi@gmail.com>

Files changed (3) hide show

recommendation_app/core/data_handler/data_handler.py +10 -5
recommendation_app/core/model.py +11 -15
recommendation_app/main.py +0 -64

recommendation_app/core/data_handler/data_handler.py CHANGED Viewed

@@ -1,12 +1,18 @@
 from typing import List
-import numpy as np
 import pandas as pd
 from sklearn import preprocessing
 class DataHandler:
     def __init__(self, df: pd.DataFrame) -> None:
         self.df = df
     def normalize(self, features: List) -> pd.DataFrame:
@@ -33,10 +39,9 @@ class DataHandler:
         Returns:
             pd.DataFrame: DataFrame with one hot encoded columns.
         """
-        for i in features:
-            ohe_df = pd.get_dummies(self.df[i])
-            print(ohe_df)
             ohe_df.reset_index(drop=True, inplace=True)
             self.df = pd.concat([self.df, ohe_df], axis=1)
-            self.df.drop(columns=i, inplace=True)
         return self.df

 from typing import List
 import pandas as pd
 from sklearn import preprocessing
 class DataHandler:
+    """Feature Engineers the dataframe."""
     def __init__(self, df: pd.DataFrame) -> None:
+        """__init__ method.
+        Args:
+            df (pd.DataFrame): pd.DataFrame.
+        """
         self.df = df
     def normalize(self, features: List) -> pd.DataFrame:
         Returns:
             pd.DataFrame: DataFrame with one hot encoded columns.
         """
+        for feature in features:
+            ohe_df = pd.get_dummies(self.df[feature])
             ohe_df.reset_index(drop=True, inplace=True)
             self.df = pd.concat([self.df, ohe_df], axis=1)
+            self.df.drop(columns=feature, inplace=True)
         return self.df

recommendation_app/core/model.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from array import array
-import numpy as np
 import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
@@ -11,18 +10,21 @@ class Model:
     def movie_similarity(self, chosen_movie: array, sim_movies: array) -> array:
         """Calculate the cosine similarity between two vectors.
         Args:
-            chosen_movie (array): Array with all information about the movie chosen by the user.
             sim_movies (array): n dimensions array with all movies.
         Returns:
-            array: Returns the cosine similarity between chosen_movie and sim_array.
         """
         chosen_movie = chosen_movie.reshape(1, -1)
-        # sim_movies = sim_movies.reshape(-1, 6)
         return cosine_similarity(chosen_movie, sim_movies, dense_output=True)
     def recommend(self, movie_id: str, n_rec: int) -> pd.DataFrame:
-        """Returns nlargest similarity movies based on movie_id.
         Args:
             movie_id (str): Name of the movie to be compared.
             n_rec (int): Number of movies the user wants.
@@ -30,15 +32,9 @@ class Model:
             pd.DataFrame: Dataframe with the n_rec recommendations.
         """
         movie_info = self.df.loc[movie_id].values
-        x = self.movie_similarity(movie_info, self.df.values)
-        # x.reshape(1, -1)
-        y = x.tolist()[0]
-        print(y)
-        self.df["similarity"] = y
-        print(self.df)
-        # movie_info = self.df.loc[movie_id].values
-        # self.df['similarity'] = self.df.apply(self.movie_similarity(movie_info,
-        # self.df.values)))
         return self.df.nlargest(columns="similarity", n=n_rec + 1)

 from array import array
 import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
     def movie_similarity(self, chosen_movie: array, sim_movies: array) -> array:
         """Calculate the cosine similarity between two vectors.
         Args:
+            chosen_movie (array): Array with all information about the movie
+            chosen by the user.
             sim_movies (array): n dimensions array with all movies.
         Returns:
+            array: Returns the cosine similarity between chosen_movie and
+            sim_array.
         """
         chosen_movie = chosen_movie.reshape(1, -1)
         return cosine_similarity(chosen_movie, sim_movies, dense_output=True)
     def recommend(self, movie_id: str, n_rec: int) -> pd.DataFrame:
+        """Return nlargest similarity movies based on movie_id.
         Args:
             movie_id (str): Name of the movie to be compared.
             n_rec (int): Number of movies the user wants.
             pd.DataFrame: Dataframe with the n_rec recommendations.
         """
         movie_info = self.df.loc[movie_id].values
+        sim_array = self.movie_similarity(movie_info, self.df.values)
+        sim_list = sim_array.tolist()[0]
+        self.df["similarity"] = sim_list
         return self.df.nlargest(columns="similarity", n=n_rec + 1)

recommendation_app/main.py DELETED Viewed

@@ -1,64 +0,0 @@
-import gradio as gr
-import pandas as pd
-from core.data_handler.data_handler import DataHandler
-from core.model import Model
-PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
-df2 = pd.read_csv(PATH)
-movie_names = df2["title"].tolist()
-def gradio(movie_name, n_rec):
-    if __name__ == "__main__":
-        PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
-        features = [
-            "type",
-            "release_year",
-            "age_certification",
-            "runtime",
-            "seasons",
-            "imdb_score",
-            "tmdb_popularity",
-            "tmdb_score",
-            "genres_transformed",
-            "production_countries_transformed",
-        ]
-        df = pd.read_csv(PATH)
-        df_model = df.copy()
-        df_model = df_model[features]
-        x = DataHandler(df_model)
-        numeric_features = [
-            "release_year",
-            "runtime",
-            "seasons",
-            "imdb_score",
-            "tmdb_popularity",
-            "tmdb_score",
-        ]
-        x.normalize(numeric_features)
-        categorical_features = [
-            "age_certification",
-            "type",
-            "genres_transformed",
-            "production_countries_transformed",
-        ]
-        x.one_hot_encode(categorical_features)
-        # print(x.one_hot_encode(categorical_features))
-        # print(x.df)
-        mdl = Model(x.df)
-        n_rec = int(n_rec)
-        movie_name = str(movie_name)
-        movie_id = df.index[df["title"] == movie_name].tolist()
-        print(movie_id)
-        recommendations = mdl.recommend(movie_id, n_rec)
-        top_index = list(recommendations.index)[1:]
-        print(df[["title", "description"]].loc[top_index])
-        return df[["title", "description"]].loc[top_index]
-app = gr.Interface(
-    fn=gradio,
-    inputs=[gr.Dropdown(choices=movie_names), gr.inputs.Number()],
-    outputs=[gr.outputs.Dataframe()],
-)
-app.launch()