Spaces:

joao-victor-campos
/

netflix-recommendation-model

Runtime error

App Files Files Community

joao-victor-campos commited on Aug 21, 2022

Commit

bb9369a

•

1 Parent(s): 7150daf

add application file

Browse files

Files changed (18) hide show

.github/pull_request_template.md +25 -0
.gitignore +135 -0
LICENSE +21 -0
Makefile +55 -0
app.py +64 -0
data/input/credits.csv +0 -0
data/input/titles.csv +0 -0
recommendation/data_exploration.ipynb +0 -0
recommendation_app/__init__.py +0 -0
recommendation_app/__metadata__.py +0 -0
recommendation_app/core/__inity__.py +0 -0
recommendation_app/core/data_handler/__inity__.py +0 -0
recommendation_app/core/data_handler/data_handler.py +42 -0
recommendation_app/core/model.py +44 -0
recommendation_app/main.py +64 -0
requirements.dev.txt +17 -0
requirements.txt +11 -0
tests/test_data_handler.py +9 -0

.github/pull_request_template.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Description
+Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
+Fixes # (issue)
+## Type of change
+Please delete options that are not relevant.
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] This change requires a documentation update
+# Checklist:
+- [ ] My code follows the style guidelines of this project
+- [ ] I have performed a self-review of my own code
+- [ ] I have commented my code, particularly in hard-to-understand areas
+- [ ] I have made corresponding changes to the documentation
+- [ ] My changes generate no new warnings
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] New and existing unit tests pass locally with my changes
+- [ ] Any dependent changes have been merged and published in downstream modules

.gitignore ADDED Viewed

	@@ -0,0 +1,135 @@

+# Byte-compiled / optimized / DLL files
+pycache/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+pypackages/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+.vscode/
+.deb
+data/output/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 João Victor Campos
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Makefile ADDED Viewed

	@@ -0,0 +1,55 @@

+# globals
+VERSION := $(shell grep __version__ recommendation_app/__metadata__.py | head -1 | cut -d \" -f2 | cut -d \' -f2)
+.PHONY: requirements-dev
+## install development requirements
+requirements-dev:
+	@python -m pip install -U -r requirements.dev.txt
+.PHONY: requirements-minimum
+## install prod requirements
+requirements-minimum:
+	@python -m pip install -U -r requirements.txt
+.PHONY: requirements
+## install requirements
+requirements: requirements-dev requirements-minimum
+.PHONY: style-check
+## run code style checks with black
+style-check:
+	@echo ""
+	@echo "Code Style"
+	@echo "=========="
+	@echo ""
+	@python -m black --check --exclude="build/|buck-out/|dist/|_build/|pip/|\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" . && echo "\n\nSuccess" || (echo "\n\nFailure\n\nRun \"make black\" to apply style formatting to your code"; exit 1)
+.PHONY: quality-check
+## run code quality checks with flake8
+quality-check:
+	@echo ""
+	@echo "Flake 8"
+	@echo "======="
+	@echo ""
+	@python -m flake8 && echo "Success"
+	@echo ""
+.PHONY: type-check
+## run code type checks with mypy
+type-check:
+	@echo ""
+	@echo "Mypy"
+	@echo "======="
+	@echo ""
+	@python -m mypy --install-types --non-interactive recommendation_app && echo "Success"
+	@echo ""
+.PHONY: checks
+## run all code checks
+checks: style-check quality-check type-check
+.PHONY: apply-style
+## fix stylistic errors with black and isort
+apply-style:
+	@python -m black --exclude="build/|buck-out/|dist/|_build/|pip/|\\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" .
+	@python -m isort recommendation_app/ tests/

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import pandas as pd
+from recommendation_app.core.data_handler.data_handler import DataHandler
+from recommendation_app.core.model import Model
+PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
+df2 = pd.read_csv(PATH)
+movie_names = df2["title"].tolist()
+def gradio(movie_name, n_rec):
+    if __name__ == "__main__":
+        PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
+        features = [
+            "type",
+            "release_year",
+            "age_certification",
+            "runtime",
+            "seasons",
+            "imdb_score",
+            "tmdb_popularity",
+            "tmdb_score",
+            "genres_transformed",
+            "production_countries_transformed",
+        ]
+        df = pd.read_csv(PATH)
+        df_model = df.copy()
+        df_model = df_model[features]
+        x = DataHandler(df_model)
+        numeric_features = [
+            "release_year",
+            "runtime",
+            "seasons",
+            "imdb_score",
+            "tmdb_popularity",
+            "tmdb_score",
+        ]
+        x.normalize(numeric_features)
+        categorical_features = [
+            "age_certification",
+            "type",
+            "genres_transformed",
+            "production_countries_transformed",
+        ]
+        x.one_hot_encode(categorical_features)
+        # print(x.one_hot_encode(categorical_features))
+        # print(x.df)
+        mdl = Model(x.df)
+        n_rec = int(n_rec)
+        movie_name = str(movie_name)
+        movie_id = df.index[df["title"] == movie_name].tolist()
+        print(movie_id)
+        recommendations = mdl.recommend(movie_id, n_rec)
+        top_index = list(recommendations.index)[1:]
+        print(df[["title", "description"]].loc[top_index])
+        return df[["title", "description"]].loc[top_index]
+app = gr.Interface(
+    fn=gradio,
+    inputs=[gr.Dropdown(choices=movie_names), gr.inputs.Number()],
+    outputs=[gr.outputs.Dataframe()],
+)
+app.launch()

data/input/credits.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/input/titles.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

recommendation/data_exploration.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

recommendation_app/__init__.py ADDED Viewed

File without changes

recommendation_app/__metadata__.py ADDED Viewed

File without changes

recommendation_app/core/__inity__.py ADDED Viewed

File without changes

recommendation_app/core/data_handler/__inity__.py ADDED Viewed

File without changes

recommendation_app/core/data_handler/data_handler.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from typing import List
+import numpy as np
+import pandas as pd
+from sklearn import preprocessing
+class DataHandler:
+    def __init__(self, df: pd.DataFrame) -> None:
+        self.df = df
+    def normalize(self, features: List) -> pd.DataFrame:
+        """Normalize a list of features  from the DataFrame inplace.
+        Args:
+            df (pd.DataFrame): DataFrame to normalize the columns.
+            features (List): List of DataFrame column names.
+        Returns:
+            pd.DataFrame: DataFrame with normalized columns.
+        """
+        normalized_arr = preprocessing.normalize(self.df[features], axis=0)
+        self.df[features] = normalized_arr
+        return self.df
+    def one_hot_encode(self, features: List) -> pd.DataFrame:
+        """One Hot Encode a list of features from the DataFrame inplace.
+        Args:
+            df (pd.DataFrame): DataFrame to one hot encode the columns.
+            features (List): List of DataFrame column names.
+        Returns:
+            pd.DataFrame: DataFrame with one hot encoded columns.
+        """
+        for i in features:
+            ohe_df = pd.get_dummies(self.df[i])
+            print(ohe_df)
+            ohe_df.reset_index(drop=True, inplace=True)
+            self.df = pd.concat([self.df, ohe_df], axis=1)
+            self.df.drop(columns=i, inplace=True)
+        return self.df

recommendation_app/core/model.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from array import array
+import numpy as np
+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+class Model:
+    def __init__(self, df: pd.DataFrame):
+        self.df = df
+    def movie_similarity(self, chosen_movie: array, sim_movies: array) -> array:
+        """Calculate the cosine similarity between two vectors.
+        Args:
+            chosen_movie (array): Array with all information about the movie chosen by the user.
+            sim_movies (array): n dimensions array with all movies.
+        Returns:
+            array: Returns the cosine similarity between chosen_movie and sim_array.
+        """
+        chosen_movie = chosen_movie.reshape(1, -1)
+        # sim_movies = sim_movies.reshape(-1, 6)
+        return cosine_similarity(chosen_movie, sim_movies, dense_output=True)
+    def recommend(self, movie_id: str, n_rec: int) -> pd.DataFrame:
+        """Returns nlargest similarity movies based on movie_id.
+        Args:
+            movie_id (str): Name of the movie to be compared.
+            n_rec (int): Number of movies the user wants.
+        Returns:
+            pd.DataFrame: Dataframe with the n_rec recommendations.
+        """
+        movie_info = self.df.loc[movie_id].values
+        x = self.movie_similarity(movie_info, self.df.values)
+        # x.reshape(1, -1)
+        y = x.tolist()[0]
+        print(y)
+        self.df["similarity"] = y
+        print(self.df)
+        # movie_info = self.df.loc[movie_id].values
+        # self.df['similarity'] = self.df.apply(self.movie_similarity(movie_info,
+        # self.df.values)))
+        return self.df.nlargest(columns="similarity", n=n_rec + 1)

recommendation_app/main.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import pandas as pd
+from core.data_handler.data_handler import DataHandler
+from core.model import Model
+PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
+df2 = pd.read_csv(PATH)
+movie_names = df2["title"].tolist()
+def gradio(movie_name, n_rec):
+    if __name__ == "__main__":
+        PATH = "../netflix-recommendation-app/data/output/df_titles.csv"
+        features = [
+            "type",
+            "release_year",
+            "age_certification",
+            "runtime",
+            "seasons",
+            "imdb_score",
+            "tmdb_popularity",
+            "tmdb_score",
+            "genres_transformed",
+            "production_countries_transformed",
+        ]
+        df = pd.read_csv(PATH)
+        df_model = df.copy()
+        df_model = df_model[features]
+        x = DataHandler(df_model)
+        numeric_features = [
+            "release_year",
+            "runtime",
+            "seasons",
+            "imdb_score",
+            "tmdb_popularity",
+            "tmdb_score",
+        ]
+        x.normalize(numeric_features)
+        categorical_features = [
+            "age_certification",
+            "type",
+            "genres_transformed",
+            "production_countries_transformed",
+        ]
+        x.one_hot_encode(categorical_features)
+        # print(x.one_hot_encode(categorical_features))
+        # print(x.df)
+        mdl = Model(x.df)
+        n_rec = int(n_rec)
+        movie_name = str(movie_name)
+        movie_id = df.index[df["title"] == movie_name].tolist()
+        print(movie_id)
+        recommendations = mdl.recommend(movie_id, n_rec)
+        top_index = list(recommendations.index)[1:]
+        print(df[["title", "description"]].loc[top_index])
+        return df[["title", "description"]].loc[top_index]
+app = gr.Interface(
+    fn=gradio,
+    inputs=[gr.Dropdown(choices=movie_names), gr.inputs.Number()],
+    outputs=[gr.outputs.Dataframe()],
+)
+app.launch()

requirements.dev.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# setup
+setuptools
+wheel
+# tests
+pytest
+pytest-cov
+# code quality
+black
+isort
+flake8
+flake8-isort
+flake8-docstrings
+pep8-naming
+mypy
+black[jupyter]

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+ipykernel==6.15.1
+pandas
+seaborn
+matplotlib
+pandas_profiling
+ipywidgets
+plotly
+sklearn
+numpy
+jupyter
+gradio

tests/test_data_handler.py ADDED Viewed

	@@ -0,0 +1,9 @@

+df3 = pd.DataFrame(
+    [["c", 3, 10, "cat"], ["d", 4, 50, "dog"]],
+    columns=["letter", "number", "number2", "animal"],
+)
+x = DataHandler(df3)
+x.normalize(["number", "number2"])
+print(x.one_hot_encode(["letter", "animal"]))