# -*- coding: utf-8 -*-
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
"""

from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset

import gradio as gr

import pandas as pd
import numpy as np
import scipy as sp
from scipy import sparse
from datetime import datetime

df = pd.read_csv("metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
df.dropna(inplace=True)
df.head()

#Create date column by converting the date into a datetime object then returning only the year
def add_year(full_date):
  datetime_object = datetime.strptime(full_date, '%b %d, %Y')
  return datetime_object.year

df['year'] = df['date'].apply(add_year)
#Add the year in brackets to the name of the game to avoid confusion 
def year_game(row):
  calendar_year = str(row['year'])
  year_game_combined = str(row['game']) + " (" + calendar_year + ")"
  return year_game_combined

df['game'] = df.apply(year_game, axis=1)

#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
df = df[df['platform'] == 'PlayStation 4']

#Filter by games since 2015 onwards
over_2015 = df[df['year'] >= 2015]

#Group by average score then sort by descending
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
#Only show top 20 games
top_40_games = top_recent_scorers.index[:40]
#top_100_games = top_recent_scorers.index[:100]

combined_games_data = df[['game','name','score']]
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]

def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games, 

  my_ratings = user_prof[user_prof['score'] != 0]
  combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
  combined_games_data.columns = ['itemID', 'userID', 'rating']

  # use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
  combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
  combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]

  reader = Reader(rating_scale=(1.0, 100.0))
  data = Dataset.load_from_df(combined_games_data, reader)

  unique_ids = combined_games_data['itemID'].unique()

  iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']

  games_to_predict = np.setdiff1d(unique_ids,iids1001)

  for i in range(len(algorithms)):
    if i == 'NMF':
      user_algo = NMF()
    elif i == 'SVD':
      user_algo = SVD()
    elif i == 'SVDpp':
      user_algo = SVDpp()
    elif i == 'KNN':
      user_algo = KNNWithZScore()
    elif i == 'CoClustering':
      user_algo = CoClustering()
    else:
      user_algo = NMF()

  algo = user_algo
  algo.fit(data.build_full_trainset())

  my_recs = []
  for iid in games_to_predict:
      my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
      
  result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
  return result

default_entries = []
for record in range(len(top_40_games)):
  default_entries.append([top_40_games[record], 1001, 0])

iface = gr.Interface(recommender, 
    inputs=[gr.inputs.Dataframe(
        headers=['game','name','score'],
        default=default_entries
    ), 
    gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
    outputs="dataframe",title="Recommendation Engine for Video Games using Surprise", description="Below is a dataframe of 40 games. Please rate as many as possible so the algorithm can predict the recommendations based on your previous game ratings. Do not edit any other cells beside the score column."
)
iface.launch(debug=True)