prateekiiest's picture
hugging-face push
1b7ca47
raw
history blame contribute delete
No virus
6.79 kB
from matplotlib import cm
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from math import sqrt
import matplotlib
matplotlib.style.use('ggplot')
matplotlib.use("Agg")
genre_df = pd.read_csv("ml-100k/ml-100k/u.genre",
sep="|", names=["genreName", "count"])
ratings_df = pd.read_csv("ml-100k/ml-100k/u.data", sep="\t",
names=["userID", "itemID", "rating", "timestamp"])
sorted_ratingsdf = ratings_df.sort_values(by=['rating'], ascending=False)
user_df = pd.read_csv("ml-100k/ml-100k/u.user", sep="|",
names=["userID", "age", "gender", "occupation", "zip_code"])
movie_df = pd.read_csv("ml-100k/ml-100k/u.item", sep="|", names=["itemID", "title", "release_date", "video_release_date", "IMDb_URL", "unknown", "Action", "Adventure", "Animation",
"Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"], encoding='latin-1')
def user_profile(type_wise):
if (type_wise[0] == "Gender Statistics"):
#px.pie(df, values='pop', names='country')
fig = px.pie(user_df, names="gender", values='userID')
fig.update_layout(
title="User Profile Distribution based on Gender",
xaxis_title="Gender",
yaxis_title="Count",
)
return fig
elif (type_wise[0] == "Age Statistics"):
fig = px.bar(user_df, "age", 'userID')
fig.update_layout(
title="User Profile Distribution based on Age",
xaxis_title="Age",
yaxis_title="Count",
)
return fig
elif (type_wise[0] == "Occupation Statistics"):
fig = px.pie(user_df, names="occupation", values='userID')
fig.update_layout(
title="User Profile Distribution based on Occupation",
xaxis_title="Occupation",
yaxis_title="Count",
)
return fig
else:
raise ValueError("A plot type must be selected")
def movieStatistics(plot_type):
if plot_type[0] == "Genre Distribution":
fig = px.bar(genre_df, x="genreName", y='count')
fig.update_layout(
title="Genre Distribution",
xaxis_title="Genre",
yaxis_title="Count",
)
return fig
elif (plot_type[0] == "Movie Distribution"):
movie_analysis_df = movie_df
movie_analysis_df['yearOfRelease'] = movie_df.title.apply(
lambda x: int(x[-5:-1]) if x[-5:-1].isdigit() else 1990)
df_year_genre = movie_analysis_df.groupby('yearOfRelease')["unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime",
"Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"].sum()
df_year_genre = df_year_genre.reset_index()
print(df_year_genre.head())
x1 = df_year_genre[-50:-1] # considering data for last 50 years
#my_colors = [(x/10.0, x/20.0, 0.75) for x in range(1,19)]
cmap = cm.get_cmap('Spectral')
ax = px.bar(x1,x='yearOfRelease', y=["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"
], title='movies produced every year for each genre')
plt.legend(loc=10, bbox_to_anchor=(1.2, .5), ncol=1)
#ax.set_xlabel('Year')
#ax.set_ylabel('Count of Movies')
ax.update_layout(
xaxis_title="Year",
yaxis_title="Count of Movies",
)
ax.write_image('movie_genre.svg', format='svg')
return ax
else:
raise ValueError("A plot type must be selected")
def userBehavior(plot_type):
movie_analysis_df = movie_df
movie_analysis_df['yearOfRelease'] = movie_df.title.apply(
lambda x: int(x[-5:-1]) if x[-5:-1].isdigit() else 1990)
df_year_genre = movie_analysis_df.groupby('yearOfRelease')["unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime",
"Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"].sum()
df_year_genre = df_year_genre.reset_index()
print(df_year_genre.head())
x1 = df_year_genre[-50:-1] # considering data for last 50 years
#my_colors = [(x/10.0, x/20.0, 0.75) for x in range(1,19)]
cmap = cm.get_cmap('Spectral')
ax = px.bar(x1,x='yearOfRelease', y=["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"
], title='movies produced every year for each genre')
plt.legend(loc=10, bbox_to_anchor=(1.2, .5), ncol=1)
#ax.set_xlabel('Year')
#ax.set_ylabel('Count of Movies')
ax.update_layout(
xaxis_title="Year",
yaxis_title="Count of Movies",
)
ax.write_image('movie_genre.svg', format='svg')
return ax
inputs = [
# gr.Dropdown(["Matplotlib", "Plotly"], label="Plot Type"),
gr.CheckboxGroup(["Genre Distribution", "Movie Distribution"], label="Plot Type", value=[
"Genre Distribution"]),
]
inputs_user = [
gr.CheckboxGroup(["Age Statistics", "Gender Statistics", "Occupation Statistics"],
label="Type Wise", value=["Occupation Statistics"]),
]
inputs_user_behavior = [
gr.CheckboxGroup(["movie genre"], label="Plot Type", value=["movie genre"])]
outputs_movie = gr.Plot()
outputs_user = gr.Plot()
outputs = gr.Plot()
demo = gr.Blocks()
with demo:
gr.Markdown("""
<div>
<h1 style='text-align: center'>Movie Recommender</h1>
MovieLens Dataset Analysis
</div>
""")
with gr.Box():
gr.Markdown(
"""
### User Profile Statistics
#### Select a user to get recommendations for.
""")
gr.Interface(
fn=user_profile,
inputs=inputs_user,
outputs=outputs_user,
cache_examples=True,
)
with gr.Box():
gr.Markdown(
"""
### Movies Statistics
#### Select a statistics type to get the distribution.
""")
gr.Interface(
fn=movieStatistics,
inputs=inputs,
outputs=outputs,
cache_examples=True,
)
if __name__ == "__main__":
demo.launch()