productizationlabs commited on
Commit
b0cb0c6
1 Parent(s): da28a92

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -71
app.py CHANGED
@@ -1,74 +1,33 @@
1
- import gradio as gr
2
- import numpy as np
3
- import pandas as pd
 
4
  from scipy.sparse import csr_matrix
5
  from sklearn.neighbors import NearestNeighbors
6
-
7
-
8
- def create_matrix(df):
9
- N = len(df['userId'].unique())
10
- M = len(df['movieId'].unique())
11
- # Map Ids to indices
12
- user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
13
- movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))
14
- # Map indices to IDs
15
- user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
16
- movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"])))
17
- user_index = [user_mapper[i] for i in df['userId']]
18
- movie_index = [movie_mapper[i] for i in df['movieId']]
19
- X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))
20
- return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
21
-
22
-
23
- def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):
24
- neighbour_ids = []
25
- movie_ind = movie_mapper[movie_id]
26
- movie_vec = X[movie_ind]
27
- k += 1
28
- kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
29
- kNN.fit(X)
30
- movie_vec = movie_vec.reshape(1, -1)
31
- neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
32
- for i in range(0, k):
33
- n = neighbour.item(i)
34
- neighbour_ids.append(movie_inv_mapper[n])
35
- neighbour_ids.pop(0)
36
- return neighbour_ids
37
-
38
-
39
  def recommend_movies(movie_name):
40
- movie_id = [k for k, v in movie_titles.items() if movie_name.lower() in v.lower()]
41
- if len(movie_id) == 0:
42
- return "Movie not found. Please check the spelling and try again"
43
- movie_id = movie_id[0]
44
- similar_ids = find_similar_movies(movie_id, X, k=10)
45
- recommendations = "\n".join([movie_titles[i] for i in similar_ids])
46
- return recommendations
47
-
48
-
49
- # Load data
50
- ratings = pd.read_csv("ratings.csv")
51
- movies = pd.read_csv("movies.csv")
52
- n_ratings = len(ratings)
53
- n_movies = len(ratings['movieId'].unique())
54
- n_users = len(ratings['userId'].unique())
55
- user_freq = ratings[['userId', 'movieId']].groupby('userId').count().reset_index()
56
- user_freq.columns = ['userId', 'n_ratings']
57
- mean_rating = ratings.groupby('movieId')[['rating']].mean()
58
- lowest_rated = mean_rating['rating'].idxmin()
59
- highest_rated = mean_rating['rating'].idxmax()
60
- movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean'])
61
- movie_stats.columns = movie_stats.columns.droplevel()
62
- X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)
63
- movie_titles = dict(zip(movies['movieId'], movies['title']))
64
-
65
- # Set up Gradio interface
66
- movie_name = gr.inputs.Textbox(label="Movie Name")
67
- outputs = gr.outputs.Textbox(label="Recommended Movies", type="text")
68
-
69
- iface = gr.Interface(
70
- fn=recommend_movies,
71
- inputs=movie_name,
72
- outputs=outputs,
73
- theme=gr.themes.Default(primary_hue="slate"))
74
- iface.launch()
 
1
+ _C='rating'
2
+ _B='userId'
3
+ _A='movieId'
4
+ import gradio as gr,numpy as np,pandas as pd
5
  from scipy.sparse import csr_matrix
6
  from sklearn.neighbors import NearestNeighbors
7
+ def create_matrix(df):A=df;B=len(A[_B].unique());C=len(A[_A].unique());D=dict(zip(np.unique(A[_B]),list(range(B))));E=dict(zip(np.unique(A[_A]),list(range(C))));F=dict(zip(list(range(B)),np.unique(A[_B])));G=dict(zip(list(range(C)),np.unique(A[_A])));H=[D[A]for A in A[_B]];I=[E[A]for A in A[_A]];J=csr_matrix((A[_C],(I,H)),shape=(C,B));return J,D,E,F,G
8
+ def find_similar_movies(movie_id,X,k,metric='cosine',show_distance=False):
9
+ A=[];D=movie_mapper[movie_id];B=X[D];k+=1;C=NearestNeighbors(n_neighbors=k,algorithm='brute',metric=metric);C.fit(X);B=B.reshape(1,-1);E=C.kneighbors(B,return_distance=show_distance)
10
+ for F in range(0,k):G=E.item(F);A.append(movie_inv_mapper[G])
11
+ A.pop(0);return A
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def recommend_movies(movie_name):
13
+ A=[A for(A,B)in movie_titles.items()if movie_name.lower()in B.lower()]
14
+ if len(A)==0:return'Movie not found. Please check the spelling and try again'
15
+ A=A[0];B=find_similar_movies(A,X,k=10);C='\n'.join([movie_titles[A]for A in B]);return C
16
+ ratings=pd.read_csv('ratings.csv')
17
+ movies=pd.read_csv('movies.csv')
18
+ n_ratings=len(ratings)
19
+ n_movies=len(ratings[_A].unique())
20
+ n_users=len(ratings[_B].unique())
21
+ user_freq=ratings[[_B,_A]].groupby(_B).count().reset_index()
22
+ user_freq.columns=[_B,'n_ratings']
23
+ mean_rating=ratings.groupby(_A)[[_C]].mean()
24
+ lowest_rated=mean_rating[_C].idxmin()
25
+ highest_rated=mean_rating[_C].idxmax()
26
+ movie_stats=ratings.groupby(_A)[[_C]].agg(['count','mean'])
27
+ movie_stats.columns=movie_stats.columns.droplevel()
28
+ X,user_mapper,movie_mapper,user_inv_mapper,movie_inv_mapper=create_matrix(ratings)
29
+ movie_titles=dict(zip(movies[_A],movies['title']))
30
+ movie_name=gr.inputs.Textbox(label='Movie Name')
31
+ outputs=gr.outputs.Textbox(label='Recommended Movies',type='text')
32
+ iface=gr.Interface(fn=recommend_movies,inputs=movie_name,outputs=outputs,theme=gr.themes.Default(primary_hue='slate'))
33
+ iface.launch()