productizationlabs
commited on
Commit
•
b0cb0c6
1
Parent(s):
da28a92
Upload app.py
Browse files
app.py
CHANGED
@@ -1,74 +1,33 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
4 |
from scipy.sparse import csr_matrix
|
5 |
from sklearn.neighbors import NearestNeighbors
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
# Map Ids to indices
|
12 |
-
user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
|
13 |
-
movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))
|
14 |
-
# Map indices to IDs
|
15 |
-
user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
|
16 |
-
movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"])))
|
17 |
-
user_index = [user_mapper[i] for i in df['userId']]
|
18 |
-
movie_index = [movie_mapper[i] for i in df['movieId']]
|
19 |
-
X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))
|
20 |
-
return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
|
21 |
-
|
22 |
-
|
23 |
-
def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):
|
24 |
-
neighbour_ids = []
|
25 |
-
movie_ind = movie_mapper[movie_id]
|
26 |
-
movie_vec = X[movie_ind]
|
27 |
-
k += 1
|
28 |
-
kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
|
29 |
-
kNN.fit(X)
|
30 |
-
movie_vec = movie_vec.reshape(1, -1)
|
31 |
-
neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
|
32 |
-
for i in range(0, k):
|
33 |
-
n = neighbour.item(i)
|
34 |
-
neighbour_ids.append(movie_inv_mapper[n])
|
35 |
-
neighbour_ids.pop(0)
|
36 |
-
return neighbour_ids
|
37 |
-
|
38 |
-
|
39 |
def recommend_movies(movie_name):
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
ratings
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
movie_stats.columns = movie_stats.columns.droplevel()
|
62 |
-
X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)
|
63 |
-
movie_titles = dict(zip(movies['movieId'], movies['title']))
|
64 |
-
|
65 |
-
# Set up Gradio interface
|
66 |
-
movie_name = gr.inputs.Textbox(label="Movie Name")
|
67 |
-
outputs = gr.outputs.Textbox(label="Recommended Movies", type="text")
|
68 |
-
|
69 |
-
iface = gr.Interface(
|
70 |
-
fn=recommend_movies,
|
71 |
-
inputs=movie_name,
|
72 |
-
outputs=outputs,
|
73 |
-
theme=gr.themes.Default(primary_hue="slate"))
|
74 |
-
iface.launch()
|
|
|
1 |
+
_C='rating'
|
2 |
+
_B='userId'
|
3 |
+
_A='movieId'
|
4 |
+
import gradio as gr,numpy as np,pandas as pd
|
5 |
from scipy.sparse import csr_matrix
|
6 |
from sklearn.neighbors import NearestNeighbors
|
7 |
+
def create_matrix(df):A=df;B=len(A[_B].unique());C=len(A[_A].unique());D=dict(zip(np.unique(A[_B]),list(range(B))));E=dict(zip(np.unique(A[_A]),list(range(C))));F=dict(zip(list(range(B)),np.unique(A[_B])));G=dict(zip(list(range(C)),np.unique(A[_A])));H=[D[A]for A in A[_B]];I=[E[A]for A in A[_A]];J=csr_matrix((A[_C],(I,H)),shape=(C,B));return J,D,E,F,G
|
8 |
+
def find_similar_movies(movie_id,X,k,metric='cosine',show_distance=False):
|
9 |
+
A=[];D=movie_mapper[movie_id];B=X[D];k+=1;C=NearestNeighbors(n_neighbors=k,algorithm='brute',metric=metric);C.fit(X);B=B.reshape(1,-1);E=C.kneighbors(B,return_distance=show_distance)
|
10 |
+
for F in range(0,k):G=E.item(F);A.append(movie_inv_mapper[G])
|
11 |
+
A.pop(0);return A
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def recommend_movies(movie_name):
|
13 |
+
A=[A for(A,B)in movie_titles.items()if movie_name.lower()in B.lower()]
|
14 |
+
if len(A)==0:return'Movie not found. Please check the spelling and try again'
|
15 |
+
A=A[0];B=find_similar_movies(A,X,k=10);C='\n'.join([movie_titles[A]for A in B]);return C
|
16 |
+
ratings=pd.read_csv('ratings.csv')
|
17 |
+
movies=pd.read_csv('movies.csv')
|
18 |
+
n_ratings=len(ratings)
|
19 |
+
n_movies=len(ratings[_A].unique())
|
20 |
+
n_users=len(ratings[_B].unique())
|
21 |
+
user_freq=ratings[[_B,_A]].groupby(_B).count().reset_index()
|
22 |
+
user_freq.columns=[_B,'n_ratings']
|
23 |
+
mean_rating=ratings.groupby(_A)[[_C]].mean()
|
24 |
+
lowest_rated=mean_rating[_C].idxmin()
|
25 |
+
highest_rated=mean_rating[_C].idxmax()
|
26 |
+
movie_stats=ratings.groupby(_A)[[_C]].agg(['count','mean'])
|
27 |
+
movie_stats.columns=movie_stats.columns.droplevel()
|
28 |
+
X,user_mapper,movie_mapper,user_inv_mapper,movie_inv_mapper=create_matrix(ratings)
|
29 |
+
movie_titles=dict(zip(movies[_A],movies['title']))
|
30 |
+
movie_name=gr.inputs.Textbox(label='Movie Name')
|
31 |
+
outputs=gr.outputs.Textbox(label='Recommended Movies',type='text')
|
32 |
+
iface=gr.Interface(fn=recommend_movies,inputs=movie_name,outputs=outputs,theme=gr.themes.Default(primary_hue='slate'))
|
33 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|