#!/usr/bin/env python # coding: utf-8 # In[15]: import csv import pandas as pd import numpy as np from sklearn.cluster import KMeans import imdb import random import gradio as gr # In[2]: file1=open("movies.csv",'r') file2=open("movies3.csv",'r') csvreader1 = csv.reader(file1) csvreader2 = csv.reader(file2) # In[3]: pd_dict={'movie':[],'time_minute':[],'imdb_rating':[],'Action':[],'Adventure':[],'Fantasy':[], 'Sci-Fi':[],'Animation':[],'Comedy':[],'Family':[],'Mystery':[],'Romance':[],'Drama':[], 'Crime':[],'Thriller':[],'War':[],'Musical':[],'Biography':[]} lst=['Action','Adventure','Fantasy','Sci-Fi','Animation','Comedy', 'Family','Mystery','Romance','Drama','Crime','Thriller','War','Musical','Biography'] # In[4]: j=0 for row in csvreader2: if j==0: j=1 continue for i in range(len(lst)): pd_dict[lst[i]].append(int(row[i+1])) j=0 for row in csvreader1: if j==0: j=1 continue if j==250: break pd_dict['movie'].append(row[1]) pd_dict['time_minute'].append(int(row[3].split()[0])) pd_dict['imdb_rating'].append(float(row[4])) j+=1 # In[5]: df=pd.DataFrame(pd_dict) X=np.array(df.drop(['movie'], axis = 1)) # In[6]: kmeans = KMeans(max_iter=300).fit(X) # In[9]: movie_name="Oppenheimer" ia=imdb.IMDb() items = ia.search_movie(movie_name) code = items[0].getID() print(code) series = ia.get_movie(code) genre = series.data['genres'] time_minute=series.data['runtimes'] rating=series.data['rating'] x=[int(time_minute[0]),float(rating)] for i in lst: if i in genre: x.append(1) else: x.append(0) print(x) # In[13]: i=random.randint(1,249) test_x=X[i].reshape(1,-1) x=np.array(x) x=x.reshape(1,-1) # print(kmeans.predict(test_x)) # print(kmeans.predict(x)) pred=kmeans.predict(x) pred_test=kmeans.predict(test_x) while pred!=pred_test: i=random.randint(1,249) test_x=X[i].reshape(1,-1) x=np.array(x) x=x.reshape(1,-1) pred_test=kmeans.predict(test_x) print('Another movie like',movie_name,'is:',df['movie'][i]) # In[16]: def movie(txt): ia=imdb.IMDb() items = ia.search_movie(movie_name) code = items[0].getID() series = ia.get_movie(code) genre = series.data['genres'] time_minute=series.data['runtimes'] rating=series.data['rating'] x=[int(time_minute[0]),float(rating)] for i in lst: if i in genre: x.append(1) else: x.append(0) i=random.randint(1,249) test_x=X[i].reshape(1,-1) x=np.array(x) x=x.reshape(1,-1) # print(kmeans.predict(test_x)) # print(kmeans.predict(x)) pred=kmeans.predict(x) pred_test=kmeans.predict(test_x) while pred!=pred_test: i=random.randint(1,249) test_x=X[i].reshape(1,-1) x=np.array(x) x=x.reshape(1,-1) pred_test=kmeans.predict(test_x) return df['movie'][i] iface = gr.Interface(fn=movie, inputs="text", outputs="text") iface.launch() # In[ ]: