Spaces:

hi-paris
/

app-ai-ds-hec

Sleeping

App Files Files Community

app-ai-ds-hec / pages /recommendation_system.py

laudavid

fix page icons

7b7d942 12 months ago

raw

history blame

21.2 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import requests
	import pickle
	import os
	import altair as alt
	import plotly.express as px
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.metrics.pairwise import cosine_similarity
	from annotated_text import annotated_text
	from utils import load_data_pickle, load_model_pickle, load_data_csv



	#st.set_page_config(layout="wide")



	st.markdown("# Recommendation system")

	st.markdown("### What is a Recommendation System ?")

	st.info("""Recommendation systems are AI algorithms built to suggest or recommend products to consumers.
	They are very common in social media platforms such as TikTok, Youtube or Instagram or e-commerce websites as they help improve and personalize a consumer's experience.""")

	st.markdown("""There are two methods to build recommendation systems:
	- Content-based filtering: Recommendations are made based on the user's own preferences
	- Collaborative filtering: Recommendations are made based on the preferences and behavior of similar users""", unsafe_allow_html=True)

	# st.markdown("""Here is an example of Content-based filtering versus Collaborative filtering for movie recommendations.""")
	st.markdown(" ")
	st.markdown(" ")

	# _, col_img, _ = st.columns(spec=[0.2,0.6,0.2])
	# with col_img:
	# st.image("images/rs.png")

	st.image("images/rs.png")

	st.markdown(" ")

	st.markdown("""Common applications of Recommendation systems include:
	- E-Commerce Platforms 🛍️: Suggest products to users based on their browsing history, purchase patterns, and preferences.
	- Streaming Services 📽️: Recommend movies, TV shows, or songs based on users' viewing/listening history and preferences.
	- Social Media Platforms 📱: Suggest friends, groups, or content based on users' connections, interests, and engagement history.
	- Automotive and Navigation Systems 🗺️: Suggest optimal routes based on real-time traffic conditions, historical data, and user preferences.
	""")

	st.markdown(" ")

	select_usecase = st.selectbox("Choose a use case",
	["Movie recommendation system 📽️",
	"Hotel recommendation system 🛎️"])

	st.divider()



	#####################################################################################################
	# MOVIE RECOMMENDATION SYSTEM #
	#####################################################################################################

	# Recommendation function
	def recommend(movie_name, nb):
	n_movies_to_recommend = nb
	idx = movies[movies['title'] == movie_name].index[0]

	distances, indices = model.kneighbors(csr_data[idx], n_neighbors=n_movies_to_recommend + 1)
	idx = list(indices.squeeze())
	df = np.take(movies, idx, axis=0)

	movies_list = list(df.title[1:])

	recommend_movies_names = []
	recommend_posters = []
	movie_ids = []
	for i in movies_list:
	temp_movie_id = (movies[movies.title ==i].movie_id).values[0]
	movie_ids.append(temp_movie_id)

	# fetch poster
	try:
	poster = fetch_poster(temp_movie_id)
	recommend_posters.append(poster)
	except:
	recommend_posters.append(None)

	recommend_movies_names.append(i)
	return recommend_movies_names, recommend_posters, movie_ids

	# Get poster
	def fetch_poster(movie_id):
	response = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}')
	data = response.json()
	return "https://image.tmdb.org/t/p/w500/" + data["poster_path"]



	if select_usecase == "Movie recommendation system 📽️":

	colors = ["#8ef", "#faa", "#afa", "#fea", "#8ef","#afa"]
	#api_key = st.secrets["recommendation_system"]["key"]
	api_key = os.environ["MOVIE_RECOM_API"]

	# Load data
	path_data = r"data/movies"
	path_models = r"pretrained_models/recommendation_system"

	movies_dict = pickle.load(open(os.path.join(path_data,"movies_dict2.pkl"),"rb"))
	movies = pd.DataFrame(movies_dict)
	movies.drop_duplicates(inplace=True)

	vote_info = pickle.load(open(os.path.join(path_data,"vote_info.pkl"),"rb"))
	vote = pd.DataFrame(vote_info)

	# Load model
	model = load_model_pickle(path_models,"model.pkl")
	with open(os.path.join(path_data,'csr_data_tf.pkl'), 'rb') as file:
	csr_data = pickle.load(file)


	# Description of the use case
	st.markdown("""## Movie Recommendation System 📽️""")

	#st.info(""" """)

	st.markdown("""This use case showcases the use of recommender systems for movie recommendations using collaborative filtering. <br>
	The model recommends and ranks movies based on what users, who have also watched the chosen movie, have watched else on the platform. <br>
	""", unsafe_allow_html=True)
	st.markdown(" ")


	# User selection
	selected_movie = st.selectbox("Select a movie", movies["title"].values[:-3])
	selected_nb_movies = st.selectbox("Select a number of movies to recommend", np.arange(2,7), index=3)

	# Show user selection on the app
	c1, c2 = st.columns([0.7,0.3], gap="medium")
	with c1:
	new_movies = movies.rename({"movie_id":"id"},axis=1).merge(vote, on="id", how="left")
	description = new_movies.loc[new_movies["title"]==selected_movie,"description"].to_list()[0]
	genre = new_movies.loc[new_movies["title"]==selected_movie,"genre"].to_list()[0]
	vote_ = new_movies.loc[new_movies["title"]==selected_movie,"vote_average"].to_list()[0]
	vote_count = new_movies.loc[new_movies["title"]==selected_movie,"vote_count"].to_list()[0]

	list_genres = [(g.strip(),"",color) for color,g in zip(colors, genre.split(", "))]

	st.header(selected_movie, divider="grey")
	st.markdown(f"Synopsis: {description}")
	annotated_text(["Genre(s): ", list_genres])
	st.markdown(f"Rating: {vote_}:star:")
	st.markdown(f"Votes: {vote_count}")

	st.info(f"You've selected {selected_nb_movies} movies to recommend")
	st.markdown(" ")

	recommend_button = st.button("Recommend movies")

	with c2:
	try:
	poster = fetch_poster(movies.loc[movies["title"]==selected_movie,"movie_id"].to_list()[0])
	st.image(poster, width=300)
	except:
	pass


	# Run model and show results
	if recommend_button:
	st.text("Here are few Recommendations..")
	names,posters,movie_ids = recommend(selected_movie, selected_nb_movies)
	tab1, tab2 = st.tabs(["View movies", "View genres"])

	with tab1:
	cols=st.columns(int(selected_nb_movies))
	#cols=[col1,col2,col3,col4,col5]
	for i in range(0,selected_nb_movies):
	with cols[i]:
	expander = st.expander("See movie details")

	if posters[i] == None:
	pass
	else:
	st.image(posters[i])

	st.markdown(f"##### {i+1}. {names[i]}")
	id = movie_ids[i]

	genre = movies.loc[movies["movie_id"]==id,"genre"].to_list()[0]
	list_genres = [(g.strip(),"",color) for color,g in zip(colors, genre.split(", "))]

	synopsis = movies.loc[movies['movie_id']==id, "description"].to_list()[0]
	st.markdown(synopsis)

	vote_avg, vote_count = vote[vote["id"] == id].vote_average , vote[vote["id"] == id].vote_count
	annotated_text(["Genre(s): ", list_genres])
	st.markdown(f"""Rating: {list(vote_avg.values)[0]}:star:""")
	st.markdown(f"Votes: {list(vote_count.values)[0]}")


	with tab2:
	recommended_genres = movies.loc[movies["movie_id"].isin(movie_ids[:5]),"genre"].to_list()
	list_recom_genres = [genre for list_genres in recommended_genres for genre in list_genres.split(", ")]
	df_recom_genres = pd.Series(list_recom_genres).value_counts().to_frame().reset_index(names="genre")
	df_recom_genres["proportion (%)"] = (100*df_recom_genres["count"]/df_recom_genres["count"].sum())

	fig = px.bar(df_recom_genres, x='count', y='genre', color="genre", title='Most recommended genres', orientation="h")
	st.plotly_chart(fig, use_container_width=True)







	#####################################################################################################
	# HOTEL RECOMMENDATION SYSTEM #
	#####################################################################################################


	# Load scaler with caching



	if select_usecase == "Hotel recommendation system 🛎️":

	@st.cache_data(ttl=3600)
	def get_scaler(df):
	scaler = MinMaxScaler()
	scaler.fit(df[['Rating', 'Price']])
	return scaler

	def recommend_hotels_with_location_and_beds(df, preferences, max_recommendations=5):
	# Start with the full dataset
	filtered_df = df.copy()

	# Filter by Location if specified (either city or country)
	if 'Location' in preferences and preferences['Location']:
	filtered_df = filtered_df[(filtered_df['City'].str.contains(preferences['Location'], case=False, na=False)) \|
	(filtered_df['Country'].str.contains(preferences['Location'], case=False, na=False))]

	# Filter by Number of beds if specified
	if 'Number of beds' in preferences:
	filtered_df = filtered_df[filtered_df['Number of bed'] == preferences['Number of beds']]

	# Filter by Rating if specified
	if 'Rating' in preferences:
	min_rating, max_rating = preferences['Rating']
	filtered_df = filtered_df[filtered_df['Rating'].between(min_rating, max_rating)]

	# Filter by Price range if specified
	if 'Price' in preferences:
	min_price, max_price = preferences['Price']
	filtered_df = filtered_df[filtered_df['Price'].between(min_price, max_price)]

	# Ensure there are still hotels after filtering
	if filtered_df.empty:
	# Send a notification if no hotels match the criteria
	send_notification("No hotels were found matching the specified criteria.")
	return pd.DataFrame(), "No hotels were found matching the specified criteria."

	preferences["Rating"] = np.mean(np.array(preferences["Rating"]))
	preferences["Price"] = np.mean(np.array(preferences["Price"]))

	# Normalize the preferences vector (excluding location and number of beds for similarity calculation)
	preferences_vector = np.array([[preferences.get('Rating', 0),
	preferences.get('Price', 0)]])
	preferences_vector_normalized = scaler.transform(preferences_vector)

	# Calculate similarity scores for the filtered hotels
	filtered_numerical_features = filtered_df[['Rating', 'Price']]
	filtered_numerical_features_normalized = scaler.transform(filtered_numerical_features)
	similarity_scores = cosine_similarity(preferences_vector_normalized, filtered_numerical_features_normalized)[0]

	# Get the indices of top_n similar hotels
	top_indices = similarity_scores.argsort()[-max_recommendations:][::-1]
	recommended_indices = filtered_df.iloc[top_indices].index

	# Return the recommended hotels with relevant details (including specified columns)
	return df.loc[recommended_indices], None


	def send_notification(message):
	"""
	Placeholder function to send a notification.
	This function can be replaced with the actual notification mechanism (e.g., email, SMS).
	"""
	print("Notification:", message)


	def country_info(country):
	if country == "Thailand":
	image = "images/thailand.jpeg"
	emoji = "🏝️"
	description = """Description:
	Thailand seamlessly fuses ancient traditions with modern dynamism, creating an unparalleled tapestry for travelers.
	Renowned for its warm hospitality, vibrant culture, and delectable cuisine, Thailand offers an unforgettable experience for every adventurer."""
	top_places = """
	- Bangkok: Immerse yourself in the hustle and bustle of Bangkok's streets, adorned with glittering temples and bustling markets. The Grand Palace and Khao San Road showcase the city's unique blend of tradition and modernity.
	- Chiang Mai: Nestled in the misty mountains of Northern Thailand, Chiang Mai captivates with ancient temples, lush landscapes, and vibrant night markets. The Old City exudes a unique atmosphere, while the surrounding hills offer tranquility.
	- Phuket: Thailand's largest island, Phuket, beckons beach lovers with its stunning white sands, vibrant nightlife, and water activities. It's a perfect blend of relaxation and excitement."""

	if country == "France":
	image = "images/france.jpeg"
	emoji = "⚜️"
	description ="""Description:
	Indulge in the countries rich tapestry of art, culture, and gastronomy.
	From the romantic allure of Paris to the sun-kissed vineyards of Provence, every corner of this diverse country tells a unique story, promising an unforgettable journey for every traveler."""
	top_places = """
	- Paris: Dive into the city's iconic landmarks such as the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral grace the skyline.
	- Provence: Visit the stunning Palais des Papes in Avignon, explore the colorful markets of Aix-en-Provence, and unwind in the serene beauty of the Luberon region.
	- Côte d'Azur: This stunning stretch of the French coastline is a captivating blend of azure waters, picturesque landscapes and charming villages.
	"""

	if country == "Spain":
	image = "images/spain-banner.jpg"
	emoji = "☀️"
	description = """Description:
	Embark on an unforgettable journey where tradition and modernity coexist in harmony.
	From the lively streets of Barcelona to the sun-soaked beaches of Andalusia, Spain offers a captivating blend of history, culture, and natural beauty.
	"""
	top_places = """
	- Barcelona: Explore the iconic Sagrada Familia, stroll down the vibrant La Rambla, and soak in the Mediterranean ambiance at Barceloneta Beach.
	- Seville: Visit the awe-inspiring Alcázar, marvel at the Giralda Tower, and wander through the enchanting alleys of the Santa Cruz neighborhood.
	- Granada: Explore the Generalife Gardens, stroll through the Albayzín quarter with its narrow streets and white houses, and savor the views of the city from the Mirador de San Nicolás.
	"""

	if country == "Singapore":
	image = "images/singapore.jpg"
	emoji = "🏙️"
	description = """Description:
	From gleaming skyscrapers to vibrant neighborhoods, this cosmopolitan gem in Southeast Asia promises an immersive journey into a world where tradition meets cutting-edge technology."""

	top_places = """
	- Marina Bay Sands: Enjoy panoramic views from the SkyPark, take a dip in the infinity pool, and explore The Shoppes for luxury shopping and entertainment. At night, witness the mesmerizing light and water show at the Marina Bay Sands Skypark.
	- Gardens by the Bay: Explore the Flower Dome and Cloud Forest conservatories, and stroll through the scenic OCBC Skyway for breathtaking views of the gardens and city.
	- Sentosa Island: Escape to Sentosa Island, a resort destination offering a myriad of attractions. Relax on pristine beaches, visit Universal Studios Singapore for thrilling rides, and explore S.E.A. Aquarium for an underwater adventure.

	"""

	###### STREAMLIT MARKDOWN ######
	st.header(f"{country} {emoji}", divider="grey")
	st.image(image)
	st.markdown(description)

	see_top_places = st.checkbox("Top places to visit", key={country})
	if see_top_places:
	st.markdown(top_places)

	st.markdown("""## Hotel Recommendation System 🛎️""")

	st.info("""This use case shows how you can create personalized hotel recommendations using a recommendation system with content-based Filtering.
	Analyzing location, amenities, price, and reviews, the model suggests tailored hotel recommendation based on the user's preference.
	""")
	st.markdown(" ")


	path_hotels_data = r"data/hotels"

	# Load hotel data
	df = load_data_csv(path_hotels_data,"booking_df.csv")

	# clean data
	df.drop_duplicates(inplace=True)
	df["Country"] = df["Country"].apply(lambda x: "Spain" if x=="Espagne" else x)
	list_cities = df["City"].value_counts().to_frame().reset_index()
	list_cities = list_cities.loc[list_cities["count"]>=5,"City"].to_numpy()
	df = df.loc[(df["City"].isin(list_cities)) & (df["Number of bed"]<=6)]
	df["Price"] = df["Price"].astype(int)
	df.loc[(df["Number of bed"]==0) & (df["Price"]<1000),"Number of bed"] = 1
	df.loc[(df["Number of bed"]==0) & (df["Price"].between(1000,2000)),"Number of bed"] = 2
	df.loc[(df["Number of bed"]==0) & (df["Price"]>2000),"Number of bed"] = 3

	df["Rating"] = df["Rating"].apply(lambda x: np.nan if x==0 else x)
	df["Rating"].fillna(np.round(df["Rating"].mean(), 1), inplace=True)

	scaler = get_scaler(df)


	col1, col2 = st.columns([0.3,0.7], gap="large")

	with col1:
	# Collect user preferences
	st.markdown(" ")
	st.markdown(" ")
	st.markdown("")
	#st.markdown("#### Filter preferences")
	list_countries = df["Country"].unique()
	location = st.selectbox("Select a Country",list_countries, index=0)

	list_nb_beds = df["Number of bed"].unique()
	num_beds = st.selectbox("Number of beds", list_nb_beds, index=0)
	#if num_beds == "No information"

	min_rating, max_rating = st.slider("Range of ratings", min_value=df["Rating"].min(), max_value=df["Rating"].max(), step=0.1, value=(5.0, df["Rating"].max()))
	min_price, max_price = st.slider("Range of room prices", min_value=df["Price"].min(), max_value=df["Price"].max(), step=10, value=(df["Price"].min(), 10000))

	# Convert price range sliders to integer values
	min_price = int(min_price)
	max_price = int(max_price)

	with col2:
	country_info(location)


	preferences = {
	'Location': location,
	'Number of beds': num_beds,
	'Rating': [min_rating, max_rating],
	'Price': [min_price, max_price],
	}


	if st.button("Recommend Hotels"):
	st.info("Hotels were recommended based on how similar they were to the users preferences.")

	# Default number of recommendations to show
	max_recommendations = 5

	# Call the recommendation function
	recommended_hotels, message = recommend_hotels_with_location_and_beds(df, preferences, max_recommendations)

	# If no recommendations, reduce the maximum number of recommendations and try again
	if recommended_hotels.empty:
	max_recommendations -= 1
	recommended_hotels, message = recommend_hotels_with_location_and_beds(df, preferences, max_recommendations)
	if recommended_hotels.empty:
	st.error(message)
	# else:
	# st.write(recommended_hotels)
	else:
	st.markdown(" ")
	for i in range(len(recommended_hotels)):
	#st.dataframe(recommended_hotels)
	df_result = recommended_hotels.iloc[i,:]
	col1_, col2_ = st.columns([0.4,0.6], gap="medium")

	with col1_:
	st.image("images/room.jpg",width=100)
	st.markdown(f"### {i+1}: {df_result['Hotel Name']}")
	st.markdown(f"""{df_result['Room Type']} <br>
	with {df_result['Bed Type']}
	""", unsafe_allow_html=True)
	with col2_:
	st.markdown(" ")
	st.markdown(" ")
	annotated_text("Number of beds : ",(f"{df_result['Number of bed']}","","#faa"))
	#st.markdown(f"Bed type: {df_result['Bed Type']}")
	annotated_text("City: ",(f"{df_result['City']}","","#afa"))
	annotated_text("Rating: ",(f"{df_result['Rating']}","","#8ef"))
	annotated_text("Price: ",(f"{df_result['Price']}$","","#fea"))

	st.divider()