Spaces:

GoTogether
/

App_Prototype

Sleeping

File size: 22,459 Bytes

import streamlit as st
import torch
import numpy as np
import time
import string
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertModel
from collections import defaultdict, Counter
from tqdm.auto import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import time
import random

#Loading the model
@st.cache_resource
def get_models():
  st.write('Loading the model...')
  tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
  model = BertModel.from_pretrained("bert-base-uncased")
  st.write("_The model is loaded and ready to use! :tada:_")
  return model, tokenizer

#convert numpy arrays from strings back to arrays
def str_to_numpy(array_string):
    array_string = array_string.replace('\n', '').replace('[','').replace(']','')
    numpy_array = np.fromstring(array_string, sep=' ')
    numpy_array = numpy_array.reshape((1, -1))
    return numpy_array

@st.cache_data  # 👈 Add the caching decorator
def load_data():
    vectors_df = pd.read_csv('filtered_restaurants_dataframe_with_embeddings.csv',  encoding="utf-8")
    embeds = dict(enumerate(vectors_df['Embeddings']))
    rest_names = list(vectors_df['Names'])
    vectors_df['Weights'] = [1]*len(vectors_df)
    return embeds, rest_names, vectors_df

#type: dict; keys: 0-n
restaurants_embeds, rest_names, init_df = load_data()

model, tokenizer = get_models()

#a function that takes a sentence and converts it into embeddings
def get_bert_embeddings(sentence, model, tokenizer):
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1)  # Average pool over tokens
    return embeddings

# a function that return top-K best restaurants
def compute_cos_sim(input):
    query = ""
    query += input

    # for el in st.session_state.preferences_1:
    #     query += el
    # for el in st.session_state.preferences_2:
    #     query += el
    
    # st.write("Your query is", query)
    # st.write("Your restrictions are", st.session_state.restrictions)
    embedded_query = get_bert_embeddings(query, model, tokenizer)
    embedded_query = embedded_query.numpy()
    top_similar = np.array([])
    for i in range(len(restaurants_embeds)):
        name = rest_names[i]
        top_similar = np.append(top_similar, cosine_similarity(embedded_query, str_to_numpy(restaurants_embeds[i]))[0][0])
    
    st.session_state.df['cos_sim'] = top_similar.tolist()
    weights = np.array(st.session_state.df['Weights'])
    #multiply weights by the cosine similarity
    top_similar_weighted = dict(enumerate(np.multiply(top_similar, weights)))
    
    st.session_state.df['Relevancy'] = top_similar_weighted.values()
    return st.session_state.df

def sort_by_relevancy(k):
    '''
    k - int - how many top-matching places to show
    '''
    top_similar_weighted = dict(enumerate(st.session_state.precalculated_df['Relevancy']))
    #sort in the descending order
    top_similar_weighted = dict(sorted(top_similar_weighted.items(), key=lambda item: item[1], reverse=True))
    #leave only K recommendations
    top_k_similar = dict([(key, value) for key, value in top_similar_weighted.items()][:k])
    #get restaurant names
    names = [rest_names[i] for i in top_k_similar.keys()]
    result = dict(zip(names, top_k_similar.values()))
    return result

def sort_by_price(k):
    '''
    k - int - how many top-matching places to show
    '''
    relevance = np.array(st.session_state.precalculated_df['Relevancy'])
    prices = np.array([st.session_state.price[str(val)] for val in st.session_state.precalculated_df['Price']])
    top_similar_by_price = dict(enumerate(np.multiply(relevance, prices)))
    st.session_state.precalculated_df['Sort_price'] = top_similar_by_price.values()
    
    #sort in the descending order
    top_similar_by_price = dict(sorted(top_similar_by_price.items(), key=lambda item: item[1], reverse=True))
    #leave only K recommendations
    top_k_similar = dict([(key, value) for key, value in top_similar_by_price.items()][:k])
    #get restaurant names
    names = [rest_names[i] for i in top_k_similar.keys()]
    result = dict(zip(names, top_k_similar.values()))
    return result

def sort_by_rating(k):
    '''
    k - int - how many top-matching places to show
    '''
    relevance = np.array(st.session_state.precalculated_df['Relevancy'])
    rating = np.array(st.session_state.precalculated_df['Rating'])
    top_similar_by_rating = dict(enumerate(np.multiply(relevance, rating)))
    
    ## Combine the three lists into a list of tuples (name, score, price)
    # restaurant_data = list(zip(rest_names, relevance, rating))

    # # Sort the combined list based on rating (index 2) in descending order and relevance (index 1) in descending order
    # sorted_data = sorted(restaurant_data, key=lambda x: (-x[1], -x[2]))

    # # Extract the sorted lists
    # sorted_restaurant_names, sorted_relevance, sorted_rating = zip(*sorted_data)
    # result = {sorted_restaurant_names[i]: sorted_relevance[i] for i in range(k)}

    st.session_state.precalculated_df['Sort_rating'] = top_similar_by_rating.values()
    
    #sort in the descending order
    top_similar_by_rating = dict(sorted(top_similar_by_rating.items(), key=lambda item: item[1], reverse=True))
    #leave only K recommendations
    top_k_similar = dict([(key, value) for key, value in top_similar_by_rating.items()][:k])
    #get restaurant names
    names = [rest_names[i] for i in top_k_similar.keys()]
    result = dict(zip(names, top_k_similar.values()))
    
    return result

#combines 2 users preferences into 1 string 
def get_combined_preferences(user1, user2):
    #TODO: optimize for more users
    shared_pref = '' 
    for pref in user1:
        shared_pref += pref.lower()
        shared_pref += " "
    shared_pref += " "
    for pref in user2:
        shared_pref += pref.lower()
        shared_pref += " "
        
    freq_words = Counter(shared_pref.split())
    
    preferences = [pref for pref in st.session_state.preferences_1 if ((pref.capitalize() in st.session_state.food) or (pref in st.session_state.ambiance))]
    preferences.extend([pref for pref in st.session_state.preferences_2 if ((pref.capitalize() in st.session_state.food) or (pref in st.session_state.ambiance))])
    translator = str.maketrans('', '', string.punctuation)
    preferences = [word.translate(translator) for phrase in preferences for word in phrase.split() if len(word) > 0]
    st.session_state.fixed_preferences = [word.lower() for word in preferences]

    return shared_pref, freq_words

def filter_places(restrictions):
    #punish the weight of places that don't fit restrictions
    # st.write("Here are the restrictions you provided:")
    # st.write(restrictions)
    taboo = set([word.lower() for word in restrictions])
    for i in range(len(st.session_state.df)):
        descr = [word.lower() for word in st.session_state.df['Strings'][i].split()]
        name = st.session_state.df['Names'][i]
        for criteria in taboo:
            if criteria not in descr:
                st.session_state.df['Weights'][i] = 0.1 * st.session_state.df['Weights'][i]
            

    return st.session_state.df
        
def promote_places():
    '''
    input type: dict()
    a function that takes most common words, checks if descriptions fit them, increases their weight if they do
    '''
    #punish the weight of places that don't fit restrictions
    # st.write("Here are the most common preferences you provided:")
    # st.write(st.session_state.fixed_preferences)
    
    preferences = st.session_state.fixed_preferences

    for i in range(len(st.session_state.df)):
        descr = [word.lower() for word in st.session_state.df['Strings'][i].split()]
        name = st.session_state.df['Names'][i]
        for pref in preferences:
            if pref.lower() in descr:
                st.session_state.df['Weights'][i] = 1.5 * st.session_state.df['Weights'][i]

    return st.session_state.df

def generate_results():
    st.session_state.results['Price'] = sort_by_price(10)
    st.session_state.results['Rating'] = sort_by_rating(10)
    st.session_state.results['Relevancy (default)'] = sort_by_relevancy(10)
    st.session_state.results['Distance'] = sort_by_relevancy(10)
    # with st.spinner("Sorting your results by relevancy..."):
        
def get_normalized_val(values):
    if st.session_state.sort_by == 'Relevancy (default)' or st.session_state.sort_by == 'Distance':
        # Find the minimum and maximum values
        min_value = min(st.session_state.precalculated_df['Relevancy'])
        max_value = max(st.session_state.precalculated_df['Relevancy'])
    elif st.session_state.sort_by == 'Rating':
        min_value = min(st.session_state.precalculated_df['Sort_rating'])
        max_value = max(st.session_state.precalculated_df['Sort_rating'])
    elif st.session_state.sort_by == 'Price':
        min_value = min(st.session_state.precalculated_df['Sort_price'])
        max_value = max(st.session_state.precalculated_df['Sort_price'])
    # Define a lambda function for normalization
    normalize = lambda x: 100 * round((x - min_value) / (max_value - min_value), 3)
    # Use the map function to apply the lambda function to all values
    normalized_results = dict(map(lambda item: (item[0], normalize(item[1])), values.items()))
    return normalized_results

if 'preferences_1' not in st.session_state:
  st.session_state.preferences_1 = []
  
if 'preferences_2' not in st.session_state:
  st.session_state.preferences_2 = []
  
if 'fixed_preferences' not in st.session_state:
  st.session_state.fixed_preferences = []
  
if 'additional_1' not in st.session_state:
  st.session_state.additional_1 = []
  
if 'additional_2' not in st.session_state:
  st.session_state.additional_2 = []
  
if 'food' not in st.session_state:
  st.session_state.food = ['Coffee', 'Italian', 'Mexican', 'Chinese', 'Indian', 'Asian', 'Fast food', 'Other']
  
if 'ambiance' not in st.session_state:
  st.session_state.ambiance = ['Romantic date', 'Friends catching up', 'Family gathering', 'Big group', 'Business-meeting', 'Other']

if 'restrictions' not in st.session_state:
    st.session_state.restrictions = []
    
if 'price' not in st.session_state:
    st.session_state.price = {'$': 2, '₩': 2, '$$': 1, '₩₩': 1, '$$$': 0.5, '$$$$': 0.1, "nan": 1}

if 'sort_by' not in st.session_state:
    st.session_state.sort_by = ''
    
if 'options' not in st.session_state:
    st.session_state.options = ['Relevancy (default)', 'Price', 'Rating', 'Distance']

if 'df' not in st.session_state:
    st.session_state.df = init_df
    
if 'precalculated_df' not in st.session_state:
    st.session_state.precalculated_df = pd.DataFrame()
    
if 'results' not in st.session_state:
    st.session_state.results = {}
    
if 'fixed_restrictions' not in st.session_state:
    st.session_state.fixed_restrictions = []

# Configure Streamlit page and state
st.title("GoTogether!")
st.markdown("Tell us about your preferences!")
st.caption("In section 'Others', you can describe any wishes.")


# Define custom CSS styles for the orange and blue rectangles
css = """
<style>
    .orange-box {
        background-color: orange;
        border: 2px solid darkred;
        border-radius: 10px;
        display: inline-block;
        padding: 5px 10px;
        margin: 0px;
    }
    
    .blue-box {
        background-color: #0077b6;
        border: 2px solid navy;
        border-radius: 10px;
        display: inline-block;
        padding: 5px 10px;
        color: white;
    }
    
    .green-box {
        border: 2px solid #004d00;  /* Dark green contour */
        border-radius: 10px;
        background-color: #4CAF50;  /* green background */
        display: inline-block;
        padding: 5px 10px;
        color: #FFFFFF;  /* White text color */
    }
    
    .violet-box {
        border: 2px solid #8a2be2;  /* Violet contour */
        border-radius: 10px;
        background-color: #4169E1;  /* Blue background */
        display: inline-block;
        padding: 5px 10px;
        color: #FFFFFF;  /* White text color */
    }
</style>
"""

text_css = """
<style>
    .text {
        font-weight: bold;
        color: #0077b6;  /* Sea-blue text color */
        margin-right: 1px;
    }
</style>
"""


# options_disability_1 = st.multiselect(
#      'Do you need a wheelchair?',
#      ['Yes', 'No'], ['No'], key=101)

# if options_disability_1 == 'Yes':
#     st.session_state.restrictions.append('Wheelchair')

st.markdown(css, unsafe_allow_html=True)
st.markdown(f'<div class="violet-box">User 1</div>', unsafe_allow_html=True)

food_1 = st.selectbox('Select the food type you prefer', st.session_state.food, key=1)
if food_1 == 'Other':
    food_1 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=10)

ambiance_1 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=2)
if ambiance_1 == 'Other':
    ambiance_1 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=11)
    
options_food_1 = st.multiselect(
    'Do you have any dietary restrictions?',
    ['Vegan', 'Vegetarian', 'Halal'], key=100)

additional_1 = st.text_input(label="Your description", placeholder="Anything else you wanna share?", key=102)  
    
with_kids = st.checkbox('I will come with kids', key=200)
    
st.markdown(css, unsafe_allow_html=True)
st.markdown(f'<div class="violet-box">User 2</div>', unsafe_allow_html=True)

food_2 = st.selectbox('Select the food type you prefer', st.session_state.food, key=3)
if food_2 == 'Other':
    food_2 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=4)

ambiance_2 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=5)
if ambiance_2 == 'Other':
    ambiance_2 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=6)

options_food_2 = st.multiselect(
    'Do you have any dietary restrictions?',
    ['Vegan', 'Vegetarian', 'Halal'], key=7)

additional_2 = st.text_input(label="Your description", placeholder="Anything else you wanna share?", key=8) 
     
with_kids_2 = st.checkbox('I will come with kids', key=201)
    
submitted = st.button('Submit!')
    
if submitted:
    with st.spinner('Processing your request...'):
        time.sleep(1)
        if len(st.session_state.preferences_1) == 0:
            st.session_state.preferences_1.append(food_1)
            # if food_1 in st.session_state.food:
            #     st.session_state.preferences_1.append(food_1)
            # else:
            #     st.session_state.additional_1.append(food_1_o)
            st.session_state.preferences_1.append(ambiance_1)
                
            # if ambiance_1 in st.session_state.ambiance:
            #     st.session_state.preferences_1.append(ambiance_1)
            # else:
            #     st.session_state.additional_1.append(ambiance_1_o)
            st.session_state.restrictions.extend(options_food_1)
            if with_kids:
                st.session_state.restrictions.append('kids')
            if additional_1:
                st.session_state.preferences_1.append(additional_1)
    
        if len(st.session_state.preferences_2) == 0:
            st.session_state.preferences_2.append(food_2)
            # if food_2 in st.session_state.food:
            #     st.session_state.preferences_2.append(food_2)
            # else:
            #     st.session_state.additional_2.append(food_2_o)
            st.session_state.preferences_2.append(ambiance_2)
            # if ambiance_2 in st.session_state.ambiance:
            #     st.session_state.preferences_2.append(ambiance_2)
            # else:
            #     st.session_state.additional_2.append(ambiance_2_o)
            st.session_state.restrictions.extend(options_food_2)
            if additional_2:
                st.session_state.preferences_2.append(additional_2)
            if with_kids_2:
                st.session_state.restrictions.append('kids')

    st.success("Thanks, we received your preferences!")
    
else:
    st.write('☝️ Describe your preferences!')


submit = st.button("Find best matches!", type='primary')
            
if submit or (not st.session_state.precalculated_df.empty):
    with st.spinner("Please wait while we are finding the best solution..."):
        if st.session_state.precalculated_df.empty:
            query = get_combined_preferences(st.session_state.preferences_1, st.session_state.preferences_2)
            #sort places based on restrictions
            st.session_state.precalculated_df = filter_places(st.session_state.restrictions)
            st.session_state.fixed_restrictions = st.session_state.restrictions
            #sort places by elevating preferrences
            
            st.session_state.precalculated_df = promote_places()
            st.session_state.precalculated_df = compute_cos_sim(query[0])
        sort_by = st.selectbox(('Sort by:'), st.session_state.options, key=400, 
                               index=st.session_state.options.index('Relevancy (default)'))
        if sort_by:
            st.session_state.sort_by = sort_by
            with st.spinner(f"Sorting your results by {sort_by.lower()}..."):
                if len(st.session_state.results) == 0:
                    generate_results()
                results = st.session_state.results[sort_by]
                if sort_by == 'Distance':
                    st.write(":pensive: Sorry, we are still working on this option. For now, the results are sorted by relevance")      
                k = 10
                st.write(f"Here are the best {k} matches to your preferences:")
                i = 1
                nums = list(range(1, 11))
                words = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'one: :zero']
                nums_emojis = dict(zip(nums, words))
                results = get_normalized_val(results)
                for name, score in results.items(): 
                    condition = st.session_state.precalculated_df['Names'] == name
                    rating = st.session_state.precalculated_df.loc[condition, 'Rating'].values[0]
                    with st.expander(f":{nums_emojis[i]}: **{name}** **({str(rating)}**:star:): match score: {score}%"):
                        
                        #f":{nums_emojis[i]}: **{name}** **({str(rating)}**:star:) :", 'match score:', score
                        try:
                            if type(st.session_state.precalculated_df.loc[condition, 'Price'].values[0]) == str:
                                st.write("Price category:", st.session_state.precalculated_df.loc[condition, 'Price'].values[0])
                        except:
                            pass
                        
                        descr = st.session_state.precalculated_df.loc[condition, 'Strings'].values[0]
                        for word in set([word.lower() for word in descr.split()]):
                            if word in [el.lower() for el in st.session_state.fixed_preferences]:
                                st.markdown(f'✅{word.capitalize()}')
                            if word in [el.lower() for el in st.session_state.fixed_restrictions]:
                                if word == 'kids':
                                    st.markdown(f'✅Good for kids')
                                else: 
                                    st.markdown(f'✅{word.capitalize()}')
                               
                        
                        #Restaurant category
                        type = [item for item in eval(st.session_state.precalculated_df.loc[condition, 'Category'].values[0])]
                        st.markdown(text_css, unsafe_allow_html=True)
                        st.markdown('<div class="text">Category</div>', unsafe_allow_html=True)
                        
                        # Display HTML with the custom styles
                        for word in type:
                            st.markdown(css, unsafe_allow_html=True)
                            st.markdown(f'<div class="blue-box">{word}</div>', unsafe_allow_html=True)
                           
                        keywords = [item[0] for item in eval(st.session_state.precalculated_df.loc[condition, 'Keywords'].values[0]) if item[1] > 2]
                        if len(keywords) > 0:
                            st.markdown(text_css, unsafe_allow_html=True)
                            st.markdown('<div class="text">Other users say:</div>', unsafe_allow_html=True)
                        
                        for pair in keywords[:3]:
                            st.markdown(css, unsafe_allow_html=True)
                            st.markdown(f'<div class="orange-box">{pair[0]} {pair[1]}</div>', unsafe_allow_html=True)                        
                        
                        url = st.session_state.precalculated_df.loc[condition, 'URL'].values[0]
                        st.write(f"_Check on the_ [_map_]({url})")
                         
                        # st.write(descr)
                                        
                    i+=1
                            
                # st.markdown("This is a text with <span style='font-size: 20px;'>bigger</span> and <i>italic</i> text.", unsafe_allow_html=True)
                # st.markdown("<span style='font-size: 24px;'>This is larger text</span>", unsafe_allow_html=True)

        
            st.session_state.preferences_1, st.session_state.preferences_2 = [], []
            # st.session_state.restrictions = []
        
stop = st.button("New search!", type='primary', key=500)  
if stop:               
    st.write("New search is launched. Please specify your preferences in the form!")
    st.session_state.preferences_1, st.session_state.preferences_2 = [], []
    st.session_state.restrictions = []
    st.session_state.additional_1, st.session_state.additional_2 = [], []
    st.session_state.sort_by = ""
    st.session_state.df = init_df
    st.session_state.precalculated_df = pd.DataFrame()
    st.session_state.results = {}
    st.session_state.fixed_preferences = []