Spaces:
Sleeping
Sleeping
File size: 5,513 Bytes
b9fc179 22f29b2 b9fc179 72d02aa b9fc179 5bae2a3 01c6427 492193c 33fba6c 77679d4 492193c 88c3e60 492193c 5760476 88c3e60 77679d4 88c3e60 492193c 22f29b2 33fba6c 5bae2a3 22f29b2 b70c12c 33fba6c 5bae2a3 22f29b2 b70c12c 33fba6c 5bae2a3 c296fbd 492193c c1f5793 0d65562 c1f5793 492193c b9fc179 c1f5793 b9fc179 c1f5793 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import streamlit as st
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import networkx as nx
import random
random.seed(42)
np.random.seed(42)
st.set_page_config(layout="wide")
# Load and process data
df = pd.read_csv('spices_by_cuisine_with_all_flavors.csv', index_col=0)
pivot = df.drop(columns=['Flavor Description']).sort_index()
cuisines = {}
for col in pivot.columns:
filter = pivot[col] == 1
cuisines[col] = pivot[filter].index.to_list()
spices = {}
pivot_t = pivot.T.sort_index()
for col in pivot_t.columns:
filter = pivot_t[col] == 1
spices[col] = pivot_t[filter].index.to_list()
def similarity(ratings, kind='user', epsilon=1e-9):
if kind == 'user':
sim = ratings.dot(ratings.T) + epsilon
elif kind == 'item':
sim = ratings.T.dot(ratings) + epsilon
norms = np.array([np.sqrt(np.diagonal(sim))])
return (sim / norms / norms.T)
pivot_names = pivot_t.columns
pivot_np = np.array(pivot_t)
cuisine_similarity = pd.DataFrame(similarity(pivot_np, kind='user'))
cuisine_similarity.columns = pivot_t.index.values
cuisine_similarity.index = pivot_t.index.values
st.title('Spices Across Cuisines')
col1, col2, col3 = st.columns(3)
with col1:
st.subheader('By Cuisine')
select_cuisine = st.selectbox('Select a cuisine to view the top 10 spices',cuisines.keys())
st.write(f'The top 10 ingredients in {select_cuisine} are:', cuisines[select_cuisine])
with col2:
st.subheader('By Spice')
select_spice = st.selectbox('Select a spice to view which cuisines it is present in',spices.keys())
st.write(f'{select_spice} is part of the following cuisines:', spices[select_spice])
with col3:
st.subheader("Similar Cuisines")
select_cuisine_sim = st.selectbox('Select a cuisine to view the 10 most similar cuisines by spices',cuisines.keys())
st.write(f'{select_cuisine_sim} is most similar to:', cuisine_similarity[select_cuisine_sim].sort_values(ascending=False).index[1:11].to_list())
count = pd.DataFrame(pivot.T.sum().sort_values(ascending=False).reset_index().rename(columns={0: "Count"}))
fig_bar = px.bar(count, x="Spice", y="Count", title="Most Frequently Occuring Spices Across Cuisines")
st.plotly_chart(fig_bar, use_container_width=True)
# Create a graph
G = nx.Graph()
# Add nodes for each cuisine and spice, and edges based on the DataFrame
for col in df.columns:
if col != "Flavor Description":
G.add_node(col, type='cuisine')
spices_for_cuisine = df[df[col] == 1].index.tolist()
for spice in spices_for_cuisine:
G.add_node(spice, type='spice')
G.add_edge(col, spice)
# Get node positions using the spring layout
pos = nx.spring_layout(G)
# Create edge trace
edge_trace = go.Scatter(
x=[],
y=[],
line=dict(width=0.5, color='#888'),
hoverinfo='none',
mode='lines')
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_trace['x'] += tuple([x0, x1, None])
edge_trace['y'] += tuple([y0, y1, None])
# Assign a unique color to each cuisine
cuisine_colors = {cuisine: f"hsl({i * (360 // len(df.columns[:-1]))}, 80%, 50%)"
for i, cuisine in enumerate(df.columns) if cuisine != "Flavor Description"}
# Create node trace for cuisines
node_trace_cuisines = go.Scatter(
x=[],
y=[],
text=[],
hovertext=[],
mode='markers+text',
hoverinfo='text',
marker=dict(
showscale=False,
size=20,
color=[],
line=dict(width=0)))
# Create node trace for spices
node_trace_spices = go.Scatter(
x=[],
y=[],
text=[],
hovertext=[],
mode='markers+text',
hoverinfo='text',
marker=dict(
showscale=False,
color='grey',
size=10,
line=dict(width=0)))
for node in G.nodes():
x, y = pos[node]
if G.nodes[node]['type'] == 'cuisine':
node_trace_cuisines['x'] += tuple([x])
node_trace_cuisines['y'] += tuple([y])
node_trace_cuisines['text'] += tuple([node])
node_trace_cuisines['marker']['color'] += tuple([cuisine_colors[node]])
# Collect all spices associated with this cuisine
spices_associated = df[df[node] == 1].index.tolist()
hover_text = f"{node} uses: {', '.join(spices_associated)}"
node_trace_cuisines['hovertext'] += tuple([hover_text])
else:
node_trace_spices['x'] += tuple([x])
node_trace_spices['y'] += tuple([y])
node_trace_spices['text'] += tuple([node])
# Collect all cuisines that use this spice
cuisines_using_spice = df.columns[df.loc[node] == 1].tolist()
hover_text = f"{node} is used in: {', '.join(cuisines_using_spice)}"
node_trace_spices['hovertext'] += tuple([hover_text])
# Create the network graph figure with updated hover information
fig_graph = go.Figure(data=[edge_trace, node_trace_cuisines, node_trace_spices],
layout=go.Layout(
title="Network Graph of Cuisines and their Spices",
titlefont_size=16,
showlegend=False,
hovermode='closest',
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
)
st.plotly_chart(fig_graph, use_container_width=True) |