Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import networkx as nx | |
import plotly.graph_objects as go | |
from itertools import combinations | |
def generate_popularity_trends(df): | |
st.header("Popularity Trends Over Time") | |
tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True) | |
if 'Decade' in df.columns: | |
avg_pop_by_decade = df.groupby( | |
'Decade')['Popularity'].mean().reset_index() | |
fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity', | |
title='Average Popularity by Decade', color_discrete_sequence=['blue']) | |
fig1.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig1) | |
else: | |
st.error("Cannot plot: 'Decade' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True) | |
if 'Year' in df.columns: | |
fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[ | |
'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red']) | |
fig2.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig2) | |
else: | |
st.error("Cannot plot: 'Year' column missing.") | |
def generate_audio_features(df): | |
st.header("Audio Features Analysis") | |
feature = st.selectbox( | |
"Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness']) | |
tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"]) | |
with tab1: | |
st.markdown( | |
f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True) | |
fig3 = px.histogram( | |
df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green']) | |
fig3.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig3) | |
with tab2: | |
st.markdown( | |
f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True) | |
if 'Decade' in df.columns: | |
fig4 = px.box(df, x='Decade', y=feature, | |
title=f'{feature} Distribution by Decade', color_discrete_sequence=['green']) | |
fig4.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig4) | |
else: | |
st.error("Cannot plot: 'Decade' column missing.") | |
with tab3: | |
st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True) | |
fig, ax = plt.subplots() | |
sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']]) | |
st.pyplot(fig) | |
def generate_genre_analysis(df): | |
st.header("Genre & Artist Analysis") | |
tab1, tab2, tab3 = st.tabs( | |
["Top Genres", "Genre Distribution", "Artist Popularity"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True) | |
if 'Decade' in df.columns: | |
genre_decade = df.explode('Genres').groupby( | |
['Decade', 'Genres']).size().reset_index(name='Count') | |
top_genres = genre_decade.groupby('Decade').apply( | |
lambda x: x.nlargest(5, 'Count')).reset_index(drop=True) | |
fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres', | |
title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1) | |
fig5.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig5) | |
else: | |
st.error("Cannot plot: 'Decade' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True) | |
genre_counts = df.explode( | |
'Genres')['Genres'].value_counts().reset_index() | |
fig6 = px.pie(genre_counts, values='count', names='Genres', | |
title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2) | |
fig6.update_layout(width=800, height=400) | |
st.plotly_chart(fig6) | |
with tab3: | |
st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True) | |
if 'Artist Name(s)' in df.columns: | |
artist_pop = df.groupby('Artist Name(s)')[ | |
'Popularity'].mean().reset_index() | |
fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna( | |
0), title='Artist Popularity Heatmap', color_continuous_scale='Reds') | |
fig7.update_layout(width=800, height=400) | |
st.plotly_chart(fig7) | |
else: | |
st.error("Cannot plot: 'Artist Name(s)' column missing.") | |
def generate_explicit_trends(df): | |
st.header("Explicit Content Trends") | |
st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True) | |
if 'Decade' in df.columns and 'Explicit' in df.columns: | |
explicit_by_decade = df.groupby( | |
['Decade', 'Explicit']).size().unstack().fillna(0) | |
fig8 = px.bar(explicit_by_decade, barmode='stack', | |
title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple']) | |
fig8.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig8) | |
else: | |
st.error("Cannot plot: 'Decade' or 'Explicit' column missing.") | |
def generate_album_insights(df): | |
st.header("Album & Label Insights") | |
tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True) | |
if 'Label' in df.columns: | |
top_labels = df['Label'].value_counts().nlargest(10).reset_index() | |
fig9 = px.bar(top_labels, x='Label', y='count', | |
title='Top Record Labels by Song Count', color_discrete_sequence=['blue']) | |
fig9.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig9) | |
else: | |
st.error("Cannot plot: 'Label' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True) | |
if 'Album Name' in df.columns and 'Popularity' in df.columns: | |
album_pop = df.groupby('Album Name')['Popularity'].agg( | |
['mean', 'count']).reset_index() | |
fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[ | |
'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red']) | |
fig10.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig10) | |
else: | |
st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.") | |
def generate_tempo_mood(df): | |
st.header("Tempo & Mood Analysis") | |
tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True) | |
if 'Year' in df.columns and 'Tempo' in df.columns: | |
tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index() | |
fig11 = px.line(tempo_by_year, x='Year', y='Tempo', | |
title='Average Tempo Over Time', color_discrete_sequence=['orange']) | |
fig11.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig11) | |
else: | |
st.error("Cannot plot: 'Year' or 'Tempo' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True) | |
if 'Valence' in df.columns and 'Energy' in df.columns: | |
fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[ | |
'Track Name'], color_discrete_sequence=['purple']) | |
fig12.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig12) | |
else: | |
st.error("Cannot plot: 'Valence' or 'Energy' column missing.") | |
def generate_top_artists_songs(df): | |
st.header("Top Artists and Songs") | |
tab1, tab2 = st.tabs(["Top Artists", "Top Songs"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True) | |
if 'Artist Name(s)' in df.columns: | |
top_artists = df['Artist Name(s)'].value_counts().nlargest( | |
10).reset_index() | |
fig13 = px.bar(top_artists, x='Artist Name(s)', y='count', | |
title='Most Featured Artists', color_discrete_sequence=['green']) | |
fig13.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig13) | |
else: | |
st.error("Cannot plot: 'Artist Name(s)' column missing.") | |
with tab2: | |
st.markdown( | |
"<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True) | |
if 'Track Name' in df.columns and 'Popularity' in df.columns: | |
top_songs = df.nlargest(10, 'Popularity')[ | |
['Track Name', 'Popularity']] | |
fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h', | |
title='Top 10 Songs by Popularity', color_discrete_sequence=['blue']) | |
fig14.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig14) | |
else: | |
st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.") | |
def generate_album_release_trends(df): | |
st.header("Album Release Trends") | |
tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True) | |
if 'Year' in df.columns: | |
albums_per_year = df['Year'].value_counts( | |
).sort_index().reset_index() | |
fig15 = px.line(albums_per_year, x='Year', y='count', | |
title='Number of Albums Released per Year', color_discrete_sequence=['purple']) | |
fig15.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig15) | |
else: | |
st.error("Cannot plot: 'Year' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True) | |
if 'Artist Name(s)' in df.columns and 'Year' in df.columns: | |
artist_year = df.groupby( | |
['Artist Name(s)', 'Year']).size().unstack().fillna(0) | |
fig16 = px.imshow( | |
artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis') | |
fig16.update_layout(width=800, height=400) | |
st.plotly_chart(fig16) | |
else: | |
st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.") | |
def generate_duration_analysis(df): | |
st.header("Track Duration Analysis") | |
tab1, tab2 = st.tabs(["Distribution", "By Decade"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True) | |
if 'Track Duration (ms)' in df.columns: | |
fig17 = px.histogram(df, x='Track Duration (ms)', | |
title='Distribution of Track Durations', color_discrete_sequence=['orange']) | |
fig17.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig17) | |
else: | |
st.error("Cannot plot: 'Track Duration (ms)' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True) | |
if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns: | |
fig18 = px.box(df, x='Decade', y='Track Duration (ms)', | |
title='Track Duration by Decade', color_discrete_sequence=['green']) | |
fig18.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig18) | |
else: | |
st.error( | |
"Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.") | |
def generate_streaming_insights(df): | |
st.header("Streaming and Engagement Insights") | |
tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True) | |
if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns: | |
fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity', | |
title='Popularity vs Track Duration', color_discrete_sequence=['blue']) | |
fig19.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig19) | |
else: | |
st.error( | |
"Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True) | |
if 'Time Signature' in df.columns and 'Popularity' in df.columns: | |
pop_by_time = df.groupby('Time Signature')[ | |
'Popularity'].mean().reset_index() | |
fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity', | |
title='Average Popularity by Time Signature', color_discrete_sequence=['purple']) | |
fig20.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig20) | |
else: | |
st.error( | |
"Cannot plot: 'Time Signature' or 'Popularity' column missing.") | |
def generate_feature_comparisons(df): | |
st.header("Feature Comparisons Across Decades") | |
tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True) | |
if 'Decade' in df.columns: | |
features_by_decade = df.groupby( | |
'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index() | |
fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable', | |
barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel) | |
fig21.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig21) | |
else: | |
st.error("Cannot plot: 'Decade' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True) | |
if 'Year' in df.columns and 'Loudness' in df.columns: | |
loudness_by_year = df.groupby( | |
'Year')['Loudness'].mean().reset_index() | |
fig22 = px.line(loudness_by_year, x='Year', y='Loudness', | |
title='Average Loudness Over Time', color_discrete_sequence=['green']) | |
fig22.update_layout(template='plotly_white', width=800, height=400) | |
st.plotly_chart(fig22) | |
else: | |
st.error("Cannot plot: 'Year' or 'Loudness' column missing.") | |
def generate_network_analysis(df): | |
st.header("Network Analysis") | |
tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"]) | |
with tab1: | |
st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True) | |
if 'Artist Name(s)' in df.columns: | |
# Filter out non-string values and handle missing data | |
valid_artists = df['Artist Name(s)'].dropna().astype(str) | |
G = nx.Graph() | |
for artists in valid_artists: | |
artists_list = [a.strip() for a in artists.split( | |
',') if a.strip()] # Split and clean | |
if len(artists_list) > 1: # Check length of list | |
for a1, a2 in combinations(artists_list, 2): | |
G.add_edge(a1, a2) | |
if G.number_of_nodes() > 0: | |
# Convert to Plotly format | |
# Use spring layout for better spacing | |
pos = nx.spring_layout(G) | |
edge_x = [] | |
edge_y = [] | |
for edge in G.edges(): | |
x0, y0 = pos[edge[0]] | |
x1, y1 = pos[edge[1]] | |
edge_x.extend([x0, x1, None]) | |
edge_y.extend([y0, y1, None]) | |
edge_trace = go.Scatter( | |
x=edge_x, y=edge_y, | |
line=dict(width=0.5, color='#888'), | |
hoverinfo='none', | |
mode='lines') | |
node_x = [pos[node][0] for node in G.nodes()] | |
node_y = [pos[node][1] for node in G.nodes()] | |
node_trace = go.Scatter( | |
x=node_x, y=node_y, | |
mode='markers+text', | |
hoverinfo='text', | |
marker=dict(size=10, color='red'), | |
text=list(G.nodes()), | |
textposition="top center") | |
fig = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
title='Artist Collaborations', | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=0, l=0, r=0, t=40), | |
width=800, height=600)) | |
st.plotly_chart(fig) | |
else: | |
st.warning("No artist collaborations to display.") | |
else: | |
st.error("Cannot plot: 'Artist Name(s)' column missing.") | |
with tab2: | |
st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True) | |
st.write("To implement, install `holoviews` and use the following code:") | |
st.code(""" | |
import holoviews as hv | |
hv.extension('bokeh') | |
genre_pairs = df.explode('Genres')[['Genres']].merge(df.explode('Genres')[['Genres']], how='cross') | |
chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value') | |
chord = hv.Chord(chord_data).opts(title="Genre Crossover") | |
st.write(hv.render(chord, backend='bokeh')) | |
""") | |