File size: 20,822 Bytes
10d82a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
import streamlit as st
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import plotly.graph_objects as go
from itertools import combinations


def generate_popularity_trends(df):
    st.header("Popularity Trends Over Time")
    tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
    with tab1:
        st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True)
        if 'Decade' in df.columns:
            avg_pop_by_decade = df.groupby(
                'Decade')['Popularity'].mean().reset_index()
            fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity',
                           title='Average Popularity by Decade', color_discrete_sequence=['blue'])
            fig1.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig1)
        else:
            st.error("Cannot plot: 'Decade' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True)
        if 'Year' in df.columns:
            fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[
                              'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
            fig2.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig2)
        else:
            st.error("Cannot plot: 'Year' column missing.")


def generate_audio_features(df):
    st.header("Audio Features Analysis")
    feature = st.selectbox(
        "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
    tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
    with tab1:
        st.markdown(
            f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True)
        fig3 = px.histogram(
            df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
        fig3.update_layout(template='plotly_white', width=800, height=400)
        st.plotly_chart(fig3)
    with tab2:
        st.markdown(
            f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True)
        if 'Decade' in df.columns:
            fig4 = px.box(df, x='Decade', y=feature,
                          title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
            fig4.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig4)
        else:
            st.error("Cannot plot: 'Decade' column missing.")
    with tab3:
        st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True)
        fig, ax = plt.subplots()
        sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
        st.pyplot(fig)


def generate_genre_analysis(df):
    st.header("Genre & Artist Analysis")
    tab1, tab2, tab3 = st.tabs(
        ["Top Genres", "Genre Distribution", "Artist Popularity"])
    with tab1:
        st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True)
        if 'Decade' in df.columns:
            genre_decade = df.explode('Genres').groupby(
                ['Decade', 'Genres']).size().reset_index(name='Count')
            top_genres = genre_decade.groupby('Decade').apply(
                lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
            fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres',
                          title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
            fig5.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig5)
        else:
            st.error("Cannot plot: 'Decade' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True)
        genre_counts = df.explode(
            'Genres')['Genres'].value_counts().reset_index()
        fig6 = px.pie(genre_counts, values='count', names='Genres',
                      title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
        fig6.update_layout(width=800, height=400)
        st.plotly_chart(fig6)
    with tab3:
        st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True)
        if 'Artist Name(s)' in df.columns:
            artist_pop = df.groupby('Artist Name(s)')[
                'Popularity'].mean().reset_index()
            fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(
                0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
            fig7.update_layout(width=800, height=400)
            st.plotly_chart(fig7)
        else:
            st.error("Cannot plot: 'Artist Name(s)' column missing.")


def generate_explicit_trends(df):
    st.header("Explicit Content Trends")
    st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True)
    if 'Decade' in df.columns and 'Explicit' in df.columns:
        explicit_by_decade = df.groupby(
            ['Decade', 'Explicit']).size().unstack().fillna(0)
        fig8 = px.bar(explicit_by_decade, barmode='stack',
                      title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
        fig8.update_layout(template='plotly_white', width=800, height=400)
        st.plotly_chart(fig8)
    else:
        st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")


def generate_album_insights(df):
    st.header("Album & Label Insights")
    tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
    with tab1:
        st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
        if 'Label' in df.columns:
            top_labels = df['Label'].value_counts().nlargest(10).reset_index()
            fig9 = px.bar(top_labels, x='Label', y='count',
                          title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
            fig9.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig9)
        else:
            st.error("Cannot plot: 'Label' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True)
        if 'Album Name' in df.columns and 'Popularity' in df.columns:
            album_pop = df.groupby('Album Name')['Popularity'].agg(
                ['mean', 'count']).reset_index()
            fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[
                               'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
            fig10.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig10)
        else:
            st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")


def generate_tempo_mood(df):
    st.header("Tempo & Mood Analysis")
    tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
    with tab1:
        st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True)
        if 'Year' in df.columns and 'Tempo' in df.columns:
            tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
            fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
                            title='Average Tempo Over Time', color_discrete_sequence=['orange'])
            fig11.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig11)
        else:
            st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True)
        if 'Valence' in df.columns and 'Energy' in df.columns:
            fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[
                               'Track Name'], color_discrete_sequence=['purple'])
            fig12.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig12)
        else:
            st.error("Cannot plot: 'Valence' or 'Energy' column missing.")


def generate_top_artists_songs(df):
    st.header("Top Artists and Songs")
    tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
    with tab1:
        st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True)
        if 'Artist Name(s)' in df.columns:
            top_artists = df['Artist Name(s)'].value_counts().nlargest(
                10).reset_index()
            fig13 = px.bar(top_artists, x='Artist Name(s)', y='count',
                           title='Most Featured Artists', color_discrete_sequence=['green'])
            fig13.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig13)
        else:
            st.error("Cannot plot: 'Artist Name(s)' column missing.")
    with tab2:
        st.markdown(
            "<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
        if 'Track Name' in df.columns and 'Popularity' in df.columns:
            top_songs = df.nlargest(10, 'Popularity')[
                ['Track Name', 'Popularity']]
            fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h',
                           title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
            fig14.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig14)
        else:
            st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")


def generate_album_release_trends(df):
    st.header("Album Release Trends")
    tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
    with tab1:
        st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True)
        if 'Year' in df.columns:
            albums_per_year = df['Year'].value_counts(
            ).sort_index().reset_index()
            fig15 = px.line(albums_per_year, x='Year', y='count',
                            title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
            fig15.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig15)
        else:
            st.error("Cannot plot: 'Year' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True)
        if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
            artist_year = df.groupby(
                ['Artist Name(s)', 'Year']).size().unstack().fillna(0)
            fig16 = px.imshow(
                artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
            fig16.update_layout(width=800, height=400)
            st.plotly_chart(fig16)
        else:
            st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")


def generate_duration_analysis(df):
    st.header("Track Duration Analysis")
    tab1, tab2 = st.tabs(["Distribution", "By Decade"])
    with tab1:
        st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True)
        if 'Track Duration (ms)' in df.columns:
            fig17 = px.histogram(df, x='Track Duration (ms)',
                                 title='Distribution of Track Durations', color_discrete_sequence=['orange'])
            fig17.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig17)
        else:
            st.error("Cannot plot: 'Track Duration (ms)' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True)
        if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
            fig18 = px.box(df, x='Decade', y='Track Duration (ms)',
                           title='Track Duration by Decade', color_discrete_sequence=['green'])
            fig18.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig18)
        else:
            st.error(
                "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")


def generate_streaming_insights(df):
    st.header("Streaming and Engagement Insights")
    tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
    with tab1:
        st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True)
        if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
            fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity',
                               title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
            fig19.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig19)
        else:
            st.error(
                "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True)
        if 'Time Signature' in df.columns and 'Popularity' in df.columns:
            pop_by_time = df.groupby('Time Signature')[
                'Popularity'].mean().reset_index()
            fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity',
                           title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
            fig20.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig20)
        else:
            st.error(
                "Cannot plot: 'Time Signature' or 'Popularity' column missing.")


def generate_feature_comparisons(df):
    st.header("Feature Comparisons Across Decades")
    tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
    with tab1:
        st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True)
        if 'Decade' in df.columns:
            features_by_decade = df.groupby(
                'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
            fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
                           barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
            fig21.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig21)
        else:
            st.error("Cannot plot: 'Decade' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True)
        if 'Year' in df.columns and 'Loudness' in df.columns:
            loudness_by_year = df.groupby(
                'Year')['Loudness'].mean().reset_index()
            fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
                            title='Average Loudness Over Time', color_discrete_sequence=['green'])
            fig22.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig22)
        else:
            st.error("Cannot plot: 'Year' or 'Loudness' column missing.")


def generate_network_analysis(df):
    st.header("Network Analysis")
    tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
    with tab1:
        st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True)
        if 'Artist Name(s)' in df.columns:
            # Filter out non-string values and handle missing data
            valid_artists = df['Artist Name(s)'].dropna().astype(str)
            G = nx.Graph()
            for artists in valid_artists:
                artists_list = [a.strip() for a in artists.split(
                    ',') if a.strip()]  # Split and clean
                if len(artists_list) > 1:  # Check length of list
                    for a1, a2 in combinations(artists_list, 2):
                        G.add_edge(a1, a2)
            if G.number_of_nodes() > 0:
                # Convert to Plotly format
                # Use spring layout for better spacing
                pos = nx.spring_layout(G)
                edge_x = []
                edge_y = []
                for edge in G.edges():
                    x0, y0 = pos[edge[0]]
                    x1, y1 = pos[edge[1]]
                    edge_x.extend([x0, x1, None])
                    edge_y.extend([y0, y1, None])

                edge_trace = go.Scatter(
                    x=edge_x, y=edge_y,
                    line=dict(width=0.5, color='#888'),
                    hoverinfo='none',
                    mode='lines')

                node_x = [pos[node][0] for node in G.nodes()]
                node_y = [pos[node][1] for node in G.nodes()]
                node_trace = go.Scatter(
                    x=node_x, y=node_y,
                    mode='markers+text',
                    hoverinfo='text',
                    marker=dict(size=10, color='red'),
                    text=list(G.nodes()),
                    textposition="top center")

                fig = go.Figure(data=[edge_trace, node_trace],
                                layout=go.Layout(
                    title='Artist Collaborations',
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    width=800, height=600))
                st.plotly_chart(fig)
            else:
                st.warning("No artist collaborations to display.")
        else:
            st.error("Cannot plot: 'Artist Name(s)' column missing.")
    with tab2:
        st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True)
        st.write("To implement, install `holoviews` and use the following code:")
        st.code("""
        import holoviews as hv
        hv.extension('bokeh')
        genre_pairs = df.explode('Genres')[['Genres']].merge(df.explode('Genres')[['Genres']], how='cross')
        chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
        chord = hv.Chord(chord_data).opts(title="Genre Crossover")
        st.write(hv.render(chord, backend='bokeh'))
        """)