File size: 26,571 Bytes
10d82a8
 
 
 
 
 
 
a31e167
93556d9
aa131a7
 
10d82a8
b595166
 
a31e167
 
 
b595166
a31e167
 
b595166
a31e167
 
 
b595166
 
 
 
 
 
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
a31e167
 
b595166
 
 
 
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
 
 
f814502
a31e167
b595166
10d82a8
b595166
 
 
a31e167
b595166
a31e167
b595166
a31e167
 
b595166
 
 
a31e167
b595166
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
 
a31e167
 
 
b595166
a31e167
 
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
 
 
 
a31e167
 
b595166
 
 
 
 
a31e167
b595166
a31e167
 
 
 
 
 
b595166
 
 
 
 
a31e167
 
 
 
b595166
 
 
10d82a8
a31e167
b595166
 
 
 
a31e167
 
b595166
 
 
 
a31e167
 
b595166
 
 
 
 
 
10d82a8
a31e167
b595166
 
 
a31e167
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
a31e167
 
 
 
b595166
 
 
 
a31e167
 
b595166
 
 
 
 
 
 
10d82a8
f814502
b595166
 
 
 
 
 
 
a31e167
 
b595166
 
 
 
 
a31e167
 
b595166
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
a31e167
 
b595166
 
 
a31e167
b595166
 
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
a31e167
b595166
 
 
a31e167
 
b595166
 
 
 
 
 
 
 
10d82a8
be1014f
b595166
 
 
 
 
 
a31e167
 
 
 
b595166
 
 
 
 
 
 
 
a31e167
 
b595166
a31e167
b595166
a31e167
 
be1014f
b595166
 
 
 
 
 
 
 
 
 
 
 
a31e167
 
b595166
 
 
be1014f
b595166
 
be1014f
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
 
a31e167
 
b595166
 
 
 
 
 
 
a31e167
 
be1014f
b595166
 
 
 
a31e167
b595166
a31e167
 
 
b595166
 
a31e167
 
 
 
10d82a8
b595166
a31e167
 
b595166
 
 
 
 
 
a31e167
 
b595166
 
a31e167
 
b595166
 
a31e167
 
be1014f
b595166
a31e167
 
b595166
a31e167
 
b595166
 
 
 
 
a31e167
 
b595166
 
a31e167
 
 
10d82a8
b595166
 
 
 
 
 
a31e167
 
b595166
 
 
 
 
 
 
 
 
a31e167
 
 
 
b595166
 
 
 
 
a31e167
b595166
 
 
a31e167
b595166
a31e167
 
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
 
a31e167
b595166
a31e167
 
b595166
a31e167
 
b595166
 
 
 
 
 
 
 
 
 
be1014f
a31e167
10d82a8
 
 
a31e167
c97cf0f
 
a31e167
10d82a8
a31e167
 
b595166
a31e167
 
657719c
b595166
657719c
a31e167
657719c
a31e167
 
 
657719c
 
 
a31e167
657719c
 
a31e167
 
 
 
 
 
657719c
 
aa131a7
a31e167
 
 
10d82a8
a31e167
 
be1014f
 
657719c
 
 
a31e167
657719c
a31e167
 
 
 
 
657719c
a31e167
657719c
 
 
 
 
a31e167
 
 
 
 
657719c
be1014f
a31e167
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
import streamlit as st
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import plotly.graph_objects as go
from itertools import chain, combinations
import numpy as np
from collections import Counter


def generate_popularity_trends(df):
    st.header("Popularity Trends Over Time")
    tab1, tab2, tab3 = st.tabs(
        ["Average Popularity", "Individual Songs", "Top 10 Songs"])

    with tab1:
        st.markdown(
            "**Average Popularity by Decade:** This chart shows how the average popularity of songs has changed over different decades.")
        if 'Decade' in df.columns:
            top_decades = df.groupby('Decade')['Popularity'].mean(
            ).reset_index().nlargest(10, 'Popularity')

            fig1 = go.Figure()
            fig1.add_trace(go.Scatter(
                x=top_decades['Decade'],
                y=top_decades['Popularity'],
                mode='lines+markers',
                fill='tonexty',
                line=dict(color='royalblue', width=3),
                marker=dict(size=8, color='darkblue',
                            line=dict(width=2, color='white')),
                name='Popularity',
                hovertext=top_decades['Decade']
            ))
            fig1.update_layout(
                title='Top 10 Decades by Average Popularity',
                xaxis_title='Decade',
                yaxis_title='Average Popularity Score',
                template='plotly_white',
                width=900,
                height=450
            )
            st.plotly_chart(fig1)
        else:
            st.error("Cannot plot: 'Decade' column missing.")

    with tab2:
        st.markdown(
            "**Top 10 Individual Songs:** This scatter plot highlights the popularity of the top 10 most popular songs over time.")
        if 'Year' in df.columns:
            top_songs = df.nlargest(10, 'Popularity')
            fig2 = px.scatter(
                top_songs, x='Year', y='Popularity',
                color='Popularity',
                size='Popularity',
                color_continuous_scale='viridis',
                title='Top 10 Individual Songs by Popularity',
                hover_data=['Track Name', 'Artist Name(s)', 'Year']
            )
            fig2.update_layout(
                xaxis_title='Release Year',
                yaxis_title='Popularity Score',
                template='plotly_white',
                width=900,
                height=500
            )
            st.plotly_chart(fig2)
        else:
            st.error("Cannot plot: 'Year' column missing.")

    with tab3:
        st.markdown(
            "**Top 10 Most Popular Songs:** This bar chart displays the top 10 songs based on their popularity scores.")
        if 'Track Name' in df.columns and 'Popularity' in df.columns:
            top_songs = df.nlargest(10, 'Popularity')[
                ['Track Name', 'Artist Name(s)', 'Popularity']]
            fig3 = px.bar(
                top_songs, y='Track Name', x='Popularity',
                orientation='h', color='Popularity',
                color_continuous_scale='deep',
                title='Top 10 Most Popular Songs',
                labels={'Track Name': 'Song Title',
                        'Popularity': 'Popularity Score'},
                hover_data=['Track Name', 'Artist Name(s)']
            )
            fig3.update_layout(
                xaxis_title='Popularity Score',
                yaxis_title='Song Title',
                template='plotly_white',
                width=900,
                height=500
            )
            st.plotly_chart(fig3)
        else:
            st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")


def generate_audio_features(df):
    st.header("Audio Features Analysis")

    feature = st.selectbox(
        "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness']
    )

    tab1, tab2 = st.tabs(["Distribution", "By Decade"])

    with tab1:
        st.markdown(
            f"**Top 20 {feature} Values:** This bar chart displays the distribution of the top 20 songs based on {feature}.")
        top_features = df.nlargest(20, feature)

        fig = px.bar(
            top_features, x='Track Name', y=feature,
            color='Decade' if 'Decade' in df.columns else None,
            title=f'Top 20 Songs by {feature}',
            color_discrete_sequence=px.colors.qualitative.Set2,
            hover_data=['Track Name', 'Artist Name(s)']
        )
        fig.update_layout(xaxis_tickangle=-45, template='plotly_white')
        st.plotly_chart(fig)

    with tab2:
        st.markdown(
            f"**{feature} by Decade:** This line chart compares the top {feature} trends over different decades.")

        if 'Decade' in df.columns:
            avg_feature_by_decade = df.groupby(
                'Decade')[feature].mean().reset_index()

            fig2 = px.line(
                avg_feature_by_decade, x='Decade', y=feature,
                title=f'Average {feature} by Decade',
                markers=True,
                color_discrete_sequence=['red'],
                hover_data=['Decade']
            )
            fig2.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig2)
        else:
            st.error("Cannot plot: 'Decade' column missing.")


def generate_genre_analysis(df):
    st.header("Genre & Artist Analysis")
    tab1, tab2, tab3 = st.tabs(
        ["Top Genres", "Genre Distribution", "Artist Popularity"])

    with tab1:
        st.markdown(
            "**Top Genres in Top 10 Songs:** Displays the most common genres among the top 10 most popular songs.")
        top_songs = df.nlargest(10, 'Popularity')
        top_genres = top_songs.explode(
            'Genres')['Genres'].value_counts().reset_index()
        fig1 = px.bar(
            top_genres, x='count', y='Genres',
            orientation='h', color='count',
            color_continuous_scale='viridis',
            title='Top Genres in Top 10 Songs',
            labels={'count': 'Number of Songs', 'Genres': 'Genre Name'},
            hover_data=['Genres', 'count']
        )
        fig1.update_layout(template='plotly_white', width=900, height=500)
        st.plotly_chart(fig1)

    with tab2:
        st.markdown(
            "**Genre Distribution in Top 10 Songs:** Shows how different genres contribute to the top 10 songs.")
        genre_song_data = top_songs.explode('Genres')
        fig2 = px.bar(
            genre_song_data, x='Track Name', y='Popularity', color='Genres',
            title='Genre Distribution in Top 10 Songs',
            labels={'Track Name': 'Song Title',
                    'Popularity': 'Popularity Score', 'Genres': 'Genre'},
            barmode='stack',
            hover_data=['Track Name', 'Genres']
        )
        fig2.update_layout(template='plotly_white', width=900, height=500)
        st.plotly_chart(fig2)

    with tab3:
        st.markdown(
            "**Artist Popularity in Top 10 Songs:** Visualizes the most popular artists in the top 10 songs with their song count and names.")
        artist_popularity = top_songs.groupby('Artist Name(s)').agg(
            {'Popularity': 'sum', 'Track Name': lambda x: list(x)}).reset_index().sort_values(by='Popularity', ascending=False)
        artist_popularity['Song Count'] = artist_popularity['Track Name'].apply(
            len)
        fig3 = px.bar(
            artist_popularity, x='Popularity', y='Artist Name(s)',
            orientation='h', color='Popularity',
            color_continuous_scale='blues',
            title='Artist Popularity in Top 10 Songs',
            labels={'Artist Name(s)': 'Artist Name',
                                 'Popularity': 'Total Popularity Score', 'Song Count': 'Number of Songs'},
            hover_data={'Artist Name(s)': True, 'Popularity': True,
                        'Song Count': True, 'Track Name': True}
        )
        fig3.update_layout(template='plotly_white', width=900, height=500)
        st.plotly_chart(fig3)


def generate_explicit_trends(df):
    st.header("Explicit Content Trends")
    st.markdown("**Explicit vs Non-Explicit Songs Over Time:** This line chart shows how the number of explicit and non-explicit songs has changed over different decades.")
    if 'Decade' in df.columns and 'Explicit' in df.columns:
        explicit_trends = df.groupby(
            ['Decade', 'Explicit']).size().reset_index(name='Count')
        fig = px.line(
            explicit_trends, x='Decade', y='Count', color='Explicit',
            markers=True, line_shape='linear',
            title='Explicit vs Non-Explicit Songs Over Time',
            labels={'Decade': 'Decade', 'Count': 'Number of Songs',
                    'Explicit': 'Song Type'},
            color_discrete_map={True: 'purple', False: 'green'}
        )
        fig.update_layout(template='plotly_white', width=900, height=500)
        st.plotly_chart(fig)
    else:
        st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")


def generate_album_insights(df):
    st.header("Album & Label Insights")
    tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])

    with tab1:
        st.markdown(
            "**Top Record Labels:** Displays the most dominant record labels based on the number of songs they have released.")
        if 'Label' in df.columns:
            top_labels = df['Label'].value_counts().nlargest(10).reset_index()
            fig9 = px.sunburst(
                top_labels, path=['Label'], values='count',
                title='Top Record Labels by Song Count',
                color='count', color_continuous_scale='blues',
                labels={'Label': 'Record Label', 'count': 'Number of Songs'}
            )
            fig9.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig9)
        else:
            st.error("Cannot plot: 'Label' column missing.")

    with tab2:
        st.markdown(
            "**Album Popularity:** Compares the popularity of albums based on the number of songs and their average popularity score.")
        if 'Album Name' in df.columns and 'Popularity' in df.columns:
            album_pop = df.groupby('Album Name')['Popularity'].agg(
                ['mean', 'count']).reset_index()
            album_pop = album_pop.sort_values(by=['mean', 'count'], ascending=[
                                              False, False]).nlargest(10, 'mean')
            fig10 = px.strip(
                album_pop, x='mean', y='Album Name',
                color='count',
                title='Top 10 Albums by Popularity',
                labels={'Album Name': 'Album',
                        'mean': 'Average Popularity Score', 'count': 'Number of Songs'},
                hover_data={'Album Name': True, 'count': True, 'mean': True},
                color_discrete_sequence=px.colors.qualitative.Pastel
            )
            fig10.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig10)
        else:
            st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")


def generate_tempo_mood(df):
    st.header("Tempo & Mood Analysis")
    tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
    with tab1:
        st.markdown("**Tempo Trends:** Tracks tempo changes.")
        if 'Year' in df.columns and 'Tempo' in df.columns:
            tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
            fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
                            title='Average Tempo Over Time', color_discrete_sequence=['orange'])
            fig11.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig11)
        else:
            st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
    with tab2:
        st.markdown(
            "**Mood Analysis (Valence & Energy):** Categorizes songs based on mood and energy.")
        if 'Valence' in df.columns and 'Energy' in df.columns:
            top_songs = df.nlargest(10, 'Popularity')
            mood_by_valence = top_songs.groupby(
                'Valence')['Energy'].mean().reset_index()
            fig12 = px.bar(
                mood_by_valence, x='Valence', y='Energy',
                title='Average Energy Levels by Valence (Mood Analysis)',
                color='Energy', color_continuous_scale='plasma'
            )
            fig12.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig12)
        else:
            st.error("Cannot plot: 'Valence' or 'Energy' column missing.")


def generate_top_artists_songs(df):
    st.header("Top Artists and Songs")
    tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])

    with tab1:
        st.markdown("**Most Featured Artists:** Shows top artists.")
        if 'Artist Name(s)' in df.columns:
            top_artists = df['Artist Name(s)'].value_counts().nlargest(
                10).reset_index()
            fig13 = px.bar(
                top_artists, x='count', y='Artist Name(s)',
                orientation='h',
                title='Most Featured Artists',
                color='count', color_continuous_scale='greens'
            )
            fig13.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig13)
        else:
            st.error("Cannot plot: 'Artist Name(s)' column missing.")

    with tab2:
        st.markdown("**Top 10 Songs:** Lists top songs.")
        if 'Track Name' in df.columns and 'Popularity' in df.columns:
            top_songs = df.nlargest(10, 'Popularity')[
                ['Track Name', 'Popularity']]
            fig14 = px.pie(
                top_songs, values='Popularity', names='Track Name',
                title='Top 10 Songs by Popularity', color_discrete_sequence=px.colors.qualitative.Set3
            )
            fig14.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig14)
        else:
            st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")


def generate_album_release_trends(df):
    st.header("Album Release Trends")
    tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
    with tab1:
        st.markdown("**Albums per Year:** Tracks release patterns.")
        if 'Year' in df.columns:
            albums_per_year = df['Year'].value_counts(
            ).sort_index().reset_index()
            fig15 = px.line(albums_per_year, x='Year', y='count',
                            title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
            fig15.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig15)
        else:
            st.error("Cannot plot: 'Year' column missing.")
    with tab2:
        st.markdown("**Songs by Artists and Years:** Visualizes trends.")
        if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
            # Filter to only show the top 10 most featured artists
            top_artists = df['Artist Name(s)'].value_counts().nlargest(
                10).index
            filtered_df = df[df['Artist Name(s)'].isin(top_artists)]

            # Grouping data
            artist_year = filtered_df.groupby(
                ['Year', 'Artist Name(s)']).size().reset_index(name='Count')

            # Create a grouped bar chart
            fig16 = px.bar(
                artist_year, x='Year', y='Count', color='Artist Name(s)',
                title='Songs Released by Top Artists Over the Years',
                labels={'Count': 'Number of Songs', 'Year': 'Year'},
                barmode='group',  # Grouped bars for each artist per year
                color_discrete_sequence=px.colors.qualitative.Set2
            )
            fig16.update_layout(width=900, height=500)
            st.plotly_chart(fig16)
        else:
            st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")


def generate_duration_analysis(df):
    st.header("Track Duration Analysis")
    tab1, tab2 = st.tabs(["Distribution", "By Decade"])

    # Filter out tracks longer than 900,000ms (15 minutes)
    df = df[df['Track Duration (ms)'] <= 900000]

    with tab1:
        st.markdown(
            "**Track Duration Distribution:** Illustrates how track durations vary, helping identify common song lengths.")
        if 'Track Duration (ms)' in df.columns:
            fig17 = px.histogram(
                df, x='Track Duration (ms)',
                title='Track Duration Distribution (Filtered)',
                nbins=50,
                color_discrete_sequence=['orange']
            )
            fig17.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig17)
        else:
            st.error("Cannot plot: 'Track Duration (ms)' column missing.")

    with tab2:
        st.markdown(
            "**Duration by Decade:** Compares the evolution of average track durations across decades, showing historical trends.")
        if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
            fig18 = px.pie(
                df.groupby('Decade')[
                    'Track Duration (ms)'].mean().reset_index(),
                names='Decade', values='Track Duration (ms)',
                title='Average Track Duration by Decade',
                color_discrete_sequence=px.colors.qualitative.Set2
            )
            fig18.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig18)
        else:
            st.error(
                "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")


def generate_streaming_insights(df):
    st.header("Streaming and Engagement Insights")
    tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])

    with tab1:
        st.markdown(
            "**Popularity vs Track Duration:** This line chart shows the trend of song popularity based on their duration.")

        if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
            df['Duration (minutes)'] = df['Track Duration (ms)'] / 60000
            duration_bins = pd.cut(df['Duration (minutes)'], bins=[
                                   0, 2, 4, 6, 8, 10, 15], labels=['0-2', '2-4', '4-6', '6-8', '8-10', '10+'])
            avg_popularity = df.groupby(duration_bins)[
                'Popularity'].mean().reset_index()

            fig1 = px.line(
                avg_popularity,
                x='Duration (minutes)',
                y='Popularity',
                title='Popularity vs. Track Duration',
                markers=True,  # Adds points to the line
                line_shape='spline',  # Smoothens the line
                color_discrete_sequence=['blue']
            )
            fig1.update_layout(
                template='plotly_white', xaxis_title='Track Duration (Minutes)', yaxis_title='Average Popularity')
            st.plotly_chart(fig1)
        else:
            st.error(
                "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")

    with tab2:
        st.markdown(
            "**Popularity by Time Signature:** This bar chart compares the average popularity of songs based on their time signatures.")

        if 'Time Signature' in df.columns and 'Popularity' in df.columns:
            pop_by_time = df.groupby('Time Signature')[
                'Popularity'].mean().reset_index()
            fig2 = px.bar(
                pop_by_time,
                x='Time Signature',
                y='Popularity',
                title='Average Popularity by Time Signature',
                color='Popularity',
                color_continuous_scale='purples'
            )
            fig2.update_layout(
                template='plotly_white', xaxis_title='Time Signature', yaxis_title='Average Popularity')
            st.plotly_chart(fig2)
        else:
            st.error(
                "Cannot plot: 'Time Signature' or 'Popularity' column missing.")


def generate_feature_comparisons(df):
    st.header("Feature Comparisons Across Decades")
    tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
    with tab1:
        st.markdown("**Feature Comparison:** Compares features across decades.")
        if 'Decade' in df.columns:
            features_by_decade = df.groupby(
                'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
            fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
                           barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
            fig21.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig21)
        else:
            st.error("Cannot plot: 'Decade' column missing.")
    with tab2:
        st.markdown("**Loudness Over Time:** Tracks loudness trends.")
        if 'Year' in df.columns and 'Loudness' in df.columns:
            loudness_by_year = df.groupby(
                'Year')['Loudness'].mean().reset_index()
            fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
                            title='Average Loudness Over Time', color_discrete_sequence=['green'])
            fig22.update_layout(template='plotly_white', width=800, height=400)
            st.plotly_chart(fig22)
        else:
            st.error("Cannot plot: 'Year' or 'Loudness' column missing.")


def generate_top_artists_songs(df):
    st.header("Top Artists and Songs")
    tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])

    with tab1:
        st.markdown(
            "**Most Featured Artists:** Displays the top 10 artists with the highest song counts, highlighting their dominance in the dataset.")
        if 'Artist Name(s)' in df.columns:
            top_artists = df['Artist Name(s)'].value_counts().nlargest(
                10).reset_index()
            top_artists.columns = ['Artist Name(s)', 'Count']
            fig13 = px.sunburst(
                top_artists, path=['Artist Name(s)'], values='Count',
                title='Most Featured Artists',
                color='Count',
                color_continuous_scale='greens'
            )
            fig13.update_layout(template='plotly_white', width=900, height=500)
            st.plotly_chart(fig13)
        else:
            st.error("Cannot plot: 'Artist Name(s)' column missing.")

    with tab2:
        st.markdown(
            "**Songs by Artists and Years:** Analyzes song release trends across different years, focusing on the top artists.")
        if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
            artist_year = df.groupby(
                ['Artist Name(s)', 'Year']).size().reset_index(name='Count')
            fig16 = px.sunburst(
                artist_year, path=['Year', 'Artist Name(s)'], values='Count',
                title='Songs Released by Artists Over the Years',
                color='Count',
                color_continuous_scale=px.colors.qualitative.Set2
            )
            fig16.update_layout(width=900, height=500)
            st.plotly_chart(fig16)
        else:
            st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")


def generate_network_analysis(df):
    st.header("Network Analysis")
    tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])

    # Ensure column names are stripped of spaces
    df.columns = df.columns.str.strip()

    with tab1:
        st.markdown(
            "**Top Collaborating Artists:** This chart highlights artists who frequently collaborate with each other.")
        if 'Artist Name(s)' in df.columns:
            df['Artist Name(s)'] = df['Artist Name(s)'].astype(
                str).str.split(', ')
            collaborations = []
            for artists in df['Artist Name(s)']:
                collaborations.extend(combinations(sorted(artists), 2))

            collab_counts = Counter(collaborations)
            top_collabs = sorted(collab_counts.items(),
                                 key=lambda x: x[1], reverse=True)[:20]

            G = nx.Graph()
            for (artist1, artist2), weight in top_collabs:
                G.add_edge(artist1, artist2, weight=weight)

            pos = nx.spring_layout(G, seed=42)
            plt.figure(figsize=(12, 8))
            edges = nx.draw_networkx_edges(G, pos, alpha=0.5, width=[
                                           G[u][v]['weight'] for u, v in G.edges()])
            nodes = nx.draw_networkx_nodes(
                G, pos, node_size=700, node_color='orange')
            labels = nx.draw_networkx_labels(
                G, pos, font_size=10, font_weight='bold')
            plt.title("Top 20 Artist Collaborations")
            st.pyplot(plt)
        else:
            st.error(
                "Cannot plot: 'Artist Name(s)' column missing. Available columns: " + ", ".join(df.columns))

    with tab2:
        st.markdown(
            "**Genre Crossover:** This chart shows how different music genres are connected and often blend together.")
        if 'Genres' in df.columns:
            df['Genres'] = df['Genres'].astype(str).str.split(', ')
            genre_pairs = []
            for genres in df['Genres']:
                genre_pairs.extend(combinations(sorted(set(genres)), 2))

            genre_counts = Counter(genre_pairs)
            top_genre_pairs = sorted(
                genre_counts.items(), key=lambda x: x[1], reverse=True)[:20]

            labels = list(set(chain.from_iterable(
                [pair[0] for pair in top_genre_pairs])))
            matrix = [[0] * len(labels) for _ in range(len(labels))]

            label_index = {label: i for i, label in enumerate(labels)}
            for (genre1, genre2), count in top_genre_pairs:
                i, j = label_index[genre1], label_index[genre2]
                matrix[i][j] = count
                matrix[j][i] = count

            fig = go.Figure(data=[go.Heatmap(
                z=matrix, x=labels, y=labels, colorscale='OrRd', text=matrix, hoverinfo='text')])
            fig.update_layout(title="Genre Crossover Chord Diagram",
                              xaxis_title="Genres", yaxis_title="Genres")
            st.plotly_chart(fig)
        else:
            st.error(
                "Cannot plot: 'Genres' column missing. Available columns: " + ", ".join(df.columns))