|
import gradio as gr |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from plotly.subplots import make_subplots |
|
import numpy as np |
|
from datetime import datetime |
|
|
|
|
|
guest_by_channel = pd.read_csv("data/table_guests_by_channel.csv").rename(columns=str.lower) |
|
topic_summaries = pd.read_csv("data/table_topic_summaries.csv").rename(columns=str.lower) |
|
videos_by_topic = pd.read_csv("data/videos_by_topic.csv").rename(columns=str.lower) |
|
youtube_metadata = pd.read_csv("data/youtube_metadata.tsv", sep="\t").rename(columns=str.lower) |
|
guest_timeline = pd.read_csv("data/guest_timeline.tsv", sep="\t").rename(columns=str.lower) |
|
|
|
|
|
guest_by_channel.columns = guest_by_channel.columns.str.replace(' ', '_') |
|
topic_summaries.columns = topic_summaries.columns.str.replace(' ', '_') |
|
|
|
|
|
youtube_metadata['video_publish_date'] = pd.to_datetime(youtube_metadata['video_publish_date']) |
|
guest_timeline['video_publish_date'] = pd.to_datetime(guest_timeline['video_publish_date']) |
|
videos_by_topic['video_publish_date'] = pd.to_datetime(videos_by_topic['video_publish_date']) |
|
|
|
|
|
guest_by_channel['avg_views_per_channel'] = guest_by_channel['views_sum'] / guest_by_channel['no_of_channels'] |
|
guest_by_channel = guest_by_channel.sort_values('views_sum', ascending=False) |
|
|
|
|
|
channel_opts = [col for col in guest_by_channel.columns if col in |
|
['adin_live', 'flagrant', 'full_send_podcast', 'impaulsive', |
|
'lex_fridman', 'pbd_podcast', 'powerfuljre', 'shawn_ryan_show', 'theo_von']] |
|
|
|
|
|
topic_opts = [col for col in topic_summaries.columns if col.startswith('#')] |
|
|
|
|
|
categories = sorted(guest_by_channel['category'].unique().tolist()) |
|
|
|
|
|
|
|
def executive_summary(): |
|
|
|
total_guests = len(guest_by_channel) |
|
total_views = guest_by_channel['views_sum'].sum() |
|
avg_views_per_guest = total_views / total_guests |
|
female_guests = guest_by_channel[guest_by_channel['is_a_woman'] == True].shape[0] |
|
female_pct = (female_guests / total_guests) * 100 |
|
|
|
|
|
category_views = guest_by_channel.groupby('category')['views_sum'].sum().sort_values(ascending=False) |
|
|
|
|
|
channel_appearances = {} |
|
for channel in channel_opts: |
|
channel_appearances[channel] = guest_by_channel[guest_by_channel[channel] == 1].shape[0] |
|
|
|
|
|
fig = make_subplots( |
|
rows=2, cols=2, |
|
specs=[[{"type": "indicator"}, {"type": "indicator"}], |
|
[{"type": "xy"}, {"type": "bar"}]], |
|
subplot_titles=("Total Guest Views (M)", "Avg Views per Guest (M)", |
|
"Guest Count by Category", "Views by Category") |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Indicator( |
|
mode="number", |
|
value=total_views / 1_000_000, |
|
number={"suffix": "M", "valueformat": ".1f"}, |
|
title={"text": "Total Views"} |
|
), |
|
row=1, col=1 |
|
) |
|
|
|
fig.add_trace( |
|
go.Indicator( |
|
mode="number", |
|
value=avg_views_per_guest / 1_000_000, |
|
number={"suffix": "M", "valueformat": ".1f"}, |
|
title={"text": "Avg Views per Guest"} |
|
), |
|
row=1, col=2 |
|
) |
|
|
|
|
|
guest_counts = guest_by_channel['category'].value_counts().loc[category_views.index] |
|
fig.add_trace( |
|
go.Bar( |
|
x=guest_counts.index, |
|
y=guest_counts.values, |
|
marker_color='teal', |
|
text=guest_counts.values, |
|
textposition='auto', |
|
name='Guest Count by Category' |
|
), |
|
row=2, col=1 |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Bar( |
|
x=category_views.index, |
|
y=category_views.values, |
|
marker_color='indianred', |
|
text=category_views.values, |
|
textposition='auto', |
|
name='Views by Category' |
|
), |
|
row=2, col=2 |
|
) |
|
|
|
fig.update_layout( |
|
height=600, |
|
title_text="Executive Dashboard - Key Performance Metrics", |
|
showlegend=False |
|
) |
|
|
|
return fig |
|
|
|
|
|
def guest_performance_analysis(top_n=20, category_filter=None, gender_filter=None): |
|
|
|
filtered_data = guest_by_channel.copy() |
|
|
|
if category_filter and category_filter != "All Categories": |
|
filtered_data = filtered_data[filtered_data['category'] == category_filter] |
|
|
|
if gender_filter == "Female": |
|
filtered_data = filtered_data[filtered_data['is_a_woman'] == True] |
|
elif gender_filter == "Male": |
|
filtered_data = filtered_data[filtered_data['is_a_woman'] == False] |
|
|
|
|
|
top_guests = filtered_data.head(top_n) |
|
|
|
|
|
fig = px.bar( |
|
top_guests, |
|
x='guest', |
|
y='views_sum', |
|
color='category', |
|
hover_data=['no_of_channels', 'avg_views_per_channel'], |
|
labels={ |
|
'guest': 'Guest Name', |
|
'views_sum': 'Total Views', |
|
'category': 'Guest Category', |
|
'no_of_channels': 'Number of Channels', |
|
'avg_views_per_channel': 'Avg Views per Channel' |
|
}, |
|
title=f'Top {top_n} Guests by Total Views', |
|
height=600 |
|
) |
|
|
|
fig.update_layout( |
|
xaxis_title="Guest", |
|
yaxis_title="Total Views", |
|
xaxis={'categoryorder':'total descending'}, |
|
yaxis=dict(tickformat=".2s") |
|
) |
|
|
|
|
|
fig.update_xaxes(tickangle=45) |
|
|
|
return fig |
|
|
|
|
|
def channel_comparison(selected_channels, metric="guest_count"): |
|
if not selected_channels: |
|
selected_channels = channel_opts[:3] |
|
|
|
|
|
if metric == "guest_count": |
|
|
|
channel_data = {channel: guest_by_channel[guest_by_channel[channel] == 1].shape[0] for channel in selected_channels} |
|
title = "Number of Guests per Channel" |
|
y_label = "Guest Count" |
|
|
|
elif metric == "total_views": |
|
|
|
channel_data = {channel: guest_by_channel[guest_by_channel[channel] == 1]['views_sum'].sum() for channel in selected_channels} |
|
title = "Total Views per Channel" |
|
y_label = "Total Views" |
|
|
|
elif metric == "avg_views": |
|
|
|
channel_data = {channel: guest_by_channel[guest_by_channel[channel] == 1]['views_sum'].mean() for channel in selected_channels} |
|
title = "Average Views per Guest per Channel" |
|
y_label = "Average Views" |
|
|
|
elif metric == "category_diversity": |
|
|
|
channel_data = {channel: len(guest_by_channel[guest_by_channel[channel] == 1]['category'].unique()) for channel in selected_channels} |
|
title = "Category Diversity per Channel" |
|
y_label = "Number of Unique Categories" |
|
|
|
|
|
fig = px.bar( |
|
x=list(channel_data.keys()), |
|
y=list(channel_data.values()), |
|
labels={'x': 'Channel', 'y': y_label}, |
|
title=title, |
|
height=500 |
|
) |
|
|
|
|
|
if metric in ["total_views", "avg_views"]: |
|
fig.update_layout(yaxis=dict(tickformat=".2s")) |
|
|
|
return fig |
|
|
|
|
|
def topic_trend_analysis(selected_topics, time_period="all"): |
|
if not selected_topics: |
|
selected_topics = topic_opts[:3] |
|
|
|
|
|
filtered_data = videos_by_topic.copy() |
|
|
|
if time_period == "last_year": |
|
one_year_ago = pd.Timestamp.now() - pd.DateOffset(years=1) |
|
filtered_data = filtered_data[filtered_data['video_publish_date'] >= one_year_ago] |
|
elif time_period == "last_6_months": |
|
six_months_ago = pd.Timestamp.now() - pd.DateOffset(months=6) |
|
filtered_data = filtered_data[filtered_data['video_publish_date'] >= six_months_ago] |
|
|
|
|
|
filtered_data['month'] = filtered_data['video_publish_date'].dt.to_period('M') |
|
|
|
|
|
topic_trends = [] |
|
|
|
for topic in selected_topics: |
|
if topic in filtered_data.columns: |
|
monthly_data = filtered_data.groupby('month')[topic].mean().reset_index() |
|
monthly_data['topic'] = topic |
|
monthly_data['month'] = monthly_data['month'].dt.to_timestamp() |
|
topic_trends.append(monthly_data) |
|
|
|
if not topic_trends: |
|
return go.Figure().update_layout(title="No data available for selected topics") |
|
|
|
trend_df = pd.concat(topic_trends) |
|
|
|
|
|
fig = px.line( |
|
trend_df, |
|
x='month', |
|
y=topic, |
|
color='topic', |
|
labels={ |
|
'month': 'Month', |
|
topic: 'Topic Frequency', |
|
'topic': 'Topic' |
|
}, |
|
title='Topic Trends Over Time', |
|
height=500 |
|
) |
|
|
|
return fig |
|
|
|
|
|
def guest_category_roi(metric="views_per_appearance"): |
|
|
|
category_metrics = guest_by_channel.groupby('category').agg( |
|
total_views=('views_sum', 'sum'), |
|
guest_count=('guest', 'count'), |
|
total_appearances=('no_of_channels', 'sum') |
|
).reset_index() |
|
|
|
|
|
category_metrics['views_per_guest'] = category_metrics['total_views'] / category_metrics['guest_count'] |
|
category_metrics['views_per_appearance'] = category_metrics['total_views'] / category_metrics['total_appearances'] |
|
|
|
|
|
if metric == "views_per_guest": |
|
y_value = 'views_per_guest' |
|
title = 'Views per Guest by Category' |
|
y_label = 'Views per Guest' |
|
else: |
|
y_value = 'views_per_appearance' |
|
title = 'Views per Appearance by Category' |
|
y_label = 'Views per Appearance' |
|
|
|
|
|
fig = px.bar( |
|
category_metrics.sort_values(y_value, ascending=False), |
|
x='category', |
|
y=y_value, |
|
color='guest_count', |
|
text='guest_count', |
|
labels={ |
|
'category': 'Guest Category', |
|
y_value: y_label, |
|
'guest_count': 'Number of Guests' |
|
}, |
|
title=title, |
|
height=500 |
|
) |
|
|
|
fig.update_layout(yaxis=dict(tickformat=".2s")) |
|
|
|
return fig |
|
|
|
|
|
def content_strategy_recommendations(selected_topics=None): |
|
if not selected_topics: |
|
selected_topics = topic_opts[:5] |
|
|
|
|
|
topic_engagement = {} |
|
|
|
for topic in selected_topics: |
|
if topic in videos_by_topic.columns: |
|
|
|
topic_videos = videos_by_topic[videos_by_topic[topic] > 0] |
|
|
|
if not topic_videos.empty: |
|
|
|
avg_views = topic_videos['video_view_count'].mean() |
|
avg_likes = topic_videos['video_like_count'].mean() |
|
avg_comments = topic_videos['video_comment_count'].mean() |
|
|
|
|
|
engagement_rate = (avg_likes + avg_comments) / avg_views if avg_views > 0 else 0 |
|
|
|
topic_engagement[topic] = { |
|
'avg_views': avg_views, |
|
'avg_likes': avg_likes, |
|
'avg_comments': avg_comments, |
|
'engagement_rate': engagement_rate |
|
} |
|
|
|
|
|
engagement_df = pd.DataFrame.from_dict(topic_engagement, orient='index').reset_index() |
|
engagement_df.rename(columns={'index': 'topic'}, inplace=True) |
|
|
|
if engagement_df.empty: |
|
return go.Figure().update_layout(title="No data available for selected topics") |
|
|
|
|
|
fig = make_subplots( |
|
rows=1, cols=2, |
|
specs=[[{"type": "bar"}, {"type": "scatter"}]], |
|
subplot_titles=("Average Views by Topic", "Engagement Analysis") |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Bar( |
|
x=engagement_df['topic'], |
|
y=engagement_df['avg_views'], |
|
name='Avg Views' |
|
), |
|
row=1, col=1 |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=engagement_df['avg_views'], |
|
y=engagement_df['engagement_rate'], |
|
mode='markers+text', |
|
text=engagement_df['topic'], |
|
textposition="top center", |
|
marker=dict( |
|
size=engagement_df['avg_comments'] / 100, |
|
sizemin=10, |
|
sizemode='area' |
|
), |
|
name='Engagement Rate' |
|
), |
|
row=1, col=2 |
|
) |
|
|
|
fig.update_layout( |
|
height=500, |
|
title_text="Content Strategy Analysis by Topic", |
|
showlegend=False |
|
) |
|
|
|
fig.update_yaxes(title_text="Average Views", row=1, col=1) |
|
fig.update_yaxes(title_text="Engagement Rate (Likes+Comments)/Views", row=1, col=2) |
|
fig.update_xaxes(title_text="Topic", row=1, col=1) |
|
fig.update_xaxes(title_text="Average Views", row=1, col=2) |
|
|
|
return fig |
|
|
|
|
|
def guest_timeline_analysis(selected_guest, view_type="views"): |
|
if not selected_guest: |
|
|
|
selected_guest = guest_by_channel.iloc[0]['guest'] |
|
|
|
|
|
guest_data = guest_timeline[guest_timeline['guest'] == selected_guest].copy() |
|
|
|
if guest_data.empty: |
|
return go.Figure().update_layout(title=f"No timeline data available for {selected_guest}") |
|
|
|
|
|
guest_data = guest_data.sort_values('video_publish_date') |
|
|
|
|
|
if view_type == "views": |
|
fig = px.line( |
|
guest_data, |
|
x='video_publish_date', |
|
y='video_view_count', |
|
color='channel_title', |
|
markers=True, |
|
labels={ |
|
'video_publish_date': 'Date', |
|
'video_view_count': 'Views', |
|
'channel_title': 'Channel' |
|
}, |
|
title=f'View Count Timeline for {selected_guest}', |
|
height=500 |
|
) |
|
|
|
|
|
avg_views = guest_data['video_view_count'].mean() |
|
fig.add_hline(y=avg_views, line_dash="dash", line_color="gray", |
|
annotation_text=f"Avg: {avg_views:.0f} views") |
|
|
|
else: |
|
guest_data = guest_data.sort_values('video_publish_date') |
|
guest_data['cumulative_views'] = guest_data['video_view_count'].cumsum() |
|
|
|
fig = px.line( |
|
guest_data, |
|
x='video_publish_date', |
|
y='cumulative_views', |
|
markers=True, |
|
labels={ |
|
'video_publish_date': 'Date', |
|
'cumulative_views': 'Cumulative Views' |
|
}, |
|
title=f'Cumulative Views for {selected_guest}', |
|
height=500 |
|
) |
|
|
|
return fig |
|
|
|
|
|
def channel_growth_analysis(selected_channels): |
|
if not selected_channels: |
|
selected_channels = channel_opts[:3] |
|
|
|
|
|
channel_data = youtube_metadata[youtube_metadata['channel_title'].str.lower().isin([ch.replace('_', ' ') for ch in selected_channels])] |
|
|
|
if channel_data.empty: |
|
return go.Figure().update_layout(title="No data available for selected channels") |
|
|
|
|
|
channel_data['month'] = channel_data['video_publish_date'].dt.to_period('M') |
|
monthly_stats = channel_data.groupby(['channel_title', 'month']).agg( |
|
avg_views=('video_view_count', 'mean'), |
|
video_count=('video_id', 'count') |
|
).reset_index() |
|
|
|
monthly_stats['month'] = monthly_stats['month'].dt.to_timestamp() |
|
|
|
|
|
fig = make_subplots( |
|
rows=1, cols=2, |
|
specs=[[{"type": "scatter"}, {"type": "bar"}]], |
|
subplot_titles=("Average Views per Video Over Time", "Monthly Video Production") |
|
) |
|
|
|
|
|
for channel in monthly_stats['channel_title'].unique(): |
|
channel_monthly = monthly_stats[monthly_stats['channel_title'] == channel] |
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=channel_monthly['month'], |
|
y=channel_monthly['avg_views'], |
|
mode='lines+markers', |
|
name=channel |
|
), |
|
row=1, col=1 |
|
) |
|
|
|
fig.add_trace( |
|
go.Bar( |
|
x=channel_monthly['month'], |
|
y=channel_monthly['video_count'], |
|
name=channel |
|
), |
|
row=1, col=2 |
|
) |
|
|
|
fig.update_layout( |
|
height=500, |
|
title_text="Channel Growth Analysis", |
|
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1) |
|
) |
|
|
|
fig.update_yaxes(title_text="Average Views per Video", row=1, col=1) |
|
fig.update_yaxes(title_text="Number of Videos", row=1, col=2) |
|
fig.update_xaxes(title_text="Month", row=1, col=1) |
|
fig.update_xaxes(title_text="Month", row=1, col=2) |
|
|
|
return fig |
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 📊 YouTube Content Strategy & Analytics Dashboard") |
|
gr.Markdown("### Business Intelligence for Content Strategy and Guest Selection") |
|
|
|
with gr.Tab("Executive Summary"): |
|
gr.Markdown("### Key Performance Indicators and Business Overview") |
|
exec_summary_plot = gr.Plot() |
|
gr.Button("Generate Executive Summary").click(fn=executive_summary, outputs=exec_summary_plot) |
|
|
|
with gr.Tab("Guest Performance"): |
|
gr.Markdown("### Guest Performance Analysis") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
top_n = gr.Slider(minimum=5, maximum=50, value=20, step=5, label="Number of Guests") |
|
category_filter = gr.Dropdown(choices=["All Categories"] + categories, value="All Categories", label="Filter by Category") |
|
gender_filter = gr.Dropdown(choices=["All", "Male", "Female"], value="All", label="Filter by Gender") |
|
guest_perf_btn = gr.Button("Analyze Guest Performance") |
|
|
|
with gr.Column(scale=3): |
|
guest_perf_plot = gr.Plot() |
|
|
|
guest_perf_btn.click( |
|
fn=guest_performance_analysis, |
|
inputs=[top_n, category_filter, gender_filter], |
|
outputs=guest_perf_plot |
|
) |
|
|
|
with gr.Tab("Channel Analysis"): |
|
gr.Markdown("### Channel Comparison and Performance") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
channel_select = gr.CheckboxGroup(choices=channel_opts, value=channel_opts[:3], label="Select Channels") |
|
metric_select = gr.Radio( |
|
choices=["guest_count", "total_views", "avg_views", "category_diversity"], |
|
value="total_views", |
|
label="Comparison Metric" |
|
) |
|
channel_btn = gr.Button("Compare Channels") |
|
|
|
with gr.Column(scale=3): |
|
channel_plot = gr.Plot() |
|
|
|
channel_btn.click( |
|
fn=channel_comparison, |
|
inputs=[channel_select, metric_select], |
|
outputs=channel_plot |
|
) |
|
|
|
with gr.Tab("Topic Trends"): |
|
gr.Markdown("### Topic Trend Analysis") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
topic_select = gr.CheckboxGroup(choices=topic_opts, value=topic_opts[:3], label="Select Topics") |
|
time_period = gr.Radio( |
|
choices=["all", "last_year", "last_6_months"], |
|
value="all", |
|
label="Time Period" |
|
) |
|
topic_btn = gr.Button("Analyze Topic Trends") |
|
|
|
with gr.Column(scale=3): |
|
topic_plot = gr.Plot() |
|
|
|
topic_btn.click( |
|
fn=topic_trend_analysis, |
|
inputs=[topic_select, time_period], |
|
outputs=topic_plot |
|
) |
|
|
|
with gr.Tab("ROI Analysis"): |
|
gr.Markdown("### Return on Investment by Guest Category") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
roi_metric = gr.Radio( |
|
choices=["views_per_appearance", "views_per_guest"], |
|
value="views_per_appearance", |
|
label="ROI Metric" |
|
) |
|
roi_btn = gr.Button("Calculate ROI") |
|
|
|
with gr.Column(scale=3): |
|
roi_plot = gr.Plot() |
|
|
|
roi_btn.click( |
|
fn=guest_category_roi, |
|
inputs=[roi_metric], |
|
outputs=roi_plot |
|
) |
|
|
|
|
|
with gr.Tab("Guest Timeline"): |
|
gr.Markdown("### Guest Performance Timeline") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
guest_select = gr.Dropdown(choices=sorted(guest_by_channel['guest'].unique().tolist(), reverse=True), label="Select Guest") |
|
timeline_type = gr.Radio( |
|
choices=["views", "cumulative"], |
|
value="views", |
|
label="Timeline View" |
|
) |
|
timeline_btn = gr.Button("Analyze Timeline") |
|
|
|
with gr.Column(scale=3): |
|
timeline_plot = gr.Plot() |
|
|
|
timeline_btn.click( |
|
fn=guest_timeline_analysis, |
|
inputs=[guest_select, timeline_type], |
|
outputs=timeline_plot |
|
) |
|
|
|
with gr.Tab("Channel Growth"): |
|
gr.Markdown("### Channel Growth Analysis") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
growth_channels = gr.CheckboxGroup(choices=channel_opts, value=channel_opts[:3], label="Select Channels") |
|
growth_btn = gr.Button("Analyze Growth") |
|
|
|
with gr.Column(scale=3): |
|
growth_plot = gr.Plot() |
|
|
|
growth_btn.click( |
|
fn=channel_growth_analysis, |
|
inputs=[growth_channels], |
|
outputs=growth_plot |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |