File size: 4,635 Bytes
7a9571b
b2f95ec
7a9571b
0f32409
b2f95ec
147199b
96ccd16
b2f95ec
147199b
b2f95ec
0f32409
96ccd16
0f32409
96ccd16
78c56b2
 
b2f95ec
 
 
 
 
 
 
 
 
 
 
78c56b2
b2f95ec
 
 
96ccd16
147199b
b2f95ec
 
 
 
 
 
 
 
7a9571b
b2f95ec
 
147199b
 
 
7a9571b
96ccd16
 
78c56b2
96ccd16
b2f95ec
 
147199b
b2f95ec
 
147199b
 
3cde172
0f32409
 
06e2967
147199b
 
 
3cde172
 
147199b
96ccd16
b2f95ec
147199b
 
 
 
 
b2f95ec
 
 
 
147199b
96ccd16
 
147199b
96ccd16
 
b2f95ec
78c56b2
147199b
 
7a9571b
0f32409
 
 
 
 
 
96ccd16
147199b
 
 
96ccd16
 
147199b
 
f91efc2
66f6223
147199b
0f32409
147199b
78c56b2
147199b
 
 
f77270e
147199b
 
f77270e
147199b
 
7a9571b
78c56b2
f77270e
96ccd16
147199b
7a9571b
 
0f32409
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import pandas as pd
import re
import tempfile
from transformers import pipeline
from googleapiclient.discovery import build
import plotly.express as px

# Load Transformers Pipelines
sentiment_pipeline = pipeline("sentiment-analysis")
toxic_classifier = pipeline("text-classification", model="unitary/toxic-bert", top_k=None)

# YouTube API Key
YOUTUBE_API_KEY = "AIzaSyD2Y4klQo0hSo4nhaWJyoDjgmGxtcY5pEQ"

# Extract video ID from URL
def extract_video_id(url):
    patterns = [
        r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)",
        r"youtube\.com\/shorts\/([^&\n?#]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

# Fetch comments from YouTube API
def fetch_comments(video_url, max_results=10):
    video_id = extract_video_id(video_url)
    if not video_id:
        return pd.DataFrame({"error": ["Invalid YouTube URL"]})

    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=max_results,
        textFormat="plainText"
    )
    comments = []
    try:
        response = request.execute()
        for item in response["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        return pd.DataFrame({"Comment": comments})
    except Exception as e:
        return pd.DataFrame({"error": [str(e)]})

# Analyze sentiments and toxicity
def analyze_video(video_url, max_comments=10, sentiment_filter="All", toxicity_filter="All"):
    df = fetch_comments(video_url, max_comments)
    if "error" in df.columns:
        return df.to_string(index=False), None, None

    results = []
    for comment in df["Comment"]:
        sentiment_result = sentiment_pipeline(comment[:512])[0]
        toxic_results = toxic_classifier(comment[:512])
        toxic_labels = toxic_results[0]
        top_label = max(toxic_labels, key=lambda x: x['score'])

        sentiment = sentiment_result["label"]
        sentiment_score = round(sentiment_result["score"], 3)

        toxic_label = top_label["label"]
        toxic_score = round(top_label["score"], 3)
        toxic_tag = toxic_label if toxic_score > 0.5 else "Not Toxic"

        results.append({
            "Comment": comment,
            "Sentiment": sentiment,
            "Sentiment Score": sentiment_score,
            "Toxicity": toxic_tag,
            "Toxicity Score": toxic_score
        })

    result_df = pd.DataFrame(results)

    # Apply filters
    if sentiment_filter != "All":
        result_df = result_df[result_df["Sentiment"] == sentiment_filter]

    if toxicity_filter != "All":
        result_df = result_df[result_df["Toxicity"] == toxicity_filter]

    # Generate sentiment distribution plot
    fig = px.histogram(result_df, x="Sentiment", title="Sentiment Distribution", color="Sentiment")
    fig.update_layout(bargap=0.2)

    # Save CSV to temp file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8") as f:
        result_df.to_csv(f.name, index=False)
        csv_file_path = f.name

    return result_df, fig, csv_file_path

# Gradio UI
with gr.Blocks(title="YouTube Comment Sentiment Analyzer") as demo:
    gr.Markdown("## πŸ“Š YouTube Comment Sentiment & Toxicity Analyzer")

    with gr.Row():
        video_url = gr.Textbox(label="πŸ“Ί YouTube Video URL", placeholder="Paste the video link here")
        max_comments = gr.Slider(1, 100, value=10, step=1, label="Number of Comments")

    with gr.Row():
        sentiment_filter = gr.Dropdown(choices=["All", "POSITIVE", "NEGATIVE"], value="All", label="Filter by Sentiment")
        toxicity_filter = gr.Dropdown(choices=["All", "toxicity", "severe_toxicity", "obscene", "identity_attack", "insult", "threat", "sexual_explicit", "Not Toxic"], value="All", label="Filter by Toxicity")

    analyze_btn = gr.Button("Analyze Comments")

    with gr.Tab("Analysis Table"):
        output_df = gr.Dataframe(label="Sentiment & Toxicity Analysis", interactive=False)

    with gr.Tab("Sentiment Chart"):
        output_plot = gr.Plot(label="Sentiment Distribution")

    with gr.Tab("Download CSV"):
        download_btn = gr.File(label="Download CSV")

    analyze_btn.click(
        fn=analyze_video,
        inputs=[video_url, max_comments, sentiment_filter, toxicity_filter],
        outputs=[output_df, output_plot, download_btn]
    )

demo.launch()  # No share=True for Hugging Face Spaces