Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import re | |
import tempfile | |
from transformers import pipeline | |
from googleapiclient.discovery import build | |
import plotly.express as px | |
# Load Transformers Pipelines | |
sentiment_pipeline = pipeline("sentiment-analysis") | |
toxic_classifier = pipeline("text-classification", model="unitary/toxic-bert", top_k=None) | |
# YouTube API Key | |
YOUTUBE_API_KEY = "AIzaSyD2Y4klQo0hSo4nhaWJyoDjgmGxtcY5pEQ" | |
# Extract video ID from URL | |
def extract_video_id(url): | |
patterns = [ | |
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)", | |
r"youtube\.com\/shorts\/([^&\n?#]+)" | |
] | |
for pattern in patterns: | |
match = re.search(pattern, url) | |
if match: | |
return match.group(1) | |
return None | |
# Fetch comments from YouTube API | |
def fetch_comments(video_url, max_results=10): | |
video_id = extract_video_id(video_url) | |
if not video_id: | |
return pd.DataFrame({"error": ["Invalid YouTube URL"]}) | |
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) | |
request = youtube.commentThreads().list( | |
part="snippet", | |
videoId=video_id, | |
maxResults=max_results, | |
textFormat="plainText" | |
) | |
comments = [] | |
try: | |
response = request.execute() | |
for item in response["items"]: | |
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] | |
comments.append(comment) | |
return pd.DataFrame({"Comment": comments}) | |
except Exception as e: | |
return pd.DataFrame({"error": [str(e)]}) | |
# Analyze sentiments and toxicity | |
def analyze_video(video_url, max_comments=10, sentiment_filter="All", toxicity_filter="All"): | |
df = fetch_comments(video_url, max_comments) | |
if "error" in df.columns: | |
return df.to_string(index=False), None, None | |
results = [] | |
for comment in df["Comment"]: | |
sentiment_result = sentiment_pipeline(comment[:512])[0] | |
toxic_results = toxic_classifier(comment[:512]) | |
toxic_labels = toxic_results[0] | |
top_label = max(toxic_labels, key=lambda x: x['score']) | |
sentiment = sentiment_result["label"] | |
sentiment_score = round(sentiment_result["score"], 3) | |
toxic_label = top_label["label"] | |
toxic_score = round(top_label["score"], 3) | |
toxic_tag = toxic_label if toxic_score > 0.5 else "Not Toxic" | |
results.append({ | |
"Comment": comment, | |
"Sentiment": sentiment, | |
"Sentiment Score": sentiment_score, | |
"Toxicity": toxic_tag, | |
"Toxicity Score": toxic_score | |
}) | |
result_df = pd.DataFrame(results) | |
# Apply filters | |
if sentiment_filter != "All": | |
result_df = result_df[result_df["Sentiment"] == sentiment_filter] | |
if toxicity_filter != "All": | |
result_df = result_df[result_df["Toxicity"] == toxicity_filter] | |
# Generate sentiment distribution plot | |
fig = px.histogram(result_df, x="Sentiment", title="Sentiment Distribution", color="Sentiment") | |
fig.update_layout(bargap=0.2) | |
# Save CSV to temp file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8") as f: | |
result_df.to_csv(f.name, index=False) | |
csv_file_path = f.name | |
return result_df, fig, csv_file_path | |
# Gradio UI | |
with gr.Blocks(title="YouTube Comment Sentiment Analyzer") as demo: | |
gr.Markdown("## π YouTube Comment Sentiment & Toxicity Analyzer") | |
with gr.Row(): | |
video_url = gr.Textbox(label="πΊ YouTube Video URL", placeholder="Paste the video link here") | |
max_comments = gr.Slider(1, 100, value=10, step=1, label="Number of Comments") | |
with gr.Row(): | |
sentiment_filter = gr.Dropdown(choices=["All", "POSITIVE", "NEGATIVE"], value="All", label="Filter by Sentiment") | |
toxicity_filter = gr.Dropdown(choices=["All", "toxicity", "severe_toxicity", "obscene", "identity_attack", "insult", "threat", "sexual_explicit", "Not Toxic"], value="All", label="Filter by Toxicity") | |
analyze_btn = gr.Button("Analyze Comments") | |
with gr.Tab("Analysis Table"): | |
output_df = gr.Dataframe(label="Sentiment & Toxicity Analysis", interactive=False) | |
with gr.Tab("Sentiment Chart"): | |
output_plot = gr.Plot(label="Sentiment Distribution") | |
with gr.Tab("Download CSV"): | |
download_btn = gr.File(label="Download CSV") | |
analyze_btn.click( | |
fn=analyze_video, | |
inputs=[video_url, max_comments, sentiment_filter, toxicity_filter], | |
outputs=[output_df, output_plot, download_btn] | |
) | |
demo.launch() # No share=True for Hugging Face Spaces |