Ujeshhh's picture
Update app.py
0f32409 verified
import gradio as gr
import pandas as pd
import re
import tempfile
from transformers import pipeline
from googleapiclient.discovery import build
import plotly.express as px
# Load Transformers Pipelines
sentiment_pipeline = pipeline("sentiment-analysis")
toxic_classifier = pipeline("text-classification", model="unitary/toxic-bert", top_k=None)
# YouTube API Key
YOUTUBE_API_KEY = "AIzaSyD2Y4klQo0hSo4nhaWJyoDjgmGxtcY5pEQ"
# Extract video ID from URL
def extract_video_id(url):
patterns = [
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)",
r"youtube\.com\/shorts\/([^&\n?#]+)"
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
# Fetch comments from YouTube API
def fetch_comments(video_url, max_results=10):
video_id = extract_video_id(video_url)
if not video_id:
return pd.DataFrame({"error": ["Invalid YouTube URL"]})
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
request = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
maxResults=max_results,
textFormat="plainText"
)
comments = []
try:
response = request.execute()
for item in response["items"]:
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
comments.append(comment)
return pd.DataFrame({"Comment": comments})
except Exception as e:
return pd.DataFrame({"error": [str(e)]})
# Analyze sentiments and toxicity
def analyze_video(video_url, max_comments=10, sentiment_filter="All", toxicity_filter="All"):
df = fetch_comments(video_url, max_comments)
if "error" in df.columns:
return df.to_string(index=False), None, None
results = []
for comment in df["Comment"]:
sentiment_result = sentiment_pipeline(comment[:512])[0]
toxic_results = toxic_classifier(comment[:512])
toxic_labels = toxic_results[0]
top_label = max(toxic_labels, key=lambda x: x['score'])
sentiment = sentiment_result["label"]
sentiment_score = round(sentiment_result["score"], 3)
toxic_label = top_label["label"]
toxic_score = round(top_label["score"], 3)
toxic_tag = toxic_label if toxic_score > 0.5 else "Not Toxic"
results.append({
"Comment": comment,
"Sentiment": sentiment,
"Sentiment Score": sentiment_score,
"Toxicity": toxic_tag,
"Toxicity Score": toxic_score
})
result_df = pd.DataFrame(results)
# Apply filters
if sentiment_filter != "All":
result_df = result_df[result_df["Sentiment"] == sentiment_filter]
if toxicity_filter != "All":
result_df = result_df[result_df["Toxicity"] == toxicity_filter]
# Generate sentiment distribution plot
fig = px.histogram(result_df, x="Sentiment", title="Sentiment Distribution", color="Sentiment")
fig.update_layout(bargap=0.2)
# Save CSV to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8") as f:
result_df.to_csv(f.name, index=False)
csv_file_path = f.name
return result_df, fig, csv_file_path
# Gradio UI
with gr.Blocks(title="YouTube Comment Sentiment Analyzer") as demo:
gr.Markdown("## πŸ“Š YouTube Comment Sentiment & Toxicity Analyzer")
with gr.Row():
video_url = gr.Textbox(label="πŸ“Ί YouTube Video URL", placeholder="Paste the video link here")
max_comments = gr.Slider(1, 100, value=10, step=1, label="Number of Comments")
with gr.Row():
sentiment_filter = gr.Dropdown(choices=["All", "POSITIVE", "NEGATIVE"], value="All", label="Filter by Sentiment")
toxicity_filter = gr.Dropdown(choices=["All", "toxicity", "severe_toxicity", "obscene", "identity_attack", "insult", "threat", "sexual_explicit", "Not Toxic"], value="All", label="Filter by Toxicity")
analyze_btn = gr.Button("Analyze Comments")
with gr.Tab("Analysis Table"):
output_df = gr.Dataframe(label="Sentiment & Toxicity Analysis", interactive=False)
with gr.Tab("Sentiment Chart"):
output_plot = gr.Plot(label="Sentiment Distribution")
with gr.Tab("Download CSV"):
download_btn = gr.File(label="Download CSV")
analyze_btn.click(
fn=analyze_video,
inputs=[video_url, max_comments, sentiment_filter, toxicity_filter],
outputs=[output_df, output_plot, download_btn]
)
demo.launch() # No share=True for Hugging Face Spaces