Spaces:

Ujeshhh
/

Youtube_Analyzer

Sleeping

App Files Files Community

Youtube_Analyzer / app.py

Ujeshhh

Update app.py

0f32409 verified 7 months ago

raw

history blame contribute delete

4.64 kB

	import gradio as gr
	import pandas as pd
	import re
	import tempfile
	from transformers import pipeline
	from googleapiclient.discovery import build
	import plotly.express as px

	# Load Transformers Pipelines
	sentiment_pipeline = pipeline("sentiment-analysis")
	toxic_classifier = pipeline("text-classification", model="unitary/toxic-bert", top_k=None)

	# YouTube API Key
	YOUTUBE_API_KEY = "AIzaSyD2Y4klQo0hSo4nhaWJyoDjgmGxtcY5pEQ"

	# Extract video ID from URL
	def extract_video_id(url):
	patterns = [
	r"(?:youtube\.com\/watch\?v=\|youtu\.be\/\|youtube\.com\/embed\/)([^&\n?#]+)",
	r"youtube\.com\/shorts\/([^&\n?#]+)"
	]
	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	return match.group(1)
	return None

	# Fetch comments from YouTube API
	def fetch_comments(video_url, max_results=10):
	video_id = extract_video_id(video_url)
	if not video_id:
	return pd.DataFrame({"error": ["Invalid YouTube URL"]})

	youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
	request = youtube.commentThreads().list(
	part="snippet",
	videoId=video_id,
	maxResults=max_results,
	textFormat="plainText"
	)
	comments = []
	try:
	response = request.execute()
	for item in response["items"]:
	comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
	comments.append(comment)
	return pd.DataFrame({"Comment": comments})
	except Exception as e:
	return pd.DataFrame({"error": [str(e)]})

	# Analyze sentiments and toxicity
	def analyze_video(video_url, max_comments=10, sentiment_filter="All", toxicity_filter="All"):
	df = fetch_comments(video_url, max_comments)
	if "error" in df.columns:
	return df.to_string(index=False), None, None

	results = []
	for comment in df["Comment"]:
	sentiment_result = sentiment_pipeline(comment[:512])[0]
	toxic_results = toxic_classifier(comment[:512])
	toxic_labels = toxic_results[0]
	top_label = max(toxic_labels, key=lambda x: x['score'])

	sentiment = sentiment_result["label"]
	sentiment_score = round(sentiment_result["score"], 3)

	toxic_label = top_label["label"]
	toxic_score = round(top_label["score"], 3)
	toxic_tag = toxic_label if toxic_score > 0.5 else "Not Toxic"

	results.append({
	"Comment": comment,
	"Sentiment": sentiment,
	"Sentiment Score": sentiment_score,
	"Toxicity": toxic_tag,
	"Toxicity Score": toxic_score
	})

	result_df = pd.DataFrame(results)

	# Apply filters
	if sentiment_filter != "All":
	result_df = result_df[result_df["Sentiment"] == sentiment_filter]

	if toxicity_filter != "All":
	result_df = result_df[result_df["Toxicity"] == toxicity_filter]

	# Generate sentiment distribution plot
	fig = px.histogram(result_df, x="Sentiment", title="Sentiment Distribution", color="Sentiment")
	fig.update_layout(bargap=0.2)

	# Save CSV to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8") as f:
	result_df.to_csv(f.name, index=False)
	csv_file_path = f.name

	return result_df, fig, csv_file_path

	# Gradio UI
	with gr.Blocks(title="YouTube Comment Sentiment Analyzer") as demo:
	gr.Markdown("## 📊 YouTube Comment Sentiment & Toxicity Analyzer")

	with gr.Row():
	video_url = gr.Textbox(label="📺 YouTube Video URL", placeholder="Paste the video link here")
	max_comments = gr.Slider(1, 100, value=10, step=1, label="Number of Comments")

	with gr.Row():
	sentiment_filter = gr.Dropdown(choices=["All", "POSITIVE", "NEGATIVE"], value="All", label="Filter by Sentiment")
	toxicity_filter = gr.Dropdown(choices=["All", "toxicity", "severe_toxicity", "obscene", "identity_attack", "insult", "threat", "sexual_explicit", "Not Toxic"], value="All", label="Filter by Toxicity")

	analyze_btn = gr.Button("Analyze Comments")

	with gr.Tab("Analysis Table"):
	output_df = gr.Dataframe(label="Sentiment & Toxicity Analysis", interactive=False)

	with gr.Tab("Sentiment Chart"):
	output_plot = gr.Plot(label="Sentiment Distribution")

	with gr.Tab("Download CSV"):
	download_btn = gr.File(label="Download CSV")

	analyze_btn.click(
	fn=analyze_video,
	inputs=[video_url, max_comments, sentiment_filter, toxicity_filter],
	outputs=[output_df, output_plot, download_btn]
	)

	demo.launch() # No share=True for Hugging Face Spaces