import gradio as gr
import pandas as pd
import re
import tempfile
from transformers import pipeline
from googleapiclient.discovery import build
import plotly.express as px

# Load Transformers Pipelines
sentiment_pipeline = pipeline("sentiment-analysis")
toxic_classifier = pipeline("text-classification", model="unitary/toxic-bert", top_k=None)

# YouTube API Key
YOUTUBE_API_KEY = "AIzaSyD2Y4klQo0hSo4nhaWJyoDjgmGxtcY5pEQ"

# Extract video ID from URL
def extract_video_id(url):
    patterns = [
        r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)",
        r"youtube\.com\/shorts\/([^&\n?#]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

# Fetch comments from YouTube API
def fetch_comments(video_url, max_results=10):
    video_id = extract_video_id(video_url)
    if not video_id:
        return pd.DataFrame({"error": ["Invalid YouTube URL"]})

    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=max_results,
        textFormat="plainText"
    )
    comments = []
    try:
        response = request.execute()
        for item in response["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        return pd.DataFrame({"Comment": comments})
    except Exception as e:
        return pd.DataFrame({"error": [str(e)]})

# Analyze sentiments and toxicity
def analyze_video(video_url, max_comments=10, sentiment_filter="All", toxicity_filter="All"):
    df = fetch_comments(video_url, max_comments)
    if "error" in df.columns:
        return df.to_string(index=False), None, None

    results = []
    for comment in df["Comment"]:
        sentiment_result = sentiment_pipeline(comment[:512])[0]
        toxic_results = toxic_classifier(comment[:512])
        toxic_labels = toxic_results[0]
        top_label = max(toxic_labels, key=lambda x: x['score'])

        sentiment = sentiment_result["label"]
        sentiment_score = round(sentiment_result["score"], 3)

        toxic_label = top_label["label"]
        toxic_score = round(top_label["score"], 3)
        toxic_tag = toxic_label if toxic_score > 0.5 else "Not Toxic"

        results.append({
            "Comment": comment,
            "Sentiment": sentiment,
            "Sentiment Score": sentiment_score,
            "Toxicity": toxic_tag,
            "Toxicity Score": toxic_score
        })

    result_df = pd.DataFrame(results)

    # Apply filters
    if sentiment_filter != "All":
        result_df = result_df[result_df["Sentiment"] == sentiment_filter]

    if toxicity_filter != "All":
        result_df = result_df[result_df["Toxicity"] == toxicity_filter]

    # Generate sentiment distribution plot
    fig = px.histogram(result_df, x="Sentiment", title="Sentiment Distribution", color="Sentiment")
    fig.update_layout(bargap=0.2)

    # Save CSV to temp file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8") as f:
        result_df.to_csv(f.name, index=False)
        csv_file_path = f.name

    return result_df, fig, csv_file_path

# Gradio UI
with gr.Blocks(title="YouTube Comment Sentiment Analyzer") as demo:
    gr.Markdown("## 📊 YouTube Comment Sentiment & Toxicity Analyzer")

    with gr.Row():
        video_url = gr.Textbox(label="📺 YouTube Video URL", placeholder="Paste the video link here")
        max_comments = gr.Slider(1, 100, value=10, step=1, label="Number of Comments")

    with gr.Row():
        sentiment_filter = gr.Dropdown(choices=["All", "POSITIVE", "NEGATIVE"], value="All", label="Filter by Sentiment")
        toxicity_filter = gr.Dropdown(choices=["All", "toxicity", "severe_toxicity", "obscene", "identity_attack", "insult", "threat", "sexual_explicit", "Not Toxic"], value="All", label="Filter by Toxicity")

    analyze_btn = gr.Button("Analyze Comments")

    with gr.Tab("Analysis Table"):
        output_df = gr.Dataframe(label="Sentiment & Toxicity Analysis", interactive=False)

    with gr.Tab("Sentiment Chart"):
        output_plot = gr.Plot(label="Sentiment Distribution")

    with gr.Tab("Download CSV"):
        download_btn = gr.File(label="Download CSV")

    analyze_btn.click(
        fn=analyze_video,
        inputs=[video_url, max_comments, sentiment_filter, toxicity_filter],
        outputs=[output_df, output_plot, download_btn]
    )

demo.launch()  # No share=True for Hugging Face Spaces