import gradio as gr
from transformers import pipeline
import nltk

# Download NLTK tokenizer (used internally by transformers sometimes)
nltk.download('punkt')

# Load models
categorization_model_name = 'facebook/bart-large-mnli'
bias_detection_model_name = 'roberta-large-mnli'

categorization_pipeline = pipeline("zero-shot-classification", model=categorization_model_name)
bias_pipeline = pipeline("zero-shot-classification", model=bias_detection_model_name)

# Label options
news_categories = ['politics', 'sports', 'technology', 'business', 'health', 'entertainment']
bias_labels = ['biased', 'not biased']

# Processing function
def analyze_news_paragraphs(text):
    paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
    results = []

    for para in paragraphs:
        if len(para) < 30:
            continue

        # Predict category
        cat_result = categorization_pipeline(para, candidate_labels=news_categories)
        top_category = cat_result['labels'][0]
        cat_score = round(cat_result['scores'][0] * 100, 2)

        # Predict bias
        context = f"This is a {top_category} news paragraph: {para}"
        bias_result = bias_pipeline(context, candidate_labels=bias_labels)
        top_bias = bias_result['labels'][0]
        bias_score = round(bias_result['scores'][0] * 100, 2)

        results.append([
            para,
            f"{top_category} ({cat_score}%)",
            f"{top_bias} ({bias_score}%)"
        ])

    return results

# Gradio Interface
iface = gr.Interface(
    fn=analyze_news_paragraphs,
    inputs=gr.Textbox(lines=15, label="Enter News Article with Paragraphs"),
    outputs=gr.Dataframe(headers=["Paragraph", "Category", "Bias"], label="Analysis Result"),
    title="News Categorization & Bias Detection",
    description="Processes each paragraph separately and provides category & bias for each. Works well for large news articles."
)

if __name__ == "__main__":
    iface.launch()