import gradio as gr
import pandas as pd
import numpy as np
import re
import nltk
import joblib
import pickle
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Download stopwords if not available
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("omw-1.4")


# ---------------------- Load HuggingFace RoBERTa Model ----------------------
try:
    tokenizer_roberta = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
    model_roberta = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
    print("RoBERTa model loaded successfully.")
except Exception as e:
    print(f"Error loading HuggingFace RoBERTa model: {e}")
    tokenizer_roberta = None
    model_roberta = None

# ---------------------- Load TensorFlow Model ----------------------
try:
    with open("tokenizer.pkl", "rb") as handle:
        tokenizer = pickle.load(handle)
    model_tnsorflow = load_model("best_model.keras")
    max_len = 1128
except Exception as e:
    print(f"Error loading TensorFlow model or tokenizer: {e}")
    tokenizer = None
    model_tnsorflow = None

# ---------------------- TensorFlow Sentiment Prediction ----------------------
def predict_sentiment_tensorflow(text):
    """Predict sentiment using the TensorFlow model."""
    try:
        if not tokenizer or not model_tnsorflow:
            return "Error: Model or Tokenizer not loaded properly."
        processed_text = preprocess_text(text)
        
        sequence = tokenizer.texts_to_sequences([processed_text])
        padded_sequence = pad_sequences(sequence, maxlen=max_len, padding="pre")
        prediction = model_tnsorflow.predict(padded_sequence)[0]
        sentiment = "POSITIVE" if prediction[1] > 0.5 else "NEGATIVE"
        return sentiment
    except Exception as e:
        return f"Error in prediction: {e}"

# ---------------------- HuggingFace RoBERTa Sentiment Prediction ----------------------
def predict_sentiment_roberta(text):
    """Predict sentiment using the HuggingFace RoBERTa model."""
    try:
        if not tokenizer_roberta or not model_roberta:
            return "Error: RoBERTa Model or Tokenizer not loaded properly."
        
        # Encode the text and prepare for the model
        inputs = tokenizer_roberta(text, return_tensors="pt", truncation=True, max_length=512)
        
        # Get model prediction
        with torch.no_grad():
            outputs = model_roberta(**inputs)
            scores = outputs.logits.softmax(dim=1)
            prediction = scores.argmax().item()
        
        # Map the prediction to sentiment label
        # The model returns 0 for negative, 1 for neutral, and 2 for positive
        sentiment_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
        sentiment = sentiment_map[prediction]
        
        # Get confidence scores for more detailed results
        confidence = scores[0][prediction].item()
        return f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.4f})"
    except Exception as e:
        return f"Error in RoBERTa prediction: {e}"

# ----------------------  Preprocess Dataset ----------------------


def preprocess_text(text):
    """Preprocess text: lowercase, remove special characters, and stopwords."""
    text = text.lower()
    text = re.sub(r"\W", " ", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    stop_words = set(stopwords.words("english"))
    words = [word for word in text.split() if word not in stop_words]
    words = [lemmatizer.lemmatize(word) for word in words]
    return " ".join(words)


# ---------------------- TF-IDF Logistic Regression Sentiment Prediction ----------------------
# Load model and vectorizer for prediction
model_lr = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")


def predict_sentiment_tfidf(text):
    """Predict sentiment using the Logistic Regression model and return confidence score."""
    processed_review = preprocess_text(text)
    review_tfidf = vectorizer.transform([processed_review])
    
    prediction = model_lr.predict(review_tfidf)[0]
    probabilities = model_lr.predict_proba(review_tfidf)[0]
    confidence = max(probabilities)

    return f"Predicted Sentiment: {prediction} (Confidence: {confidence:.2f})"

# ---------------------- Sentiment Analysis Function ----------------------
def analyze_sentiment(text, model_choice):
    """Analyze sentiment using the selected model."""
    if model_choice == "TF-IDF Logistic Regression":
        return predict_sentiment_tfidf(text)
    elif model_choice == "TensorFlow Model":
        return predict_sentiment_tensorflow(text)
    else:
        return predict_sentiment_roberta(text)

# ---------------------- Gradio UI ----------------------
with gr.Blocks() as interface:
    gr.Markdown("# Movie Review Sentiment Analysis App")
    gr.Markdown("Enter a review, and the model will predict if it's Positive, Negative, or Neutral.")

    model_choice = gr.Dropdown(
        ["TF-IDF Logistic Regression", "TensorFlow Model", "RoBERTa Model"], 
        label="Select Model",
        value="RoBERTa Model"  # Set RoBERTa as default
    )
    text_input = gr.Textbox(label="Enter a Review", lines=5)
    output = gr.Textbox(label="Sentiment Prediction", interactive=False)

    analyze_button = gr.Button("Analyze")
    analyze_button.click(analyze_sentiment, inputs=[text_input, model_choice], outputs=output)

    # Add example inputs
    gr.Examples(
        [
            ["This movie was amazing, I loved every minute of it!", "RoBERTa Model"],
            ["This was the worst movie I've ever seen, terrible acting and plot.", "RoBERTa Model"],
            ["The movie was okay, nothing special but watchable.", "RoBERTa Model"]
        ],
        inputs=[text_input, model_choice]
    )

    # Add model comparison section
    with gr.Accordion("About the Models", open=False):
        gr.Markdown("""
        ## Model Information
        
        - **TF-IDF Logistic Regression**: A classical machine learning approach using term frequency-inverse document frequency features.
        - **TensorFlow Model**: A custom neural network trained on the training data.
        - **RoBERTa Model**: A state-of-the-art transformer model (cardiffnlp/twitter-roberta-base-sentiment-latest) from HuggingFace, fine-tuned for sentiment analysis on Twitter data.
        
        The RoBERTa model generally provides the most accurate sentiment predictions, especially for complex or nuanced text, but may be slower than the other models.
        """)

# Launch the app
interface.launch()