import gradio as gr
import pickle
import torch
import numpy as np
from transformers import BertTokenizer, BertModel
from sklearn.linear_model import LogisticRegression

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = BertModel.from_pretrained('bert-base-cased')

# Load the trained Logistic Regression classifier
with open('bert_cased.pkl', 'rb') as model_file:
    classifier = pickle.load(model_file)

# Define function to preprocess and classify text
def classify_text(text):
    # Preprocess text and get BERT embeddings
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = bert_model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].numpy()

    # Predict using the classifier
    label = classifier.predict(embeddings)
    return label[0]

# Create the Gradio interface
iface = gr.Interface(
    fn=classify_text,
    inputs="text",
    outputs="text",
    title="Text Classification: Human or AI?",
    description="Enter a text to classify whether it's generated by a human or AI.",
)

# Launch the Gradio interface
iface.launch()