import spacy
import os
import re
import numpy as np
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from flask import Flask, request, render_template

# Download the GPT-2 tokenizer and load the pre-trained language model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Specify the device to run the model on (either "cuda" or "cpu")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Download the Spacy model for English
os.system("python -m spacy download en_core_web_sm")
nlp = spacy.load("en_core_web_sm")

# Define a function to detect AI-generated content and calculate the perplexity score,
# burstiness score, and average perplexity score for a given text input
def detect_ai_content(text):
    # Clean the text by removing extra spaces, line breaks, and special characters
    cleaned_text = re.sub(r'\s+', ' ', text).strip()
    cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)

    # Return an error response if the cleaned text is empty or contains only stop words
    doc = nlp(cleaned_text)
    tokens = [token.text.lower() for token in doc if not token.is_stop and token.is_alpha]
    if not tokens:
        return {
            "error": "The input text contains only stop words. Please provide meaningful text."
        }

    # Tokenize the cleaned text using the GPT-2 tokenizer
    input_ids = torch.tensor(tokenizer.encode(cleaned_text)).unsqueeze(0).to(device)

    # Generate text using the pre-trained GPT-2 model
    try:
        output = model.generate(
            input_ids=input_ids,
            max_length=1024,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,  # set pad token ID as eos_token_id
            eos_token_id=tokenizer.eos_token_id,
            top_k=50,
            top_p=0.95,
            temperature=1.0,
            repetition_penalty=1.5,
            num_return_sequences=1
        )
    except Exception as e:
        return {"error": str(e)}

    # Decode the generated text using the GPT-2 tokenizer
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Calculate the perplexity score of the generated text using the GPT-2 model
    try:
        generated_input_ids = torch.tensor(tokenizer.encode(generated_text)).unsqueeze(0).to(device)
        generated_loss = model(generated_input_ids, labels=generated_input_ids)[0]
        perplexity = np.exp(generated_loss.item())
    except Exception as e:
        return {"error": str(e)}

    # Calculate the burstiness score of the cleaned text using Spacy
    unique_tokens = set(tokens)
    burstiness = len(unique_tokens) / len(tokens)

    # Calculate the average perplexity score of the cleaned text and generated text
    try:
        cleaned_input_ids = torch.tensor(tokenizer.encode(cleaned_text)).unsqueeze(0).to(device)
        cleaned_loss = model(cleaned_input_ids, labels=cleaned_input_ids)[0]
        avg_perplexity = np.mean([np.exp(cleaned_loss.item()), perplexity])
    except Exception as e:
        return {"error": str(e)}

    # Return a dictionary with the percentage of AI-generated content, the cleaned text,
    # the generated text, the perplexity score, burstiness score, and average perplexity score
    return {
        "text": cleaned_text,
        "percentage": f"{len(generated_text)/len(text):.2%} AI-generated content",
        "generated_text": generated_text,
        "perplexity": perplexity,
        "burstiness": burstiness,
        "avg_perplexity": avg_perplexity
    }

# Create a Flask application
app = Flask(__name__)

# Define a route to handle GET requests to the index page
@app.route('/')
def index():
    return render_template('index.html')

# Define a route to handle POST requests to the API
@app.route('/api', methods=['POST'])
def api():
    # Get the text input from the request body
    data = request.form.get('text', '').strip()
    if not data:
        return {"error": "Please provide some text."}

    # Call the detect_ai_content function to analyze the text
    result = detect_ai_content(data)

    # Check if an error occurred during analysis
    if "error" in result:
        return {"error": result["error"]}

    # Render the results using a template
    return render_template('result.html', result=result)
#Start the Flask application
if __name__ == '__main__':
    app.run(debug=True, host="0.0.0.0", port=7860)