import spacy import os import re import numpy as np import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel from flask import Flask, request, render_template # Download the GPT-2 tokenizer and load the pre-trained language model tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2') # Specify the device to run the model on (either "cuda" or "cpu") device = "cuda" if torch.cuda.is_available() else "cpu" # Download the Spacy model for English os.system("python -m spacy download en_core_web_sm") nlp = spacy.load("en_core_web_sm") # Define a function to detect AI-generated content and calculate the perplexity score, # burstiness score, and average perplexity score for a given text input def detect_ai_content(text): # Clean the text by removing extra spaces, line breaks, and special characters cleaned_text = re.sub(r'\s+', ' ', text).strip() cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text) # Return an error response if the cleaned text is empty or contains only stop words doc = nlp(cleaned_text) tokens = [token.text.lower() for token in doc if not token.is_stop and token.is_alpha] if not tokens: return { "error": "The input text contains only stop words. Please provide meaningful text." } # Tokenize the cleaned text using the GPT-2 tokenizer input_ids = torch.tensor(tokenizer.encode(cleaned_text)).unsqueeze(0).to(device) # Generate text using the pre-trained GPT-2 model try: output = model.generate( input_ids=input_ids, max_length=1024, do_sample=True, pad_token_id=tokenizer.eos_token_id, # set pad token ID as eos_token_id eos_token_id=tokenizer.eos_token_id, top_k=50, top_p=0.95, temperature=1.0, repetition_penalty=1.5, num_return_sequences=1 ) except Exception as e: return {"error": str(e)} # Decode the generated text using the GPT-2 tokenizer generated_text = tokenizer.decode(output[0], skip_special_tokens=True) # Calculate the perplexity score of the generated text using the GPT-2 model try: generated_input_ids = torch.tensor(tokenizer.encode(generated_text)).unsqueeze(0).to(device) generated_loss = model(generated_input_ids, labels=generated_input_ids)[0] perplexity = np.exp(generated_loss.item()) except Exception as e: return {"error": str(e)} # Calculate the burstiness score of the cleaned text using Spacy unique_tokens = set(tokens) burstiness = len(unique_tokens) / len(tokens) # Calculate the average perplexity score of the cleaned text and generated text try: cleaned_input_ids = torch.tensor(tokenizer.encode(cleaned_text)).unsqueeze(0).to(device) cleaned_loss = model(cleaned_input_ids, labels=cleaned_input_ids)[0] avg_perplexity = np.mean([np.exp(cleaned_loss.item()), perplexity]) except Exception as e: return {"error": str(e)} # Return a dictionary with the percentage of AI-generated content, the cleaned text, # the generated text, the perplexity score, burstiness score, and average perplexity score return { "text": cleaned_text, "percentage": f"{len(generated_text)/len(text):.2%} AI-generated content", "generated_text": generated_text, "perplexity": perplexity, "burstiness": burstiness, "avg_perplexity": avg_perplexity } # Create a Flask application app = Flask(__name__) # Define a route to handle GET requests to the index page @app.route('/') def index(): return render_template('index.html') # Define a route to handle POST requests to the API @app.route('/api', methods=['POST']) def api(): # Get the text input from the request body data = request.form.get('text', '').strip() if not data: return {"error": "Please provide some text."} # Call the detect_ai_content function to analyze the text result = detect_ai_content(data) # Check if an error occurred during analysis if "error" in result: return {"error": result["error"]} # Render the results using a template return render_template('result.html', result=result) #Start the Flask application if __name__ == '__main__': app.run(debug=True, host="0.0.0.0", port=7860)