import gradio as gr import spacy import os import re os.system("python -m spacy download en_core_web_sm") nlp = spacy.load("en_core_web_sm") def detect_ai_content(text): # Count the number of words in the text word_count = len(text.split()) # Analyze the text using Spacy doc = nlp(text) # Count the number of tokens that are not in Spacy's default stop word list non_stopword_tokens = [token for token in doc if not token.is_stop] non_stopword_count = len(non_stopword_tokens) # Calculate the percentage of non-stopword tokens percentage_ai = (1 - non_stopword_count / word_count) * 100 # Clean the text by removing extra spaces, line breaks and special characters cleaned_text = re.sub(r'\s+', ' ', text).strip() cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text) # Return a dictionary with the percentage of AI-generated content and the cleaned text return { "text": cleaned_text, "percentage": f"{percentage_ai:.2f}% AI-generated content" } gr.Interface(detect_ai_content, "text", "json").launch()