import gradio as gr import spacy import re import os import requests nlp = None def load_spacy_model(): global nlp if not nlp: try: nlp = spacy.load("en_core_web_sm") except OSError: print("Downloading Spacy model...") os.system("python -m spacy download en_core_web_sm") nlp = spacy.load("en_core_web_sm") def detect_ai_content(text, download_file=False): # Load Spacy model if not already loaded load_spacy_model() # Count the number of words in the text word_count = len(text.split()) # Analyze the text using Spacy doc = nlp(text) # Count the number of tokens that are not in Spacy's default stop word list non_stopword_tokens = [token for token in doc if not token.is_stop] non_stopword_count = len(non_stopword_tokens) # Calculate the percentage of non-stopword tokens percentage_ai = (1 - non_stopword_count / word_count) * 100 # Clean the text by removing extra spaces, line breaks and special characters cleaned_text = re.sub(r'\s+', ' ', text).strip() cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text) # Return a dictionary with the percentage of AI-generated content and the cleaned text result = { "text": cleaned_text, "percentage": f"{percentage_ai:.2f}% AI-generated content" } # Download cleaned text as file if option is enabled if download_file: with open('cleaned_text.txt', 'w') as f: f.write(cleaned_text) result['download_link'] = 'cleaned_text.txt' return result def upload_text_file(file): with open(file.name, 'r') as f: text = f.read() return text def get_example_text(): # Get example text from external API try: response = requests.get('https://baconipsum.com/api/?type=all-meat&sentences=1') if response.ok: return response.json()[0] except: pass # Use a fallback example text if API request fails return "This is a sample text with no AI-generated content." # Create Gradio interface inputs = [ gr.inputs.Textbox(lines=5, label="Enter text to analyze:"), gr.inputs.Checkbox(label="Download cleaned text as file", default=False) ] outputs = gr.JSON( label="Results", allow_download=True, download_filename="ai_content_analysis.json" ) file_input = gr.inputs.File(label="Upload a text file:") file_output = gr.outputs.Textbox(label="File contents:") examples = [ {"text": get_example_text(), "percentage": "0.00% AI-generated content"}, {"text": "This is a text generated by a chatbot.", "percentage": "100.00% AI-generated content"} ] iface = gr.Interface( fn=detect_ai_content, inputs=inputs + [file_input], outputs=[outputs,file_output], title="AI-Generated Content Detector", description="This app uses a simple stylometry analysis technique to estimate the percentage of AI-generated content in a given text.", show_input=True, show_output=True,) iface.launch()