import gradio as gr from transformers import pipeline import torch import re import os from docx import Document class AITextDetector: def __init__(self): self.classifier = None self.load_model() def load_model(self): """Load the AI text detection model from Hugging Face""" try: print("Loading AI text detection model...") self.classifier = pipeline( "text-classification", model="VSAsteroid/ai-text-detector-hc3", return_all_scores=True, device=0 if torch.cuda.is_available() else -1 ) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") self.classifier = None def detect_text(self, input_text): """ Detect if text is AI-generated or human-written Returns: (label, confidence_score, confidence_bar_html) """ if not input_text.strip(): return "Please enter some text to analyze.", 0.0, "" if self.classifier is None: return "Model not loaded. Please try again.", 0.0, "" try: # Run inference results = self.classifier(input_text) # Extract results - model returns scores for both labels ai_score = 0.0 human_score = 0.0 for result in results[0]: if "AI" in result['label'].upper() or "GENERATED" in result['label'].upper(): ai_score = result['score'] else: human_score = result['score'] # Determine the prediction if ai_score > human_score: label = "AI-Generated" confidence = ai_score else: label = "Human-Written" confidence = human_score # Create confidence visualization confidence_percentage = confidence * 100 confidence_bar = self.create_confidence_bar(confidence_percentage, label) return label, f"{confidence_percentage:.2f}%", confidence_bar except Exception as e: return f"Error during prediction: {str(e)}", 0.0, "" def create_confidence_bar(self, confidence_percentage, label): """Create an HTML confidence bar""" color = "#ff6b6b" if "AI" in label else "#51cf66" return f"""
Confidence: {confidence_percentage:.2f}%
""" def create_text_confidence_bar(self, confidence_percentage, label): """Create a text-based confidence bar for markdown display""" # Create a text-based progress bar bar_length = 20 filled_length = int(bar_length * confidence_percentage / 100) bar_char = "█" if "AI" in label else "▓" empty_char = "░" bar = bar_char * filled_length + empty_char * (bar_length - filled_length) emoji = "🤖" if "AI" in label else "👤" return f"{emoji} **Confidence:** {confidence_percentage:.1f}% `{bar}`" def extract_text_from_file(self, file_path): """Extract text content from uploaded files""" try: file_extension = os.path.splitext(file_path)[1].lower() if file_extension == '.txt': with open(file_path, 'r', encoding='utf-8') as file: return file.read() elif file_extension == '.md': with open(file_path, 'r', encoding='utf-8') as file: content = file.read() # Remove markdown formatting for better analysis content = re.sub(r'#{1,6}\s+', '', content) # Remove headers content = re.sub(r'\*\*(.*?)\*\*', r'\1', content) # Remove bold content = re.sub(r'\*(.*?)\*', r'\1', content) # Remove italic content = re.sub(r'`(.*?)`', r'\1', content) # Remove code content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content) # Remove links return content elif file_extension == '.docx': doc = Document(file_path) text_content = [] for paragraph in doc.paragraphs: if paragraph.text.strip(): text_content.append(paragraph.text) return '\n'.join(text_content) else: return f"Unsupported file format: {file_extension}. Please upload .txt, .md, or .docx files." except Exception as e: return f"Error reading file: {str(e)}" def analyze_file(self, file_obj): """Analyze uploaded file for AI text detection""" if file_obj is None: return "Please upload a file to analyze.", "", "" try: # Extract text from file text_content = self.extract_text_from_file(file_obj.name) if text_content.startswith("Error") or text_content.startswith("Unsupported"): return text_content, "", "" # Check if file is too large or too small if len(text_content.strip()) < 10: return "File content is too short for analysis (minimum 10 characters).", "", "" if len(text_content) > 50000: # Limit to ~50k characters text_content = text_content[:50000] truncation_note = "\n\n*Note: File was truncated to 50,000 characters for analysis.*" else: truncation_note = "" # Split into chunks if text is very long if len(text_content) > 5000: return self.analyze_long_text(text_content, truncation_note) else: # Analyze the entire text label, confidence_str, conf_bar = self.detect_text(text_content) confidence_num = float(confidence_str.replace('%', '')) text_bar = self.create_text_confidence_bar(confidence_num, label) file_info = f"**File:** {os.path.basename(file_obj.name)}\n" file_info += f"**Length:** {len(text_content)} characters\n\n" result = f"{file_info}**Overall Result:** {label} ({confidence_str})\n\n{text_bar}{truncation_note}" return result, conf_bar, text_content[:500] + "..." if len(text_content) > 500 else text_content except Exception as e: return f"Error analyzing file: {str(e)}", "", "" def analyze_long_text(self, text_content, truncation_note=""): """Analyze long text by splitting into chunks""" # Split text into paragraphs or sentences chunks = self.split_text_into_chunks(text_content) results = [] ai_count = 0 human_count = 0 total_confidence = 0 results.append(f"**File Analysis Results** ({len(chunks)} sections analyzed)\n") results.append("=" * 50 + "\n") for i, chunk in enumerate(chunks, 1): if len(chunk.strip()) < 20: # Skip very short chunks continue label, confidence_str, _ = self.detect_text(chunk) confidence_num = float(confidence_str.replace('%', '')) text_bar = self.create_text_confidence_bar(confidence_num, label) if "AI" in label: ai_count += 1 else: human_count += 1 total_confidence += confidence_num results.append(f"### Section {i}") results.append(f"*{chunk[:200]}{'...' if len(chunk) > 200 else ''}*\n") results.append(f"**Result:** {label} ({confidence_str})") results.append(text_bar) results.append("\n" + "-" * 30 + "\n") # Overall summary total_sections = ai_count + human_count if total_sections > 0: avg_confidence = total_confidence / total_sections overall_label = "Predominantly AI-Generated" if ai_count > human_count else "Predominantly Human-Written" results.insert(2, f"**Overall Assessment:** {overall_label}\n") results.insert(3, f"**AI Sections:** {ai_count} | **Human Sections:** {human_count}\n") results.insert(4, f"**Average Confidence:** {avg_confidence:.1f}%\n\n") results.append(truncation_note) return "\n".join(results), "", "" def split_text_into_chunks(self, text, max_chunk_size=1000): """Split long text into analyzable chunks""" # First try splitting by double newlines (paragraphs) paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] chunks = [] current_chunk = "" for paragraph in paragraphs: if len(current_chunk + paragraph) <= max_chunk_size: current_chunk += paragraph + "\n\n" else: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = paragraph + "\n\n" if current_chunk: chunks.append(current_chunk.strip()) # If we still have chunks that are too long, split by sentences final_chunks = [] for chunk in chunks: if len(chunk) <= max_chunk_size: final_chunks.append(chunk) else: sentences = re.split(r'[.!?]+\s+', chunk) temp_chunk = "" for sentence in sentences: if len(temp_chunk + sentence) <= max_chunk_size: temp_chunk += sentence + ". " else: if temp_chunk: final_chunks.append(temp_chunk.strip()) temp_chunk = sentence + ". " if temp_chunk: final_chunks.append(temp_chunk.strip()) return final_chunks # Initialize the detector detector = AITextDetector() def analyze_single_text(text): """Wrapper function for single text analysis""" label, confidence, conf_bar = detector.detect_text(text) return label, confidence, conf_bar def analyze_uploaded_file(file_obj): """Wrapper function for file analysis""" return detector.analyze_file(file_obj) # Create Gradio interface def create_interface(): with gr.Blocks( title="AI Text Detection Tool", theme=gr.themes.Soft(), css=""" .main-header { text-align: center; margin-bottom: 30px; } .description { text-align: center; color: #666; margin-bottom: 20px; } """ ) as demo: gr.Markdown("""
# AI Text Detection Tool
Detect whether text was written by Artificial Intelligence or Humans.
""") with gr.Tabs(): # Single Text Analysis Tab with gr.TabItem("Single Text Analysis"): with gr.Row(): with gr.Column(scale=2): single_input = gr.Textbox( label="Enter text to analyze", placeholder="Paste or type the text you want to analyze here...", lines=8, max_lines=15 ) single_button = gr.Button("Analyze Text", variant="primary", size="lg") with gr.Column(scale=1): single_label = gr.Textbox(label="Prediction", interactive=False) single_confidence = gr.Textbox(label="Confidence", interactive=False) single_conf_bar = gr.HTML(label="Confidence Visualization") # Examples gr.Examples( examples=[ ["Artificial intelligence is a rapidly evolving field that encompasses machine learning, natural language processing, and computer vision. These technologies are transforming industries and creating new possibilities for automation and innovation."], ["I woke up this morning feeling refreshed after a good night's sleep. The sun was shining through my bedroom window, and I could hear birds chirping outside. It reminded me of my childhood summers at my grandmother's house."], ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet and is commonly used for testing purposes."] ], inputs=single_input, label="Try these examples:" ) # File Upload Analysis Tab with gr.TabItem("File Upload Analysis"): gr.Markdown("### Upload and Analyze Files") gr.Markdown("Upload text files (.txt), Markdown files (.md), or Word documents (.docx) for AI text detection analysis.") with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload File", file_types=[".txt", ".md", ".docx"], type="filepath" ) file_button = gr.Button("Analyze File", variant="primary", size="lg") gr.Markdown("**Supported formats:**") gr.Markdown("- 📄 `.txt` - Plain text files") gr.Markdown("- 📝 `.md` - Markdown files") gr.Markdown("- 📋 `.docx` - Word documents") with gr.Column(scale=2): file_results = gr.Markdown( label="Analysis Results", value="Upload a file and click 'Analyze File' to see results here..." ) with gr.Row(): with gr.Column(): file_confidence_bar = gr.HTML(label="Confidence Visualization") with gr.Column(): file_preview = gr.Textbox( label="File Preview (first 500 characters)", lines=8, interactive=False ) # Event handlers single_button.click( fn=analyze_single_text, inputs=single_input, outputs=[single_label, single_confidence, single_conf_bar] ) file_button.click( fn=analyze_uploaded_file, inputs=file_input, outputs=[file_results, file_confidence_bar, file_preview] ) # Footer gr.Markdown(""" --- **Model:** VSAsteroid/ai-text-detector-hc3 from Hugging Face **Note:** This tool provides predictions based on the model's training data. Results should be used as guidance, not definitive proof. """) return demo if __name__ == "__main__": # Create and launch the interface print("Starting AI Text Detection Web App...") interface = create_interface() # Launch with public sharing option for deployment interface.launch( server_name="0.0.0.0", server_port=7860, share=False, # Set to True for public sharing show_error=True )