import gradio as gr
from transformers import pipeline
import torch
import re
import os
from docx import Document
class AITextDetector:
def __init__(self):
self.classifier = None
self.load_model()
def load_model(self):
"""Load the AI text detection model from Hugging Face"""
try:
print("Loading AI text detection model...")
self.classifier = pipeline(
"text-classification",
model="VSAsteroid/ai-text-detector-hc3",
return_all_scores=True,
device=0 if torch.cuda.is_available() else -1
)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
self.classifier = None
def detect_text(self, input_text):
"""
Detect if text is AI-generated or human-written
Returns: (label, confidence_score, confidence_bar_html)
"""
if not input_text.strip():
return "Please enter some text to analyze.", 0.0, ""
if self.classifier is None:
return "Model not loaded. Please try again.", 0.0, ""
try:
# Run inference
results = self.classifier(input_text)
# Extract results - model returns scores for both labels
ai_score = 0.0
human_score = 0.0
for result in results[0]:
if "AI" in result['label'].upper() or "GENERATED" in result['label'].upper():
ai_score = result['score']
else:
human_score = result['score']
# Determine the prediction
if ai_score > human_score:
label = "AI-Generated"
confidence = ai_score
else:
label = "Human-Written"
confidence = human_score
# Create confidence visualization
confidence_percentage = confidence * 100
confidence_bar = self.create_confidence_bar(confidence_percentage, label)
return label, f"{confidence_percentage:.2f}%", confidence_bar
except Exception as e:
return f"Error during prediction: {str(e)}", 0.0, ""
def create_confidence_bar(self, confidence_percentage, label):
"""Create an HTML confidence bar"""
color = "#ff6b6b" if "AI" in label else "#51cf66"
return f"""
Confidence: {confidence_percentage:.2f}%
"""
def create_text_confidence_bar(self, confidence_percentage, label):
"""Create a text-based confidence bar for markdown display"""
# Create a text-based progress bar
bar_length = 20
filled_length = int(bar_length * confidence_percentage / 100)
bar_char = "█" if "AI" in label else "▓"
empty_char = "░"
bar = bar_char * filled_length + empty_char * (bar_length - filled_length)
emoji = "🤖" if "AI" in label else "👤"
return f"{emoji} **Confidence:** {confidence_percentage:.1f}% `{bar}`"
def extract_text_from_file(self, file_path):
"""Extract text content from uploaded files"""
try:
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
elif file_extension == '.md':
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Remove markdown formatting for better analysis
content = re.sub(r'#{1,6}\s+', '', content) # Remove headers
content = re.sub(r'\*\*(.*?)\*\*', r'\1', content) # Remove bold
content = re.sub(r'\*(.*?)\*', r'\1', content) # Remove italic
content = re.sub(r'`(.*?)`', r'\1', content) # Remove code
content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content) # Remove links
return content
elif file_extension == '.docx':
doc = Document(file_path)
text_content = []
for paragraph in doc.paragraphs:
if paragraph.text.strip():
text_content.append(paragraph.text)
return '\n'.join(text_content)
else:
return f"Unsupported file format: {file_extension}. Please upload .txt, .md, or .docx files."
except Exception as e:
return f"Error reading file: {str(e)}"
def analyze_file(self, file_obj):
"""Analyze uploaded file for AI text detection"""
if file_obj is None:
return "Please upload a file to analyze.", "", ""
try:
# Extract text from file
text_content = self.extract_text_from_file(file_obj.name)
if text_content.startswith("Error") or text_content.startswith("Unsupported"):
return text_content, "", ""
# Check if file is too large or too small
if len(text_content.strip()) < 10:
return "File content is too short for analysis (minimum 10 characters).", "", ""
if len(text_content) > 50000: # Limit to ~50k characters
text_content = text_content[:50000]
truncation_note = "\n\n*Note: File was truncated to 50,000 characters for analysis.*"
else:
truncation_note = ""
# Split into chunks if text is very long
if len(text_content) > 5000:
return self.analyze_long_text(text_content, truncation_note)
else:
# Analyze the entire text
label, confidence_str, conf_bar = self.detect_text(text_content)
confidence_num = float(confidence_str.replace('%', ''))
text_bar = self.create_text_confidence_bar(confidence_num, label)
file_info = f"**File:** {os.path.basename(file_obj.name)}\n"
file_info += f"**Length:** {len(text_content)} characters\n\n"
result = f"{file_info}**Overall Result:** {label} ({confidence_str})\n\n{text_bar}{truncation_note}"
return result, conf_bar, text_content[:500] + "..." if len(text_content) > 500 else text_content
except Exception as e:
return f"Error analyzing file: {str(e)}", "", ""
def analyze_long_text(self, text_content, truncation_note=""):
"""Analyze long text by splitting into chunks"""
# Split text into paragraphs or sentences
chunks = self.split_text_into_chunks(text_content)
results = []
ai_count = 0
human_count = 0
total_confidence = 0
results.append(f"**File Analysis Results** ({len(chunks)} sections analyzed)\n")
results.append("=" * 50 + "\n")
for i, chunk in enumerate(chunks, 1):
if len(chunk.strip()) < 20: # Skip very short chunks
continue
label, confidence_str, _ = self.detect_text(chunk)
confidence_num = float(confidence_str.replace('%', ''))
text_bar = self.create_text_confidence_bar(confidence_num, label)
if "AI" in label:
ai_count += 1
else:
human_count += 1
total_confidence += confidence_num
results.append(f"### Section {i}")
results.append(f"*{chunk[:200]}{'...' if len(chunk) > 200 else ''}*\n")
results.append(f"**Result:** {label} ({confidence_str})")
results.append(text_bar)
results.append("\n" + "-" * 30 + "\n")
# Overall summary
total_sections = ai_count + human_count
if total_sections > 0:
avg_confidence = total_confidence / total_sections
overall_label = "Predominantly AI-Generated" if ai_count > human_count else "Predominantly Human-Written"
results.insert(2, f"**Overall Assessment:** {overall_label}\n")
results.insert(3, f"**AI Sections:** {ai_count} | **Human Sections:** {human_count}\n")
results.insert(4, f"**Average Confidence:** {avg_confidence:.1f}%\n\n")
results.append(truncation_note)
return "\n".join(results), "", ""
def split_text_into_chunks(self, text, max_chunk_size=1000):
"""Split long text into analyzable chunks"""
# First try splitting by double newlines (paragraphs)
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
chunks = []
current_chunk = ""
for paragraph in paragraphs:
if len(current_chunk + paragraph) <= max_chunk_size:
current_chunk += paragraph + "\n\n"
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = paragraph + "\n\n"
if current_chunk:
chunks.append(current_chunk.strip())
# If we still have chunks that are too long, split by sentences
final_chunks = []
for chunk in chunks:
if len(chunk) <= max_chunk_size:
final_chunks.append(chunk)
else:
sentences = re.split(r'[.!?]+\s+', chunk)
temp_chunk = ""
for sentence in sentences:
if len(temp_chunk + sentence) <= max_chunk_size:
temp_chunk += sentence + ". "
else:
if temp_chunk:
final_chunks.append(temp_chunk.strip())
temp_chunk = sentence + ". "
if temp_chunk:
final_chunks.append(temp_chunk.strip())
return final_chunks
# Initialize the detector
detector = AITextDetector()
def analyze_single_text(text):
"""Wrapper function for single text analysis"""
label, confidence, conf_bar = detector.detect_text(text)
return label, confidence, conf_bar
def analyze_uploaded_file(file_obj):
"""Wrapper function for file analysis"""
return detector.analyze_file(file_obj)
# Create Gradio interface
def create_interface():
with gr.Blocks(
title="AI Text Detection Tool",
theme=gr.themes.Soft(),
css="""
.main-header {
text-align: center;
margin-bottom: 30px;
}
.description {
text-align: center;
color: #666;
margin-bottom: 20px;
}
"""
) as demo:
gr.Markdown("""
# AI Text Detection Tool
Detect whether text was written by Artificial Intelligence or Humans.
""")
with gr.Tabs():
# Single Text Analysis Tab
with gr.TabItem("Single Text Analysis"):
with gr.Row():
with gr.Column(scale=2):
single_input = gr.Textbox(
label="Enter text to analyze",
placeholder="Paste or type the text you want to analyze here...",
lines=8,
max_lines=15
)
single_button = gr.Button("Analyze Text", variant="primary", size="lg")
with gr.Column(scale=1):
single_label = gr.Textbox(label="Prediction", interactive=False)
single_confidence = gr.Textbox(label="Confidence", interactive=False)
single_conf_bar = gr.HTML(label="Confidence Visualization")
# Examples
gr.Examples(
examples=[
["Artificial intelligence is a rapidly evolving field that encompasses machine learning, natural language processing, and computer vision. These technologies are transforming industries and creating new possibilities for automation and innovation."],
["I woke up this morning feeling refreshed after a good night's sleep. The sun was shining through my bedroom window, and I could hear birds chirping outside. It reminded me of my childhood summers at my grandmother's house."],
["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet and is commonly used for testing purposes."]
],
inputs=single_input,
label="Try these examples:"
)
# File Upload Analysis Tab
with gr.TabItem("File Upload Analysis"):
gr.Markdown("### Upload and Analyze Files")
gr.Markdown("Upload text files (.txt), Markdown files (.md), or Word documents (.docx) for AI text detection analysis.")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload File",
file_types=[".txt", ".md", ".docx"],
type="filepath"
)
file_button = gr.Button("Analyze File", variant="primary", size="lg")
gr.Markdown("**Supported formats:**")
gr.Markdown("- 📄 `.txt` - Plain text files")
gr.Markdown("- 📝 `.md` - Markdown files")
gr.Markdown("- 📋 `.docx` - Word documents")
with gr.Column(scale=2):
file_results = gr.Markdown(
label="Analysis Results",
value="Upload a file and click 'Analyze File' to see results here..."
)
with gr.Row():
with gr.Column():
file_confidence_bar = gr.HTML(label="Confidence Visualization")
with gr.Column():
file_preview = gr.Textbox(
label="File Preview (first 500 characters)",
lines=8,
interactive=False
)
# Event handlers
single_button.click(
fn=analyze_single_text,
inputs=single_input,
outputs=[single_label, single_confidence, single_conf_bar]
)
file_button.click(
fn=analyze_uploaded_file,
inputs=file_input,
outputs=[file_results, file_confidence_bar, file_preview]
)
# Footer
gr.Markdown("""
---
**Model:** VSAsteroid/ai-text-detector-hc3 from Hugging Face
**Note:** This tool provides predictions based on the model's training data. Results should be used as guidance, not definitive proof.
""")
return demo
if __name__ == "__main__":
# Create and launch the interface
print("Starting AI Text Detection Web App...")
interface = create_interface()
# Launch with public sharing option for deployment
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False, # Set to True for public sharing
show_error=True
)