Spaces:

sundaram07
/

AI_Text_Detector

Running

App Files Files Community

AI_Text_Detector / src /streamlit_app.py

sundaram07

Update src/streamlit_app.py

375ed47 verified about 14 hours ago

raw

history blame contribute delete

3.33 kB

	import streamlit as st
	import tensorflow as tf
	import numpy as np
	import nltk
	import os
	from nltk.tokenize import sent_tokenize
	from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification

	# 📁 Hugging Face cache dir
	os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"

	# 📥 Download NLTK punkt tokenizer
	nltk_data_path = "/tmp/nltk_data"
	nltk.download("punkt_tab", download_dir=nltk_data_path)
	nltk.data.path.append(nltk_data_path)

	# ✅ Cache the model/tokenizer
	@st.cache_resource
	def load_model_and_tokenizer():
	tokenizer = DistilBertTokenizerFast.from_pretrained(
	"distilbert-base-uncased", cache_dir="/tmp/huggingface"
	)
	model = TFDistilBertForSequenceClassification.from_pretrained(
	"sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface"
	)
	return tokenizer, model

	tokenizer, model = load_model_and_tokenizer()

	# 🔮 Predict sentence AI probability
	def predict_sentence_ai_probability(sentence):
	inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
	outputs = model(inputs)
	logits = outputs.logits
	prob_ai = tf.sigmoid(logits)[0][0].numpy()
	return prob_ai

	# 📊 Analyze text
	def predict_ai_generated_percentage(text, threshold=0.15):
	text = text.strip()
	sentences = sent_tokenize(text)
	if len(sentences) == 0:
	return 0.0, []

	ai_sentence_count = 0
	results = []

	for sentence in sentences:
	prob = predict_sentence_ai_probability(sentence)
	is_ai = prob <= threshold
	results.append((sentence, prob, is_ai))
	if is_ai:
	ai_sentence_count += 1

	ai_percentage = (ai_sentence_count / len(sentences)) * 100
	return ai_percentage, results

	# 🖥️ Streamlit UI
	st.set_page_config(page_title="AI Detector", layout="wide")
	st.title("🧠 AI Content Detector")
	st.markdown("This app detects the percentage of AI-generated content using sentence-level analysis with DistilBERT.")

	# 📋 Text input
	user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)

	# 🔍 Analyze button logic
	if st.button("🔍 Analyze"):
	# Clear previous session results
	st.session_state.analysis_done = False
	st.session_state.analysis_results = None
	st.session_state.ai_percentage = None

	if not user_input.strip():
	st.warning("⚠️ Please enter some text.")
	else:
	# Perform analysis
	ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)

	if len(analysis_results) == 0:
	st.warning("⚠️ Not enough valid sentences to analyze.")
	else:
	st.session_state.analysis_done = True
	st.session_state.analysis_results = analysis_results
	st.session_state.ai_percentage = ai_percentage

	# 📤 Show results
	if st.session_state.get("analysis_done", False):
	st.subheader("🔍 Sentence-level Analysis")
	for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
	label = "🟢 Human" if not is_ai else "🔴 AI"
	st.markdown(f"{i}. _{sentence}_\n\n → {label}")

	st.subheader("📊 Final Result")
	st.success(f"Estimated AI-generated content: {st.session_state.ai_percentage:.2f}%")