Spaces:

iyadalagha
/

ai-text-detector-app

Sleeping

App Files Files Community

ai-text-detector-app / app.py

iyadalagha

handle both ar and eng

778b6ac 3 months ago

raw

history blame

2.5 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
	import torch
	import math

	app = FastAPI(title="Improved AI Text Detector")

	# 1. Classifier model (better than akshayvkt)
	clf_model_name = "Hello-SimpleAI/chatgpt-detector-roberta"
	clf_tokenizer = AutoTokenizer.from_pretrained(clf_model_name)
	clf_model = AutoModelForSequenceClassification.from_pretrained(clf_model_name)

	# 2. Perplexity model (GPT-2)
	ppl_model_name = "gpt2"
	ppl_tokenizer = AutoTokenizer.from_pretrained(ppl_model_name)
	ppl_model = AutoModelForCausalLM.from_pretrained(ppl_model_name)

	class InputText(BaseModel):
	text: str

	def get_classifier_score(text: str) -> float:
	inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
	with torch.no_grad():
	outputs = clf_model(**inputs)
	probs = torch.softmax(outputs.logits, dim=-1)
	ai_prob = probs[0][1].item() # label 1 = AI
	return ai_prob

	def get_perplexity(text: str) -> float:
	encodings = ppl_tokenizer(text, return_tensors="pt")
	max_length = ppl_model.config.n_positions
	stride = 512
	seq_len = encodings.input_ids.size(1)

	nlls = []
	prev_end_loc = 0
	for begin_loc in range(0, seq_len, stride):
	end_loc = min(begin_loc + stride, seq_len)
	trg_len = end_loc - prev_end_loc
	input_ids = encodings.input_ids[:, begin_loc:end_loc]
	target_ids = input_ids.clone()
	target_ids[:, :-trg_len] = -100

	with torch.no_grad():
	outputs = ppl_model(input_ids, labels=target_ids)
	neg_log_likelihood = outputs.loss * trg_len

	nlls.append(neg_log_likelihood)
	prev_end_loc = end_loc

	if end_loc == seq_len:
	break

	ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
	return ppl.item()

	@app.post("/detect")
	def detect(input_text: InputText):
	text = input_text.text.strip()

	# Run classifier
	clf_score = get_classifier_score(text)

	# Run perplexity
	ppl = get_perplexity(text)

	# Decision rule: combine both
	# Lower perplexity (<50) + high classifier_score (>0.7) = AI
	if clf_score > 0.6 and ppl < 70:
	final = "AI"
	elif clf_score < 0.4 and ppl > 60:
	final = "Human"
	else:
	final = "Uncertain"

	return {
	"classifier_score": round(clf_score, 4),
	"perplexity": round(ppl, 2),
	"final_label": final
	}