Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM | |
| import torch | |
| import math | |
| app = FastAPI(title="Improved AI Text Detector") | |
| # 1. Classifier model (better than akshayvkt) | |
| clf_model_name = "Hello-SimpleAI/chatgpt-detector-roberta" | |
| clf_tokenizer = AutoTokenizer.from_pretrained(clf_model_name) | |
| clf_model = AutoModelForSequenceClassification.from_pretrained(clf_model_name) | |
| # 2. Perplexity model (GPT-2) | |
| ppl_model_name = "gpt2" | |
| ppl_tokenizer = AutoTokenizer.from_pretrained(ppl_model_name) | |
| ppl_model = AutoModelForCausalLM.from_pretrained(ppl_model_name) | |
| class InputText(BaseModel): | |
| text: str | |
| def get_classifier_score(text: str) -> float: | |
| inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = clf_model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=-1) | |
| ai_prob = probs[0][1].item() # label 1 = AI | |
| return ai_prob | |
| def get_perplexity(text: str) -> float: | |
| encodings = ppl_tokenizer(text, return_tensors="pt") | |
| max_length = ppl_model.config.n_positions | |
| stride = 512 | |
| seq_len = encodings.input_ids.size(1) | |
| nlls = [] | |
| prev_end_loc = 0 | |
| for begin_loc in range(0, seq_len, stride): | |
| end_loc = min(begin_loc + stride, seq_len) | |
| trg_len = end_loc - prev_end_loc | |
| input_ids = encodings.input_ids[:, begin_loc:end_loc] | |
| target_ids = input_ids.clone() | |
| target_ids[:, :-trg_len] = -100 | |
| with torch.no_grad(): | |
| outputs = ppl_model(input_ids, labels=target_ids) | |
| neg_log_likelihood = outputs.loss * trg_len | |
| nlls.append(neg_log_likelihood) | |
| prev_end_loc = end_loc | |
| if end_loc == seq_len: | |
| break | |
| ppl = torch.exp(torch.stack(nlls).sum() / end_loc) | |
| return ppl.item() | |
| def detect(input_text: InputText): | |
| text = input_text.text.strip() | |
| # Run classifier | |
| clf_score = get_classifier_score(text) | |
| # Run perplexity | |
| ppl = get_perplexity(text) | |
| # Decision rule: combine both | |
| # Lower perplexity (<50) + high classifier_score (>0.7) = AI | |
| if clf_score > 0.6 and ppl < 70: | |
| final = "AI" | |
| elif clf_score < 0.4 and ppl > 60: | |
| final = "Human" | |
| else: | |
| final = "Uncertain" | |
| return { | |
| "classifier_score": round(clf_score, 4), | |
| "perplexity": round(ppl, 2), | |
| "final_label": final | |
| } | |