Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,78 +1,59 @@
|
|
| 1 |
from fastapi import FastAPI, Request
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 4 |
import torch
|
| 5 |
-
import
|
| 6 |
|
| 7 |
-
app = FastAPI(title="AI
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 11 |
-
|
| 12 |
-
# Load model (use small model for Hugging Face to prevent restarts)
|
| 13 |
MODEL_NAME = "roberta-base-openai-detector"
|
| 14 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 15 |
-
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
| 16 |
model.eval()
|
| 17 |
-
|
| 18 |
-
# --- Text Cleaning ---
|
| 19 |
-
def clean_text(text: str) -> str:
|
| 20 |
-
text = re.sub(r'\s{2,}', ' ', text)
|
| 21 |
-
text = re.sub(r'\s+([,.;:?!])', r'\1', text)
|
| 22 |
-
return text.strip()
|
| 23 |
-
|
| 24 |
-
# --- Paragraph Splitter ---
|
| 25 |
-
def split_paragraphs(text: str):
|
| 26 |
-
return [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
|
| 27 |
|
| 28 |
-
# --- Classification ---
|
| 29 |
-
def analyze_text(text: str):
|
| 30 |
-
text = clean_text(text)
|
| 31 |
-
paragraphs = split_paragraphs(text)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
for i, p in enumerate(paragraphs, 1):
|
| 37 |
-
inputs = tokenizer(p, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 38 |
-
with torch.no_grad():
|
| 39 |
-
logits = model(**inputs).logits
|
| 40 |
-
probs = torch.softmax(logits, dim=1)[0]
|
| 41 |
-
ai_score = float(probs[1].item() * 100)
|
| 42 |
-
human_score = float(probs[0].item() * 100)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
})
|
| 53 |
|
| 54 |
-
|
| 55 |
-
avg_human = total_human / len(paragraphs)
|
| 56 |
-
overall_label = "AI-generated" if avg_ai > avg_human else "Human-written"
|
| 57 |
-
|
| 58 |
return {
|
| 59 |
-
"
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
"label": overall_label
|
| 63 |
-
},
|
| 64 |
-
"paragraphs": paragraph_results
|
| 65 |
}
|
| 66 |
|
| 67 |
-
# --- Request Schema ---
|
| 68 |
-
class TextInput(BaseModel):
|
| 69 |
-
text: str
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
async def root():
|
| 74 |
-
return {"status": "ok", "message": "AI Text Detector API is running."}
|
| 75 |
-
|
| 76 |
-
@app.post("/analyze")
|
| 77 |
-
async def analyze(input_data: TextInput):
|
| 78 |
-
return analyze_text(input_data.text)
|
|
|
|
| 1 |
from fastapi import FastAPI, Request
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
+
import uvicorn
|
| 5 |
|
| 6 |
+
app = FastAPI(title="AI Detector API")
|
| 7 |
|
| 8 |
+
# Load model once at startup
|
|
|
|
|
|
|
|
|
|
| 9 |
MODEL_NAME = "roberta-base-openai-detector"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 11 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
| 12 |
model.eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
def get_ai_probability(text: str) -> float:
|
| 16 |
+
"""Return the AI probability (0–100%) for a given text."""
|
| 17 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
| 18 |
+
with torch.no_grad():
|
| 19 |
+
logits = model(**inputs).logits
|
| 20 |
+
probs = torch.softmax(logits, dim=1)
|
| 21 |
+
ai_score = probs[0][1].item() * 100
|
| 22 |
+
return round(ai_score, 2)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
@app.post("/analyze")
|
| 26 |
+
async def analyze_text(request: Request):
|
| 27 |
+
"""
|
| 28 |
+
Example body:
|
| 29 |
+
{
|
| 30 |
+
"text": "Your long article text here"
|
| 31 |
+
}
|
| 32 |
+
"""
|
| 33 |
+
data = await request.json()
|
| 34 |
+
text = data.get("text", "").strip()
|
| 35 |
+
if not text:
|
| 36 |
+
return {"error": "No text provided"}
|
| 37 |
+
|
| 38 |
+
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
|
| 39 |
+
results = []
|
| 40 |
+
|
| 41 |
+
for i, para in enumerate(paragraphs, start=1):
|
| 42 |
+
ai_score = get_ai_probability(para)
|
| 43 |
+
results.append({
|
| 44 |
+
"paragraph": i,
|
| 45 |
+
"ai_score": ai_score,
|
| 46 |
+
"human_score": round(100 - ai_score, 2),
|
| 47 |
+
"content": para[:200] + ("..." if len(para) > 200 else "")
|
| 48 |
})
|
| 49 |
|
| 50 |
+
overall = sum([r["ai_score"] for r in results]) / len(results)
|
|
|
|
|
|
|
|
|
|
| 51 |
return {
|
| 52 |
+
"overall_ai_score": round(overall, 2),
|
| 53 |
+
"overall_human_score": round(100 - overall, 2),
|
| 54 |
+
"paragraphs": results
|
|
|
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|