mahmoudsaber0 commited on
Commit
9b17a72
·
verified ·
1 Parent(s): 6f823c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -60
app.py CHANGED
@@ -1,78 +1,59 @@
1
  from fastapi import FastAPI, Request
2
- from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import torch
5
- import re
6
 
7
- app = FastAPI(title="AI Text Detector API")
8
 
9
- # Device setup
10
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
-
12
- # Load model (use small model for Hugging Face to prevent restarts)
13
  MODEL_NAME = "roberta-base-openai-detector"
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
16
  model.eval()
17
-
18
- # --- Text Cleaning ---
19
- def clean_text(text: str) -> str:
20
- text = re.sub(r'\s{2,}', ' ', text)
21
- text = re.sub(r'\s+([,.;:?!])', r'\1', text)
22
- return text.strip()
23
-
24
- # --- Paragraph Splitter ---
25
- def split_paragraphs(text: str):
26
- return [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
27
 
28
- # --- Classification ---
29
- def analyze_text(text: str):
30
- text = clean_text(text)
31
- paragraphs = split_paragraphs(text)
32
 
33
- paragraph_results = []
34
- total_ai, total_human = 0, 0
 
 
 
 
 
 
35
 
36
- for i, p in enumerate(paragraphs, 1):
37
- inputs = tokenizer(p, return_tensors="pt", truncation=True, padding=True).to(device)
38
- with torch.no_grad():
39
- logits = model(**inputs).logits
40
- probs = torch.softmax(logits, dim=1)[0]
41
- ai_score = float(probs[1].item() * 100)
42
- human_score = float(probs[0].item() * 100)
43
 
44
- total_ai += ai_score
45
- total_human += human_score
46
-
47
- paragraph_results.append({
48
- "paragraph_number": i,
49
- "ai_probability": round(ai_score, 2),
50
- "human_probability": round(human_score, 2),
51
- "text_snippet": p[:150] + ("..." if len(p) > 150 else "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  })
53
 
54
- avg_ai = total_ai / len(paragraphs)
55
- avg_human = total_human / len(paragraphs)
56
- overall_label = "AI-generated" if avg_ai > avg_human else "Human-written"
57
-
58
  return {
59
- "overall_result": {
60
- "ai_percentage": round(avg_ai, 2),
61
- "human_percentage": round(avg_human, 2),
62
- "label": overall_label
63
- },
64
- "paragraphs": paragraph_results
65
  }
66
 
67
- # --- Request Schema ---
68
- class TextInput(BaseModel):
69
- text: str
70
 
71
- # --- API Routes ---
72
- @app.get("/")
73
- async def root():
74
- return {"status": "ok", "message": "AI Text Detector API is running."}
75
-
76
- @app.post("/analyze")
77
- async def analyze(input_data: TextInput):
78
- return analyze_text(input_data.text)
 
1
  from fastapi import FastAPI, Request
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ import uvicorn
5
 
6
+ app = FastAPI(title="AI Detector API")
7
 
8
+ # Load model once at startup
 
 
 
9
  MODEL_NAME = "roberta-base-openai-detector"
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
12
  model.eval()
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
 
14
 
15
+ def get_ai_probability(text: str) -> float:
16
+ """Return the AI probability (0–100%) for a given text."""
17
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
18
+ with torch.no_grad():
19
+ logits = model(**inputs).logits
20
+ probs = torch.softmax(logits, dim=1)
21
+ ai_score = probs[0][1].item() * 100
22
+ return round(ai_score, 2)
23
 
 
 
 
 
 
 
 
24
 
25
+ @app.post("/analyze")
26
+ async def analyze_text(request: Request):
27
+ """
28
+ Example body:
29
+ {
30
+ "text": "Your long article text here"
31
+ }
32
+ """
33
+ data = await request.json()
34
+ text = data.get("text", "").strip()
35
+ if not text:
36
+ return {"error": "No text provided"}
37
+
38
+ paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
39
+ results = []
40
+
41
+ for i, para in enumerate(paragraphs, start=1):
42
+ ai_score = get_ai_probability(para)
43
+ results.append({
44
+ "paragraph": i,
45
+ "ai_score": ai_score,
46
+ "human_score": round(100 - ai_score, 2),
47
+ "content": para[:200] + ("..." if len(para) > 200 else "")
48
  })
49
 
50
+ overall = sum([r["ai_score"] for r in results]) / len(results)
 
 
 
51
  return {
52
+ "overall_ai_score": round(overall, 2),
53
+ "overall_human_score": round(100 - overall, 2),
54
+ "paragraphs": results
 
 
 
55
  }
56
 
 
 
 
57
 
58
+ if __name__ == "__main__":
59
+ uvicorn.run(app, host="0.0.0.0", port=7860)