Spaces:

Pujan-Dev
/

AI_API

Running

App Files Files Community

Pujan-Dev commited on May 23, 2025

Commit

bc13edc

1 Parent(s): b247b11

fix: fixed the server err

Browse files

Files changed (3) hide show

Dockerfile +17 -0
features/text_classifier/controller.py +14 -10
features/text_classifier/model_loader.py +8 -6

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN python -m spacy download en_core_web_sm || echo "Failed to download model"
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

features/text_classifier/controller.py CHANGED Viewed

@@ -5,12 +5,12 @@ from io import BytesIO
 from fastapi import HTTPException, UploadFile, status, Depends
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from nltk.tokenize import sent_tokenize
 from .inferencer import classify_text
 from .preprocess import parse_docx, parse_pdf, parse_txt
 security = HTTPBearer()
 # Verify Bearer token from Authorization header
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
@@ -77,18 +77,23 @@ async def handle_file_upload(file: UploadFile):
         logging.error(f"Error processing file: {e}")
         raise HTTPException(status_code=500, detail="Error processing the file")
-# Analyze each sentence in plain text input
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
-    if text[-1] != ".":
-        text+="."
     if len(text) > 10000:
         raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
-    sentences = sent_tokenize(text, language="english")
     results = []
     for sentence in sentences:
-        if not sentence.strip():
             continue
         label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
@@ -97,9 +102,8 @@ async def handle_sentence_level_analysis(text: str):
             "perplexity": round(perplexity, 2),
             "ai_likelihood": ai_likelihood
         })
-    return {"analysis": results}
-# Analyze each sentence from uploaded file
 async def handle_file_sentence(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)

 from fastapi import HTTPException, UploadFile, status, Depends
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from .inferencer import classify_text
 from .preprocess import parse_docx, parse_pdf, parse_txt
+import spacy
 security = HTTPBearer()
+nlp = spacy.load("en_core_web_sm")
 # Verify Bearer token from Authorization header
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
         logging.error(f"Error processing file: {e}")
         raise HTTPException(status_code=500, detail="Error processing the file")
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
+    if not text.endswith("."):
+        text += "."
     if len(text) > 10000:
         raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
+    # Use SpaCy for sentence splitting
+    doc = nlp(text)
+    sentences = [sent.text.strip() for sent in doc.sents]
     results = []
     for sentence in sentences:
+        if not sentence:
             continue
         label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
             "perplexity": round(perplexity, 2),
             "ai_likelihood": ai_likelihood
         })
+    return {"analysis": results}# Analyze each sentence from uploaded file
 async def handle_file_sentence(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)

features/text_classifier/model_loader.py CHANGED Viewed

@@ -5,7 +5,8 @@ from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
 from huggingface_hub import snapshot_download
 import torch
 from dotenv import load_dotenv
-import nltk
 load_dotenv()
 REPO_ID = "Pujan-Dev/AI-Text-Detector"
 MODEL_DIR = "./models"
@@ -15,17 +16,18 @@ WEIGHTS_PATH = os.path.join(MODEL_DIR, "model_weights.pth")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 _model, _tokenizer = None, None
 def warmup():
     global _model, _tokenizer
     # Ensure punkt is available
-    nltk.download("punkt")
-    nltk.download('punkt_tab')
     download_model_repo()
     _model, _tokenizer = load_model()
-    logging.info("Its ready")
 def download_model_repo():

 from huggingface_hub import snapshot_download
 import torch
 from dotenv import load_dotenv
+import spacy
 load_dotenv()
 REPO_ID = "Pujan-Dev/AI-Text-Detector"
 MODEL_DIR = "./models"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 _model, _tokenizer = None, None
 def warmup():
     global _model, _tokenizer
     # Ensure punkt is available
+    try:
+        nlp = spacy.load("en_core_web_sm")
+    except OSError:
+        import subprocess
+        subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
+        nlp = spacy.load("en_core_web_sm")
     download_model_repo()
     _model, _tokenizer = load_model()
 def download_model_repo():