MGZON commited on
Commit
7b25dd3
·
1 Parent(s): eb25e8f

Deploy FastAPI Hugging Face Space without model files

Browse files
Files changed (3) hide show
  1. Dockerfile +6 -10
  2. app/main.py +8 -41
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -1,15 +1,11 @@
1
- FROM python:3.10-slim
2
-
3
- RUN useradd -m -u 1000 user
4
- USER user
5
- ENV HOME=/home/user \
6
- PATH=/home/user/.local/bin:$PATH \
7
- TRANSFORMERS_CACHE=/home/user/.cache/huggingface \
8
- HUGGINGFACE_HUB_CACHE=/home/user/.cache/huggingface
9
 
10
  WORKDIR /app
 
11
  COPY requirements.txt .
12
- RUN pip install --no-cache-dir --user -r requirements.txt
13
- COPY . .
 
14
 
 
15
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.10
 
 
 
 
 
 
 
2
 
3
  WORKDIR /app
4
+
5
  COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY ./app ./app
9
 
10
+ EXPOSE 7860
11
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py CHANGED
@@ -1,33 +1,14 @@
1
- from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
5
- import os
6
- import logging
7
-
8
- # إعداد الـ logging
9
- logging.basicConfig(level=logging.INFO)
10
- logger = logging.getLogger(__name__)
11
 
12
  app = FastAPI(title="MGZON FLAN-T5 API")
13
 
14
- # Environment Variable (لو هتحتاج Token)
15
- HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN", None)
16
-
17
  MODEL_NAME = "MGZON/mgzon-flan-t5-base"
18
-
19
- try:
20
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HUGGING_FACE_TOKEN)
21
- model = AutoModelForSeq2SeqLM.from_pretrained(
22
- MODEL_NAME,
23
- use_auth_token=HUGGING_FACE_TOKEN,
24
- torch_dtype=torch.float16,
25
- device_map="auto" # أو "cpu" لو مش فيه GPU
26
- )
27
- logger.info("Model and tokenizer loaded successfully")
28
- except Exception as e:
29
- logger.error(f"Failed to load model: {e}")
30
- raise
31
 
32
  class RequestText(BaseModel):
33
  text: str
@@ -37,25 +18,11 @@ class RequestText(BaseModel):
37
  async def health_check():
38
  return {"status": "healthy"}
39
 
40
- @app.post("/api/generate")
41
  async def generate(req: RequestText):
42
- try:
43
- inputs = tokenizer(req.text, return_tensors="pt", truncation=True, max_length=512).to(model.device)
44
- outputs = model.generate(
45
- **inputs,
46
- max_length=req.max_length,
47
- do_sample=True,
48
- temperature=0.7,
49
- top_p=0.9,
50
- top_k=50,
51
- pad_token_id=tokenizer.pad_token_id,
52
- num_return_sequences=1
53
- )
54
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
- return {"generated_text": text}
56
- except Exception as e:
57
- logger.error(f"Error generating text: {e}")
58
- raise HTTPException(status_code=500, detail=str(e))
59
 
60
  if __name__ == "__main__":
61
  import uvicorn
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
 
 
 
 
 
 
5
 
6
  app = FastAPI(title="MGZON FLAN-T5 API")
7
 
8
+ # تحميل النموذج من Hugging Face مباشرة
 
 
9
  MODEL_NAME = "MGZON/mgzon-flan-t5-base"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, device_map="auto")
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class RequestText(BaseModel):
14
  text: str
 
18
  async def health_check():
19
  return {"status": "healthy"}
20
 
21
+ @app.post("/api/generate/")
22
  async def generate(req: RequestText):
23
+ inputs = tokenizer(req.text, return_tensors="pt").to(model.device)
24
+ outputs = model.generate(**inputs, max_length=req.max_length)
25
+ return {"generated_text": tokenizer.decode(outputs[0], skip_special_tokens=True)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  if __name__ == "__main__":
28
  import uvicorn
requirements.txt CHANGED
@@ -3,3 +3,4 @@ uvicorn[standard]
3
  transformers
4
  torch
5
  accelerate
 
 
3
  transformers
4
  torch
5
  accelerate
6
+ pydantic