Spaces:
Sleeping
Sleeping
updates
Browse files- Dockerfile +2 -8
- app.py +23 -1
Dockerfile
CHANGED
|
@@ -5,15 +5,9 @@ WORKDIR /app
|
|
| 5 |
COPY requirements.txt .
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
-
# Export model to OpenVINO format at build time (baked into image)
|
| 9 |
-
ARG HF_TOKEN
|
| 10 |
-
ENV HF_TOKEN=${HF_TOKEN}
|
| 11 |
-
RUN optimum-cli export openvino \
|
| 12 |
-
--model google/functiongemma-270m-it \
|
| 13 |
-
--task text-generation-with-past \
|
| 14 |
-
functiongemma_ov/
|
| 15 |
-
|
| 16 |
COPY app.py .
|
| 17 |
COPY index.html .
|
| 18 |
|
|
|
|
|
|
|
| 19 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 5 |
COPY requirements.txt .
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
COPY app.py .
|
| 9 |
COPY index.html .
|
| 10 |
|
| 11 |
+
# HF_TOKEN must be passed at runtime: docker run -e HF_TOKEN=hf_xxx ...
|
| 12 |
+
# The model is exported to OpenVINO format on first startup.
|
| 13 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
CHANGED
|
@@ -2,7 +2,9 @@ from fastapi import FastAPI, Request
|
|
| 2 |
from fastapi.responses import HTMLResponse
|
| 3 |
from optimum.intel import OVModelForCausalLM
|
| 4 |
from transformers import AutoTokenizer
|
|
|
|
| 5 |
import multiprocessing
|
|
|
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
|
|
@@ -12,9 +14,29 @@ os.environ["OV_CPU_THREADS_NUM"] = str(multiprocessing.cpu_count())
|
|
| 12 |
app = FastAPI()
|
| 13 |
|
| 14 |
model_name = "google/functiongemma-270m-it"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 17 |
-
model = OVModelForCausalLM.from_pretrained(
|
| 18 |
|
| 19 |
ESCAPE = "<escape>"
|
| 20 |
SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
|
|
|
|
| 2 |
from fastapi.responses import HTMLResponse
|
| 3 |
from optimum.intel import OVModelForCausalLM
|
| 4 |
from transformers import AutoTokenizer
|
| 5 |
+
import huggingface_hub
|
| 6 |
import multiprocessing
|
| 7 |
+
import subprocess
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
|
|
|
|
| 14 |
app = FastAPI()
|
| 15 |
|
| 16 |
model_name = "google/functiongemma-270m-it"
|
| 17 |
+
OV_MODEL_DIR = "functiongemma_ov"
|
| 18 |
+
|
| 19 |
+
# Authenticate with HuggingFace if token is provided
|
| 20 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 21 |
+
if hf_token:
|
| 22 |
+
huggingface_hub.login(token=hf_token)
|
| 23 |
+
|
| 24 |
+
# Export model to OpenVINO format on first run if not already done
|
| 25 |
+
if not os.path.isdir(OV_MODEL_DIR):
|
| 26 |
+
print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
|
| 27 |
+
subprocess.run(
|
| 28 |
+
[
|
| 29 |
+
"optimum-cli", "export", "openvino",
|
| 30 |
+
"--model", model_name,
|
| 31 |
+
"--task", "text-generation-with-past",
|
| 32 |
+
OV_MODEL_DIR + "/",
|
| 33 |
+
],
|
| 34 |
+
check=True,
|
| 35 |
+
)
|
| 36 |
+
print("Export complete.")
|
| 37 |
|
| 38 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 39 |
+
model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=True)
|
| 40 |
|
| 41 |
ESCAPE = "<escape>"
|
| 42 |
SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
|