Spaces:

jerinaj
/

functiongemm

Sleeping

jerinaj commited on 17 days ago

Commit

587fdf0

1 Parent(s): f31aab2

updates

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -5,15 +5,9 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Export model to OpenVINO format at build time (baked into image)
-ARG HF_TOKEN
-ENV HF_TOKEN=${HF_TOKEN}
-RUN optimum-cli export openvino \
-    --model google/functiongemma-270m-it \
-    --task text-generation-with-past \
-    functiongemma_ov/
 COPY app.py .
 COPY index.html .
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py .
 COPY index.html .
+# HF_TOKEN must be passed at runtime: docker run -e HF_TOKEN=hf_xxx ...
+# The model is exported to OpenVINO format on first startup.
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer
 import multiprocessing
 import os
 import re
@@ -12,9 +14,29 @@ os.environ["OV_CPU_THREADS_NUM"] = str(multiprocessing.cpu_count())
 app = FastAPI()
 model_name = "google/functiongemma-270m-it"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = OVModelForCausalLM.from_pretrained("functiongemma_ov", compile=True)
 ESCAPE = "<escape>"
 SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"

 from fastapi.responses import HTMLResponse
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer
+import huggingface_hub
 import multiprocessing
+import subprocess
 import os
 import re
 app = FastAPI()
 model_name = "google/functiongemma-270m-it"
+OV_MODEL_DIR = "functiongemma_ov"
+# Authenticate with HuggingFace if token is provided
+hf_token = os.environ.get("HF_TOKEN")
+if hf_token:
+    huggingface_hub.login(token=hf_token)
+# Export model to OpenVINO format on first run if not already done
+if not os.path.isdir(OV_MODEL_DIR):
+    print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
+    subprocess.run(
+        [
+            "optimum-cli", "export", "openvino",
+            "--model", model_name,
+            "--task", "text-generation-with-past",
+            OV_MODEL_DIR + "/",
+        ],
+        check=True,
+    )
+    print("Export complete.")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=True)
 ESCAPE = "<escape>"
 SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"