jerinaj commited on
Commit
587fdf0
·
1 Parent(s): f31aab2
Files changed (2) hide show
  1. Dockerfile +2 -8
  2. app.py +23 -1
Dockerfile CHANGED
@@ -5,15 +5,9 @@ WORKDIR /app
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
- # Export model to OpenVINO format at build time (baked into image)
9
- ARG HF_TOKEN
10
- ENV HF_TOKEN=${HF_TOKEN}
11
- RUN optimum-cli export openvino \
12
- --model google/functiongemma-270m-it \
13
- --task text-generation-with-past \
14
- functiongemma_ov/
15
-
16
  COPY app.py .
17
  COPY index.html .
18
 
 
 
19
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
 
 
 
 
 
 
 
 
8
  COPY app.py .
9
  COPY index.html .
10
 
11
+ # HF_TOKEN must be passed at runtime: docker run -e HF_TOKEN=hf_xxx ...
12
+ # The model is exported to OpenVINO format on first startup.
13
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -2,7 +2,9 @@ from fastapi import FastAPI, Request
2
  from fastapi.responses import HTMLResponse
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer
 
5
  import multiprocessing
 
6
  import os
7
  import re
8
 
@@ -12,9 +14,29 @@ os.environ["OV_CPU_THREADS_NUM"] = str(multiprocessing.cpu_count())
12
  app = FastAPI()
13
 
14
  model_name = "google/functiongemma-270m-it"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
- model = OVModelForCausalLM.from_pretrained("functiongemma_ov", compile=True)
18
 
19
  ESCAPE = "<escape>"
20
  SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
 
2
  from fastapi.responses import HTMLResponse
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer
5
+ import huggingface_hub
6
  import multiprocessing
7
+ import subprocess
8
  import os
9
  import re
10
 
 
14
  app = FastAPI()
15
 
16
  model_name = "google/functiongemma-270m-it"
17
+ OV_MODEL_DIR = "functiongemma_ov"
18
+
19
+ # Authenticate with HuggingFace if token is provided
20
+ hf_token = os.environ.get("HF_TOKEN")
21
+ if hf_token:
22
+ huggingface_hub.login(token=hf_token)
23
+
24
+ # Export model to OpenVINO format on first run if not already done
25
+ if not os.path.isdir(OV_MODEL_DIR):
26
+ print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
27
+ subprocess.run(
28
+ [
29
+ "optimum-cli", "export", "openvino",
30
+ "--model", model_name,
31
+ "--task", "text-generation-with-past",
32
+ OV_MODEL_DIR + "/",
33
+ ],
34
+ check=True,
35
+ )
36
+ print("Export complete.")
37
 
38
  tokenizer = AutoTokenizer.from_pretrained(model_name)
39
+ model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=True)
40
 
41
  ESCAPE = "<escape>"
42
  SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"