Spaces:
Sleeping
Sleeping
updates
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from optimum.intel import OVModelForCausalLM
|
|
| 4 |
from transformers import AutoTokenizer
|
| 5 |
import huggingface_hub
|
| 6 |
import multiprocessing
|
| 7 |
-
import subprocess
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
|
|
@@ -22,24 +21,21 @@ if hf_token:
|
|
| 22 |
huggingface_hub.login(token=hf_token)
|
| 23 |
|
| 24 |
# Export model to OpenVINO format on first run if not already done.
|
| 25 |
-
#
|
| 26 |
-
#
|
| 27 |
if not os.path.isdir(OV_MODEL_DIR):
|
| 28 |
print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
"--model", model_name,
|
| 33 |
-
"--task", "text-generation-with-past",
|
| 34 |
-
"--disable-stateful",
|
| 35 |
-
OV_MODEL_DIR + "/",
|
| 36 |
-
],
|
| 37 |
-
check=True,
|
| 38 |
-
)
|
| 39 |
print("Export complete.")
|
| 40 |
|
| 41 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
ESCAPE = "<escape>"
|
| 45 |
SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
|
|
@@ -75,16 +71,24 @@ def build_declaration(tool):
|
|
| 75 |
def build_prompt(messages, tools):
|
| 76 |
parts = []
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
if tools:
|
| 79 |
declarations = "".join(build_declaration(t) for t in tools)
|
| 80 |
parts.append(
|
| 81 |
f"<start_of_turn>developer\n"
|
| 82 |
-
f"{
|
| 83 |
f"<end_of_turn>\n"
|
| 84 |
)
|
|
|
|
|
|
|
| 85 |
|
| 86 |
for msg in messages:
|
| 87 |
role = msg["role"]
|
|
|
|
|
|
|
| 88 |
content = msg["content"]
|
| 89 |
if role == "tool":
|
| 90 |
# content should already be formatted as <start_function_response>...<end_function_response>
|
|
|
|
| 4 |
from transformers import AutoTokenizer
|
| 5 |
import huggingface_hub
|
| 6 |
import multiprocessing
|
|
|
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
|
|
|
|
| 21 |
huggingface_hub.login(token=hf_token)
|
| 22 |
|
| 23 |
# Export model to OpenVINO format on first run if not already done.
|
| 24 |
+
# Using the Python API (export=True) instead of the CLI produces a model with
|
| 25 |
+
# dynamic shapes, avoiding static sequence-length constants baked in by tracing.
|
| 26 |
if not os.path.isdir(OV_MODEL_DIR):
|
| 27 |
print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
|
| 28 |
+
_export_model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False)
|
| 29 |
+
_export_model.save_pretrained(OV_MODEL_DIR)
|
| 30 |
+
del _export_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
print("Export complete.")
|
| 32 |
|
| 33 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 34 |
+
# Load without compiling, reshape to dynamic sequence length, then compile.
|
| 35 |
+
# This ensures long prompts (e.g. many tools) don't hit static-shape errors.
|
| 36 |
+
model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=False)
|
| 37 |
+
model.reshape(1, -1) # batch=1, sequence_length=dynamic
|
| 38 |
+
model.compile()
|
| 39 |
|
| 40 |
ESCAPE = "<escape>"
|
| 41 |
SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
|
|
|
|
| 71 |
def build_prompt(messages, tools):
|
| 72 |
parts = []
|
| 73 |
|
| 74 |
+
# Use developer message from the request if provided, else fall back to default.
|
| 75 |
+
developer_msg = next((m for m in messages if m["role"] == "developer"), None)
|
| 76 |
+
system_content = developer_msg["content"] if developer_msg else SYSTEM_PROMPT
|
| 77 |
+
|
| 78 |
if tools:
|
| 79 |
declarations = "".join(build_declaration(t) for t in tools)
|
| 80 |
parts.append(
|
| 81 |
f"<start_of_turn>developer\n"
|
| 82 |
+
f"{system_content}{declarations}"
|
| 83 |
f"<end_of_turn>\n"
|
| 84 |
)
|
| 85 |
+
elif developer_msg:
|
| 86 |
+
parts.append(f"<start_of_turn>developer\n{system_content}<end_of_turn>\n")
|
| 87 |
|
| 88 |
for msg in messages:
|
| 89 |
role = msg["role"]
|
| 90 |
+
if role == "developer":
|
| 91 |
+
continue # already emitted above
|
| 92 |
content = msg["content"]
|
| 93 |
if role == "tool":
|
| 94 |
# content should already be formatted as <start_function_response>...<end_function_response>
|