jerinaj commited on
Commit
76afdbd
·
1 Parent(s): 47d6d47
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -4,7 +4,6 @@ from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer
5
  import huggingface_hub
6
  import multiprocessing
7
- import subprocess
8
  import os
9
  import re
10
 
@@ -22,24 +21,21 @@ if hf_token:
22
  huggingface_hub.login(token=hf_token)
23
 
24
  # Export model to OpenVINO format on first run if not already done.
25
- # --disable-stateful avoids the static sliding-window shape (512) that gets
26
- # baked in during tracing, which causes shape mismatches for long prompts.
27
  if not os.path.isdir(OV_MODEL_DIR):
28
  print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
29
- subprocess.run(
30
- [
31
- "optimum-cli", "export", "openvino",
32
- "--model", model_name,
33
- "--task", "text-generation-with-past",
34
- "--disable-stateful",
35
- OV_MODEL_DIR + "/",
36
- ],
37
- check=True,
38
- )
39
  print("Export complete.")
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_name)
42
- model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=True)
 
 
 
 
43
 
44
  ESCAPE = "<escape>"
45
  SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
@@ -75,16 +71,24 @@ def build_declaration(tool):
75
  def build_prompt(messages, tools):
76
  parts = []
77
 
 
 
 
 
78
  if tools:
79
  declarations = "".join(build_declaration(t) for t in tools)
80
  parts.append(
81
  f"<start_of_turn>developer\n"
82
- f"{SYSTEM_PROMPT}{declarations}"
83
  f"<end_of_turn>\n"
84
  )
 
 
85
 
86
  for msg in messages:
87
  role = msg["role"]
 
 
88
  content = msg["content"]
89
  if role == "tool":
90
  # content should already be formatted as <start_function_response>...<end_function_response>
 
4
  from transformers import AutoTokenizer
5
  import huggingface_hub
6
  import multiprocessing
 
7
  import os
8
  import re
9
 
 
21
  huggingface_hub.login(token=hf_token)
22
 
23
  # Export model to OpenVINO format on first run if not already done.
24
+ # Using the Python API (export=True) instead of the CLI produces a model with
25
+ # dynamic shapes, avoiding static sequence-length constants baked in by tracing.
26
  if not os.path.isdir(OV_MODEL_DIR):
27
  print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
28
+ _export_model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False)
29
+ _export_model.save_pretrained(OV_MODEL_DIR)
30
+ del _export_model
 
 
 
 
 
 
 
31
  print("Export complete.")
32
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
+ # Load without compiling, reshape to dynamic sequence length, then compile.
35
+ # This ensures long prompts (e.g. many tools) don't hit static-shape errors.
36
+ model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=False)
37
+ model.reshape(1, -1) # batch=1, sequence_length=dynamic
38
+ model.compile()
39
 
40
  ESCAPE = "<escape>"
41
  SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
 
71
  def build_prompt(messages, tools):
72
  parts = []
73
 
74
+ # Use developer message from the request if provided, else fall back to default.
75
+ developer_msg = next((m for m in messages if m["role"] == "developer"), None)
76
+ system_content = developer_msg["content"] if developer_msg else SYSTEM_PROMPT
77
+
78
  if tools:
79
  declarations = "".join(build_declaration(t) for t in tools)
80
  parts.append(
81
  f"<start_of_turn>developer\n"
82
+ f"{system_content}{declarations}"
83
  f"<end_of_turn>\n"
84
  )
85
+ elif developer_msg:
86
+ parts.append(f"<start_of_turn>developer\n{system_content}<end_of_turn>\n")
87
 
88
  for msg in messages:
89
  role = msg["role"]
90
+ if role == "developer":
91
+ continue # already emitted above
92
  content = msg["content"]
93
  if role == "tool":
94
  # content should already be formatted as <start_function_response>...<end_function_response>