Spaces:

jerinaj
/

functiongemm

Sleeping

App Files Files Community

jerinaj commited on 9 days ago

Commit

76afdbd

1 Parent(s): 47d6d47

updates

Browse files

Files changed (1) hide show

app.py +19 -15

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer
 import huggingface_hub
 import multiprocessing
-import subprocess
 import os
 import re
@@ -22,24 +21,21 @@ if hf_token:
     huggingface_hub.login(token=hf_token)
 # Export model to OpenVINO format on first run if not already done.
-# --disable-stateful avoids the static sliding-window shape (512) that gets
-# baked in during tracing, which causes shape mismatches for long prompts.
 if not os.path.isdir(OV_MODEL_DIR):
     print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
-    subprocess.run(
-        [
-            "optimum-cli", "export", "openvino",
-            "--model", model_name,
-            "--task", "text-generation-with-past",
-            "--disable-stateful",
-            OV_MODEL_DIR + "/",
-        ],
-        check=True,
-    )
     print("Export complete.")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=True)
 ESCAPE = "<escape>"
 SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
@@ -75,16 +71,24 @@ def build_declaration(tool):
 def build_prompt(messages, tools):
     parts = []
     if tools:
         declarations = "".join(build_declaration(t) for t in tools)
         parts.append(
             f"<start_of_turn>developer\n"
-            f"{SYSTEM_PROMPT}{declarations}"
             f"<end_of_turn>\n"
         )
     for msg in messages:
         role = msg["role"]
         content = msg["content"]
         if role == "tool":
             # content should already be formatted as <start_function_response>...<end_function_response>

 from transformers import AutoTokenizer
 import huggingface_hub
 import multiprocessing
 import os
 import re
     huggingface_hub.login(token=hf_token)
 # Export model to OpenVINO format on first run if not already done.
+# Using the Python API (export=True) instead of the CLI produces a model with
+# dynamic shapes, avoiding static sequence-length constants baked in by tracing.
 if not os.path.isdir(OV_MODEL_DIR):
     print(f"OpenVINO model not found at '{OV_MODEL_DIR}', exporting now...")
+    _export_model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False)
+    _export_model.save_pretrained(OV_MODEL_DIR)
+    del _export_model
     print("Export complete.")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Load without compiling, reshape to dynamic sequence length, then compile.
+# This ensures long prompts (e.g. many tools) don't hit static-shape errors.
+model = OVModelForCausalLM.from_pretrained(OV_MODEL_DIR, compile=False)
+model.reshape(1, -1)  # batch=1, sequence_length=dynamic
+model.compile()
 ESCAPE = "<escape>"
 SYSTEM_PROMPT = "You are a model that can do function calling with the following functions"
 def build_prompt(messages, tools):
     parts = []
+    # Use developer message from the request if provided, else fall back to default.
+    developer_msg = next((m for m in messages if m["role"] == "developer"), None)
+    system_content = developer_msg["content"] if developer_msg else SYSTEM_PROMPT
     if tools:
         declarations = "".join(build_declaration(t) for t in tools)
         parts.append(
             f"<start_of_turn>developer\n"
+            f"{system_content}{declarations}"
             f"<end_of_turn>\n"
         )
+    elif developer_msg:
+        parts.append(f"<start_of_turn>developer\n{system_content}<end_of_turn>\n")
     for msg in messages:
         role = msg["role"]
+        if role == "developer":
+            continue  # already emitted above
         content = msg["content"]
         if role == "tool":
             # content should already be formatted as <start_function_response>...<end_function_response>