Spaces:

yulu2
/

FoundationMotion

Sleeping

sunrainyg commited on Oct 19

Commit

3f13efa

1 Parent(s): 3957f9a

Update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -61,18 +61,16 @@ processor = AutoProcessor.from_pretrained(
     max_pixels=MAX_PIXELS,
 )
-SYSTEM_PROMPT = (
-    "You are a helpful assistant that watches a user-provided video and answers questions "
-    "about it concisely and accurately."
-)
-# ========== Conversation Builder ==========
 def build_conversation(video_path: str, question: str, fps: int):
-    """
-    Qwen2.5-VL expects a chat-style list where media and text are items in 'content'.
-    """
     return [
-        {"role": "system", "content": SYSTEM_PROMPT},
         {
             "role": "user",
             "content": [
@@ -82,6 +80,7 @@ def build_conversation(video_path: str, question: str, fps: int):
         },
     ]
 # ========== Inference ==========
 @torch.inference_mode()
 def answer(video, question, fps=1, max_new_tokens=128, temperature=0.2, top_p=0.9):

     max_pixels=MAX_PIXELS,
 )
+SYSTEM_PROMPT = "You are a helpful assistant that watches a user-provided video and answers questions about it concisely and accurately."
 def build_conversation(video_path: str, question: str, fps: int):
     return [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": SYSTEM_PROMPT}
+            ],
+        },
         {
             "role": "user",
             "content": [
         },
     ]
 # ========== Inference ==========
 @torch.inference_mode()
 def answer(video, question, fps=1, max_new_tokens=128, temperature=0.2, top_p=0.9):