Spaces:
Sleeping
Sleeping
sunrainyg
commited on
Commit
·
3f13efa
1
Parent(s):
3957f9a
Update
Browse files
app.py
CHANGED
|
@@ -61,18 +61,16 @@ processor = AutoProcessor.from_pretrained(
|
|
| 61 |
max_pixels=MAX_PIXELS,
|
| 62 |
)
|
| 63 |
|
| 64 |
-
SYSTEM_PROMPT =
|
| 65 |
-
"You are a helpful assistant that watches a user-provided video and answers questions "
|
| 66 |
-
"about it concisely and accurately."
|
| 67 |
-
)
|
| 68 |
|
| 69 |
-
# ========== Conversation Builder ==========
|
| 70 |
def build_conversation(video_path: str, question: str, fps: int):
|
| 71 |
-
"""
|
| 72 |
-
Qwen2.5-VL expects a chat-style list where media and text are items in 'content'.
|
| 73 |
-
"""
|
| 74 |
return [
|
| 75 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
{
|
| 77 |
"role": "user",
|
| 78 |
"content": [
|
|
@@ -82,6 +80,7 @@ def build_conversation(video_path: str, question: str, fps: int):
|
|
| 82 |
},
|
| 83 |
]
|
| 84 |
|
|
|
|
| 85 |
# ========== Inference ==========
|
| 86 |
@torch.inference_mode()
|
| 87 |
def answer(video, question, fps=1, max_new_tokens=128, temperature=0.2, top_p=0.9):
|
|
|
|
| 61 |
max_pixels=MAX_PIXELS,
|
| 62 |
)
|
| 63 |
|
| 64 |
+
SYSTEM_PROMPT = "You are a helpful assistant that watches a user-provided video and answers questions about it concisely and accurately."
|
|
|
|
|
|
|
|
|
|
| 65 |
|
|
|
|
| 66 |
def build_conversation(video_path: str, question: str, fps: int):
|
|
|
|
|
|
|
|
|
|
| 67 |
return [
|
| 68 |
+
{
|
| 69 |
+
"role": "system",
|
| 70 |
+
"content": [
|
| 71 |
+
{"type": "text", "text": SYSTEM_PROMPT}
|
| 72 |
+
],
|
| 73 |
+
},
|
| 74 |
{
|
| 75 |
"role": "user",
|
| 76 |
"content": [
|
|
|
|
| 80 |
},
|
| 81 |
]
|
| 82 |
|
| 83 |
+
|
| 84 |
# ========== Inference ==========
|
| 85 |
@torch.inference_mode()
|
| 86 |
def answer(video, question, fps=1, max_new_tokens=128, temperature=0.2, top_p=0.9):
|