Commit
•
00fe05a
1
Parent(s):
f2b4932
update processor kwargs
Browse files
README.md
CHANGED
@@ -87,7 +87,7 @@ def read_video_pyav(container, indices):
|
|
87 |
return np.stack([x.to_ndarray(format="rgb24") for x in frames])
|
88 |
|
89 |
|
90 |
-
# define a chat
|
91 |
# Each value in "content" has to be a list of dicts with types ("text", "image", "video")
|
92 |
conversation = [
|
93 |
{
|
@@ -134,7 +134,7 @@ conversation = [
|
|
134 |
}
|
135 |
]
|
136 |
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
137 |
-
inputs_image = processor(prompt, images=raw_image, return_tensors='pt').to(0, torch.float16)
|
138 |
|
139 |
output = model.generate(**inputs_image, max_new_tokens=100, do_sample=False)
|
140 |
print(processor.decode(output[0][2:], skip_special_tokens=True))
|
|
|
87 |
return np.stack([x.to_ndarray(format="rgb24") for x in frames])
|
88 |
|
89 |
|
90 |
+
# define a chat history and use `apply_chat_template` to get correctly formatted prompt
|
91 |
# Each value in "content" has to be a list of dicts with types ("text", "image", "video")
|
92 |
conversation = [
|
93 |
{
|
|
|
134 |
}
|
135 |
]
|
136 |
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
137 |
+
inputs_image = processor(text=prompt, images=raw_image, return_tensors='pt').to(0, torch.float16)
|
138 |
|
139 |
output = model.generate(**inputs_image, max_new_tokens=100, do_sample=False)
|
140 |
print(processor.decode(output[0][2:], skip_special_tokens=True))
|