Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -320,6 +320,8 @@ def add_message(history, message):
|
|
320 |
# return history, gr.MultimodalTextbox(value=None, interactive=False)
|
321 |
# else:
|
322 |
for x in message["files"]:
|
|
|
|
|
323 |
history.append(((x,), None))
|
324 |
if message["text"] is not None:
|
325 |
history.append((message["text"], None))
|
@@ -408,6 +410,7 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
408 |
with open(file_path, "rb") as src, open(filename, "wb") as dst:
|
409 |
dst.write(src.read())
|
410 |
|
|
|
411 |
if not is_video:
|
412 |
image_tensor = [
|
413 |
our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
|
@@ -418,14 +421,14 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
418 |
for f in image_list
|
419 |
]
|
420 |
image_tensor = torch.stack(image_tensor)
|
421 |
-
|
422 |
image_tensor = our_chatbot.image_processor.preprocess(image_list, return_tensors="pt")["pixel_values"].half().to(our_chatbot.model.device)
|
423 |
-
|
424 |
-
|
425 |
-
image_token = DEFAULT_IMAGE_TOKEN * num_new_images
|
426 |
|
427 |
inp = text
|
428 |
-
inp = image_token +
|
429 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
|
430 |
# image = None
|
431 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
|
@@ -450,14 +453,20 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
450 |
)
|
451 |
print(our_chatbot.model.device)
|
452 |
print(input_ids.device)
|
453 |
-
print(image_tensor.device)
|
454 |
|
455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
generate_kwargs = dict(
|
458 |
inputs=input_ids,
|
459 |
streamer=streamer,
|
460 |
-
images=
|
461 |
do_sample=True,
|
462 |
temperature=temperature,
|
463 |
top_p=top_p,
|
@@ -610,7 +619,7 @@ with gr.Blocks(
|
|
610 |
"files": [
|
611 |
f"{cur_dir}/examples/realcase_math.jpg",
|
612 |
],
|
613 |
-
"text": "Find the measure of angle 3.",
|
614 |
}
|
615 |
],
|
616 |
[
|
|
|
320 |
# return history, gr.MultimodalTextbox(value=None, interactive=False)
|
321 |
# else:
|
322 |
for x in message["files"]:
|
323 |
+
if "realcase_video.jpg" in x:
|
324 |
+
x = x.replace("realcase_video.jpg", "realcase_video.mp4")
|
325 |
history.append(((x,), None))
|
326 |
if message["text"] is not None:
|
327 |
history.append((message["text"], None))
|
|
|
410 |
with open(file_path, "rb") as src, open(filename, "wb") as dst:
|
411 |
dst.write(src.read())
|
412 |
|
413 |
+
image_tensor = []
|
414 |
if not is_video:
|
415 |
image_tensor = [
|
416 |
our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
|
|
|
421 |
for f in image_list
|
422 |
]
|
423 |
image_tensor = torch.stack(image_tensor)
|
424 |
+
elif num_new_images > 0:
|
425 |
image_tensor = our_chatbot.image_processor.preprocess(image_list, return_tensors="pt")["pixel_values"].half().to(our_chatbot.model.device)
|
426 |
+
|
427 |
+
|
428 |
+
image_token = DEFAULT_IMAGE_TOKEN * num_new_images + "\n"
|
429 |
|
430 |
inp = text
|
431 |
+
inp = image_token + inp
|
432 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
|
433 |
# image = None
|
434 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
|
|
|
453 |
)
|
454 |
print(our_chatbot.model.device)
|
455 |
print(input_ids.device)
|
456 |
+
# print(image_tensor.device)
|
457 |
|
458 |
|
459 |
+
if is_video:
|
460 |
+
input_image_tensor = [image_tensor]
|
461 |
+
elif num_new_images > 0:
|
462 |
+
input_image_tensor = image_tensor
|
463 |
+
else:
|
464 |
+
input_image_tensor = None
|
465 |
|
466 |
generate_kwargs = dict(
|
467 |
inputs=input_ids,
|
468 |
streamer=streamer,
|
469 |
+
images=input_image_tensor,
|
470 |
do_sample=True,
|
471 |
temperature=temperature,
|
472 |
top_p=top_p,
|
|
|
619 |
"files": [
|
620 |
f"{cur_dir}/examples/realcase_math.jpg",
|
621 |
],
|
622 |
+
"text": "Find the measure of angle 3. Please provide a step by step solution.",
|
623 |
}
|
624 |
],
|
625 |
[
|