Spaces:
Runtime error
Runtime error
add examples
Browse files- app_dialogue.py +168 -7
- gradio_dope_data_points/Idefics2/3c521493856dddb7c914/1-Figure1-1.png +3 -0
- gradio_dope_data_points/Idefics2/621831e337f23a0cfa20/5_differences.jpeg +0 -0
- gradio_dope_data_points/Idefics2/bfbb7b61084cfb7ddfdf/1-Figure1-1.png +3 -0
- gradio_dope_data_points/Idefics2/c9d5e002601021786000/5_differences.jpeg +0 -0
- gradio_dope_data_points/log.csv +4 -0
- gradio_problematic_data_points/Idefics2/4e1cacac141902de6e7e/5_differences.jpeg +0 -0
- gradio_problematic_data_points/Idefics2/4f57685fb80ffdb08c0b/5_differences.jpeg +0 -0
- gradio_problematic_data_points/log.csv +3 -0
app_dialogue.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import subprocess
|
3 |
|
4 |
# Install flash attention
|
5 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
6 |
|
7 |
|
8 |
import copy
|
@@ -16,6 +16,10 @@ import urllib
|
|
16 |
from urllib.parse import urlparse
|
17 |
from PIL import Image
|
18 |
import io
|
|
|
|
|
|
|
|
|
19 |
|
20 |
import gradio as gr
|
21 |
from transformers import AutoProcessor, TextIteratorStreamer
|
@@ -98,6 +102,31 @@ SYSTEM_PROMPT = [
|
|
98 |
{"type": "text", "text": "A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes."},
|
99 |
],
|
100 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
]
|
102 |
|
103 |
|
@@ -126,12 +155,11 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
126 |
"""
|
127 |
resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
|
128 |
resulting_images = []
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
resulting_images.append(load_image_from_url(content["image"]))
|
135 |
|
136 |
# Format history
|
137 |
for turn in chat_history:
|
@@ -277,6 +305,59 @@ def model_inference(
|
|
277 |
print("-----")
|
278 |
|
279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
# Hyper-parameters for generation
|
281 |
max_new_tokens = gr.Slider(
|
282 |
minimum=8,
|
@@ -331,6 +412,22 @@ chatbot = gr.Chatbot(
|
|
331 |
height=450,
|
332 |
)
|
333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
335 |
with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {height: 40px width: 40px !important;}""") as demo:
|
336 |
# model selector should be set to `visbile=False` ultimately
|
@@ -377,5 +474,69 @@ with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {hei
|
|
377 |
multimodal=True,
|
378 |
additional_inputs=[model_selector, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p],
|
379 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
demo.launch()
|
|
|
2 |
import subprocess
|
3 |
|
4 |
# Install flash attention
|
5 |
+
# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
6 |
|
7 |
|
8 |
import copy
|
|
|
16 |
from urllib.parse import urlparse
|
17 |
from PIL import Image
|
18 |
import io
|
19 |
+
import pandas as pd
|
20 |
+
import datasets
|
21 |
+
import json
|
22 |
+
import requests
|
23 |
|
24 |
import gradio as gr
|
25 |
from transformers import AutoProcessor, TextIteratorStreamer
|
|
|
102 |
{"type": "text", "text": "A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes."},
|
103 |
],
|
104 |
},
|
105 |
+
{
|
106 |
+
"role": "user",
|
107 |
+
"content": [
|
108 |
+
{"type": "text", "text": "What can you tell me about this breed of dogs?"},
|
109 |
+
],
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"role": "assistant",
|
113 |
+
"content": [
|
114 |
+
{"type": "text", "text": "One specific characteristic of the Cavalier King Charles Spaniel is its friendly and affectionate nature. This breed is known for being extremely sociable and forming strong bonds with their owners. They are often described as \"velcro dogs\" because they love to be close to their human companions, whether it's sitting on their laps, cuddling on the couch, or simply following them around the house."},
|
115 |
+
],
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"role": "user",
|
119 |
+
"content": [
|
120 |
+
{"type": "text", "text": "How many dogs do you see in the following image?"},
|
121 |
+
{"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/tennis_tsonga.jpg?download=true"},
|
122 |
+
],
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"role": "assistant",
|
126 |
+
"content": [
|
127 |
+
{"type": "text", "text": "There are no dogs in this image. The picture shows a tennis player in the midst of a powerful swing."},
|
128 |
+
],
|
129 |
+
},
|
130 |
]
|
131 |
|
132 |
|
|
|
155 |
"""
|
156 |
resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
|
157 |
resulting_images = []
|
158 |
+
for resulting_message in resulting_messages:
|
159 |
+
if resulting_message["role"] == "user":
|
160 |
+
for content in resulting_message["content"]:
|
161 |
+
if content["type"] == "image":
|
162 |
+
resulting_images.append(load_image_from_url(content["image"]))
|
|
|
163 |
|
164 |
# Format history
|
165 |
for turn in chat_history:
|
|
|
305 |
print("-----")
|
306 |
|
307 |
|
308 |
+
def csv_to_hf_dataset(csv_file):
|
309 |
+
df = pd.read_csv(csv_file)
|
310 |
+
|
311 |
+
FEATURES = datasets.Features(
|
312 |
+
{
|
313 |
+
"images": datasets.Sequence(datasets.Image(decode=True)),
|
314 |
+
"conversation": [
|
315 |
+
{
|
316 |
+
"user": datasets.Value("string"),
|
317 |
+
"assistant": datasets.Value("string"),
|
318 |
+
}
|
319 |
+
],
|
320 |
+
}
|
321 |
+
)
|
322 |
+
|
323 |
+
def parse_and_download(data_row):
|
324 |
+
# Parse the JSON-like structure in the second column
|
325 |
+
discussion_data = json.loads(data_row[1].replace('""', '"'))
|
326 |
+
|
327 |
+
images = []
|
328 |
+
conversation = []
|
329 |
+
for entry in discussion_data:
|
330 |
+
if isinstance(entry[0], dict) and 'file' in entry[0]:
|
331 |
+
# Get images
|
332 |
+
image = load_image_from_url(entry[0]['file'])
|
333 |
+
images.append(image)
|
334 |
+
elif isinstance(entry, list):
|
335 |
+
# Get conversations
|
336 |
+
conversation.append({"user": entry[0], "assistant": entry[1]})
|
337 |
+
|
338 |
+
return images, conversation
|
339 |
+
|
340 |
+
|
341 |
+
# Apply parsing and downloading function
|
342 |
+
df['processed_data'] = df.apply(parse_and_download, axis=1)
|
343 |
+
|
344 |
+
# Create a Hugging Face dataset
|
345 |
+
data_dict = {
|
346 |
+
"images": df['processed_data'].apply(lambda x: x[0]),
|
347 |
+
"conversation": df['processed_data'].apply(lambda x: x[1])
|
348 |
+
}
|
349 |
+
|
350 |
+
dataset = datasets.Dataset.from_dict(data_dict, features=FEATURES)
|
351 |
+
return dataset
|
352 |
+
|
353 |
+
|
354 |
+
def update_dope_problematic_dataset_fn():
|
355 |
+
dope_dataset = csv_to_hf_dataset("gradio_dope_data_points/log.csv")
|
356 |
+
dope_dataset.push_to_hub("HuggingFaceM4/dope_chatty_dataset", private=True)
|
357 |
+
problematic_dataset = csv_to_hf_dataset("gradio_problematic_data_points/log.csv")
|
358 |
+
problematic_dataset.push_to_hub("HuggingFaceM4/problematic_chatty_dataset", private=True)
|
359 |
+
|
360 |
+
|
361 |
# Hyper-parameters for generation
|
362 |
max_new_tokens = gr.Slider(
|
363 |
minimum=8,
|
|
|
412 |
height=450,
|
413 |
)
|
414 |
|
415 |
+
dope_callback = gr.CSVLogger()
|
416 |
+
problematic_callback = gr.CSVLogger()
|
417 |
+
|
418 |
+
|
419 |
+
# Using Flagging for saving dope and problematic examples
|
420 |
+
# Dope examples flagging
|
421 |
+
|
422 |
+
|
423 |
+
# gr.Markdown("""## How to use?
|
424 |
+
|
425 |
+
# There are two ways to provide image inputs:
|
426 |
+
# - Using the image box on the left panel
|
427 |
+
# - Using the inline syntax: `text<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>text`
|
428 |
+
|
429 |
+
# The second syntax allows inputting an arbitrary number of images.""")
|
430 |
+
|
431 |
|
432 |
with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {height: 40px width: 40px !important;}""") as demo:
|
433 |
# model selector should be set to `visbile=False` ultimately
|
|
|
474 |
multimodal=True,
|
475 |
additional_inputs=[model_selector, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p],
|
476 |
)
|
477 |
+
with gr.Group():
|
478 |
+
with gr.Row():
|
479 |
+
with gr.Column(scale=1, min_width=50):
|
480 |
+
dope_bttn = gr.Button("Dope🔥")
|
481 |
+
with gr.Column(scale=1, min_width=50):
|
482 |
+
problematic_bttn = gr.Button("Problematic😬")
|
483 |
+
with gr.Row():
|
484 |
+
update_dope_problematic_dataset = gr.Button("Update Dope/Problematic dataset📚")
|
485 |
+
dope_callback.setup(
|
486 |
+
[
|
487 |
+
model_selector,
|
488 |
+
chatbot,
|
489 |
+
decoding_strategy,
|
490 |
+
temperature,
|
491 |
+
max_new_tokens,
|
492 |
+
repetition_penalty,
|
493 |
+
top_p,
|
494 |
+
],
|
495 |
+
"gradio_dope_data_points",
|
496 |
+
)
|
497 |
+
dope_bttn.click(
|
498 |
+
lambda *args: dope_callback.flag(args),
|
499 |
+
[
|
500 |
+
model_selector,
|
501 |
+
chatbot,
|
502 |
+
decoding_strategy,
|
503 |
+
temperature,
|
504 |
+
max_new_tokens,
|
505 |
+
repetition_penalty,
|
506 |
+
top_p,
|
507 |
+
],
|
508 |
+
None,
|
509 |
+
preprocess=False,
|
510 |
+
)
|
511 |
+
# Problematic examples flagging
|
512 |
+
problematic_callback.setup(
|
513 |
+
[
|
514 |
+
model_selector,
|
515 |
+
chatbot,
|
516 |
+
decoding_strategy,
|
517 |
+
temperature,
|
518 |
+
max_new_tokens,
|
519 |
+
repetition_penalty,
|
520 |
+
top_p,
|
521 |
+
],
|
522 |
+
"gradio_problematic_data_points",
|
523 |
+
)
|
524 |
+
problematic_bttn.click(
|
525 |
+
lambda *args: problematic_callback.flag(args),
|
526 |
+
[
|
527 |
+
model_selector,
|
528 |
+
chatbot,
|
529 |
+
decoding_strategy,
|
530 |
+
temperature,
|
531 |
+
max_new_tokens,
|
532 |
+
repetition_penalty,
|
533 |
+
top_p,
|
534 |
+
],
|
535 |
+
None,
|
536 |
+
preprocess=False,
|
537 |
+
)
|
538 |
+
update_dope_problematic_dataset.click(
|
539 |
+
fn=update_dope_problematic_dataset_fn,
|
540 |
+
)
|
541 |
|
542 |
demo.launch()
|
gradio_dope_data_points/Idefics2/3c521493856dddb7c914/1-Figure1-1.png
ADDED
Git LFS Details
|
gradio_dope_data_points/Idefics2/621831e337f23a0cfa20/5_differences.jpeg
ADDED
gradio_dope_data_points/Idefics2/bfbb7b61084cfb7ddfdf/1-Figure1-1.png
ADDED
Git LFS Details
|
gradio_dope_data_points/Idefics2/c9d5e002601021786000/5_differences.jpeg
ADDED
gradio_dope_data_points/log.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Idefics2,Decoding strategy,Sampling temperature,Maximum number of new tokens to generate,Repetition penalty,Top P,flag,username,timestamp
|
2 |
+
idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/621831e337f23a0cfa20/5_differences.jpeg"", ""alt_text"": null}, null], [""find the 5 differences"", ""In the image, there are five differences between the two bears:\n\n1. The bear on the left is holding a fish in its mouth.\n2. The bear on the right is holding a bug in its mouth.\n3. The bear on the left is wearing a hat.\n4. The bear on the right is wearing glasses.\n5. The bear on the left is holding a stick.\n\nThese differences make the two bears appear distinct from each other.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:06:23.515619
|
3 |
+
idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/3c521493856dddb7c914/1-Figure1-1.png"", ""alt_text"": null}, null], [""read the text"", ""The first image depicts a lightbulb surrounded by plants, while the second image shows a hanging lightbulb with a plant inside it. Both images feature a lightbulb surrounded by greenery, creating a unique and visually appealing scene.""], [""what text is written exactly?"", ""The text reads: \""(a) some plants surrounding a lightbulb (b) a lightbulb surrounded by some plants.\""""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:13:57.547009
|
4 |
+
idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/bfbb7b61084cfb7ddfdf/1-Figure1-1.png"", ""alt_text"": null}, null], [{""file"": ""gradio_dope_data_points/Idefics2/c9d5e002601021786000/5_differences.jpeg"", ""alt_text"": null}, null], [""how do these 2 images differ?"", ""The first image features a group of kittens standing in the grass, while the second image is a coloring book page with a bear and her cub.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:59:57.962236
|
gradio_problematic_data_points/Idefics2/4e1cacac141902de6e7e/5_differences.jpeg
ADDED
gradio_problematic_data_points/Idefics2/4f57685fb80ffdb08c0b/5_differences.jpeg
ADDED
gradio_problematic_data_points/log.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Model,Idefics2,Decoding strategy,Sampling temperature,Maximum number of new tokens to generate,Repetition penalty,Top P,flag,username,timestamp
|
2 |
+
idefics2-8b-chatty (chat),"[[{""file"": ""gradio_problematic_data_points/Idefics2/4e1cacac141902de6e7e/5_differences.jpeg"", ""alt_text"": null}, null], [""list all the differences"", ""In the first image, the bear family consists of three bears - a mother bear and her two cubs. In the second image, there are only two bears - a mother bear and her single cub. Additionally, the background scenery in the second image includes trees and flowers, while the first image does not have any such elements.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 18:47:44.337779
|
3 |
+
idefics2-8b-chatty (chat),"[[{""file"": ""gradio_problematic_data_points/Idefics2/4f57685fb80ffdb08c0b/5_differences.jpeg"", ""alt_text"": null}, null], [""what differences are there ?"", ""In the first image, the bears are standing next to each other under a tree, while in the second image, they are walking down a path. Additionally, the bears have different expressions in both images.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 18:50:24.100295
|