Spaces:
Running
on
Zero
Running
on
Zero
fix system prompt
Browse files- app_dialogue.py +28 -13
app_dialogue.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
|
4 |
-
|
5 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
6 |
|
7 |
|
@@ -12,8 +12,10 @@ import torch
|
|
12 |
|
13 |
from threading import Thread
|
14 |
from typing import List, Dict, Union
|
|
|
15 |
from urllib.parse import urlparse
|
16 |
from PIL import Image
|
|
|
17 |
|
18 |
import gradio as gr
|
19 |
from transformers import AutoProcessor, TextIteratorStreamer
|
@@ -22,13 +24,13 @@ from transformers import Idefics2ForConditionalGeneration
|
|
22 |
|
23 |
DEVICE = torch.device("cuda")
|
24 |
MODELS = {
|
25 |
-
"idefics2-8b (sft)": Idefics2ForConditionalGeneration.from_pretrained(
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
).to(DEVICE),
|
32 |
"idefics2-8b-chatty (chat)": Idefics2ForConditionalGeneration.from_pretrained(
|
33 |
"HuggingFaceM4/idefics2-8b-chatty",
|
34 |
torch_dtype=torch.bfloat16,
|
@@ -47,11 +49,11 @@ PROCESSOR = AutoProcessor.from_pretrained(
|
|
47 |
|
48 |
# The conversation begins:""",
|
49 |
# """\nUser:""",
|
50 |
-
# "https://
|
51 |
# "Describe this image.<end_of_utterance>",
|
52 |
# """\nAssistant: Five kittens are standing together in the center of the photograph. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>""",
|
53 |
# "\nUser:How about this image?",
|
54 |
-
# "https://
|
55 |
# "Can you describe it too?<end_of_utterance>",
|
56 |
# """\nAssistant: A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>""",
|
57 |
# "\nUser: What kind of breed is it?<end_of_utterance>",
|
@@ -63,7 +65,7 @@ PROCESSOR = AutoProcessor.from_pretrained(
|
|
63 |
# "\nUser: Do you like Cavalier King Charles Spaniel?<end_of_utterance>",
|
64 |
# """\nAssistant: I do not have personal opinions as I’m just a computer program. However, cavaliers are known for being excellent family pets due to their gentle and patient demeanor, making them great with children and other pets. Their affectionate nature also makes them ideal therapy dogs, as they can provide comfort and emotional support to those in need.<end_of_utterance>""",
|
65 |
# "\nUser: How many dogs do you see in this image?",
|
66 |
-
# "https://
|
67 |
# "<end_of_utterance>",
|
68 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
69 |
# ]
|
@@ -72,7 +74,7 @@ SYSTEM_PROMPT = [
|
|
72 |
{
|
73 |
"role": "user",
|
74 |
"content": [
|
75 |
-
{"type": "image", "image": "https://
|
76 |
{"type": "text", "text": "Describe this image."},
|
77 |
],
|
78 |
},
|
@@ -86,7 +88,7 @@ SYSTEM_PROMPT = [
|
|
86 |
"role": "user",
|
87 |
"content": [
|
88 |
{"type": "text", "text": "How about this image?"},
|
89 |
-
{"type": "image", "image": "https://
|
90 |
{"type": "text", "text": "Can you describe it too?"},
|
91 |
],
|
92 |
},
|
@@ -108,6 +110,12 @@ BOT_AVATAR = "IDEFICS_logo.png"
|
|
108 |
def turn_is_pure_media(turn):
|
109 |
return turn[1] is None
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
def format_user_prompt_with_im_history_and_system_conditioning(
|
113 |
user_prompt, chat_history
|
@@ -118,6 +126,12 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
118 |
"""
|
119 |
resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
|
120 |
resulting_images = []
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# Format history
|
123 |
for turn in chat_history:
|
@@ -232,6 +246,7 @@ def model_inference(
|
|
232 |
user_prompt=user_prompt,
|
233 |
chat_history=chat_history,
|
234 |
)
|
|
|
235 |
prompt = PROCESSOR.apply_chat_template(resulting_text, add_generation_prompt=True)
|
236 |
inputs = PROCESSOR(text=prompt, images=resulting_images if resulting_images else None, return_tensors="pt")
|
237 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
|
4 |
+
Install flash attention
|
5 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
6 |
|
7 |
|
|
|
12 |
|
13 |
from threading import Thread
|
14 |
from typing import List, Dict, Union
|
15 |
+
import urllib
|
16 |
from urllib.parse import urlparse
|
17 |
from PIL import Image
|
18 |
+
import io
|
19 |
|
20 |
import gradio as gr
|
21 |
from transformers import AutoProcessor, TextIteratorStreamer
|
|
|
24 |
|
25 |
DEVICE = torch.device("cuda")
|
26 |
MODELS = {
|
27 |
+
# "idefics2-8b (sft)": Idefics2ForConditionalGeneration.from_pretrained(
|
28 |
+
# "HuggingFaceM4/idefics2-8b",
|
29 |
+
# torch_dtype=torch.bfloat16,
|
30 |
+
# _attn_implementation="flash_attention_2",
|
31 |
+
# trust_remote_code=True,
|
32 |
+
# token=os.environ["HF_AUTH_TOKEN"],
|
33 |
+
# ).to(DEVICE),
|
34 |
"idefics2-8b-chatty (chat)": Idefics2ForConditionalGeneration.from_pretrained(
|
35 |
"HuggingFaceM4/idefics2-8b-chatty",
|
36 |
torch_dtype=torch.bfloat16,
|
|
|
49 |
|
50 |
# The conversation begins:""",
|
51 |
# """\nUser:""",
|
52 |
+
# "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/kittens-cats-pet-cute-preview.jpg?download=true",
|
53 |
# "Describe this image.<end_of_utterance>",
|
54 |
# """\nAssistant: Five kittens are standing together in the center of the photograph. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>""",
|
55 |
# "\nUser:How about this image?",
|
56 |
+
# "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/puppy.jpg?download=true",
|
57 |
# "Can you describe it too?<end_of_utterance>",
|
58 |
# """\nAssistant: A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>""",
|
59 |
# "\nUser: What kind of breed is it?<end_of_utterance>",
|
|
|
65 |
# "\nUser: Do you like Cavalier King Charles Spaniel?<end_of_utterance>",
|
66 |
# """\nAssistant: I do not have personal opinions as I’m just a computer program. However, cavaliers are known for being excellent family pets due to their gentle and patient demeanor, making them great with children and other pets. Their affectionate nature also makes them ideal therapy dogs, as they can provide comfort and emotional support to those in need.<end_of_utterance>""",
|
67 |
# "\nUser: How many dogs do you see in this image?",
|
68 |
+
# "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/tennis_tsonga.jpg?download=true",
|
69 |
# "<end_of_utterance>",
|
70 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
71 |
# ]
|
|
|
74 |
{
|
75 |
"role": "user",
|
76 |
"content": [
|
77 |
+
{"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/kittens-cats-pet-cute-preview.jpg?download=true"},
|
78 |
{"type": "text", "text": "Describe this image."},
|
79 |
],
|
80 |
},
|
|
|
88 |
"role": "user",
|
89 |
"content": [
|
90 |
{"type": "text", "text": "How about this image?"},
|
91 |
+
{"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/puppy.jpg?download=true"},
|
92 |
{"type": "text", "text": "Can you describe it too?"},
|
93 |
],
|
94 |
},
|
|
|
110 |
def turn_is_pure_media(turn):
|
111 |
return turn[1] is None
|
112 |
|
113 |
+
def load_image_from_url(url):
|
114 |
+
with urllib.request.urlopen(url) as response:
|
115 |
+
image_data = response.read()
|
116 |
+
image_stream = io.BytesIO(image_data)
|
117 |
+
image = Image.open(image_stream)
|
118 |
+
return image
|
119 |
|
120 |
def format_user_prompt_with_im_history_and_system_conditioning(
|
121 |
user_prompt, chat_history
|
|
|
126 |
"""
|
127 |
resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
|
128 |
resulting_images = []
|
129 |
+
if len(resulting_messages) > 0:
|
130 |
+
for resulting_message in resulting_messages:
|
131 |
+
if resulting_message["role"] == "user":
|
132 |
+
for content in resulting_message["content"]:
|
133 |
+
if content["type"] == "image":
|
134 |
+
resulting_images.append(load_image_from_url(content["image"]))
|
135 |
|
136 |
# Format history
|
137 |
for turn in chat_history:
|
|
|
246 |
user_prompt=user_prompt,
|
247 |
chat_history=chat_history,
|
248 |
)
|
249 |
+
|
250 |
prompt = PROCESSOR.apply_chat_template(resulting_text, add_generation_prompt=True)
|
251 |
inputs = PROCESSOR(text=prompt, images=resulting_images if resulting_images else None, return_tensors="pt")
|
252 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|