Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,418 Bytes
75ec781 6c08561 2f6cfc2 75ec781 c0b4343 75ec781 631e701 6c08561 75ec781 a92f8fe 2801168 399079f 2801168 d746a50 7a071a4 2801168 d746a50 75ec781 631e701 fda7aed 75ec781 2801168 631e701 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from transformers import (
Qwen2VLForConditionalGeneration,
AutoProcessor,
TextIteratorStreamer,
)
from PIL import Image
from threading import Thread
import gradio as gr
import spaces
import subprocess
subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
model_name = "scb10x/typhoon2-qwen2vl-7b-vision-instruct"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_name, torch_dtype="auto", device_map="auto"
)
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor = AutoProcessor.from_pretrained(
model_name, min_pixels=min_pixels, max_pixels=max_pixels
)
theme = gr.themes.Soft(
primary_hue=gr.themes.Color(
c50="#f7f7fd",
c100="#dfdef8",
c200="#c4c1f2",
c300="#a29eea",
c400="#8f8ae6",
c500="#756fe0",
c600="#635cc1",
c700="#4f4a9b",
c800="#433f83",
c900="#302d5e",
c950="#302d5e",
),
secondary_hue="rose",
neutral_hue="stone",
)
@spaces.GPU
def bot_streaming(message, history, max_new_tokens=512):
txt = message["text"]
messages = []
images = []
for i, msg in enumerate(history):
if isinstance(msg[0], tuple):
messages.append(
{
"role": "user",
"content": [
{"type": "text", "text": history[i + 1][0]},
{"type": "image"},
],
}
)
messages.append(
{
"role": "assistant",
"content": [{"type": "text", "text": history[i + 1][1]}],
}
)
images.append(Image.open(msg[0][0]).convert("RGB"))
elif isinstance(history[i - 1], tuple) and isinstance(msg[0], str):
pass
elif isinstance(history[i - 1][0], str) and isinstance(msg[0], str):
messages.append(
{"role": "user", "content": [{"type": "text", "text": msg[0]}]}
)
messages.append(
{"role": "assistant", "content": [{"type": "text", "text": msg[1]}]}
)
if len(message["files"]) == 1:
if isinstance(message["files"][0], str):
image = Image.open(message["files"][0]).convert("RGB")
else:
image = Image.open(message["files"][0]["path"]).convert("RGB")
images.append(image)
messages.append(
{
"role": "user",
"content": [{"type": "text", "text": txt}, {"type": "image"}],
}
)
else:
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
if images == []:
inputs = processor(text=texts, return_tensors="pt").to("cuda")
else:
inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(
processor, skip_special_tokens=True, skip_prompt=True
)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer
demo = gr.ChatInterface(
fn=bot_streaming,
title="🌪️ Typhoon2-Vision: Vision-Language Model optimized for Thai (Research Preview)",
description="""
<ul>
<li>📝 <b>Technical Report</b>: <a href="https://arxiv.org/abs/2412.13702" target="_blank">https://arxiv.org/abs/2412.13702 (Section 4)</a></li>
<li>🤗 <b>Model weights</b>: <a href="https://huggingface.co/scb10x/typhoon2-qwen2vl-7b-vision-instruct" target="_blank">https://huggingface.co/scb10x/typhoon2-qwen2vl-7b-vision-instruct</a></li>
</ul>
<br />
<details>
<summary><strong>Disclaimer</strong></summary>
The responses generated by this Artificial Intelligence (AI) system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
</details>
<br />
<details>
<summary><strong>Terms of use</strong></summary>
By using this service, users are required to agree to the following terms: The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. Vision language models are prone to hallucinations to a greater extent compared to text-only LLMs.
</details>
<br />
<details>
<summary><strong>License</strong></summary>
This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
</details>
""",
textbox=gr.MultimodalTextbox(
placeholder="Type a message or drag and drop an image",
file_types=["image"],
file_count="multiple",
),
additional_inputs=[
gr.Slider(
minimum=512,
maximum=1024,
value=512,
step=1,
label="Maximum number of new tokens to generate",
)
],
cache_examples=False,
stop_btn="Stop Generation",
fill_height=True,
multimodal=True,
theme=theme,
# css="footer {visibility: hidden}",
)
demo.queue()
demo.launch(ssr_mode=False)
|