Spaces:
Runtime error
Runtime error
import spaces | |
import time | |
from threading import Thread | |
import gradio as gr | |
import torch | |
from PIL import Image | |
from transformers import AutoProcessor | |
from llava.constants import ( | |
IMAGE_TOKEN_INDEX, | |
DEFAULT_IMAGE_TOKEN, | |
DEFAULT_IM_START_TOKEN, | |
DEFAULT_IM_END_TOKEN, | |
IMAGE_PLACEHOLDER, | |
) | |
from llava.model.builder import load_pretrained_model | |
from llava.utils import disable_torch_init | |
from llava.mm_utils import ( | |
process_images, | |
tokenizer_image_token, | |
get_model_name_from_path, | |
) | |
from io import BytesIO | |
import requests | |
import os | |
from conversation import Conversation, SeparatorStyle | |
model_id = "ytu-ce-cosmos/Turkish-LLaVA-v0.1" | |
disable_torch_init() | |
model_name = get_model_name_from_path(model_id) | |
tokenizer, model, image_processor, context_len = load_pretrained_model( | |
model_id, None, model_name | |
) | |
def load_image(image_file): | |
if image_file.startswith("http") or image_file.startswith("https"): | |
response = requests.get(image_file) | |
image = Image.open(BytesIO(response.content)).convert("RGB") | |
elif os.path.exists(image_file): | |
image = Image.open(image_file).convert("RGB") | |
else: | |
raise FileNotFoundError(f"Görüntü dosyası {image_file} bulunamadı.") | |
return image | |
def infer_single_image(model_id, image_file, prompt): | |
image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN | |
if IMAGE_PLACEHOLDER in prompt: | |
if model.config.mm_use_im_start_end: | |
prompt = re.sub(IMAGE_PLACEHOLDER, image_token_se, prompt) | |
else: | |
prompt = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, prompt) | |
else: | |
if model.config.mm_use_im_start_end: | |
prompt = image_token_se + "\n" + prompt | |
else: | |
prompt = DEFAULT_IMAGE_TOKEN + "\n" + prompt | |
conv = Conversation( | |
system="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak. Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.""", | |
roles=("<|start_header_id|>user<|end_header_id|>\n\n", "<|start_header_id|>assistant<|end_header_id|>\n\n"), | |
version="llama3", | |
messages=[], | |
offset=0, | |
sep_style=SeparatorStyle.MPT, | |
sep="<|eot_id|>", | |
) | |
conv.append_message(conv.roles[0], prompt) | |
conv.append_message(conv.roles[1], None) | |
full_prompt = conv.get_prompt() | |
print("full prompt: ", full_prompt) | |
image = load_image(image_file) | |
image_tensor = process_images( | |
[image], | |
image_processor, | |
model.config | |
).to(model.device, dtype=torch.float16) | |
input_ids = ( | |
tokenizer_image_token(full_prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt") | |
.unsqueeze(0) | |
.cuda() | |
) | |
with torch.inference_mode(): | |
output_ids = model.generate( | |
input_ids, | |
images=image_tensor, | |
image_sizes=[image.size], | |
do_sample=False, | |
max_new_tokens=512, | |
use_cache=True, | |
) | |
output = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip() | |
return output | |
def bot_streaming(message, history): | |
print(message) | |
if message["files"]: | |
if type(message["files"][-1]) == dict: | |
image = message["files"][-1]["path"] | |
else: | |
image = message["files"][-1] | |
else: | |
for hist in history: | |
if type(hist[0]) == tuple: | |
image = hist[0][0] | |
try: | |
if image is None: | |
gr.Error("LLaVA'nın çalışması için bir resim yüklemeniz gerekir.") | |
except NameError: | |
gr.Error("LLaVA'nın çalışması için bir resim yüklemeniz gerekir.") | |
prompt = message['text'] | |
result = infer_single_image(model_id, image, prompt) | |
print(result) | |
yield result | |
chatbot = gr.Chatbot(scale=1) | |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Mesaj girin veya dosya yükleyin...", show_label=False) | |
with gr.Blocks(fill_height=True) as demo: | |
gr.ChatInterface( | |
fn=bot_streaming, | |
title="Cosmos LLaVA", | |
examples=[{"text": "Bu kitabın adı ne?", "files": ["./book.jpg"]}, | |
{"text": "Çiçeğin üzerinde ne var?", "files": ["./bee.jpg"]}, | |
{"text": "Bu tatlı nasıl yapılır?", "files": ["./baklava.png"]}], | |
description="", | |
stop_btn="Stop Generation", | |
multimodal=True, | |
textbox=chat_input, | |
chatbot=chatbot, | |
) | |
demo.queue(api_open=False) | |
demo.launch(show_api=False, share=False) | |