Rathapoom's picture
Update app.py
30beb89 verified
raw
history blame
2.31 kB
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import requests
import gradio as gr
import spaces
# Load model and tokenizer
model_name = 'scb10x/llama-3-typhoon-v1.5-8b-instruct-vision-preview'
@spaces.GPU(duration=120) # ใช้ GPU เป็นเวลา 120 วินาที
def load_model():
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map='auto',
trust_remote_code=True
)
return model
model = load_model()
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
def prepare_inputs(text, image):
messages = [
{"role": "system", "content": "You are a helpful vision-capable assistant who eagerly converses with the user in their language."},
{"role": "user", "content": f"<image>\n{text}"}
]
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
return inputs
@spaces.GPU(duration=60) # ใช้ GPU เป็นเวลา 60 วินาที
def predict(prompt, img_url):
try:
image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
image = image.resize((model.config.image_size, model.config.image_size))
image_tensor = model.preprocess_images([image]).to(model.device)
inputs = prepare_inputs(prompt, image)
output_ids = model.generate(
inputs,
images=image_tensor,
max_new_tokens=100,
do_sample=True,
temperature=0.2,
top_p=0.2,
repetition_penalty=1.0
)[0]
result = tokenizer.decode(output_ids[inputs.shape[1]:], skip_special_tokens=True).strip()
return result
except Exception as e:
return str(e)
# Gradio Interface
inputs = [
gr.Textbox(label="Prompt", placeholder="Ask about the food in the image"),
gr.Textbox(label="Image URL", placeholder="Enter an image URL")
]
outputs = gr.Textbox(label="Generated Output")
gr.Interface(
fn=predict,
inputs=inputs,
outputs=outputs,
title="Food Image AI Assistant",
description="This model can analyze food images and answer questions about them."
).launch()