Spaces:
Build error
Build error
File size: 1,862 Bytes
1c5a277 a3895ed ef81d40 1c5a277 a3895ed ef81d40 a3895ed 1c5a277 f9d091a ef81d40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# app.py
import gradio as gr
from tinyllava.model.builder import load_pretrained_model
from tinyllava.utils import disable_torch_init
from tinyllava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path
import torch
from PIL import Image
# --- Disable unnecessary torch init ---
disable_torch_init()
# --- Load TinyLLaVA 3.1B ---
model_path = "bczhou/TinyLLaVA-3.1B" # official HF ID
tokenizer, model, image_processor, context_len = load_pretrained_model(
model_path=model_path,
model_base=None, # If you have a base model, point it here; else leave as is
model_name="TinyLLaVA-3.1B"
)
device = torch.device("cpu")
model.to(device)
# --- Gradio handler ---
def describe_image(image, prompt):
# TinyLLaVA wants PIL
image = Image.fromarray(image)
image_tensor = process_images([image], image_processor, model.config)
image_tensor = image_tensor.to(device)
prompt = tokenizer_image_token(prompt, tokenizer, context_len)
inputs = tokenizer([prompt])
input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device)
with torch.no_grad():
output_ids = model.generate(
input_ids,
images=image_tensor,
do_sample=True,
temperature=0.2,
max_new_tokens=200
)
out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return out_text
iface = gr.Interface(
fn=describe_image,
inputs=[
gr.Image(type="numpy", label="Image"),
gr.Textbox(label="Your question", placeholder="What's happening in this image?")
],
outputs=gr.Textbox(label="TinyLLaVA Answer"),
title="π¦ TinyLLaVA-3.1B β Vision-Language Q&A",
description="A lightweight LLaVA variant that runs on CPU Spaces. Upload an image, ask a question."
)
if __name__ == "__main__":
iface.launch()
|