import gradio as gr
from transformers import AutoProcessor, LlavaForConditionalGeneration
import torch
from PIL import Image

# LLaVA-Modell laden
model_name = "llava-hf/llava-1.5-13b"
model = LlavaForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
processor = AutoProcessor.from_pretrained(model_name)

# Funktion für die Bildanalyse & Chat
def chat(image, text):
    if image:
        image = Image.open(image)
    inputs = processor(image, text, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs)
    return processor.batch_decode(outputs, skip_special_tokens=True)[0]

# Gradio UI erstellen
interface = gr.Interface(
    fn=chat,
    inputs=["image", "text"],
    outputs="text",
    title="LLaVA - Multimodale KI",
    description="Diese KI kann Chatten & Bilder analysieren.",
)

# KI starten
interface.launch()