import gradio as gr from transformers import AutoProcessor, LlavaForConditionalGeneration import torch from PIL import Image # LLaVA-Modell laden model_name = "llava-hf/llava-1.5-13b" model = LlavaForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") processor = AutoProcessor.from_pretrained(model_name) # Funktion für die Bildanalyse & Chat def chat(image, text): if image: image = Image.open(image) inputs = processor(image, text, return_tensors="pt").to("cuda") outputs = model.generate(**inputs) return processor.batch_decode(outputs, skip_special_tokens=True)[0] # Gradio UI erstellen interface = gr.Interface( fn=chat, inputs=["image", "text"], outputs="text", title="LLaVA - Multimodale KI", description="Diese KI kann Chatten & Bilder analysieren.", ) # KI starten interface.launch()