""" app.py """ import gradio as gr import spaces import torch from transformers import ( LlavaNextProcessor, LlavaNextForConditionalGeneration ) # Load model and processor model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") model.to("cuda:0") processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") @spaces.GPU(duration=240) def generate_caption(image): """ Generate a poem from an image. """ # Process the image and the prompt inputs = processor("[INST] \nWrite a poem about this picture [/INST]", image, return_tensors="pt").to('cuda') # autoregressively complete prompt output = model.generate(**inputs, max_new_tokens=200) return processor.decode(output[0], skip_special_tokens=True) # Define the Gradio interface description = """This is a demo of [`llava-hf/llava-v1.6-mistral-7b-hf`](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf) hosted with ZeroGPU.""" iface = gr.Interface( fn=generate_caption, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Generated Poem"), description=description ) # Launch the interface iface.launch()