""" app.py """ import gradio as gr import spaces import torch from transformers import ( LlavaNextProcessor, LlavaNextForConditionalGeneration ) # Load model and processor model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") model.to("cuda:0") processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") @spaces.GPU(duration=240) def generate_caption(image): """ Generate a poem from an image. """ # Process the image and the prompt inputs = processor("[INST] \nWrite a poem about this picture [/INST]", image, return_tensors="pt").to('cuda') # autoregressively complete prompt output = model.generate(**inputs, max_new_tokens=200) prompt_len = inputs["input_ids"].shape[1] decoded_text = processor.batch_decode(output[:, prompt_len:])[0] return decoded_text # Define the Gradio interface description = """Enter an image, and receive an inspired poem. This is a demo of [`llava-hf/llava-v1.6-mistral-7b-hf`](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf) hosted with ZeroGPU.""" iface = gr.Interface( title="The Poet", fn=generate_caption, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Generated Poem"), description=description ) # Launch the interface iface.launch()