import gradio as gr import spaces import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["USER"] = "imagecraft" import gradio as gr from src.model.modules.imagecraft import ImageCraft model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224") default_image = "media/images/3.jpg" @spaces.GPU def generate(image_path): """Process image inputs and generate audio response.""" transcript, audio_buffer = model.generate(image_path, output_type="buffer") return audio_buffer, transcript imagecraft_app = gr.Interface( fn=generate, inputs=[ gr.Image(type="filepath", label="Upload an image", value=default_image), ], outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Text")], title="ImageCraft", description="Upload an image and get the speech responses.", allow_flagging="never", ) if __name__ == "__main__": imagecraft_app.launch()