import gradio as gr from transformers import pipeline text_to_speech = pipeline('image-classification') input_img = 'abc.jpeg' def text_to_speech(input_img): return text_to_speech(input_img).wav interface = gr.Interface(text_to_speech, gr.Image(), "audio", theme='dark') interface.launch()