import gradio as gr
from transformers import pipeline 
text_to_speech = pipeline('image-classification')
input_img = 'abc.jpeg'
def text_to_speech(input_img):
  return text_to_speech(input_img).wav
interface = gr.Interface(text_to_speech, gr.Image(), "audio", theme='dark')
interface.launch()