import gradio as gr fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech") clip = gr.Interface.load("spaces/DrishtiSharma/Text-to-Image-search-using-CLIP") def text2speech(text): return fastspeech(text) def text2image(text): image = clip(text)[0] return gr.processing_utils.decode_base64_to_image(image) block = gr.Blocks() with block: text = gr.inputs.Textbox(placeholder="Try writing something..") with gr.Column(): with gr.Row(): get_audio = gr.Button("generate audio") get_image = gr.Button("generate image") with gr.Row(): speech = gr.outputs.Audio() image = gr.outputs.Image() get_audio.click(text2speech, inputs=text, outputs=speech) get_image.click(text2image, inputs=text, outputs=image) block.launch()