Spaces:
Runtime error
Runtime error
import gradio as gr | |
from PIL import Image | |
import os | |
#from diffusers import StableDiffusionPipeline | |
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2") | |
#stable_diffusion = gr.Blocks.load(name="spaces/stabilityai/stable-diffusion") | |
stable_diffusion = gr.Blocks.load(name="spaces/runwayml/stable-diffusion-v1-5") | |
### ββββββββββββββββββββββββββββββββββββββββ | |
title="Talking to Stable Diffusion" | |
### ββββββββββββββββββββββββββββββββββββββββ | |
def get_images(prompt): | |
#gallery_dir = stable_diffusion(prompt, None, None, fn_index=2) | |
gallery_dir = stable_diffusion(prompt, fn_index=2) | |
return [os.path.join(gallery_dir, img) for img in os.listdir(gallery_dir)] | |
def translate_better(audio): | |
print(""" | |
β | |
Sending audio to Whisper ... | |
β | |
""") | |
transcribe_text_result = whisper(audio, None, "transcribe", fn_index=0) | |
translate_text_result = whisper(audio, None, "translate", fn_index=0) | |
print("transcript: " + transcribe_text_result) | |
print("βββββββββββββββββββββββββββββββββββββββββββ") | |
print("translated: " + translate_text_result) | |
return transcribe_text_result, translate_text_result | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
## 1. Say what you want: | |
""" | |
) | |
with gr.Column(): | |
with gr.Tab(label="Record audio input", elem_id="record_tab"): | |
with gr.Column(): | |
record_input = gr.Audio( | |
source="microphone", | |
type="filepath", | |
show_label=False, | |
elem_id="record_btn" | |
) | |
with gr.Row(): | |
audio_r_translate = gr.Button("Check Whisper first", elem_id="check_btn_1") | |
audio_r_direct_sd = gr.Button("Generating Images", elem_id="magic_btn_1") | |
with gr.Accordion(label="Stable Diffusion Settings", elem_id="sd_settings", visible=False): | |
with gr.Row(): | |
guidance_scale = gr.Slider(2, 15, value = 7, label = 'Guidance Scale') | |
nb_iterations = gr.Slider(10, 50, value = 25, step = 1, label = 'Steps') | |
seed = gr.Slider(label = "Seed", minimum = 0, maximum = 2147483647, step = 1, randomize = True) | |
gr.Markdown( | |
""" | |
## 2. Check Whisper output: | |
""" | |
) | |
with gr.Row(): | |
transcripted_output = gr.Textbox( | |
label="Transcription in your detected spoken language", | |
lines=3, | |
elem_id="transcripted" | |
) | |
translated_output = gr.Textbox( | |
label="Transcription in your detected spoken language", | |
lines=3, | |
elem_id="translated" | |
) | |
gr.Markdown(""" | |
## 3. Wait for Stable Diffusion Results about ~10 seconds | |
""" | |
) | |
sd_output = gr.Gallery().style(grid=2, height="auto") | |
audio_r_translate.click(translate_better, | |
inputs = [ | |
record_input | |
], | |
outputs = [ | |
transcripted_output, | |
translated_output, | |
]) | |
audio_r_direct_sd.click(get_images, | |
inputs = [ | |
translated_output | |
], | |
outputs = sd_output | |
) | |
if __name__ == "__main__": | |
demo.queue(max_size=32, concurrency_count=20).launch() |