Spaces:
Runtime error
Runtime error
File size: 4,258 Bytes
79d11aa d25de88 79d11aa ccbd1ab 79d11aa d25de88 79d11aa 66322a4 79d11aa ce803ce ccbd1ab 79d11aa ccbd1ab 79d11aa ccbd1ab 79d11aa 086eb0b 79d11aa 086eb0b 79d11aa 9f7fe25 79d11aa 086eb0b 9f7fe25 79d11aa 086eb0b 79d11aa 66322a4 79d11aa 66322a4 79d11aa 647c4c1 79d11aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import gradio as gr
from PIL import Image
import os
#from diffusers import StableDiffusionPipeline
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
#stable_diffusion = gr.Blocks.load(name="spaces/stabilityai/stable-diffusion")
stable_diffusion = gr.Blocks.load(name="spaces/runwayml/stable-diffusion-v1-5")
### ββββββββββββββββββββββββββββββββββββββββ
title="Talking to Stable Diffusion"
### ββββββββββββββββββββββββββββββββββββββββ
def get_images(prompt):
#gallery_dir = stable_diffusion(prompt, None, None, fn_index=2)
gallery_dir = stable_diffusion(prompt, fn_index=2)
return [os.path.join(gallery_dir, img) for img in os.listdir(gallery_dir)]
def translate_better(audio):
print("""
β
Sending audio to Whisper ...
β
""")
transcribe_text_result = whisper(audio, None, "transcribe", fn_index=0)
translate_text_result = whisper(audio, None, "translate", fn_index=0)
print("transcript: " + transcribe_text_result)
print("βββββββββββββββββββββββββββββββββββββββββββ")
print("translated: " + translate_text_result)
return transcribe_text_result, translate_text_result
with gr.Blocks() as demo:
gr.Markdown(
"""
## 1. Say what you want:
"""
)
with gr.Column():
with gr.Tab(label="Record audio input", elem_id="record_tab"):
with gr.Column():
record_input = gr.Audio(
source="microphone",
type="filepath",
show_label=False,
elem_id="record_btn"
)
with gr.Row():
audio_r_translate = gr.Button("Check Whisper first", elem_id="check_btn_1")
audio_r_direct_sd = gr.Button("Generating Images", elem_id="magic_btn_1")
with gr.Accordion(label="Stable Diffusion Settings", elem_id="sd_settings", visible=False):
with gr.Row():
guidance_scale = gr.Slider(2, 15, value = 7, label = 'Guidance Scale')
nb_iterations = gr.Slider(10, 50, value = 25, step = 1, label = 'Steps')
seed = gr.Slider(label = "Seed", minimum = 0, maximum = 2147483647, step = 1, randomize = True)
gr.Markdown(
"""
## 2. Check Whisper output:
"""
)
with gr.Row():
transcripted_output = gr.Textbox(
label="Transcription in your detected spoken language",
lines=3,
elem_id="transcripted"
)
translated_output = gr.Textbox(
label="Transcription in your detected spoken language",
lines=3,
elem_id="translated"
)
gr.Markdown("""
## 3. Wait for Stable Diffusion Results about ~10 seconds
"""
)
sd_output = gr.Gallery().style(grid=2, height="auto")
audio_r_translate.click(translate_better,
inputs = [
record_input
],
outputs = [
transcripted_output,
translated_output,
])
audio_r_direct_sd.click(get_images,
inputs = [
translated_output
],
outputs = sd_output
)
if __name__ == "__main__":
demo.queue(max_size=32, concurrency_count=20).launch() |