Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

muhtasham commited on May 19, 2022

Commit

ed93d68

•

1 Parent(s): b80c362

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import io, os, base64
+from PIL import Image
+import gradio as gr
+import shortuuid
+from transformers import pipeline
+#input voice/text
+#input text to latent/dalle
+#do zero-shot classification of the output
+#tts your output looks like "label of zero-shot"
+asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
+zero = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
+tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
+def text2image_latent(text, steps, width, height, images, diversity):
+    print(text)
+    results = latent(text, steps, width, height, images, diversity)
+    image_paths = []
+    for image in results[1]:
+        image_str = image[0]
+        image_str = image_str.replace("data:image/png;base64,","")
+        decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
+        img = Image.open(io.BytesIO(decoded_bytes))
+        url = shortuuid.uuid()
+        temp_dir = './tmp'
+        if not os.path.exists(temp_dir):
+            os.makedirs(temp_dir, exist_ok=True)
+        image_path = f'{temp_dir}/{url}.png'
+        img.save(f'{temp_dir}/{url}.png')
+        image_paths.append(image_path)
+    return(image_paths)
+def speech_to_text(mic=None, file=None):
+    if mic is not None:
+        audio = mic
+    elif file is not None:
+        audio = file
+    else:
+        return "You must either provide a mic recording or a file"
+    transcription = asr(audio)["text"]
+    return transcription
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+        audio_file =[
+        gr.Audio(source="microphone", type="filepath", optional=True),
+        gr.Audio(source="upload", type="filepath", optional=True)]
+        text = gr.Textbox()
+            with gr.Row():
+                speech_to_text = gr.Button("Speech to text go brrr")
+        with gr.Column():
+            steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=50,maximum=50,minimum=1,step=1)
+            width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
+            height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
+            images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=4, step=1, minimum=1, maximum=4)
+            diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
+        with gr.Column():
+            gallery = gr.Gallery(label="Individual images")
+            with gr.Row():
+                get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
+    speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
+    get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
+demo.launch(enable_queue=False)