Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

muhtasham commited on May 20, 2022

Commit

6d981ed

•

1 Parent(s): a32bdc6

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -26

app.py CHANGED Viewed

@@ -4,31 +4,30 @@ import gradio as gr
 import shortuuid
 import numpy as np
 from transformers import pipeline
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
-#zero = pipeline("zero-shot-image-classification")
-#zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
-#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
-def text2image_latent(text, steps, width, height, images, diversity):
-    print(text)
-    results = latent(text, steps, width, height, images, diversity)
-    image_paths = []
-    for image in results[1]:
         image_str = image[0]
         image_str = image_str.replace("data:image/png;base64,","")
         decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
         img = Image.open(io.BytesIO(decoded_bytes))
-        url = shortuuid.uuid()
-        temp_dir = './tmp'
-        if not os.path.exists(temp_dir):
-            os.makedirs(temp_dir, exist_ok=True)
-        image_path = f'{temp_dir}/{url}.png'
-        img.save(f'{temp_dir}/{url}.png')
-        image_paths.append(image_path)
-    return(image_paths)
 def speech_to_text(mic=None, file=None):
@@ -41,6 +40,24 @@ def speech_to_text(mic=None, file=None):
     transcription = asr(audio)["text"]
     return transcription
 with gr.Blocks() as demo:
     gr.Markdown( """
@@ -53,22 +70,22 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             audio_file =[
-            gr.Audio(source="microphone", type="filepath", optional=True, label="Speak here..."),
             gr.Audio(source="upload", type="filepath", optional=True, label="Or if you want upload here...")]
             text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
                 s2t = gr.Button("Speech to text go brrr")
         with gr.Column():
-            steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=1,maximum=50,minimum=1,step=1)
-            width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
-            height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
-            images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
-            diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
             gallery = gr.Gallery(label="Individual images")
             with gr.Row():
-                get_image_latent = gr.Button("Generate Image go brr")
     s2t.click(speech_to_text, inputs=audio_file, outputs=text)
-    get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
 demo.launch(enable_queue=True, debug=True)

 import shortuuid
 import numpy as np
 from transformers import pipeline
+from moviepy.editor import AudioFileClip, ImageClip
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
+# function by Epoching
+def text2image_latent(text):
+    steps=25
+    width=256
+    height=256
+    num_images=1
+    diversity=5
+    image_bytes = latent(text, steps, width, height, num_images, diversity)
+    # Algo from spaces/Gradio-Blocks/latent_gpt2_story/blob/main/app.py
+    generated_images = []
+    for image in image_bytes[1]:
         image_str = image[0]
         image_str = image_str.replace("data:image/png;base64,","")
         decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
         img = Image.open(io.BytesIO(decoded_bytes))
+        generated_images.append(img)
+    return generated_images
 def speech_to_text(mic=None, file=None):
     transcription = asr(audio)["text"]
     return transcription
+def combine_audio_image(audio_file, gallery):
+    "Create and rerturn a combined image from the audio and image"
+    generated_images = []
+    for image_str in gallery:
+        image_str = image_str.replace("data:image/png;base64,","")
+        decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
+        img = Image.open(io.BytesIO(decoded_bytes))
+        generated_images.append(img)
+    # combine generated images with audio file and return "out.mp4"
+    audio_clip = AudioFileClip(audio_file)
+    generated_images_clip = ImageClip(np.array(generated_images))
+    final_clip = audio_clip.set_audio(generated_images_clip.audio).set_duration(audio_clip.duration)
+    final_clip.write_videofile("out.mp4")
+    return "out.mp4"
 with gr.Blocks() as demo:
     gr.Markdown( """
     with gr.Row():
         with gr.Column():
             audio_file =[
+            gr.Audio(source="microphone", type="filepath", optional=True, label="Speak here..."),
             gr.Audio(source="upload", type="filepath", optional=True, label="Or if you want upload here...")]
             text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
                 s2t = gr.Button("Speech to text go brrr")
         with gr.Column():
             gallery = gr.Gallery(label="Individual images")
             with gr.Row():
+                get_image_latent = gr.Button("Generate Image go brr")
+        with gr.Column():
+            video = gr.Video(label="Video with audio")
+            with gr.Row():
+                get_video_latent = gr.Button("Generate Video go brr")
     s2t.click(speech_to_text, inputs=audio_file, outputs=text)
+    get_image_latent.click(text2image_latent, inputs=text, outputs=gallery)
+    get_video_latent.click(combine_audio_image, inputs=[audio_file, gallery], output=video)
 demo.launch(enable_queue=True, debug=True)