Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

muhtasham commited on May 19, 2022

Commit

0283c36

•

1 Parent(s): c2377c4

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -5,14 +5,14 @@ import shortuuid
 from transformers import pipeline
 #input voice/text
-#input text to latent/dalle
-#do zero-shot classification of the output
-#tts your output looks like "label of zero-shot"
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
-#zero = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
-#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
 def text2image_latent(text, steps, width, height, images, diversity):
     print(text)
@@ -43,6 +43,9 @@ def speech_to_text(mic=None, file=None):
     transcription = asr(audio)["text"]
     return transcription
 with gr.Blocks() as demo:
     with gr.Row():
@@ -50,7 +53,7 @@ with gr.Blocks() as demo:
             audio_file =[
             gr.Audio(source="microphone", type="filepath", optional=True),
             gr.Audio(source="upload", type="filepath", optional=True)]
-            text = gr.Textbox()
             with gr.Row():
                 speech_to_text = gr.Button("Speech to text go brrr")
         with gr.Column():
@@ -59,11 +62,14 @@ with gr.Blocks() as demo:
             height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
             images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=4, step=1, minimum=1, maximum=4)
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
-        with gr.Column():
-            gallery = gr.Gallery(label="Individual images")
             with gr.Row():
                 get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)

 from transformers import pipeline
 #input voice/text
+#convert text to image via dalle
+#given list of labels and a selected image from gallery do zero-shot classification
+#tts your output label as: Your output looks like "label of zero-shot"
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
+zero = pipeline("zero-shot-image-classification")
+tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
 def text2image_latent(text, steps, width, height, images, diversity):
     print(text)
     transcription = asr(audio)["text"]
     return transcription
+#def zero_shot(image, labels_text):
 with gr.Blocks() as demo:
     with gr.Row():
             audio_file =[
             gr.Audio(source="microphone", type="filepath", optional=True),
             gr.Audio(source="upload", type="filepath", optional=True)]
+            text = gr.Textbox(default="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
                 speech_to_text = gr.Button("Speech to text go brrr")
         with gr.Column():
             height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
             images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=4, step=1, minimum=1, maximum=4)
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
+            gallery = gr.Gallery(label="Individual images", show_label=True)
             with gr.Row():
                 get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
+        #with gr.Column():
+            #with gr.Row():
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)