Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

muhtasham commited on May 19, 2022

Commit

ac022fd

•

1 Parent(s): 974534b

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -11

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from transformers import pipeline
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
-#zero = pipeline("zero-shot-image-classification")
 #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
 def text2image_latent(text, steps, width, height, images, diversity):
@@ -43,19 +43,19 @@ def speech_to_text(mic=None, file=None):
     transcription = asr(audio)["text"]
     return transcription
-#def zero_shot(image, labels_text):
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             audio_file =[
             gr.Audio(source="microphone", type="filepath", optional=True),
             gr.Audio(source="upload", type="filepath", optional=True)]
-            text = gr.Textbox(default="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
-                speech_to_text = gr.Button("Speech to text go brrr")
         with gr.Column():
             steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=50,maximum=50,minimum=1,step=1)
             width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
@@ -64,14 +64,16 @@ with gr.Blocks() as demo:
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
             gallery = gr.Gallery(label="Individual images", show_label=True)
             with gr.Row():
-                get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
-        #with gr.Column():
-            #with gr.Row():
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
 demo.launch(enable_queue=False)

 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
+zero = pipeline("zero-shot-image-classification")
 #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
 def text2image_latent(text, steps, width, height, images, diversity):
     transcription = asr(audio)["text"]
     return transcription
+def zero_shot(image, text_input):
+    results = zero(image, text_input)
+    return([results])
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             audio_file =[
             gr.Audio(source="microphone", type="filepath", optional=True),
             gr.Audio(source="upload", type="filepath", optional=True)]
+            text = gr.Textbox(placeholder="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
+                speech_to_text = gr.Button("Speech to text go brrr", css={"margin-top": "1em"})
         with gr.Column():
             steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=50,maximum=50,minimum=1,step=1)
             width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
             gallery = gr.Gallery(label="Individual images", show_label=True)
             with gr.Row():
+                get_image_latent = gr.Button("Generate Image go brr", css={"margin-top": "1em"})
+        with gr.Column():
+            text_input = gr.Textbox(placeholder="input a list of labels separated by commas")
+            label = gr.Label()
+            with gr.Row():
+                zero_shot_clf = gr.Button("Classify Image go brr", css={"margin-top": "1em"})
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
+    zero_shot_clf.click(zero_shot, inputs=[gallery[0],text_input], outputs = label)
 demo.launch(enable_queue=False)