Spaces:

Gradio-Blocks
/

magnificento

Runtime error

App Files Files Community

muhtasham commited on May 19, 2022

Commit

2adde1f

•

1 Parent(s): affaf34

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -34

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import pipeline
 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
-zero = pipeline("zero-shot-image-classification")
 #zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
 #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
@@ -38,59 +38,34 @@ def speech_to_text(mic=None, file=None, state=""):
     else:
         return "You must either provide a mic recording or a file"
     transcription = asr(audio)["text"]
-    #state += text + " "
     return state
-def zero_shot(image, text_input):
-    PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
-    labels = labels_text.split(",")
-    res = pipe(images=PIL_image,
-           candidate_labels=labels,
-           hypothesis_template= "This is a photo of a {}")
-    return {dic["label"]: dic["score"] for dic in res}
-def shot(image, labels_text):
-    PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
-    labels = labels_text.split(",")
-    res = pipe(images= PIL_image,
-           candidate_labels=labels,
-           hypothesis_template= "This is a photo of a {}")
-    return {dic["label"]: dic["score"] for dic in res}
 with gr.Blocks() as demo:
     gr.Markdown( """
-    - 🎤 Input voice/text
-    - ✨ Convert voice/text to image via Latent Diffusion
-    - 🤖 Given list of labels and a selected image from gallery do zero-shot classification
-    - 🎛️ Coming soon: TTS(audio) your output label as: Your output looks like "label of zero-shot"
      """)
     with gr.Row():
         with gr.Column():
             audio_file =[
-            gr.Audio(source="microphone", type="filepath", optional=True),
-            gr.Audio(source="upload", type="filepath", optional=True)]
-            text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
             with gr.Row():
-                speech_to_text = gr.Button("Speech to text go brrr", css={"margin-top": "1em"})
         with gr.Column():
-            steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=50,maximum=50,minimum=1,step=1)
             width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
             height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
             images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
-            #gallery = [gr.outputs.Image(type="pil"),gr.outputs.Textbox(label="Error")]
             gallery = gr.Gallery(label="Individual images")
             with gr.Row():
                 get_image_latent = gr.Button("Generate Image go brr")
-        with gr.Column():
-            text_input = gr.Textbox(label="Candidate labels", placeholder="input a list of labels separated by commas")
-            label = gr.Label()
-            with gr.Row():
-                zero_shot_clf = gr.Button("Classify Image go brr")
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
-    zero_shot_clf.click(zero_shot, inputs=[gallery, text_input], outputs=label)
 demo.launch()

 asr = pipeline("automatic-speech-recognition")
 latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
+#zero = pipeline("zero-shot-image-classification")
 #zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
 #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
     else:
         return "You must either provide a mic recording or a file"
     transcription = asr(audio)["text"]
     return state
 with gr.Blocks() as demo:
     gr.Markdown( """
+    # 🎤 Sing or tell your story and let this Space ✨  visualize your story along
+    - Soon to be added
+    - Near real time(streaming option)
+    - Also allow playback of you audio relayed with video
      """)
     with gr.Row():
         with gr.Column():
             audio_file =[
+            gr.Audio(source="microphone", type="filepath")]
+            speech_to_text = gr.Button("Speech to text go brrr")
             with gr.Row():
+                text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
         with gr.Column():
+            steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to     generate",default=1,maximum=50,minimum=1,step=1)
             width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
             height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
             images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
             diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
             gallery = gr.Gallery(label="Individual images")
             with gr.Row():
                 get_image_latent = gr.Button("Generate Image go brr")
     speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
     get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
 demo.launch()