Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from transformers import pipeline
|
|
11 |
|
12 |
asr = pipeline("automatic-speech-recognition")
|
13 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
14 |
-
|
15 |
#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
|
16 |
|
17 |
def text2image_latent(text, steps, width, height, images, diversity):
|
@@ -43,19 +43,19 @@ def speech_to_text(mic=None, file=None):
|
|
43 |
transcription = asr(audio)["text"]
|
44 |
return transcription
|
45 |
|
46 |
-
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
|
50 |
with gr.Blocks() as demo:
|
51 |
with gr.Row():
|
52 |
with gr.Column():
|
53 |
audio_file =[
|
54 |
gr.Audio(source="microphone", type="filepath", optional=True),
|
55 |
gr.Audio(source="upload", type="filepath", optional=True)]
|
56 |
-
text = gr.Textbox(
|
57 |
with gr.Row():
|
58 |
-
speech_to_text = gr.Button("Speech to text go brrr")
|
59 |
with gr.Column():
|
60 |
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
|
61 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
@@ -64,14 +64,16 @@ with gr.Blocks() as demo:
|
|
64 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
|
65 |
gallery = gr.Gallery(label="Individual images", show_label=True)
|
66 |
with gr.Row():
|
67 |
-
get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
71 |
|
72 |
|
73 |
speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
|
74 |
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
|
75 |
-
|
76 |
|
77 |
demo.launch(enable_queue=False)
|
|
|
11 |
|
12 |
asr = pipeline("automatic-speech-recognition")
|
13 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
14 |
+
zero = pipeline("zero-shot-image-classification")
|
15 |
#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
|
16 |
|
17 |
def text2image_latent(text, steps, width, height, images, diversity):
|
|
|
43 |
transcription = asr(audio)["text"]
|
44 |
return transcription
|
45 |
|
46 |
+
def zero_shot(image, text_input):
|
47 |
+
results = zero(image, text_input)
|
48 |
+
return([results])
|
49 |
|
|
|
|
|
50 |
with gr.Blocks() as demo:
|
51 |
with gr.Row():
|
52 |
with gr.Column():
|
53 |
audio_file =[
|
54 |
gr.Audio(source="microphone", type="filepath", optional=True),
|
55 |
gr.Audio(source="upload", type="filepath", optional=True)]
|
56 |
+
text = gr.Textbox(placeholder="If you dont want to record or upload your voice you can input text here")
|
57 |
with gr.Row():
|
58 |
+
speech_to_text = gr.Button("Speech to text go brrr", css={"margin-top": "1em"})
|
59 |
with gr.Column():
|
60 |
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
|
61 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
|
|
64 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
|
65 |
gallery = gr.Gallery(label="Individual images", show_label=True)
|
66 |
with gr.Row():
|
67 |
+
get_image_latent = gr.Button("Generate Image go brr", css={"margin-top": "1em"})
|
68 |
+
with gr.Column():
|
69 |
+
text_input = gr.Textbox(placeholder="input a list of labels separated by commas")
|
70 |
+
label = gr.Label()
|
71 |
+
with gr.Row():
|
72 |
+
zero_shot_clf = gr.Button("Classify Image go brr", css={"margin-top": "1em"})
|
73 |
|
74 |
|
75 |
speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
|
76 |
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
|
77 |
+
zero_shot_clf.click(zero_shot, inputs=[gallery[0],text_input], outputs = label)
|
78 |
|
79 |
demo.launch(enable_queue=False)
|