Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ from transformers import pipeline
|
|
7 |
|
8 |
asr = pipeline("automatic-speech-recognition")
|
9 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
10 |
-
zero = pipeline("zero-shot-image-classification")
|
11 |
#zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
|
12 |
#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
|
13 |
|
@@ -38,59 +38,34 @@ def speech_to_text(mic=None, file=None, state=""):
|
|
38 |
else:
|
39 |
return "You must either provide a mic recording or a file"
|
40 |
transcription = asr(audio)["text"]
|
41 |
-
#state += text + " "
|
42 |
return state
|
43 |
|
44 |
-
def zero_shot(image, text_input):
|
45 |
-
PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
|
46 |
-
labels = labels_text.split(",")
|
47 |
-
res = pipe(images=PIL_image,
|
48 |
-
candidate_labels=labels,
|
49 |
-
hypothesis_template= "This is a photo of a {}")
|
50 |
-
return {dic["label"]: dic["score"] for dic in res}
|
51 |
-
|
52 |
-
def shot(image, labels_text):
|
53 |
-
PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
|
54 |
-
labels = labels_text.split(",")
|
55 |
-
res = pipe(images= PIL_image,
|
56 |
-
candidate_labels=labels,
|
57 |
-
hypothesis_template= "This is a photo of a {}")
|
58 |
-
return {dic["label"]: dic["score"] for dic in res}
|
59 |
|
60 |
with gr.Blocks() as demo:
|
61 |
gr.Markdown( """
|
62 |
-
|
63 |
-
-
|
64 |
-
-
|
65 |
-
-
|
66 |
""")
|
67 |
with gr.Row():
|
68 |
with gr.Column():
|
69 |
audio_file =[
|
70 |
-
gr.Audio(source="microphone", type="filepath"
|
71 |
-
gr.
|
72 |
-
text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
|
73 |
with gr.Row():
|
74 |
-
|
75 |
with gr.Column():
|
76 |
-
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=
|
77 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
78 |
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
|
79 |
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
|
80 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
|
81 |
-
#gallery = [gr.outputs.Image(type="pil"),gr.outputs.Textbox(label="Error")]
|
82 |
gallery = gr.Gallery(label="Individual images")
|
83 |
with gr.Row():
|
84 |
get_image_latent = gr.Button("Generate Image go brr")
|
85 |
-
with gr.Column():
|
86 |
-
text_input = gr.Textbox(label="Candidate labels", placeholder="input a list of labels separated by commas")
|
87 |
-
label = gr.Label()
|
88 |
-
with gr.Row():
|
89 |
-
zero_shot_clf = gr.Button("Classify Image go brr")
|
90 |
-
|
91 |
|
92 |
speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
|
93 |
get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
|
94 |
-
zero_shot_clf.click(zero_shot, inputs=[gallery, text_input], outputs=label)
|
95 |
|
96 |
demo.launch()
|
|
|
7 |
|
8 |
asr = pipeline("automatic-speech-recognition")
|
9 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
10 |
+
#zero = pipeline("zero-shot-image-classification")
|
11 |
#zero = gr.Interface.load("spaces/Datatrooper/zero-shot-image-classification")
|
12 |
#tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
|
13 |
|
|
|
38 |
else:
|
39 |
return "You must either provide a mic recording or a file"
|
40 |
transcription = asr(audio)["text"]
|
|
|
41 |
return state
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
with gr.Blocks() as demo:
|
45 |
gr.Markdown( """
|
46 |
+
# π€ Sing or tell your story and let this Space β¨ visualize your story along
|
47 |
+
- Soon to be added
|
48 |
+
- Near real time(streaming option)
|
49 |
+
- Also allow playback of you audio relayed with video
|
50 |
""")
|
51 |
with gr.Row():
|
52 |
with gr.Column():
|
53 |
audio_file =[
|
54 |
+
gr.Audio(source="microphone", type="filepath")]
|
55 |
+
speech_to_text = gr.Button("Speech to text go brrr")
|
|
|
56 |
with gr.Row():
|
57 |
+
text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
|
58 |
with gr.Column():
|
59 |
+
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=1,maximum=50,minimum=1,step=1)
|
60 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
61 |
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
|
62 |
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=1, step=1, minimum=1, maximum=4)
|
63 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
|
|
|
64 |
gallery = gr.Gallery(label="Individual images")
|
65 |
with gr.Row():
|
66 |
get_image_latent = gr.Button("Generate Image go brr")
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
|
69 |
get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
|
|
|
70 |
|
71 |
demo.launch()
|