muhtasham commited on
Commit
ac022fd
1 Parent(s): 974534b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -11,7 +11,7 @@ from transformers import pipeline
11
 
12
  asr = pipeline("automatic-speech-recognition")
13
  latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
14
- #zero = pipeline("zero-shot-image-classification")
15
  #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
16
 
17
  def text2image_latent(text, steps, width, height, images, diversity):
@@ -43,19 +43,19 @@ def speech_to_text(mic=None, file=None):
43
  transcription = asr(audio)["text"]
44
  return transcription
45
 
46
- #def zero_shot(image, labels_text):
 
 
47
 
48
-
49
-
50
  with gr.Blocks() as demo:
51
  with gr.Row():
52
  with gr.Column():
53
  audio_file =[
54
  gr.Audio(source="microphone", type="filepath", optional=True),
55
  gr.Audio(source="upload", type="filepath", optional=True)]
56
- text = gr.Textbox(default="If you dont want to record or upload your voice you can input text here")
57
  with gr.Row():
58
- speech_to_text = gr.Button("Speech to text go brrr")
59
  with gr.Column():
60
  steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
61
  width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
@@ -64,14 +64,16 @@ with gr.Blocks() as demo:
64
  diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
65
  gallery = gr.Gallery(label="Individual images", show_label=True)
66
  with gr.Row():
67
- get_image_latent = gr.Button("Generate Image", css={"margin-top": "1em"})
68
- #with gr.Column():
69
-
70
- #with gr.Row():
 
 
71
 
72
 
73
  speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
74
  get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
75
-
76
 
77
  demo.launch(enable_queue=False)
 
11
 
12
  asr = pipeline("automatic-speech-recognition")
13
  latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
14
+ zero = pipeline("zero-shot-image-classification")
15
  #tts = gr.Interface.load("spaces/osanseviero/tortoisse-tts")
16
 
17
  def text2image_latent(text, steps, width, height, images, diversity):
 
43
  transcription = asr(audio)["text"]
44
  return transcription
45
 
46
+ def zero_shot(image, text_input):
47
+ results = zero(image, text_input)
48
+ return([results])
49
 
 
 
50
  with gr.Blocks() as demo:
51
  with gr.Row():
52
  with gr.Column():
53
  audio_file =[
54
  gr.Audio(source="microphone", type="filepath", optional=True),
55
  gr.Audio(source="upload", type="filepath", optional=True)]
56
+ text = gr.Textbox(placeholder="If you dont want to record or upload your voice you can input text here")
57
  with gr.Row():
58
+ speech_to_text = gr.Button("Speech to text go brrr", css={"margin-top": "1em"})
59
  with gr.Column():
60
  steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=50,minimum=1,step=1)
61
  width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
 
64
  diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=15.0, minimum=1.0, maximum=15.0)
65
  gallery = gr.Gallery(label="Individual images", show_label=True)
66
  with gr.Row():
67
+ get_image_latent = gr.Button("Generate Image go brr", css={"margin-top": "1em"})
68
+ with gr.Column():
69
+ text_input = gr.Textbox(placeholder="input a list of labels separated by commas")
70
+ label = gr.Label()
71
+ with gr.Row():
72
+ zero_shot_clf = gr.Button("Classify Image go brr", css={"margin-top": "1em"})
73
 
74
 
75
  speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
76
  get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
77
+ zero_shot_clf.click(zero_shot, inputs=[gallery[0],text_input], outputs = label)
78
 
79
  demo.launch(enable_queue=False)