nekin12 commited on
Commit
ebc7502
1 Parent(s): 85aaaab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -1,16 +1,34 @@
 
 
 
 
 
1
  transcriber = pipeline("automatic-speech-recognition")
2
 
 
 
 
 
 
 
 
 
 
 
3
  def transcribe(audio):
4
  sr, y = audio
5
  y = y.astype(np.float32)
6
  y /= np.max(np.abs(y))
 
 
 
 
7
 
8
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
9
-
10
- demo = gr.Interface(
11
  transcribe,
12
- gr.Audio(sources=["microphone"], type="numpy", label="xerra aqui..."),
13
- "text",
14
  )
15
 
16
- demo.launch(debug=True)
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import torch
5
+ from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
6
  transcriber = pipeline("automatic-speech-recognition")
7
 
8
+ def generate_img(promt):
9
+ print(promt)
10
+ model_id = "stabilityai/stable-diffusion-2-1"
11
+ pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
12
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
13
+ #pipe = pipe.to("cuda")
14
+ #pipe = pipe.to("cpu")
15
+ pic = pipe(promt).images[0]
16
+ return pic
17
+
18
  def transcribe(audio):
19
  sr, y = audio
20
  y = y.astype(np.float32)
21
  y /= np.max(np.abs(y))
22
+ txt= transcriber({"sampling_rate": sr, "raw": y})["text"]
23
+ #txt="dog with a hat"
24
+ img= generate_img(txt)
25
+ return txt, img
26
 
27
+ g_image = gr.Interface(
 
 
28
  transcribe,
29
+ inputs=gr.Audio(sources=["microphone"], type="numpy", label="xerra aqui..."),
30
+ outputs=[gr.Textbox(label="Promt",info="La transcripció de la grabació feta",lines=2), "image"]
31
  )
32
 
33
+
34
+ g_image.launch(debug=True, share=True)