nekin12 commited on
Commit
b5c8bc0
1 Parent(s): 63d4527

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -18
app.py CHANGED
@@ -4,30 +4,18 @@ import numpy as np
4
 
5
  transcriber = pipeline("automatic-speech-recognition")
6
 
7
- def generate_img(promt):
8
- print(promt)
9
- model_id = "stabilityai/stable-diffusion-2-1"
10
- pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
11
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
12
- #pipe = pipe.to("cuda")
13
- #pipe = pipe.to("cpu")
14
- pic = pipe(promt).images[0]
15
- return pic
16
-
17
  def transcribe(audio):
18
  sr, y = audio
19
  y = y.astype(np.float32)
20
  y /= np.max(np.abs(y))
21
- txt= transcriber({"sampling_rate": sr, "raw": y})["text"]
22
- #txt="dog with a hat"
23
- img= generate_img(txt)
24
- return txt, img
25
 
26
- g_image = gr.Interface(
 
 
27
  transcribe,
28
- inputs=gr.Audio(sources=["microphone"], type="numpy", label="xerra aqui..."),
29
- outputs=[gr.Textbox(label="Promt",info="La transcripció de la grabació feta",lines=2), "image"]
30
  )
31
 
 
32
 
33
- g_image.launch(debug=True, share=True)
 
4
 
5
  transcriber = pipeline("automatic-speech-recognition")
6
 
 
 
 
 
 
 
 
 
 
 
7
  def transcribe(audio):
8
  sr, y = audio
9
  y = y.astype(np.float32)
10
  y /= np.max(np.abs(y))
 
 
 
 
11
 
12
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
13
+
14
+ demo = gr.Interface(
15
  transcribe,
16
+ gr.Audio(sources=["microphone"], type="numpy", label="xerra aqui..."),
17
+ "text",
18
  )
19
 
20
+ demo.launch(debug=True)
21