File size: 615 Bytes
b212449
1f98aa3
b212449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78ddf88
b212449
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import gradio as gr
import numpy as np
from transformers import pipeline
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")


def predict(audio):
  sr, y = audio
  y = y.astype(np.float32)
  if y.ndim > 1:
    y = y.mean(axis=1)
  y /= np.max(np.abs(y))

  text = transcriber({"sampling_rate": sr, "raw": y})['text']

  return text


gradio_app = gr.Interface(
    fn=predict,
    inputs=[gr.Audio(sources=["upload", "microphone"], type="numpy")],
    outputs=[gr.Textbox(label="Transcription")],
    title = "Speech transcription"
)
if __name__ == "__main__":
  gradio_app.launch()