| import gradio as gr | |
| from transformers import pipeline | |
| import numpy as np | |
| transcriber_hindi = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-hindi") | |
| transcriber_bang = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec_v1_bengali") | |
| transcriber_odia = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-odia") | |
| def transcribe(audio,lang = "hindi"): | |
| sr, y = audio | |
| y = y.astype(np.float32) | |
| y /= np.max(np.abs(y)) | |
| if lang == "hindi": | |
| return transcriber_hindi({"sampling_rate": sr, "raw": y})["text"] | |
| if lang == "bangali": | |
| return transcriber_bang({"sampling_rate": sr, "raw": y})["text"] | |
| if lang == "odia": | |
| return transcriber_odia({"sampling_rate": sr, "raw": y})["text"] | |
| demo = gr.Interface(fn=transcribe, inputs = [gr.Audio(source="microphone"), gr.Radio(["hindi","bangali","odia"])] , outputs = "text") | |
| demo.launch() |