|
import gradio as gr |
|
from transformers import pipeline |
|
import torch |
|
|
|
def init_transcription_pipeline(): |
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
model_path = "c:/Users/vhits/Documents/Speect2Text/model/whisper-hindi-large-v2" |
|
transcribe_pipeline = pipeline( |
|
task = "automatic-speech-recognition", |
|
model = model_path, |
|
chunk_length_s = 30, |
|
device = device |
|
) |
|
transcribe_pipeline.model.config.forced_decoder_ids = transcribe_pipeline.tokenizer.get_decoder_prompt_ids(language="gu", task="transcribe") |
|
return transcribe_pipeline |
|
|
|
transcribe_pipeline = init_transcription_pipeline() |
|
|
|
def transcribe_audio(audio_file_path): |
|
transcription_result = transcribe_pipeline(audio_file_path)["text"] |
|
return transcription_result |
|
|
|
iface = gr.Interface( |
|
fn = transcribe_audio, |
|
inputs = gr.Audio(label="Upload your audio file", type="filepath"), |
|
outputs=gr.Textbox(label="Transcription"), |
|
title = "Gujarati Audio VH Test" |
|
).launch() |