File size: 1,009 Bytes
11a232c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
from transformers import pipeline
import torch

def init_transcription_pipeline():
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    model_path = "c:/Users/vhits/Documents/Speect2Text/model/whisper-hindi-large-v2"
    transcribe_pipeline = pipeline(
        task = "automatic-speech-recognition",
        model = model_path,
        chunk_length_s = 30,
        device = device
    )
    transcribe_pipeline.model.config.forced_decoder_ids = transcribe_pipeline.tokenizer.get_decoder_prompt_ids(language="gu", task="transcribe")
    return transcribe_pipeline

transcribe_pipeline = init_transcription_pipeline()

def transcribe_audio(audio_file_path):
    transcription_result = transcribe_pipeline(audio_file_path)["text"]
    return transcription_result

iface = gr.Interface(
    fn = transcribe_audio,
    inputs = gr.Audio(label="Upload your audio file", type="filepath"),
    outputs=gr.Textbox(label="Transcription"),
    title = "Gujarati Audio VH Test"
).launch()