File size: 1,479 Bytes
5954031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import torchaudio
from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast

def transcribe_audio(audio_path):
    # Load and resample audio
    audio, sr = torchaudio.load(audio_path)
    if sr != 16000:
        audio = torchaudio.functional.resample(audio, sr, 16000)
    
    # Get transcription
    tokens = model(audio)
    transcription = tokenizer.decode(tokens[0], skip_special_tokens=True)
    return transcription

# Load model and tokenizer globally
model = AutoModelForSpeechSeq2Seq.from_pretrained('usefulsensors/moonshine-tiny', trust_remote_code=True)
tokenizer = PreTrainedTokenizerFast.from_pretrained('usefulsensors/moonshine-tiny')

# Create Gradio interface
demo = gr.Blocks()

with demo:
    gr.Markdown("## Audio Transcription App")
    
    with gr.Tabs():
        with gr.TabItem("Upload Audio"):
            audio_file = gr.Audio(source="upload", type="filepath")
            output_text1 = gr.Textbox(label="Transcription")
            upload_button = gr.Button("Transcribe")
            upload_button.click(fn=transcribe_audio, inputs=audio_file, outputs=output_text1)
            
        with gr.TabItem("Record Audio"):
            audio_mic = gr.Audio(source="microphone", type="filepath")
            output_text2 = gr.Textbox(label="Transcription")
            record_button = gr.Button("Transcribe")
            record_button.click(fn=transcribe_audio, inputs=audio_mic, outputs=output_text2)

demo.launch()