DexterSptizu commited on
Commit
07b17aa
1 Parent(s): 4f8204e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -24
app.py CHANGED
@@ -2,38 +2,80 @@ import gradio as gr
2
  import torchaudio
3
  from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast
4
 
 
 
 
 
 
 
 
 
 
 
 
5
  def transcribe_audio(audio_path):
6
- # Load and resample audio
7
- audio, sr = torchaudio.load(audio_path)
8
- if sr != 16000:
9
- audio = torchaudio.functional.resample(audio, sr, 16000)
10
 
11
- # Get transcription
12
- tokens = model(audio)
13
- transcription = tokenizer.decode(tokens[0], skip_special_tokens=True)
14
- return transcription
15
-
16
- # Load model and tokenizer globally
17
- model = AutoModelForSpeechSeq2Seq.from_pretrained('usefulsensors/moonshine-tiny', trust_remote_code=True)
18
- tokenizer = PreTrainedTokenizerFast.from_pretrained('usefulsensors/moonshine-tiny')
 
 
 
 
19
 
20
  # Create Gradio interface
21
- demo = gr.Blocks()
22
-
23
- with demo:
24
  gr.Markdown("## Audio Transcription App")
25
 
26
  with gr.Tabs():
27
  with gr.TabItem("Upload Audio"):
28
- audio_file = gr.Audio(source="upload", type="filepath")
29
- output_text1 = gr.Textbox(label="Transcription")
30
- upload_button = gr.Button("Transcribe")
31
- upload_button.click(fn=transcribe_audio, inputs=audio_file, outputs=output_text1)
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  with gr.TabItem("Record Audio"):
34
- audio_mic = gr.Audio(source="microphone", type="filepath")
35
- output_text2 = gr.Textbox(label="Transcription")
36
- record_button = gr.Button("Transcribe")
37
- record_button.click(fn=transcribe_audio, inputs=audio_mic, outputs=output_text2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- demo.launch()
 
 
2
  import torchaudio
3
  from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast
4
 
5
+ # Load model and tokenizer globally with pinned revision
6
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
7
+ 'usefulsensors/moonshine-tiny',
8
+ revision="main",
9
+ trust_remote_code=True
10
+ )
11
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(
12
+ 'usefulsensors/moonshine-tiny',
13
+ revision="main"
14
+ )
15
+
16
  def transcribe_audio(audio_path):
17
+ if audio_path is None:
18
+ return "Please provide an audio input."
 
 
19
 
20
+ try:
21
+ # Load and resample audio
22
+ audio, sr = torchaudio.load(audio_path)
23
+ if sr != 16000:
24
+ audio = torchaudio.functional.resample(audio, sr, 16000)
25
+
26
+ # Get transcription
27
+ tokens = model(audio)
28
+ transcription = tokenizer.decode(tokens[0], skip_special_tokens=True)
29
+ return transcription
30
+ except Exception as e:
31
+ return f"Error processing audio: {str(e)}"
32
 
33
  # Create Gradio interface
34
+ with gr.Blocks() as demo:
 
 
35
  gr.Markdown("## Audio Transcription App")
36
 
37
  with gr.Tabs():
38
  with gr.TabItem("Upload Audio"):
39
+ audio_file = gr.Audio(
40
+ sources=["upload"],
41
+ type="filepath",
42
+ label="Upload Audio File"
43
+ )
44
+ output_text1 = gr.Textbox(
45
+ label="Transcription",
46
+ placeholder="Transcription will appear here..."
47
+ )
48
+ upload_button = gr.Button("Transcribe Uploaded Audio")
49
+ upload_button.click(
50
+ fn=transcribe_audio,
51
+ inputs=audio_file,
52
+ outputs=output_text1
53
+ )
54
 
55
  with gr.TabItem("Record Audio"):
56
+ audio_mic = gr.Audio(
57
+ sources=["microphone"],
58
+ type="filepath",
59
+ label="Record Audio"
60
+ )
61
+ output_text2 = gr.Textbox(
62
+ label="Transcription",
63
+ placeholder="Transcription will appear here..."
64
+ )
65
+ record_button = gr.Button("Transcribe Recorded Audio")
66
+ record_button.click(
67
+ fn=transcribe_audio,
68
+ inputs=audio_mic,
69
+ outputs=output_text2
70
+ )
71
+
72
+ gr.Markdown("""
73
+ ### Instructions:
74
+ 1. Choose either 'Upload Audio' or 'Record Audio' tab
75
+ 2. Upload an audio file or record using your microphone
76
+ 3. Click the respective 'Transcribe' button
77
+ 4. Wait for the transcription to appear
78
+ """)
79
 
80
+ if __name__ == "__main__":
81
+ demo.launch()