Srivatsa Kundurthy commited on
Commit
b6e138e
β€’
1 Parent(s): 1ef38bc

update app

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
 
4
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
5
 
6
  wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True)
@@ -17,11 +18,23 @@ def inference(path):
17
  )
18
  return out['text']
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  mic_mode = gr.Interface(
21
  fn=inference,
22
  inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"),
23
  outputs=gr.Textbox(label="Transcription Output"),
24
- title="πŸŽ™οΈ Live Lecture Transcription",
25
  description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!",
26
  examples=[],
27
  )
@@ -31,10 +44,21 @@ upload_mode = gr.Interface(
31
  fn=inference,
32
  inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"),
33
  outputs=gr.Textbox(label="Transcription Output"),
34
- title="πŸ“‚ Lecture Recording Transcription",
35
  description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!",
36
  )
37
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  with app:
40
  gr.Markdown(
@@ -46,21 +70,19 @@ with app:
46
  ## How It Works
47
  - **Recording Mode:** Record the lecture as it happens. When you stop, your transcription will be generated.
48
  - **Upload Mode:** Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more.
 
49
 
50
  ## Optimized for Technical Oration
51
  Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for
52
  accurately transcribing technical speech.
53
-
54
 
55
 
56
- **Never miss a word with Lecture Transcription!**
57
  """
58
  )
59
- # Add a Tabbed Interface for different modes
60
  gr.TabbedInterface(
61
- [mic_mode, upload_mode],
62
- ["πŸŽ™οΈ Record & Transcribe", "πŸ“‚ Upload & Transcribe"]
63
  )
64
 
65
- # Launch the app
66
- app.launch(share=True)
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
+ import numpy as np
5
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
6
 
7
  wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True)
 
18
  )
19
  return out['text']
20
 
21
+
22
+ def transcribe(stream, new_chunk):
23
+ sr, y = new_chunk
24
+ y = y.astype(np.float32)
25
+ y /= np.max(np.abs(y))
26
+
27
+ if stream is not None:
28
+ stream = np.concatenate([stream, y])
29
+ else:
30
+ stream = y
31
+ return stream, wav2_ft({"sampling_rate": sr, "raw": stream})["text"]
32
+
33
  mic_mode = gr.Interface(
34
  fn=inference,
35
  inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"),
36
  outputs=gr.Textbox(label="Transcription Output"),
37
+ title="πŸŽ™οΈ Recording & Transcribe",
38
  description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!",
39
  examples=[],
40
  )
 
44
  fn=inference,
45
  inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"),
46
  outputs=gr.Textbox(label="Transcription Output"),
47
+ title="πŸ“‚ Upload & Transcribe",
48
  description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!",
49
  )
50
 
51
+ # inspired by Gradio App Real Time Speech Recognition: https://www.gradio.app/guides/real-time-speech-recognition
52
+ live_mode = gr.Interface(
53
+ transcribe,
54
+ ["state", gr.Audio(sources=["microphone"], streaming=True)],
55
+ ["state", "text"],
56
+ title="🎀 Live Transcription",
57
+ description="Transcribe your lecture in real-time! Start speaking into your microphone, and watch the transcription appear instantly.",
58
+ live=True,
59
+ )
60
+
61
+
62
 
63
  with app:
64
  gr.Markdown(
 
70
  ## How It Works
71
  - **Recording Mode:** Record the lecture as it happens. When you stop, your transcription will be generated.
72
  - **Upload Mode:** Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more.
73
+ - **Live Mode:** That's right, low-latency live transcription!
74
 
75
  ## Optimized for Technical Oration
76
  Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for
77
  accurately transcribing technical speech.
 
78
 
79
 
 
80
  """
81
  )
 
82
  gr.TabbedInterface(
83
+ [mic_mode, upload_mode,live_mode],
84
+ ["πŸŽ™οΈ Record & Transcribe", "πŸ“‚ Upload & Transcribe","🎀 Live Transcribe"]
85
  )
86
 
87
+
88
+ app.launch(debug=True)