Porjaz commited on
Commit
cf075ed
·
verified ·
1 Parent(s): eb43650

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -12
app.py CHANGED
@@ -56,18 +56,14 @@ def return_prediction_w2v2(mic=None, file=None, device=device):
56
 
57
 
58
  @spaces.GPU(duration=30)
59
- def return_prediction_whisper(mic=None, file=None, device=device):
60
  if mic is not None:
61
  waveform, sr = librosa.load(mic, sr=16000)
62
- waveform = waveform[:60*sr]
63
- whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
64
- elif file is not None:
65
- waveform, sr = librosa.load(file, sr=16000)
66
- waveform = waveform[:60*sr]
67
  whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
68
  else:
69
- return "You must either provide a mic recording or a file"
70
-
71
  recap_result = recap_sentence(whisper_result[0])
72
 
73
  # If the letter after punct is small, recap it
@@ -79,6 +75,39 @@ def return_prediction_whisper(mic=None, file=None, device=device):
79
  return recap_result
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def return_prediction_compare(mic=None, file=None, device=device):
83
  # pipe_whisper.model.to(device)
84
  # mms_model.to(device)
@@ -132,7 +161,8 @@ def return_prediction_compare(mic=None, file=None, device=device):
132
 
133
 
134
  # Create a partial function with the device pre-applied
135
- return_prediction_whisper_with_device = partial(return_prediction_whisper, device=device)
 
136
  return_prediction_w2v2_with_device = partial(return_prediction_w2v2, device=device)
137
 
138
  # Load the ASR models
@@ -153,7 +183,7 @@ recap_model.eval()
153
 
154
 
155
  mic_transcribe_whisper = gr.Interface(
156
- fn=return_prediction_whisper_with_device,
157
  inputs=gr.Audio(sources="microphone", type="filepath"),
158
  outputs=gr.Textbox(),
159
  allow_flagging="never",
@@ -161,11 +191,11 @@ mic_transcribe_whisper = gr.Interface(
161
  )
162
 
163
  file_transcribe_whisper = gr.Interface(
164
- fn=return_prediction_whisper_with_device,
165
  inputs=gr.Audio(sources="upload", type="filepath"),
166
  outputs=gr.Textbox(),
167
  allow_flagging="never",
168
- live=False
169
  )
170
 
171
  mic_transcribe_w2v2 = gr.Interface(
 
56
 
57
 
58
  @spaces.GPU(duration=30)
59
+ def return_prediction_whisper_mic(mic=None, device=device):
60
  if mic is not None:
61
  waveform, sr = librosa.load(mic, sr=16000)
62
+ waveform = waveform[:30*sr]
 
 
 
 
63
  whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
64
  else:
65
+ return "You must provide a mic recording"
66
+
67
  recap_result = recap_sentence(whisper_result[0])
68
 
69
  # If the letter after punct is small, recap it
 
75
  return recap_result
76
 
77
 
78
+ @spaces.GPU(duration=120)
79
+ def return_prediction_whisper_file(file, device=device):
80
+ waveform, sr = librosa.load(file, sr=16000)
81
+ waveform = waveform[:3600*sr]
82
+ whisper_result = whisper_classifier.classify_file_whisper_mkd_streaming(waveform, device)
83
+
84
+ recap_result = ""
85
+ prev_segment = ""
86
+ prev_segment_len = 0
87
+
88
+ segment_counter = 0
89
+ for segment in whisper_result:
90
+ segment_counter += 1
91
+ if prev_segment == "":
92
+ recap_segment= recap_sentence(segment[0])
93
+ else:
94
+ prev_segment_len = len(prev_segment.split())
95
+ recap_segment = recap_sentence(prev_segment + " " + segment[0])
96
+ # remove prev_segment from the beginning of the recap_result
97
+ recap_segment = recap_segment.split()
98
+ recap_segment = recap_segment[prev_segment_len:]
99
+ recap_segment = " ".join(recap_segment)
100
+ prev_segment = segment[0]
101
+ recap_result += recap_segment + " "
102
+
103
+ # If the letter after punct is small, recap it
104
+ for i, letter in enumerate(recap_result):
105
+ if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
106
+ recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
107
+
108
+ yield recap_result
109
+
110
+
111
  def return_prediction_compare(mic=None, file=None, device=device):
112
  # pipe_whisper.model.to(device)
113
  # mms_model.to(device)
 
161
 
162
 
163
  # Create a partial function with the device pre-applied
164
+ return_prediction_whisper_mic_with_device = partial(return_prediction_whisper_mic, device=device)
165
+ return_prediction_whisper_file_with_device = partial(return_prediction_whisper_file, device=device)
166
  return_prediction_w2v2_with_device = partial(return_prediction_w2v2, device=device)
167
 
168
  # Load the ASR models
 
183
 
184
 
185
  mic_transcribe_whisper = gr.Interface(
186
+ fn=return_prediction_whisper_mic_with_device,
187
  inputs=gr.Audio(sources="microphone", type="filepath"),
188
  outputs=gr.Textbox(),
189
  allow_flagging="never",
 
191
  )
192
 
193
  file_transcribe_whisper = gr.Interface(
194
+ fn=return_prediction_whisper_file_with_device,
195
  inputs=gr.Audio(sources="upload", type="filepath"),
196
  outputs=gr.Textbox(),
197
  allow_flagging="never",
198
+ live=True
199
  )
200
 
201
  mic_transcribe_w2v2 = gr.Interface(