Update app.py
Browse files
app.py
CHANGED
@@ -56,18 +56,14 @@ def return_prediction_w2v2(mic=None, file=None, device=device):
|
|
56 |
|
57 |
|
58 |
@spaces.GPU(duration=30)
|
59 |
-
def
|
60 |
if mic is not None:
|
61 |
waveform, sr = librosa.load(mic, sr=16000)
|
62 |
-
waveform = waveform[:
|
63 |
-
whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
|
64 |
-
elif file is not None:
|
65 |
-
waveform, sr = librosa.load(file, sr=16000)
|
66 |
-
waveform = waveform[:60*sr]
|
67 |
whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
|
68 |
else:
|
69 |
-
return "You must
|
70 |
-
|
71 |
recap_result = recap_sentence(whisper_result[0])
|
72 |
|
73 |
# If the letter after punct is small, recap it
|
@@ -79,6 +75,39 @@ def return_prediction_whisper(mic=None, file=None, device=device):
|
|
79 |
return recap_result
|
80 |
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def return_prediction_compare(mic=None, file=None, device=device):
|
83 |
# pipe_whisper.model.to(device)
|
84 |
# mms_model.to(device)
|
@@ -132,7 +161,8 @@ def return_prediction_compare(mic=None, file=None, device=device):
|
|
132 |
|
133 |
|
134 |
# Create a partial function with the device pre-applied
|
135 |
-
|
|
|
136 |
return_prediction_w2v2_with_device = partial(return_prediction_w2v2, device=device)
|
137 |
|
138 |
# Load the ASR models
|
@@ -153,7 +183,7 @@ recap_model.eval()
|
|
153 |
|
154 |
|
155 |
mic_transcribe_whisper = gr.Interface(
|
156 |
-
fn=
|
157 |
inputs=gr.Audio(sources="microphone", type="filepath"),
|
158 |
outputs=gr.Textbox(),
|
159 |
allow_flagging="never",
|
@@ -161,11 +191,11 @@ mic_transcribe_whisper = gr.Interface(
|
|
161 |
)
|
162 |
|
163 |
file_transcribe_whisper = gr.Interface(
|
164 |
-
fn=
|
165 |
inputs=gr.Audio(sources="upload", type="filepath"),
|
166 |
outputs=gr.Textbox(),
|
167 |
allow_flagging="never",
|
168 |
-
live=
|
169 |
)
|
170 |
|
171 |
mic_transcribe_w2v2 = gr.Interface(
|
|
|
56 |
|
57 |
|
58 |
@spaces.GPU(duration=30)
|
59 |
+
def return_prediction_whisper_mic(mic=None, device=device):
|
60 |
if mic is not None:
|
61 |
waveform, sr = librosa.load(mic, sr=16000)
|
62 |
+
waveform = waveform[:30*sr]
|
|
|
|
|
|
|
|
|
63 |
whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
|
64 |
else:
|
65 |
+
return "You must provide a mic recording"
|
66 |
+
|
67 |
recap_result = recap_sentence(whisper_result[0])
|
68 |
|
69 |
# If the letter after punct is small, recap it
|
|
|
75 |
return recap_result
|
76 |
|
77 |
|
78 |
+
@spaces.GPU(duration=120)
|
79 |
+
def return_prediction_whisper_file(file, device=device):
|
80 |
+
waveform, sr = librosa.load(file, sr=16000)
|
81 |
+
waveform = waveform[:3600*sr]
|
82 |
+
whisper_result = whisper_classifier.classify_file_whisper_mkd_streaming(waveform, device)
|
83 |
+
|
84 |
+
recap_result = ""
|
85 |
+
prev_segment = ""
|
86 |
+
prev_segment_len = 0
|
87 |
+
|
88 |
+
segment_counter = 0
|
89 |
+
for segment in whisper_result:
|
90 |
+
segment_counter += 1
|
91 |
+
if prev_segment == "":
|
92 |
+
recap_segment= recap_sentence(segment[0])
|
93 |
+
else:
|
94 |
+
prev_segment_len = len(prev_segment.split())
|
95 |
+
recap_segment = recap_sentence(prev_segment + " " + segment[0])
|
96 |
+
# remove prev_segment from the beginning of the recap_result
|
97 |
+
recap_segment = recap_segment.split()
|
98 |
+
recap_segment = recap_segment[prev_segment_len:]
|
99 |
+
recap_segment = " ".join(recap_segment)
|
100 |
+
prev_segment = segment[0]
|
101 |
+
recap_result += recap_segment + " "
|
102 |
+
|
103 |
+
# If the letter after punct is small, recap it
|
104 |
+
for i, letter in enumerate(recap_result):
|
105 |
+
if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
|
106 |
+
recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
|
107 |
+
|
108 |
+
yield recap_result
|
109 |
+
|
110 |
+
|
111 |
def return_prediction_compare(mic=None, file=None, device=device):
|
112 |
# pipe_whisper.model.to(device)
|
113 |
# mms_model.to(device)
|
|
|
161 |
|
162 |
|
163 |
# Create a partial function with the device pre-applied
|
164 |
+
return_prediction_whisper_mic_with_device = partial(return_prediction_whisper_mic, device=device)
|
165 |
+
return_prediction_whisper_file_with_device = partial(return_prediction_whisper_file, device=device)
|
166 |
return_prediction_w2v2_with_device = partial(return_prediction_w2v2, device=device)
|
167 |
|
168 |
# Load the ASR models
|
|
|
183 |
|
184 |
|
185 |
mic_transcribe_whisper = gr.Interface(
|
186 |
+
fn=return_prediction_whisper_mic_with_device,
|
187 |
inputs=gr.Audio(sources="microphone", type="filepath"),
|
188 |
outputs=gr.Textbox(),
|
189 |
allow_flagging="never",
|
|
|
191 |
)
|
192 |
|
193 |
file_transcribe_whisper = gr.Interface(
|
194 |
+
fn=return_prediction_whisper_file_with_device,
|
195 |
inputs=gr.Audio(sources="upload", type="filepath"),
|
196 |
outputs=gr.Textbox(),
|
197 |
allow_flagging="never",
|
198 |
+
live=True
|
199 |
)
|
200 |
|
201 |
mic_transcribe_w2v2 = gr.Interface(
|