csukuangfj
commited on
Commit
•
074cf4f
1
Parent(s):
0eef9b6
small fixes
Browse files
app.py
CHANGED
@@ -19,6 +19,7 @@
|
|
19 |
# References:
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
|
|
22 |
import os
|
23 |
import time
|
24 |
from datetime import datetime
|
@@ -34,7 +35,7 @@ languages = sorted(language_to_models.keys())
|
|
34 |
def convert_to_wav(in_filename: str) -> str:
|
35 |
"""Convert the input audio file to a wave file"""
|
36 |
out_filename = in_filename + ".wav"
|
37 |
-
|
38 |
_ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
|
39 |
return out_filename
|
40 |
|
@@ -46,23 +47,23 @@ def process(
|
|
46 |
decoding_method: str,
|
47 |
num_active_paths: int,
|
48 |
) -> str:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
filename = convert_to_wav(in_filename)
|
56 |
|
57 |
now = datetime.now()
|
58 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
59 |
-
|
60 |
|
61 |
start = time.time()
|
62 |
wave, wave_sample_rate = torchaudio.load(filename)
|
63 |
|
64 |
if wave_sample_rate != sample_rate:
|
65 |
-
|
66 |
f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
|
67 |
f"Resampling to {sample_rate}."
|
68 |
)
|
@@ -86,22 +87,12 @@ def process(
|
|
86 |
duration = wave.shape[0] / sample_rate
|
87 |
rtf = (end - start) / duration
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
print(hyp)
|
94 |
|
95 |
-
|
96 |
-
<div class='result'>
|
97 |
-
<div class='result_item result_item_success'>
|
98 |
-
{hyp}
|
99 |
-
<br/>
|
100 |
-
</div>
|
101 |
-
</div>
|
102 |
-
"""
|
103 |
-
|
104 |
-
return html_output
|
105 |
|
106 |
|
107 |
title = "# Automatic Speech Recognition with Next-gen Kaldi"
|
@@ -125,16 +116,7 @@ def update_model_dropdown(language: str):
|
|
125 |
raise ValueError(f"Unsupported language: {language}")
|
126 |
|
127 |
|
128 |
-
|
129 |
-
# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L112
|
130 |
-
demo = gr.Blocks(
|
131 |
-
css="""
|
132 |
-
.result {display:flex;flex-direction:column}
|
133 |
-
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
134 |
-
.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
|
135 |
-
.result_item_error {background-color:#ff7070;color:white;align-self:start}
|
136 |
-
""",
|
137 |
-
)
|
138 |
|
139 |
with demo:
|
140 |
gr.Markdown(title)
|
@@ -178,8 +160,8 @@ with demo:
|
|
178 |
optional=False,
|
179 |
label="Upload from disk",
|
180 |
)
|
181 |
-
uploaded_output = gr.HTML(label="Recognized speech from uploaded file")
|
182 |
upload_button = gr.Button("Submit for recognition")
|
|
|
183 |
|
184 |
with gr.TabItem("Record from microphone"):
|
185 |
microphone = gr.Audio(
|
@@ -190,7 +172,7 @@ with demo:
|
|
190 |
)
|
191 |
|
192 |
record_button = gr.Button("Submit for recognition")
|
193 |
-
recorded_output = gr.
|
194 |
|
195 |
upload_button.click(
|
196 |
process,
|
@@ -217,4 +199,8 @@ with demo:
|
|
217 |
gr.Markdown(description)
|
218 |
|
219 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
220 |
demo.launch()
|
|
|
19 |
# References:
|
20 |
# https://gradio.app/docs/#dropdown
|
21 |
|
22 |
+
import logging
|
23 |
import os
|
24 |
import time
|
25 |
from datetime import datetime
|
|
|
35 |
def convert_to_wav(in_filename: str) -> str:
|
36 |
"""Convert the input audio file to a wave file"""
|
37 |
out_filename = in_filename + ".wav"
|
38 |
+
logging.info(f"Converting '{in_filename}' to '{out_filename}'")
|
39 |
_ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
|
40 |
return out_filename
|
41 |
|
|
|
47 |
decoding_method: str,
|
48 |
num_active_paths: int,
|
49 |
) -> str:
|
50 |
+
logging.info(f"in_filename: {in_filename}")
|
51 |
+
logging.info(f"language: {language}")
|
52 |
+
logging.info(f"repo_id: {repo_id}")
|
53 |
+
logging.info(f"decoding_method: {decoding_method}")
|
54 |
+
logging.info(f"num_active_paths: {num_active_paths}")
|
55 |
|
56 |
filename = convert_to_wav(in_filename)
|
57 |
|
58 |
now = datetime.now()
|
59 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
60 |
+
logging.info(f"Started at {date_time}")
|
61 |
|
62 |
start = time.time()
|
63 |
wave, wave_sample_rate = torchaudio.load(filename)
|
64 |
|
65 |
if wave_sample_rate != sample_rate:
|
66 |
+
logging.info(
|
67 |
f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
|
68 |
f"Resampling to {sample_rate}."
|
69 |
)
|
|
|
87 |
duration = wave.shape[0] / sample_rate
|
88 |
rtf = (end - start) / duration
|
89 |
|
90 |
+
logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
|
91 |
+
logging.info(f"Duration {duration: .3f} s")
|
92 |
+
logging.info(f"RTF {rtf: .3f}")
|
93 |
+
logging.info(f"hyp:\n{hyp}")
|
|
|
94 |
|
95 |
+
return hyp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
title = "# Automatic Speech Recognition with Next-gen Kaldi"
|
|
|
116 |
raise ValueError(f"Unsupported language: {language}")
|
117 |
|
118 |
|
119 |
+
demo = gr.Blocks()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
with demo:
|
122 |
gr.Markdown(title)
|
|
|
160 |
optional=False,
|
161 |
label="Upload from disk",
|
162 |
)
|
|
|
163 |
upload_button = gr.Button("Submit for recognition")
|
164 |
+
uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
|
165 |
|
166 |
with gr.TabItem("Record from microphone"):
|
167 |
microphone = gr.Audio(
|
|
|
172 |
)
|
173 |
|
174 |
record_button = gr.Button("Submit for recognition")
|
175 |
+
recorded_output = gr.Textbox(label="Recognized speech from recordings")
|
176 |
|
177 |
upload_button.click(
|
178 |
process,
|
|
|
199 |
gr.Markdown(description)
|
200 |
|
201 |
if __name__ == "__main__":
|
202 |
+
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
203 |
+
|
204 |
+
logging.basicConfig(format=formatter, level=logging.INFO)
|
205 |
+
|
206 |
demo.launch()
|