csukuangfj commited on
Commit
074cf4f
1 Parent(s): 0eef9b6

small fixes

Browse files
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -19,6 +19,7 @@
19
  # References:
20
  # https://gradio.app/docs/#dropdown
21
 
 
22
  import os
23
  import time
24
  from datetime import datetime
@@ -34,7 +35,7 @@ languages = sorted(language_to_models.keys())
34
  def convert_to_wav(in_filename: str) -> str:
35
  """Convert the input audio file to a wave file"""
36
  out_filename = in_filename + ".wav"
37
- print(f"Converting '{in_filename}' to '{out_filename}'")
38
  _ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
39
  return out_filename
40
 
@@ -46,23 +47,23 @@ def process(
46
  decoding_method: str,
47
  num_active_paths: int,
48
  ) -> str:
49
- print("in_filename", in_filename)
50
- print("language", language)
51
- print("repo_id", repo_id)
52
- print("decoding_method", decoding_method)
53
- print("num_active_paths", num_active_paths)
54
 
55
  filename = convert_to_wav(in_filename)
56
 
57
  now = datetime.now()
58
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
59
- print(f"Started at {date_time}")
60
 
61
  start = time.time()
62
  wave, wave_sample_rate = torchaudio.load(filename)
63
 
64
  if wave_sample_rate != sample_rate:
65
- print(
66
  f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
67
  f"Resampling to {sample_rate}."
68
  )
@@ -86,22 +87,12 @@ def process(
86
  duration = wave.shape[0] / sample_rate
87
  rtf = (end - start) / duration
88
 
89
- print(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
90
- print(f"Duration {duration: .3f} s")
91
- print(f"RTF {rtf: .3f}")
92
- print("hyp")
93
- print(hyp)
94
 
95
- html_output = f"""
96
- <div class='result'>
97
- <div class='result_item result_item_success'>
98
- {hyp}
99
- <br/>
100
- </div>
101
- </div>
102
- """
103
-
104
- return html_output
105
 
106
 
107
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
@@ -125,16 +116,7 @@ def update_model_dropdown(language: str):
125
  raise ValueError(f"Unsupported language: {language}")
126
 
127
 
128
- # The css style is copied from
129
- # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L112
130
- demo = gr.Blocks(
131
- css="""
132
- .result {display:flex;flex-direction:column}
133
- .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
134
- .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
135
- .result_item_error {background-color:#ff7070;color:white;align-self:start}
136
- """,
137
- )
138
 
139
  with demo:
140
  gr.Markdown(title)
@@ -178,8 +160,8 @@ with demo:
178
  optional=False,
179
  label="Upload from disk",
180
  )
181
- uploaded_output = gr.HTML(label="Recognized speech from uploaded file")
182
  upload_button = gr.Button("Submit for recognition")
 
183
 
184
  with gr.TabItem("Record from microphone"):
185
  microphone = gr.Audio(
@@ -190,7 +172,7 @@ with demo:
190
  )
191
 
192
  record_button = gr.Button("Submit for recognition")
193
- recorded_output = gr.HTML(label="Recognized speech from recordings")
194
 
195
  upload_button.click(
196
  process,
@@ -217,4 +199,8 @@ with demo:
217
  gr.Markdown(description)
218
 
219
  if __name__ == "__main__":
 
 
 
 
220
  demo.launch()
 
19
  # References:
20
  # https://gradio.app/docs/#dropdown
21
 
22
+ import logging
23
  import os
24
  import time
25
  from datetime import datetime
 
35
  def convert_to_wav(in_filename: str) -> str:
36
  """Convert the input audio file to a wave file"""
37
  out_filename = in_filename + ".wav"
38
+ logging.info(f"Converting '{in_filename}' to '{out_filename}'")
39
  _ = os.system(f"ffmpeg -hide_banner -i '{in_filename}' '{out_filename}'")
40
  return out_filename
41
 
 
47
  decoding_method: str,
48
  num_active_paths: int,
49
  ) -> str:
50
+ logging.info(f"in_filename: {in_filename}")
51
+ logging.info(f"language: {language}")
52
+ logging.info(f"repo_id: {repo_id}")
53
+ logging.info(f"decoding_method: {decoding_method}")
54
+ logging.info(f"num_active_paths: {num_active_paths}")
55
 
56
  filename = convert_to_wav(in_filename)
57
 
58
  now = datetime.now()
59
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
60
+ logging.info(f"Started at {date_time}")
61
 
62
  start = time.time()
63
  wave, wave_sample_rate = torchaudio.load(filename)
64
 
65
  if wave_sample_rate != sample_rate:
66
+ logging.info(
67
  f"Expected sample rate: {sample_rate}. Given: {wave_sample_rate}. "
68
  f"Resampling to {sample_rate}."
69
  )
 
87
  duration = wave.shape[0] / sample_rate
88
  rtf = (end - start) / duration
89
 
90
+ logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
91
+ logging.info(f"Duration {duration: .3f} s")
92
+ logging.info(f"RTF {rtf: .3f}")
93
+ logging.info(f"hyp:\n{hyp}")
 
94
 
95
+ return hyp
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
 
116
  raise ValueError(f"Unsupported language: {language}")
117
 
118
 
119
+ demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
120
 
121
  with demo:
122
  gr.Markdown(title)
 
160
  optional=False,
161
  label="Upload from disk",
162
  )
 
163
  upload_button = gr.Button("Submit for recognition")
164
+ uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
165
 
166
  with gr.TabItem("Record from microphone"):
167
  microphone = gr.Audio(
 
172
  )
173
 
174
  record_button = gr.Button("Submit for recognition")
175
+ recorded_output = gr.Textbox(label="Recognized speech from recordings")
176
 
177
  upload_button.click(
178
  process,
 
199
  gr.Markdown(description)
200
 
201
  if __name__ == "__main__":
202
+ formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
203
+
204
+ logging.basicConfig(format=formatter, level=logging.INFO)
205
+
206
  demo.launch()