bofenghuang commited on
Commit
0585497
1 Parent(s): 2eface4
Files changed (2) hide show
  1. run_demo_ct2.py +2 -3
  2. run_demo_openai_layout.py +3 -3
run_demo_ct2.py CHANGED
@@ -149,8 +149,7 @@ def infer(model, filename, with_timestamps, return_df=False):
149
  else:
150
  # text = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)["text"]
151
  model_outputs, _ = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)
152
- model_outputs = list(model_outputs)
153
- text = model_outputs[0].text
154
  if return_df:
155
  return pd.DataFrame({"text": sent_tokenize(text)})
156
  else:
@@ -201,7 +200,7 @@ def video_transcribe(video_file_path, with_timestamps, model_name=DEFAULT_MODEL_
201
  raise ValueError("Failed to transcribe video as no video_file_path has been defined")
202
 
203
  audio_file_path = re.sub(r"\.mp4$", ".wav", video_file_path)
204
- os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file_path}"')
205
 
206
  model = maybe_load_cached_pipeline(model_name)
207
  # text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]
 
149
  else:
150
  # text = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)["text"]
151
  model_outputs, _ = model.transcribe(filename, without_timestamps=True, **GEN_KWARGS)
152
+ text = " ".join([segment.text for segment in model_outputs])
 
153
  if return_df:
154
  return pd.DataFrame({"text": sent_tokenize(text)})
155
  else:
 
200
  raise ValueError("Failed to transcribe video as no video_file_path has been defined")
201
 
202
  audio_file_path = re.sub(r"\.mp4$", ".wav", video_file_path)
203
+ os.system(f'ffmpeg -hide_banner -loglevel error -y -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file_path}"')
204
 
205
  model = maybe_load_cached_pipeline(model_name)
206
  # text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]
run_demo_openai_layout.py CHANGED
@@ -216,7 +216,7 @@ with gr.Blocks() as demo:
216
  </div>
217
  Transcribe long-form microphone or audio inputs!
218
 
219
- Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
220
  """
221
  )
222
 
@@ -252,7 +252,7 @@ with gr.Blocks() as demo:
252
  # </div>
253
  # Transcribe long-form YouTube videos!
254
 
255
- # Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
256
  # """
257
  # )
258
 
@@ -282,7 +282,7 @@ with gr.Blocks() as demo:
282
  </div>
283
  Transcribe long-form YouTube videos or uploaded video inputs!
284
 
285
- Demo uses the fine-tuned checkpoint: <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
286
  """
287
  )
288
 
 
216
  </div>
217
  Transcribe long-form microphone or audio inputs!
218
 
219
+ Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
220
  """
221
  )
222
 
 
252
  # </div>
253
  # Transcribe long-form YouTube videos!
254
 
255
+ # Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
256
  # """
257
  # )
258
 
 
282
  </div>
283
  Transcribe long-form YouTube videos or uploaded video inputs!
284
 
285
+ Demo uses the fine-tuned checkpoint <a href='https://huggingface.co/{DEFAULT_MODEL_NAME}' target='_blank'><b>{DEFAULT_MODEL_NAME}</b></a> to transcribe video files of arbitrary length.
286
  """
287
  )
288