marquesafonso commited on
Commit
2f59878
·
1 Parent(s): 0f91105

change model to distil-large-v3. fix cli control flow.

Browse files
Files changed (2) hide show
  1. cli.py +11 -11
  2. utils/transcriber.py +1 -12
cli.py CHANGED
@@ -36,17 +36,17 @@ def main(video_url:str,
36
  pbar.update(25)
37
  subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH,fontsize, font, bg_color, text_color)
38
  pbar.update(25)
39
- return
40
- INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
41
- INAUDIO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.m4a")
42
- if not os.path.exists(INAUDIO_PATH):
43
- convert_video_to_audio(INVIDEO_PATH,INAUDIO_PATH)
44
- pbar.update(50)
45
- if not os.path.exists(SRT_PATH):
46
- transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
47
- pbar.update(25)
48
- subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
49
- pbar.update(25)
50
 
51
  if __name__ == '__main__':
52
  parser = ArgumentParser()
 
36
  pbar.update(25)
37
  subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH,fontsize, font, bg_color, text_color)
38
  pbar.update(25)
39
+ else:
40
+ INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
41
+ INAUDIO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.m4a")
42
+ if not os.path.exists(INAUDIO_PATH):
43
+ convert_video_to_audio(INVIDEO_PATH,INAUDIO_PATH)
44
+ pbar.update(50)
45
+ if not os.path.exists(SRT_PATH):
46
+ transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
47
+ pbar.update(25)
48
+ subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
49
+ pbar.update(25)
50
 
51
  if __name__ == '__main__':
52
  parser = ArgumentParser()
utils/transcriber.py CHANGED
@@ -37,17 +37,8 @@ def transcriber(input_path:str,
37
  srt_path:str,
38
  max_words_per_line:int):
39
 
40
- model_size = "large-v3"
41
-
42
- # Run on GPU with FP16
43
- # model = WhisperModel(model_size, device="cuda", compute_type="float16")
44
-
45
- # or run on GPU with INT8
46
- # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
47
- # or run on CPU with INT8
48
- logging.info("Logging Whisper model...")
49
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
50
- logging.info("Starting transcription...")
51
  segments, info = model.transcribe(
52
  input_path,
53
  beam_size=5,
@@ -55,7 +46,5 @@ def transcriber(input_path:str,
55
  vad_parameters=dict(min_silence_duration_ms=500),
56
  word_timestamps=True
57
  )
58
-
59
  logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
60
- logging.info("Writing file...")
61
  write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)
 
37
  srt_path:str,
38
  max_words_per_line:int):
39
 
40
+ model_size = "distil-large-v3"
 
 
 
 
 
 
 
 
41
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
 
42
  segments, info = model.transcribe(
43
  input_path,
44
  beam_size=5,
 
46
  vad_parameters=dict(min_silence_duration_ms=500),
47
  word_timestamps=True
48
  )
 
49
  logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
 
50
  write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)