Commit
·
2f59878
1
Parent(s):
0f91105
change model to distil-large-v3. fix cli control flow.
Browse files- cli.py +11 -11
- utils/transcriber.py +1 -12
cli.py
CHANGED
|
@@ -36,17 +36,17 @@ def main(video_url:str,
|
|
| 36 |
pbar.update(25)
|
| 37 |
subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH,fontsize, font, bg_color, text_color)
|
| 38 |
pbar.update(25)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
if __name__ == '__main__':
|
| 52 |
parser = ArgumentParser()
|
|
|
|
| 36 |
pbar.update(25)
|
| 37 |
subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH,fontsize, font, bg_color, text_color)
|
| 38 |
pbar.update(25)
|
| 39 |
+
else:
|
| 40 |
+
INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
|
| 41 |
+
INAUDIO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.m4a")
|
| 42 |
+
if not os.path.exists(INAUDIO_PATH):
|
| 43 |
+
convert_video_to_audio(INVIDEO_PATH,INAUDIO_PATH)
|
| 44 |
+
pbar.update(50)
|
| 45 |
+
if not os.path.exists(SRT_PATH):
|
| 46 |
+
transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
|
| 47 |
+
pbar.update(25)
|
| 48 |
+
subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
|
| 49 |
+
pbar.update(25)
|
| 50 |
|
| 51 |
if __name__ == '__main__':
|
| 52 |
parser = ArgumentParser()
|
utils/transcriber.py
CHANGED
|
@@ -37,17 +37,8 @@ def transcriber(input_path:str,
|
|
| 37 |
srt_path:str,
|
| 38 |
max_words_per_line:int):
|
| 39 |
|
| 40 |
-
model_size = "large-v3"
|
| 41 |
-
|
| 42 |
-
# Run on GPU with FP16
|
| 43 |
-
# model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
| 44 |
-
|
| 45 |
-
# or run on GPU with INT8
|
| 46 |
-
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
|
| 47 |
-
# or run on CPU with INT8
|
| 48 |
-
logging.info("Logging Whisper model...")
|
| 49 |
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 50 |
-
logging.info("Starting transcription...")
|
| 51 |
segments, info = model.transcribe(
|
| 52 |
input_path,
|
| 53 |
beam_size=5,
|
|
@@ -55,7 +46,5 @@ def transcriber(input_path:str,
|
|
| 55 |
vad_parameters=dict(min_silence_duration_ms=500),
|
| 56 |
word_timestamps=True
|
| 57 |
)
|
| 58 |
-
|
| 59 |
logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
| 60 |
-
logging.info("Writing file...")
|
| 61 |
write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)
|
|
|
|
| 37 |
srt_path:str,
|
| 38 |
max_words_per_line:int):
|
| 39 |
|
| 40 |
+
model_size = "distil-large-v3"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
|
|
| 42 |
segments, info = model.transcribe(
|
| 43 |
input_path,
|
| 44 |
beam_size=5,
|
|
|
|
| 46 |
vad_parameters=dict(min_silence_duration_ms=500),
|
| 47 |
word_timestamps=True
|
| 48 |
)
|
|
|
|
| 49 |
logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
|
|
|
| 50 |
write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)
|