|
import nemo.collections.asr as nemo_asr |
|
import torch |
|
import gc |
|
import os |
|
import subprocess |
|
from pathlib import Path |
|
import gradio as gr |
|
import shutil |
|
from utils import * |
|
|
|
def run_nemo_asr(mono_audio_path): |
|
asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2") |
|
output = asr_model.transcribe([mono_audio_path], timestamps=True) |
|
|
|
word_timestamps = output[0].timestamp['word'] |
|
segment_timestamps = output[0].timestamp['segment'] |
|
char_timestamps = output[0].timestamp['char'] |
|
|
|
|
|
del asr_model |
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
return word_timestamps,segment_timestamps,char_timestamps |
|
|
|
|
|
|
|
def process(file): |
|
file_path = file.name |
|
file_ext = Path(file_path).suffix.lower() |
|
|
|
if file_ext in [".mp4", ".mkv"]: |
|
new_file_path=clean_file_name(file_path,unique_id=False) |
|
shutil.copy(file_path,new_file_path) |
|
audio_path = new_file_path.replace(file_ext, ".mp3") |
|
subprocess.run(["ffmpeg", "-i", new_file_path, audio_path, "-y"]) |
|
os.remove(new_file_path) |
|
else: |
|
audio_path = file_path |
|
|
|
mono_audio_path = convert_to_mono(audio_path) |
|
word_timestamps, segment_timestamps, char_timestamps = run_nemo_asr(mono_audio_path) |
|
default_srt, word_srt, shorts_srt, text_path, json_path, raw_text = save_files(mono_audio_path, word_timestamps) |
|
|
|
if os.path.exists(mono_audio_path): |
|
os.remove(mono_audio_path) |
|
|
|
return default_srt, word_srt, shorts_srt, text_path, json_path, raw_text |
|
|
|
|
|
|
|
|
|
import click |
|
@click.command() |
|
@click.option("--debug", is_flag=True, default=False, help="Enable debug mode.") |
|
@click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.") |
|
def main(debug, share): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("<center><h1 style='font-size: 40px;'>Auto Subtitle Generator </h1></center>") |
|
gr.Markdown("Need to improve the SRT generation code.") |
|
gr.Markdown("[Try on Google Colab](https://colab.research.google.com/github/NeuralFalconYT/parakeet-tdt-subtitle/blob/main/Free_Subtitle.ipynb)") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
upload_file = gr.File(label="Upload Audio or Video File") |
|
with gr.Row(): |
|
generate_btn = gr.Button("π Generate Subtitle", variant="primary") |
|
|
|
with gr.Column(): |
|
output_default_srt = gr.File(label="sentence Level SRT File") |
|
output_word_srt = gr.File(label="Word Level SRT File") |
|
|
|
with gr.Accordion("Others Format", open=False): |
|
output_shorts_srt = gr.File(label="Subtitle For Vertical Video [Shorts or Reels]") |
|
output_text_file = gr.File(label="Speech To Text File") |
|
output_json = gr.File(label="Word Timestamp JSON") |
|
output_text = gr.Text(label="Transcribed Text",lines=6) |
|
|
|
generate_btn.click( |
|
fn=process, |
|
inputs=[upload_file], |
|
outputs=[ |
|
output_default_srt, |
|
output_word_srt, |
|
output_shorts_srt, |
|
output_text_file, |
|
output_json, |
|
output_text |
|
] |
|
) |
|
|
|
demo.queue().launch(debug=debug, share=share) |
|
|
|
if __name__ == "__main__": |
|
main() |