Spaces:

Maxkillor
/

video-transcription-and-subtitling-1.0

Sleeping

File size: 4,337 Bytes

import whisper
import os
import datetime
import srt
from moviepy.editor import VideoFileClip
import gradio as gr
import tempfile

# Load the Whisper models once at startup
model_sizes = ['tiny', 'base', 'small', 'medium', 'large']
models = {size: whisper.load_model(size) for size in model_sizes}

# Task options
tasks = ['transcribe', 'translate']

# Output format options
output_formats = {
    'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
    'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
}

# Language options
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']

def is_video_file(file_path):
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    ext = os.path.splitext(file_path)[-1].lower()
    return ext in video_extensions

def extract_audio_from_video(video_path):
    audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, codec='mp3')
    return audio_path

def generate_output(file_path, model_size, task, output_format, language):
    # Ensure that the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    # If it's a video file, extract the audio
    if is_video_file(file_path):
        audio_path = extract_audio_from_video(file_path)
    else:
        audio_path = file_path

    # Select the pre-loaded model
    model = models[model_size]

    # Transcribe or translate the audio
    result = model.transcribe(
        audio_path,
        task=task,
        language=None if language == "Auto-detect" else language
    )

    # Prepare the output file
    base_filename = os.path.splitext(file_path)[0]
    if 'Subtitles' in output_format:
        # Generate SRT content
        subtitles = []
        for segment in result['segments']:
            start = datetime.timedelta(seconds=segment['start'])
            end = datetime.timedelta(seconds=segment['end'])
            text = segment['text']

            subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
            subtitles.append(subtitle)

        srt_content = srt.compose(subtitles)
        output_file = base_filename + '.srt'
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(srt_content)
    else:
        # Generate TXT content
        transcription_text = " ".join([segment['text'] for segment in result['segments']])
        output_file = base_filename + '.txt'
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(transcription_text)

    return output_file

def update_output_format(task):
    return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])

with gr.Blocks() as demo:
    gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
    gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")

    with gr.Row():
        file_input = gr.File(
            label="Upload Video or Audio File", 
            file_types=['video', 'audio'],
            type='filepath'
        )

    with gr.Row():
        model_size_input = gr.Dropdown(
            label="Select Whisper Model Size", 
            choices=model_sizes, 
            value='small'
        )
        task_input = gr.Dropdown(
            label="Select Task", 
            choices=tasks, 
            value='transcribe'
        )
        output_format_input = gr.Dropdown(
            label="Select Output Format", 
            choices=output_formats['transcribe'], 
            value=output_formats['transcribe'][0]
        )
        language_input = gr.Dropdown(
            label="Select Original Language (Optional)", 
            choices=languages, 
            value='Auto-detect'
        )

    task_input.change(
        fn=update_output_format, 
        inputs=task_input, 
        outputs=output_format_input
    )

    submit_button = gr.Button("Generate")
    output_file = gr.File(label="Download Output File")

    submit_button.click(
        fn=generate_output, 
        inputs=[
            file_input, 
            model_size_input, 
            task_input, 
            output_format_input, 
            language_input
        ], 
        outputs=output_file
    )


demo.launch()