File size: 4,337 Bytes
70a6679
 
 
 
 
 
 
 
 
73b0620
70a6679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5dae721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70a6679
 
 
 
 
 
 
 
 
 
 
bb82bcb
5dae721
70a6679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2ef9bf
70a6679
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import whisper
import os
import datetime
import srt
from moviepy.editor import VideoFileClip
import gradio as gr
import tempfile

# Load the Whisper models once at startup
model_sizes = ['tiny', 'base', 'small', 'medium', 'large']
models = {size: whisper.load_model(size) for size in model_sizes}

# Task options
tasks = ['transcribe', 'translate']

# Output format options
output_formats = {
    'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
    'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
}

# Language options
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']

def is_video_file(file_path):
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    ext = os.path.splitext(file_path)[-1].lower()
    return ext in video_extensions

def extract_audio_from_video(video_path):
    audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, codec='mp3')
    return audio_path

def generate_output(file_path, model_size, task, output_format, language):
    # Ensure that the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    # If it's a video file, extract the audio
    if is_video_file(file_path):
        audio_path = extract_audio_from_video(file_path)
    else:
        audio_path = file_path

    # Select the pre-loaded model
    model = models[model_size]

    # Transcribe or translate the audio
    result = model.transcribe(
        audio_path,
        task=task,
        language=None if language == "Auto-detect" else language
    )

    # Prepare the output file
    base_filename = os.path.splitext(file_path)[0]
    if 'Subtitles' in output_format:
        # Generate SRT content
        subtitles = []
        for segment in result['segments']:
            start = datetime.timedelta(seconds=segment['start'])
            end = datetime.timedelta(seconds=segment['end'])
            text = segment['text']

            subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
            subtitles.append(subtitle)

        srt_content = srt.compose(subtitles)
        output_file = base_filename + '.srt'
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(srt_content)
    else:
        # Generate TXT content
        transcription_text = " ".join([segment['text'] for segment in result['segments']])
        output_file = base_filename + '.txt'
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(transcription_text)

    return output_file

def update_output_format(task):
    return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])

with gr.Blocks() as demo:
    gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
    gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")

    with gr.Row():
        file_input = gr.File(
            label="Upload Video or Audio File", 
            file_types=['video', 'audio'],
            type='filepath'
        )

    with gr.Row():
        model_size_input = gr.Dropdown(
            label="Select Whisper Model Size", 
            choices=model_sizes, 
            value='small'
        )
        task_input = gr.Dropdown(
            label="Select Task", 
            choices=tasks, 
            value='transcribe'
        )
        output_format_input = gr.Dropdown(
            label="Select Output Format", 
            choices=output_formats['transcribe'], 
            value=output_formats['transcribe'][0]
        )
        language_input = gr.Dropdown(
            label="Select Original Language (Optional)", 
            choices=languages, 
            value='Auto-detect'
        )

    task_input.change(
        fn=update_output_format, 
        inputs=task_input, 
        outputs=output_format_input
    )

    submit_button = gr.Button("Generate")
    output_file = gr.File(label="Download Output File")

    submit_button.click(
        fn=generate_output, 
        inputs=[
            file_input, 
            model_size_input, 
            task_input, 
            output_format_input, 
            language_input
        ], 
        outputs=output_file
    )


demo.launch()