File size: 4,337 Bytes
70a6679 73b0620 70a6679 5dae721 70a6679 bb82bcb 5dae721 70a6679 b2ef9bf 70a6679 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import whisper
import os
import datetime
import srt
from moviepy.editor import VideoFileClip
import gradio as gr
import tempfile
# Load the Whisper models once at startup
model_sizes = ['tiny', 'base', 'small', 'medium', 'large']
models = {size: whisper.load_model(size) for size in model_sizes}
# Task options
tasks = ['transcribe', 'translate']
# Output format options
output_formats = {
'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
}
# Language options
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']
def is_video_file(file_path):
video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
ext = os.path.splitext(file_path)[-1].lower()
return ext in video_extensions
def extract_audio_from_video(video_path):
audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
video = VideoFileClip(video_path)
video.audio.write_audiofile(audio_path, codec='mp3')
return audio_path
def generate_output(file_path, model_size, task, output_format, language):
# Ensure that the file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"The file {file_path} does not exist.")
# If it's a video file, extract the audio
if is_video_file(file_path):
audio_path = extract_audio_from_video(file_path)
else:
audio_path = file_path
# Select the pre-loaded model
model = models[model_size]
# Transcribe or translate the audio
result = model.transcribe(
audio_path,
task=task,
language=None if language == "Auto-detect" else language
)
# Prepare the output file
base_filename = os.path.splitext(file_path)[0]
if 'Subtitles' in output_format:
# Generate SRT content
subtitles = []
for segment in result['segments']:
start = datetime.timedelta(seconds=segment['start'])
end = datetime.timedelta(seconds=segment['end'])
text = segment['text']
subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
subtitles.append(subtitle)
srt_content = srt.compose(subtitles)
output_file = base_filename + '.srt'
with open(output_file, "w", encoding='utf-8') as file:
file.write(srt_content)
else:
# Generate TXT content
transcription_text = " ".join([segment['text'] for segment in result['segments']])
output_file = base_filename + '.txt'
with open(output_file, "w", encoding='utf-8') as file:
file.write(transcription_text)
return output_file
def update_output_format(task):
return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])
with gr.Blocks() as demo:
gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")
with gr.Row():
file_input = gr.File(
label="Upload Video or Audio File",
file_types=['video', 'audio'],
type='filepath'
)
with gr.Row():
model_size_input = gr.Dropdown(
label="Select Whisper Model Size",
choices=model_sizes,
value='small'
)
task_input = gr.Dropdown(
label="Select Task",
choices=tasks,
value='transcribe'
)
output_format_input = gr.Dropdown(
label="Select Output Format",
choices=output_formats['transcribe'],
value=output_formats['transcribe'][0]
)
language_input = gr.Dropdown(
label="Select Original Language (Optional)",
choices=languages,
value='Auto-detect'
)
task_input.change(
fn=update_output_format,
inputs=task_input,
outputs=output_format_input
)
submit_button = gr.Button("Generate")
output_file = gr.File(label="Download Output File")
submit_button.click(
fn=generate_output,
inputs=[
file_input,
model_size_input,
task_input,
output_format_input,
language_input
],
outputs=output_file
)
demo.launch()
|