|
import whisper |
|
import os |
|
import datetime |
|
import srt |
|
from moviepy.editor import VideoFileClip |
|
import gradio as gr |
|
import tempfile |
|
|
|
|
|
model_sizes = ['tiny', 'base', 'small', 'medium', 'large'] |
|
models = {size: whisper.load_model(size) for size in model_sizes} |
|
|
|
|
|
tasks = ['transcribe', 'translate'] |
|
|
|
|
|
output_formats = { |
|
'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'], |
|
'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)'] |
|
} |
|
|
|
|
|
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko'] |
|
|
|
def is_video_file(file_path): |
|
video_extensions = ['.mp4', '.avi', '.mov', '.mkv'] |
|
ext = os.path.splitext(file_path)[-1].lower() |
|
return ext in video_extensions |
|
|
|
def extract_audio_from_video(video_path): |
|
audio_path = video_path.rsplit('.', 1)[0] + '.mp3' |
|
video = VideoFileClip(video_path) |
|
video.audio.write_audiofile(audio_path, codec='mp3') |
|
return audio_path |
|
|
|
def generate_output(file_path, model_size, task, output_format, language): |
|
|
|
if not os.path.exists(file_path): |
|
raise FileNotFoundError(f"The file {file_path} does not exist.") |
|
|
|
|
|
if is_video_file(file_path): |
|
audio_path = extract_audio_from_video(file_path) |
|
else: |
|
audio_path = file_path |
|
|
|
|
|
model = models[model_size] |
|
|
|
|
|
result = model.transcribe( |
|
audio_path, |
|
task=task, |
|
language=None if language == "Auto-detect" else language |
|
) |
|
|
|
|
|
base_filename = os.path.splitext(file_path)[0] |
|
if 'Subtitles' in output_format: |
|
|
|
subtitles = [] |
|
for segment in result['segments']: |
|
start = datetime.timedelta(seconds=segment['start']) |
|
end = datetime.timedelta(seconds=segment['end']) |
|
text = segment['text'] |
|
|
|
subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text) |
|
subtitles.append(subtitle) |
|
|
|
srt_content = srt.compose(subtitles) |
|
output_file = base_filename + '.srt' |
|
with open(output_file, "w", encoding='utf-8') as file: |
|
file.write(srt_content) |
|
else: |
|
|
|
transcription_text = " ".join([segment['text'] for segment in result['segments']]) |
|
output_file = base_filename + '.txt' |
|
with open(output_file, "w", encoding='utf-8') as file: |
|
file.write(transcription_text) |
|
|
|
return output_file |
|
|
|
def update_output_format(task): |
|
return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0]) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# ๐ผ Video Transcription and Subtitles Generator") |
|
gr.Markdown("Upload a video or audio file to get the transcription or subtitles.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload Video or Audio File", |
|
file_types=['video', 'audio'], |
|
type='filepath' |
|
) |
|
|
|
with gr.Row(): |
|
model_size_input = gr.Dropdown( |
|
label="Select Whisper Model Size", |
|
choices=model_sizes, |
|
value='small' |
|
) |
|
task_input = gr.Dropdown( |
|
label="Select Task", |
|
choices=tasks, |
|
value='transcribe' |
|
) |
|
output_format_input = gr.Dropdown( |
|
label="Select Output Format", |
|
choices=output_formats['transcribe'], |
|
value=output_formats['transcribe'][0] |
|
) |
|
language_input = gr.Dropdown( |
|
label="Select Original Language (Optional)", |
|
choices=languages, |
|
value='Auto-detect' |
|
) |
|
|
|
task_input.change( |
|
fn=update_output_format, |
|
inputs=task_input, |
|
outputs=output_format_input |
|
) |
|
|
|
submit_button = gr.Button("Generate") |
|
output_file = gr.File(label="Download Output File") |
|
|
|
submit_button.click( |
|
fn=generate_output, |
|
inputs=[ |
|
file_input, |
|
model_size_input, |
|
task_input, |
|
output_format_input, |
|
language_input |
|
], |
|
outputs=output_file |
|
) |
|
|
|
|
|
demo.launch() |
|
|