Spaces:

Maxkillor
/

video-transcription-and-subtitling-1.0

Sleeping

App Files Files Community

video-transcription-and-subtitling-1.0 / app.py

Maxkillor

Update app.py

c5fe245 verified about 2 months ago

raw

history blame contribute delete

4.34 kB

	import whisper
	import os
	import datetime
	import srt
	from moviepy.editor import VideoFileClip
	import gradio as gr
	import tempfile

	# Load the Whisper models once at startup
	model_sizes = ['tiny', 'base', 'small', 'medium', 'large']
	models = {size: whisper.load_model(size) for size in model_sizes}

	# Task options
	tasks = ['transcribe', 'translate']

	# Output format options
	output_formats = {
	'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
	'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
	}

	# Language options
	languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']

	def is_video_file(file_path):
	video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
	ext = os.path.splitext(file_path)[-1].lower()
	return ext in video_extensions

	def extract_audio_from_video(video_path):
	audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
	video = VideoFileClip(video_path)
	video.audio.write_audiofile(audio_path, codec='mp3')
	return audio_path

	def generate_output(file_path, model_size, task, output_format, language):
	# Ensure that the file exists
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"The file {file_path} does not exist.")

	# If it's a video file, extract the audio
	if is_video_file(file_path):
	audio_path = extract_audio_from_video(file_path)
	else:
	audio_path = file_path

	# Select the pre-loaded model
	model = models[model_size]

	# Transcribe or translate the audio
	result = model.transcribe(
	audio_path,
	task=task,
	language=None if language == "Auto-detect" else language
	)

	# Prepare the output file
	base_filename = os.path.splitext(file_path)[0]
	if 'Subtitles' in output_format:
	# Generate SRT content
	subtitles = []
	for segment in result['segments']:
	start = datetime.timedelta(seconds=segment['start'])
	end = datetime.timedelta(seconds=segment['end'])
	text = segment['text']

	subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
	subtitles.append(subtitle)

	srt_content = srt.compose(subtitles)
	output_file = base_filename + '.srt'
	with open(output_file, "w", encoding='utf-8') as file:
	file.write(srt_content)
	else:
	# Generate TXT content
	transcription_text = " ".join([segment['text'] for segment in result['segments']])
	output_file = base_filename + '.txt'
	with open(output_file, "w", encoding='utf-8') as file:
	file.write(transcription_text)

	return output_file

	def update_output_format(task):
	return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])

	with gr.Blocks() as demo:
	gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
	gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")

	with gr.Row():
	file_input = gr.File(
	label="Upload Video or Audio File",
	file_types=['video', 'audio'],
	type='filepath'
	)

	with gr.Row():
	model_size_input = gr.Dropdown(
	label="Select Whisper Model Size",
	choices=model_sizes,
	value='small'
	)
	task_input = gr.Dropdown(
	label="Select Task",
	choices=tasks,
	value='transcribe'
	)
	output_format_input = gr.Dropdown(
	label="Select Output Format",
	choices=output_formats['transcribe'],
	value=output_formats['transcribe'][0]
	)
	language_input = gr.Dropdown(
	label="Select Original Language (Optional)",
	choices=languages,
	value='Auto-detect'
	)

	task_input.change(
	fn=update_output_format,
	inputs=task_input,
	outputs=output_format_input
	)

	submit_button = gr.Button("Generate")
	output_file = gr.File(label="Download Output File")

	submit_button.click(
	fn=generate_output,
	inputs=[
	file_input,
	model_size_input,
	task_input,
	output_format_input,
	language_input
	],
	outputs=output_file
	)


	demo.launch()