Spaces:

altryne
/

vidtranslator

Build error

Alex Volkov

Added captions API, that receives a URL and both transcribes AND translates it.

7db5fdc over 2 years ago

3.34 kB

	import ffmpeg
	import os
	from pathlib import Path, PureWindowsPath
	import anvil.media
	import os
	from typing import Iterator, TextIO


	def bake_subs(input_file, output_file, subs_file, fontsdir, translate_action):
	print(f"Baking {subs_file} into video... {input_file} -> {output_file}")

	fontfile = fontsdir / 'HelveticaWorld-Bold.ttf'
	fontname = 'Helvetica World'
	if translate_action:
	fontfile = fontsdir / 'Poppins-Black.ttf'
	fontname = 'Poppins'

	watermarkfile = fontsdir / 'watermark_new.png'

	video = ffmpeg.input(input_file)
	watermark = ffmpeg.input(watermarkfile)
	audio = video.audio
	probe = ffmpeg.probe(input_file)
	video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
	iw = int(video_stream['width'])
	ih = int(video_stream['height'])
	print(f"width {iw} and height {ih}")
	sub_size = 18 if iw > ih else 8
	fontstyle = f'Fontsize={sub_size},OutlineColour=&H40000000,BorderStyle=3,FontName={fontname},Bold=1'
	(
	ffmpeg.concat(
	video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
	audio, v=1, a=1
	)
	.overlay(watermark.filter('scale', iw / 3, -1), x='10', y='10')
	.output(filename=output_file)
	.run(quiet=True, overwrite_output=True)
	)


	def str2bool(string):
	str2val = {"True": True, "False": False}
	if string in str2val:
	return str2val[string]
	else:
	raise ValueError(
	f"Expected one of {set(str2val.keys())}, got {string}")


	def format_timestamp(seconds: float, always_include_hours: bool = False):
	assert seconds >= 0, "non-negative timestamp expected"
	milliseconds = round(seconds * 1000.0)

	hours = milliseconds // 3_600_000
	milliseconds -= hours * 3_600_000

	minutes = milliseconds // 60_000
	milliseconds -= minutes * 60_000

	seconds = milliseconds // 1_000
	milliseconds -= seconds * 1_000

	hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
	return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"


	def write_srt(transcript: Iterator[dict], file: TextIO):
	for i, segment in enumerate(transcript, start=1):
	print(
	f"{i}\n"
	f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
	f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
	f"{segment['text'].strip().replace('-->', '->')}\n",
	file=file,
	flush=True,
	)


	def get_srt(transcript: Iterator[dict]):
	srt = ''
	for i, segment in enumerate(transcript, start=1):
	srt += f"{i}\n" \
	f"{format_timestamp(segment['start'], always_include_hours=True)} --> " \
	f"{format_timestamp(segment['end'], always_include_hours=True)}\n" \
	f"{segment['text'].strip().replace('-->', '->')}\n"
	return srt

	def filename(path):
	return os.path.splitext(os.path.basename(path))[0]



	# if __name__ == '__main__':
	# meta = {
	# "id": 1576155093245693954,
	# "ext": 'mp4'
	# }
	# tempdirname = Path(f"encoding/temp/{meta['id']}")
	# video_file_path = f"{meta['id']}.{meta['ext']}"
	# srt_path = f"{meta['id']}.srt"
	# out_path = f"{meta['id']}_translated.mp4"
	# os.chdir(tempdirname)
	# bake_subs(video_file_path, out_path, srt_path)
	# anvil_media = anvil.media.from_file(out_path, 'video/mp4')
	# print(anvil_media)