Alex Volkov
Added captions API, that receives a URL and both transcribes AND translates it.
7db5fdc
import ffmpeg
import os
from pathlib import Path, PureWindowsPath
import anvil.media
import os
from typing import Iterator, TextIO
def bake_subs(input_file, output_file, subs_file, fontsdir, translate_action):
print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
fontfile = fontsdir / 'HelveticaWorld-Bold.ttf'
fontname = 'Helvetica World'
if translate_action:
fontfile = fontsdir / 'Poppins-Black.ttf'
fontname = 'Poppins'
watermarkfile = fontsdir / 'watermark_new.png'
video = ffmpeg.input(input_file)
watermark = ffmpeg.input(watermarkfile)
audio = video.audio
probe = ffmpeg.probe(input_file)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
iw = int(video_stream['width'])
ih = int(video_stream['height'])
print(f"width {iw} and height {ih}")
sub_size = 18 if iw > ih else 8
fontstyle = f'Fontsize={sub_size},OutlineColour=&H40000000,BorderStyle=3,FontName={fontname},Bold=1'
(
ffmpeg.concat(
video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
audio, v=1, a=1
)
.overlay(watermark.filter('scale', iw / 3, -1), x='10', y='10')
.output(filename=output_file)
.run(quiet=True, overwrite_output=True)
)
def str2bool(string):
str2val = {"True": True, "False": False}
if string in str2val:
return str2val[string]
else:
raise ValueError(
f"Expected one of {set(str2val.keys())}, got {string}")
def format_timestamp(seconds: float, always_include_hours: bool = False):
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
def write_srt(transcript: Iterator[dict], file: TextIO):
for i, segment in enumerate(transcript, start=1):
print(
f"{i}\n"
f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
f"{segment['text'].strip().replace('-->', '->')}\n",
file=file,
flush=True,
)
def get_srt(transcript: Iterator[dict]):
srt = ''
for i, segment in enumerate(transcript, start=1):
srt += f"{i}\n" \
f"{format_timestamp(segment['start'], always_include_hours=True)} --> " \
f"{format_timestamp(segment['end'], always_include_hours=True)}\n" \
f"{segment['text'].strip().replace('-->', '->')}\n"
return srt
def filename(path):
return os.path.splitext(os.path.basename(path))[0]
# if __name__ == '__main__':
# meta = {
# "id": 1576155093245693954,
# "ext": 'mp4'
# }
# tempdirname = Path(f"encoding/temp/{meta['id']}")
# video_file_path = f"{meta['id']}.{meta['ext']}"
# srt_path = f"{meta['id']}.srt"
# out_path = f"{meta['id']}_translated.mp4"
# os.chdir(tempdirname)
# bake_subs(video_file_path, out_path, srt_path)
# anvil_media = anvil.media.from_file(out_path, 'video/mp4')
# print(anvil_media)