File size: 2,425 Bytes
6fd7ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import ffmpeg
import os
from pathlib import Path, PureWindowsPath
import anvil.media
import os
from typing import Iterator, TextIO



def bake_subs(input_file, output_file, subs_file, fontsdir):
  print(f"Baking {subs_file} into video... {input_file} -> {output_file}")


  fontfile = fontsdir / 'arial.ttf'
  fontstyle = 'Fontsize=18,OutlineColour=&H40000000,BorderStyle=3,FontName=Arial'
  video = ffmpeg.input(input_file)
  audio = video.audio
  (
    ffmpeg
      .concat(
      video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
      audio, v=1, a=1
    )
      .output(filename=output_file)
      .run(quiet=True, overwrite_output=True)
  )


def str2bool(string):
    str2val = {"True": True, "False": False}
    if string in str2val:
        return str2val[string]
    else:
        raise ValueError(
            f"Expected one of {set(str2val.keys())}, got {string}")


def format_timestamp(seconds: float, always_include_hours: bool = False):
    assert seconds >= 0, "non-negative timestamp expected"
    milliseconds = round(seconds * 1000.0)

    hours = milliseconds // 3_600_000
    milliseconds -= hours * 3_600_000

    minutes = milliseconds // 60_000
    milliseconds -= minutes * 60_000

    seconds = milliseconds // 1_000
    milliseconds -= seconds * 1_000

    hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
    return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"


def write_srt(transcript: Iterator[dict], file: TextIO):
    for i, segment in enumerate(transcript, start=1):
        print(
            f"{i}\n"
            f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
            f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
            f"{segment['text'].strip().replace('-->', '->')}\n",
            file=file,
            flush=True,
        )


def filename(path):
    return os.path.splitext(os.path.basename(path))[0]



# if __name__ == '__main__':
#   meta = {
#     "id": 1576155093245693954,
#     "ext": 'mp4'
#   }
#   tempdirname = Path(f"encoding/temp/{meta['id']}")
#   video_file_path = f"{meta['id']}.{meta['ext']}"
#   srt_path = f"{meta['id']}.srt"
#   out_path = f"{meta['id']}_translated.mp4"
#   os.chdir(tempdirname)
#   bake_subs(video_file_path, out_path, srt_path)
#   anvil_media = anvil.media.from_file(out_path, 'video/mp4')
#   print(anvil_media)