marquesafonso's picture
add validation for other video formats; double cache size; adapt to device_type change in transcriber
fc6dd1b
from moviepy.editor import VideoFileClip, CompositeVideoClip, TextClip
import os, json
def parse_srt(srt_string):
"""Parse the SRT string and return a list of (start, end, text) for each subtitle."""
lines = srt_string.split("\n")
i = 0
subtitles = []
while i < len(lines):
if lines[i].strip().isdigit():
timing_str = lines[i+1].strip().split(" --> ")
start = timing_str[0]
end = timing_str[1]
text = lines[i+2].strip()
subtitles.append((start, end, text))
i += 4
else:
i += 1
return subtitles
def filter_caption_width(device_type:str):
if device_type == 'desktop':
caption_width_ratio = 0.5
caption_height_ratio = 0.8
elif device_type == 'mobile':
caption_width_ratio = 0.2
caption_height_ratio = 0.7
return caption_width_ratio, caption_height_ratio
def subtitler(video_file: str,
srt_string: str,
srt_json: str,
output_file: str,
fontsize: int,
font: str,
bg_color: str,
text_color: str,
highlight_mode: bool,
highlight_color: str,
device_type: str,
temp_dir: str
):
"""Add subtitles to a video, with optional word-level highlighting."""
video_file = os.path.abspath(video_file)
output_file = os.path.abspath(output_file)
temp_audiofile = os.path.join(temp_dir, "temp_audio_file.mp4")
clip = VideoFileClip(filename=video_file, target_resolution=None)
subtitle_clips = []
caption_width_ratio, caption_height_ratio = filter_caption_width(device_type)
subtitle_y_position = clip.h * caption_height_ratio
if highlight_mode:
srt_data = json.loads(json.dumps(eval(srt_json)))
for line in srt_data.get("lines", []):
line_start = float(line["start"])
line_end = float(line["end"])
line_text = line["text"]
base_clip = TextClip(line_text, fontsize=fontsize, font=font, color=text_color, bg_color=bg_color, method='label')
base_clip = base_clip.set_start(line_start).set_end(line_end)
# Center the full line
line_width = base_clip.w
x_center = (clip.w - line_width) // 2
base_clip = base_clip.set_position((x_center, subtitle_y_position))
subtitle_clips.append(base_clip)
# Calculate word-level highlight positions
current_x = x_center
for word_info in line["words"]:
word = word_info["word"] + " "
word_start = float(word_info["start"])
word_end = float(word_info["end"])
# Create a background-only word clip
word_clip = TextClip(word, fontsize=fontsize, color=text_color, font=font,
method='label', bg_color=highlight_color)
word_clip = word_clip.set_start(word_start).set_end(word_end)
word_clip = word_clip.set_position((current_x - 7.5, subtitle_y_position))
subtitle_clips.append(word_clip)
current_x += word_clip.w
video = CompositeVideoClip(size=None, clips=[clip] + subtitle_clips)
video.set_audio(temp_audiofile)
video.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile = temp_audiofile)
return
# Normal mode
subtitles = parse_srt(srt_string)
subtitle_x_position = 'center'
subtitle_y_position = clip.h * caption_height_ratio
text_position = (subtitle_x_position, subtitle_y_position)
for start, end, text in subtitles:
txt_clip = TextClip(text,
fontsize=fontsize,
color=text_color,
font=font,
method='caption',
bg_color=bg_color,
align='center',
size=(clip.w * caption_width_ratio, None))
txt_clip = txt_clip.set_start(start).set_end(end).set_position(text_position)
subtitle_clips.append(txt_clip)
video = CompositeVideoClip(size=None, clips=[clip] + subtitle_clips)
video.set_audio(temp_audiofile)
video.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile = temp_audiofile)