from moviepy.editor import * from gtts import gTTS from pydub import AudioSegment import tempfile import os os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/local/bin/ffmpeg" # os.environ["IMAGEMAGICK_BINARY"] = "/usr/local/bin/convert" def gen_audio(subtitles, tmpdir): # subtitles = [ # [[0, 3], "這裡請注意右臂"], # [[4, 8], "這裡請注意左臂"], # [[9, 12], "這裡請注意左手細節"] # ] subtitles = merge_subtitles(subtitles) print("===> Subtitles:") for subtitle in subtitles: print(subtitle) combined_clip = VideoFileClip(tmpdir + "/output.mp4") total_duration = combined_clip.duration def add_subtitles(clip, subtitles): txt_clips = [] for start_end, text in subtitles: start_time, end_time = start_end duration = end_time - start_time txt_clip = (TextClip(text, fontsize=clip.w//20, color='white', font='bold.ttf', method='caption', size=(clip.w * 0.9, None)) .set_duration(duration) .set_position(('center', 4 * clip.h // 5)) .set_start(start_time)) txt_clips.append(txt_clip) return CompositeVideoClip([clip, *txt_clips]) video_with_subtitles = add_subtitles(combined_clip, subtitles) def generate_audio(subtitles, total_duration): temp_audio_path = tempfile.mkdtemp() clips = [] for i, (start_end, text) in enumerate(subtitles): start_time, end_time = start_end duration = end_time - start_time tts = gTTS(text=text, lang='zh') audio_path = os.path.join(temp_audio_path, f'subtitle_{i}.mp3') tts.save(audio_path) audio_segment = AudioSegment.from_mp3(audio_path) new_audio_path = os.path.join(temp_audio_path, f'subtitle_{i}_adjusted.wav') audio_segment.export(new_audio_path, format="wav") audio_clip = AudioFileClip(new_audio_path).set_start(start_time).set_duration(min(duration, audio_segment.duration_seconds)) clips.append(audio_clip) final_audio = CompositeAudioClip(clips) return final_audio audio_clip = generate_audio(subtitles, total_duration) video_with_audio = video_with_subtitles.set_audio(audio_clip) video_with_audio.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24) return tmpdir + '/output_with_subtitles.mp4' def merge_subtitles(subtitles): merged_subtitles = [] i = 0 n = len(subtitles) while i < n: current_start, current_end = subtitles[i][0] current_content = subtitles[i][1] duration = current_end - current_start i += 1 while duration < 3: if i >= n: current_end = current_start + 3 duration = current_end - current_start break next_start, next_end = subtitles[i][0] next_content = subtitles[i][1] if next_start == current_end: current_end = next_end current_content += "、" + next_content duration = current_end - current_start i += 1 else: if duration < 3: current_end = current_start + 3 duration = current_end - current_start break current_content = '這裡請注意' + '、'.join(list(set(current_content.replace("細節", "").split('、')))) merged_subtitles.append([[current_start, current_end], current_content]) return merged_subtitles