File size: 4,261 Bytes
2e5e07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import ast
from IPython.display import Audio
import nltk  # we'll use this to split into sentences
import numpy as np

from bark import generate_audio, SAMPLE_RATE
from moviepy.editor import concatenate_videoclips, concatenate_audioclips
from moviepy.editor import VideoFileClip, AudioFileClip, AudioClip, CompositeAudioClip
import librosa
import soundfile as sf
import math


def make_audio(en_prompt_file, output_dir):
    print("Begin to make the aside!")
    prompt_list = []
    with open(en_prompt_file, 'r', encoding='utf-8') as f:
        video_prompts = f.read()
        video_fragments = ast.literal_eval(video_prompts)
        for video_fragment in video_fragments:
            prompt_list.append(video_fragment["video fragment description"])

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i, prompt in enumerate(prompt_list):
        sentences = nltk.sent_tokenize(prompt)
        SPEAKER = "v2/en_speaker_1"
        silence = np.zeros(int(0.25 * SAMPLE_RATE))  # quarter second of silence

        pieces = []
        for sentence in sentences:
            audio_array = generate_audio(sentence, history_prompt=SPEAKER)
            # audio_array = generate_audio(sentence)
            pieces += [audio_array, silence.copy()]
        audio = Audio(np.concatenate(pieces), rate=SAMPLE_RATE)
        with open(os.path.join(output_dir, str(i) + ".wav"), 'w+b') as f:
            f.write(audio.data)


def merge_video_audio(video_dir, audio_dir, output_dir):
    video_fnames = []
    for fname in os.listdir(video_dir):
        if not fname.startswith("result"):
            video_fnames.append(fname)
    audio_fnames = []
    for fname in os.listdir(audio_dir):
        if not fname.startswith("result") and not fname.startswith("fast"):
            audio_fnames.append(fname)
    video_fnames.sort(key=lambda x: int(x.split('.')[0]))
    audio_fnames.sort(key=lambda x: int(x.split('.')[0]))
    assert len(video_fnames) == len(audio_fnames), 'The number of videos is not equal to audios.'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    audios = []
    for i, (video_fname, audio_fname) in enumerate(zip(video_fnames, audio_fnames)):
        video = VideoFileClip(os.path.join(video_dir, video_fname))
        audio = AudioFileClip(os.path.join(audio_dir, audio_fname))

        video_duration = video.duration
        audio_duration = audio.duration
        if audio_duration > video_duration:
            y, sr = librosa.load(os.path.join(audio_dir, audio_fname))
            speed_change = audio_duration / video_duration
            y_stretched = librosa.effects.time_stretch(y, rate=speed_change)
            sf.write(os.path.join(audio_dir, "fast_video.wav"), y_stretched, sr)
            audio = AudioFileClip(os.path.join(audio_dir, "fast_video.wav"))
        else:
            silence_len = math.ceil(video_duration * audio.fps) / audio.fps  # make sure the silence duration not less than required
            silence = AudioClip(lambda t: [0] * audio.nchannels, duration=silence_len, fps=audio.fps)
            audio = CompositeAudioClip([audio, silence])
        
        audios.append(audio)
        video = video.set_audio(audio)
        video.write_videofile(os.path.join(output_dir, str(i) + ".mp4"))
    final_audio = concatenate_audioclips(audios)
    final_audio.write_audiofile(os.path.join(audio_dir, "result" + ".wav"))



def concatenate_videos(video_dir, output_dir=None):
    if output_dir is None:
        output_dir = video_dir
    video_fnames = []
    for fname in os.listdir(video_dir):
        if not fname.startswith("result") and not fname.startswith("audio"):
            video_fnames.append(fname)
    video_fnames.sort(key=lambda x: int(x.split('.')[0]))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    video_clips = [VideoFileClip(os.path.join(video_dir, video_fname)) for video_fname in video_fnames]
    audio_clips = [video.audio for video in video_clips]

    final_video = concatenate_videoclips(video_clips, method="compose")
    final_audio = concatenate_audioclips(audio_clips)

    final_clip = final_video.set_audio(final_audio)
    final_clip.write_videofile(os.path.join(output_dir, "result.mp4"))