llinahosna commited on
Commit
3716ece
1 Parent(s): 9fb8c08

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +104 -0
main.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import cv2
4
+
5
+ from dall_e import DalleImageGenerator
6
+ from download_from_youtube import download_transcription, download_mp3, get_video_name
7
+ from utils import clean_str, get_sqrt, read_and_preprocess_transcript, put_subtitles_on_frame
8
+
9
+
10
+ def main():
11
+ parser = argparse.ArgumentParser(description='Generate a Dall-e video clip from a youtube url')
12
+ parser.add_argument('url', help='URL of the a song to get the audio from')
13
+ parser.add_argument('--token', help='A Replicate API token', default=None)
14
+ parser.add_argument('--song_name', help='The name of the song / output files', default=None)
15
+ parser.add_argument('--fps', help='FPS higher then 1 / sec_per_img to better control over timing', default=10)
16
+ parser.add_argument('--sec_per_img', help='How long to show each image', default=3)
17
+ parser.add_argument('--n_lines', help='Limit number of generated lines in the video to faster testing', default=None)
18
+ args = parser.parse_args()
19
+
20
+ dalle = DalleImageGenerator(token=args.token)
21
+ img_dim = 256 # Dall-e's output dim
22
+ resize_factor = 2 # upacale factor for frames
23
+
24
+ if args.song_name is None:
25
+ args.song_name = clean_str(get_video_name(args.url))
26
+
27
+ # Set paths
28
+ outputs_dir = f"data/{args.song_name}"
29
+ frames_dir = f"{outputs_dir}/frames"
30
+ os.makedirs(outputs_dir, exist_ok=True)
31
+ os.makedirs(frames_dir, exist_ok=True)
32
+ mp3_path = f"{outputs_dir}/audio.mp3"
33
+ transcript_path = f"{outputs_dir}/transcript.json"
34
+ vid_path = f"{outputs_dir}/frames.avi"
35
+ final_vid_path = f"{outputs_dir}/final.avi"
36
+
37
+ print("Getting audio file and transcript from youtube")
38
+ # Download data
39
+ download_transcription(args.url, transcript_path)
40
+ download_mp3(args.url, mp3_path)
41
+
42
+ transcript = read_and_preprocess_transcript(transcript_path, args.song_name, args.n_lines)
43
+
44
+ frames, video_duration = get_frames(dalle, transcript, resize_factor, args)
45
+
46
+ # Write video
47
+ video = cv2.VideoWriter(vid_path, 0, args.fps, (img_dim * resize_factor, img_dim * resize_factor))
48
+ for i, frame in enumerate(frames):
49
+ cv2.imwrite(f"{frames_dir}/frame-{i}.png", frame)
50
+ video.write(frame)
51
+ cv2.destroyAllWindows()
52
+ video.release()
53
+
54
+ # Mix video clip with audio
55
+ os.system(f"ffmpeg -ss 00:00:00 -t {video_duration} -i '{mp3_path}' -map 0:a -acodec libmp3lame '{f'data/{args.song_name}/tmp.mp3'}'")
56
+ os.system(f"ffmpeg -i '{vid_path}' -i '{f'data/{args.song_name}/tmp.mp3'}' -map 0 -map 1:a -c:v copy -shortest '{final_vid_path}'")
57
+ print(f"Final video available at: {final_vid_path}")
58
+
59
+
60
+ def get_frames(dalle, transcript, resize_factor, args):
61
+ """For each line in the transcript prompt dall-e mini to get images and duplicate them in the correct FPS for writing as a video"""
62
+ print("Building video-clip")
63
+ frames = []
64
+ video_duration = 0
65
+ for line in transcript:
66
+ text = clean_str(line['text'])
67
+ # start = min(video_duration, start)
68
+ start = line['start']
69
+ duration = line['duration']
70
+
71
+ # Dall-e generatees grid_size**2 images
72
+ grid_size = max(get_sqrt(duration / args.sec_per_img), 1)
73
+
74
+ print(f"({start:.1f} - {start + duration:.1f}):")
75
+
76
+ print(f"* Generating {grid_size**2} images with prompt: '{text}'")
77
+ # Generate images
78
+ images = dalle.generate_images(text, grid_size, text_adherence=3)
79
+
80
+ # Write frames
81
+ segment_duration = 0
82
+ frames_per_image = int(duration * args.fps) // len(images)
83
+ for j in range(len(images)):
84
+ frame = cv2.cvtColor(images[j], cv2.COLOR_RGBA2BGR)
85
+ frame = put_subtitles_on_frame(frame, text, resize_factor)
86
+ print(f"* Writing image - {j} as {frames_per_image} frames")
87
+ for _ in range(frames_per_image):
88
+ frames.append(frame)
89
+ segment_duration += 1 / args.fps
90
+
91
+ # Write more frames from last image to fill the gap
92
+ if segment_duration < duration:
93
+ n_frames = int((duration - segment_duration) * args.fps)
94
+ print(f"* Writing image - {j} for {n_frames} frames")
95
+ for _ in range(n_frames):
96
+ frames.append(frame)
97
+ segment_duration += 1 / args.fps
98
+ video_duration += segment_duration
99
+
100
+ return frames, video_duration
101
+
102
+
103
+ if __name__ == '__main__':
104
+ main()