jbilcke-hf HF staff commited on
Commit
ccd48b8
β€’
1 Parent(s): e4e0e54
.gitignore CHANGED
@@ -4,4 +4,5 @@ node_modules
4
  .DS_Store
5
  .venv
6
  *.mp4
7
- sandbox
 
 
4
  .DS_Store
5
  .venv
6
  *.mp4
7
+ sandbox
8
+ scripts
src/index.mts CHANGED
@@ -95,6 +95,7 @@ app.get("/:ownerId/:videoId\.mp4", async (req, res) => {
95
  */
96
 
97
  const ownerId = req.params.ownerId
 
98
 
99
  if (!uuidValidate(ownerId)) {
100
  console.error("invalid owner id")
 
95
  */
96
 
97
  const ownerId = req.params.ownerId
98
+ console.log("downloading..")
99
 
100
  if (!uuidValidate(ownerId)) {
101
  console.error("invalid owner id")
src/llm/enrichVideoSpecsUsingLLM.mts CHANGED
@@ -5,6 +5,7 @@ import { generateYAML } from "./openai/generateYAML.mts"
5
  import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
6
  import { getQueryChatMessages } from "../preproduction/prompts.mts"
7
  import { getValidNumber } from "../utils/getValidNumber.mts"
 
8
 
9
 
10
  export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
@@ -13,15 +14,9 @@ export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> =>
13
 
14
  const defaultValue = {} as unknown as HallucinatedVideoRequest
15
 
16
- // console.log("enrichVideoSpecsUsingLLM: messages = ", messages)
17
-
18
  let hallucinatedVideo: HallucinatedVideoRequest
19
-
20
-
21
- const referenceShot = video.shots[0]
22
  video.shots = []
23
- // console.log("referenceShot:", referenceShot)
24
-
25
  try {
26
  hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
27
  messages,
@@ -64,12 +59,13 @@ export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> =>
64
 
65
 
66
  for (const hallucinatedShot of hallucinatedShots) {
67
- const shot = JSON.parse(JSON.stringify(referenceShot))
68
- shot.shotPrompt = hallucinatedShot.shotPrompt || shot.shotPrompt
69
- shot.environmentPrompt = hallucinatedShot.environmentPrompt || shot.environmentPrompt
70
- shot.photographyPrompt = hallucinatedShot.photographyPrompt || shot.photographyPrompt
71
- shot.actionPrompt = hallucinatedShot.actionPrompt || shot.actionPrompt
72
- shot.foregroundAudioPrompt = hallucinatedShot.foregroundAudioPrompt || shot.foregroundAudioPrompt
 
73
  video.shots.push(shot)
74
  }
75
 
 
5
  import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
6
  import { getQueryChatMessages } from "../preproduction/prompts.mts"
7
  import { getValidNumber } from "../utils/getValidNumber.mts"
8
+ import { parseShotRequest } from "../utils/parseShotRequest.mts"
9
 
10
 
11
  export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
 
14
 
15
  const defaultValue = {} as unknown as HallucinatedVideoRequest
16
 
 
 
17
  let hallucinatedVideo: HallucinatedVideoRequest
 
 
 
18
  video.shots = []
19
+
 
20
  try {
21
  hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
22
  messages,
 
59
 
60
 
61
  for (const hallucinatedShot of hallucinatedShots) {
62
+ const shot = await parseShotRequest(video, {
63
+ shotPrompt: hallucinatedShot.shotPrompt,
64
+ environmentPrompt: hallucinatedShot.environmentPrompt,
65
+ photographyPrompt: hallucinatedShot.photographyPrompt,
66
+ actionPrompt: hallucinatedShot.actionPrompt,
67
+ foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
68
+ })
69
  video.shots.push(shot)
70
  }
71
 
src/preproduction/prompts.mts CHANGED
@@ -30,12 +30,16 @@ The format expected by the API must be in YAML. The TypeScript schema for this Y
30
  }>
31
  }
32
  \`\`\`
33
- # Guidelines
34
- You will generate 3 shots by default, unless more or less are specified.
35
- Is it crucial to repeat the elements consituting a sequence of multiple shots verbatim from one shot to another.
 
 
 
 
36
  For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
37
  This will ensure consistency from one scene to another.
38
- ## Creating a movie
39
  Here are some guidelines regarding film-making:
40
  - The distance your subject is to the camera impacts how the audience feels about them.
41
  - Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.
 
30
  }>
31
  }
32
  \`\`\`
33
+ # Guidelines for number of shots and their duration
34
+ You will generate 1 shot by default, unless more or less are specified.
35
+ A shot can only last 3 seconds max.
36
+ So if you are asked to generate a 6 seconds videos, you need 2 shots, for a 9 seconds video, 3 shots, and so on.
37
+ If you are asked to generate for instance a 11 seconds videos, you need three 3sec shots plus one 2sec shot.
38
+ # Guidelines for writing descriptions
39
+ Is it crucial to repeat the elements constituting a sequence of multiple shots verbatim from one shot to another.
40
  For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
41
  This will ensure consistency from one scene to another.
42
+ ## Choosing the right words
43
  Here are some guidelines regarding film-making:
44
  - The distance your subject is to the camera impacts how the audience feels about them.
45
  - Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.
src/production/assembleShots.mts CHANGED
@@ -4,6 +4,7 @@ import concat from 'ffmpeg-concat'
4
 
5
  import { VideoShot } from '../types.mts'
6
  import { pendingFilesDirFilePath } from "../config.mts"
 
7
 
8
  export const assembleShots = async (shots: VideoShot[], fileName: string) => {
9
 
@@ -26,6 +27,7 @@ export const assembleShots = async (shots: VideoShot[], fileName: string) => {
26
  // pass custom params to a transition
27
  params: { direction: [1, -1] },
28
  },
 
29
  /*
30
  {
31
  name: 'squaresWire',
@@ -36,10 +38,12 @@ export const assembleShots = async (shots: VideoShot[], fileName: string) => {
36
 
37
  const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
38
 
39
- const shotFilesPaths = shots.map(shot => path.join(
40
- pendingFilesDirFilePath,
41
- shot.fileName
42
- ))
 
 
43
 
44
  await concat({
45
  output: videoFilePath,
 
4
 
5
  import { VideoShot } from '../types.mts'
6
  import { pendingFilesDirFilePath } from "../config.mts"
7
+ import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts"
8
 
9
  export const assembleShots = async (shots: VideoShot[], fileName: string) => {
10
 
 
27
  // pass custom params to a transition
28
  params: { direction: [1, -1] },
29
  },
30
+
31
  /*
32
  {
33
  name: 'squaresWire',
 
38
 
39
  const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
40
 
41
+ // before performing assembly, we must normalize images
42
+ const shotFilesPaths: string[] = []
43
+ for (let shot of shots) {
44
+ const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName)
45
+ shotFilesPaths.push(normalizedShotFilePath)
46
+ }
47
 
48
  await concat({
49
  output: videoFilePath,
src/production/generateShot.mts DELETED
@@ -1,209 +0,0 @@
1
- import path from "node:path"
2
-
3
- import { v4 as uuidv4 } from "uuid"
4
- import tmpDir from "temp-dir"
5
-
6
- import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
7
- import { generateAudio } from "./generateAudio.mts"
8
- import { generateVideo } from "./generateVideo.mts"
9
- import { upscaleVideo } from "./upscaleVideo.mts"
10
- import { generateVoice } from "./generateVoice.mts"
11
- import { generateSeed } from "../utils/generateSeed.mts"
12
- import { mergeAudio } from "./mergeAudio.mts"
13
- import { addAudioToVideo } from "./addAudioToVideo.mts"
14
- import { interpolateVideo } from "./interpolateVideo.mts"
15
- import { postInterpolation } from "./postInterpolation.mts"
16
-
17
-
18
- export const generateShot = async ({
19
- seed = 0,
20
- shotId = "",
21
- actorPrompt = "",
22
- shotPrompt = "",
23
- backgroundAudioPrompt = "",
24
- foregroundAudioPrompt = "",
25
- actorDialoguePrompt = "",
26
- actorVoicePrompt = "",
27
- duration = 2,
28
- nbFrames = 24,
29
- resolution = 576,
30
- nbSteps = 35,
31
- upscale = true,
32
- interpolate = true,
33
- noise = true,
34
- }:Β {
35
- seed?: number;
36
- shotId?: string;
37
- actorPrompt?: string;
38
- shotPrompt?: string;
39
- backgroundAudioPrompt?: string;
40
- foregroundAudioPrompt?: string;
41
- actorDialoguePrompt?: string;
42
- actorVoicePrompt?: string;
43
- duration?: number; // 2 seconds
44
- nbFrames?: number; // 24 FPS
45
- resolution?: number; // 256, 320, 512, 576, 720, 1080..
46
- nbSteps?: number;
47
- upscale?: boolean;
48
- interpolate?: boolean;
49
- noise?: boolean;
50
- }) => {
51
- seed = seed || generateSeed()
52
- shotId = shotId || uuidv4()
53
-
54
- const shotFileName = `${shotId}.mp4`
55
-
56
- console.log("generating video shot:", {
57
- seed,
58
- shotId,
59
- actorPrompt,
60
- shotPrompt,
61
- backgroundAudioPrompt,
62
- foregroundAudioPrompt,
63
- actorDialoguePrompt,
64
- actorVoicePrompt,
65
- duration,
66
- nbFrames,
67
- resolution,
68
- nbSteps,
69
- upscale,
70
- interpolate,
71
- noise,
72
- })
73
-
74
-
75
- if (actorPrompt) {
76
- console.log("generating actor..")
77
- const actorIdentityFileName = `actor_${Date.now()}.png`
78
- // await generateActor(actorPrompt, actorIdentityFileName, seed)
79
- }
80
-
81
- console.log("generating base video ..")
82
- let generatedVideoUrl = ""
83
-
84
- // currenty the base model is incapable of generating more than 24 FPS,
85
- // because otherwise the upscaler will have trouble
86
-
87
- // so for now, we fix it to 24 frames
88
- // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
89
- const nbFramesForBaseModel = 24
90
-
91
- try {
92
- generatedVideoUrl = await generateVideo(shotPrompt, {
93
- seed,
94
- nbFrames: nbFramesForBaseModel,
95
- nbSteps
96
- })
97
- } catch (err) {
98
- // upscaling can be finicky, if it fails we try again
99
- console.log('- trying again to generate base shot..')
100
- generatedVideoUrl = await generateVideo(shotPrompt, {
101
- seed,
102
- nbFrames: nbFramesForBaseModel,
103
- nbSteps
104
- })
105
- }
106
-
107
- console.log("downloading video..")
108
-
109
- const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName)
110
-
111
- if (upscale) {
112
- console.log("upscaling video..")
113
- try {
114
- await upscaleVideo(videoFileName, shotPrompt)
115
- } catch (err) {
116
- // upscaling can be finicky, if it fails we try again
117
- console.log('- trying again to upscale shot..')
118
- await upscaleVideo(videoFileName, shotPrompt)
119
- }
120
- }
121
-
122
- if (interpolate) {
123
- console.log("upscaling video..")
124
- // ATTENTION 1:
125
- // the interpolation step always create a SLOW MOTION video
126
- // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
127
- // than the duration generated by the original video model
128
-
129
- // ATTENTION 2:
130
- // the interpolation step generates videos in 910x512!
131
-
132
- // ATTENTION 3:
133
- // the interpolation step parameters are currently not passed to the space,
134
- // so changing those two variables below will have no effect!
135
- const interpolationSteps = 3
136
- const interpolatedFramesPerSecond = 24
137
- await interpolateVideo(
138
- video,
139
- interpolationSteps,
140
- interpolatedFramesPerSecond
141
- )
142
- console.log('creating slow-mo video (910x512 @ 24 FPS)')
143
-
144
- // with our current interpolation settings, the 3 seconds video generated by the model
145
- // become a 7 seconds video, at 24 FPS
146
-
147
- // so we want to scale it back to the desired duration length
148
- // also, as a last trick we want to upscale it (without AI) and add some FXs
149
- console.log('performing final scaling (1280x720 @ 24 FPS)')
150
- await postInterpolation(videoFileName, duration, nbFrames)
151
- }
152
-
153
- let backgroundAudioFileName = ''
154
- if (backgroundAudioPrompt) {
155
- console.log("generating background audio..")
156
- backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
157
- }
158
-
159
- let foregroundAudioFileName = ''
160
- if (foregroundAudioPrompt) {
161
- console.log("generating foreground audio..")
162
- foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
163
- }
164
-
165
-
166
- let voiceAudioFileName = ''
167
- if (actorDialoguePrompt) {
168
- console.log("configuring dialogue..")
169
- if (actorVoicePrompt) {
170
- console.log("configuring voice..")
171
- // well.. that's a TODO!
172
- // for now let's always use the same voice model
173
-
174
- console.log('TODO this should be done in the sequence, not the prompt!')
175
- voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
176
- }
177
- }
178
-
179
- console.log('merging audio with video..')
180
- if (backgroundAudioFileName || foregroundAudioFileName) {
181
- let audioFileName = ''
182
-
183
- // we have both background and foreground
184
- if (backgroundAudioFileName && foregroundAudioFileName) {
185
- audioFileName = await mergeAudio({
186
- input1FileName: backgroundAudioFileName,
187
- input1Volume: 0.2,// 20% volume
188
- input2FileName: foregroundAudioFileName,
189
- input2Volume: 0.7, // 70% volume
190
- })
191
- } else if (backgroundAudioFileName) {
192
- audioFileName = backgroundAudioFileName
193
- } else if (foregroundAudioFileName) {
194
- audioFileName = foregroundAudioFileName
195
- }
196
-
197
- await addAudioToVideo(video, audioFileName)
198
- }
199
-
200
- console.log("returning result to user..")
201
-
202
- const filePath = path.resolve(tmpDir, videoFileName)
203
-
204
- return {
205
- shotId,
206
- filePath,
207
- videoFileName
208
- }
209
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/production/normalizePendingVideoToTmpFilePath.mts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
+
9
+ export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise<string> => {
10
+ return new Promise((resolve,reject) => {
11
+
12
+ const tmpFileName = `${uuidv4()}.mp4`
13
+
14
+ const filePath = path.join(pendingFilesDirFilePath, fileName)
15
+ const tmpFilePath = path.join(tmpDir, tmpFileName)
16
+
17
+ ffmpeg.ffprobe(filePath, function(err,) {
18
+ if (err) { reject(err); return; }
19
+
20
+ ffmpeg(filePath)
21
+
22
+ .size("1280x720")
23
+
24
+ .save(tmpFilePath)
25
+ .on("end", async () => {
26
+ resolve(tmpFilePath)
27
+ })
28
+ .on("error", (err) => {
29
+ reject(err)
30
+ })
31
+ })
32
+ })
33
+ }
src/production/postInterpolation.mts CHANGED
@@ -4,13 +4,14 @@ import { v4 as uuidv4 } from "uuid"
4
  import tmpDir from "temp-dir"
5
  import ffmpeg from "fluent-ffmpeg"
6
  import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
 
7
 
8
  export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
9
  return new Promise((resolve,reject) => {
10
 
11
  const tmpFileName = `${uuidv4()}.mp4`
12
 
13
- const filePath = path.join(tmpDir, fileName)
14
  const tmpFilePath = path.join(tmpDir, tmpFileName)
15
 
16
  ffmpeg.ffprobe(filePath, function(err, metadata) {
@@ -34,7 +35,7 @@ export const postInterpolation = async (fileName: string, durationMs: number, nb
34
  .size("1280x720")
35
 
36
  .videoFilters([
37
- `setpts=${durationRatio}*PTS`, // we make the video faster
38
  //'scale=-1:576:lanczos',
39
  // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
40
  `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise
 
4
  import tmpDir from "temp-dir"
5
  import ffmpeg from "fluent-ffmpeg"
6
  import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
 
9
  export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
10
  return new Promise((resolve,reject) => {
11
 
12
  const tmpFileName = `${uuidv4()}.mp4`
13
 
14
+ const filePath = path.join(pendingFilesDirFilePath, fileName)
15
  const tmpFilePath = path.join(tmpDir, tmpFileName)
16
 
17
  ffmpeg.ffprobe(filePath, function(err, metadata) {
 
35
  .size("1280x720")
36
 
37
  .videoFilters([
38
+ `setpts=0.5*PTS`, // we make the video faster
39
  //'scale=-1:576:lanczos',
40
  // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
41
  `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise
src/scheduler/processVideo.mts CHANGED
@@ -1,21 +1,20 @@
1
  import { v4 as uuidv4 } from "uuid"
2
 
3
- import { Video } from "../types.mts"
4
 
5
  import { generateVideo } from "../production/generateVideo.mts"
6
  import { upscaleVideo } from "../production/upscaleVideo.mts"
7
  import { interpolateVideo } from "../production/interpolateVideo.mts"
8
  import { postInterpolation } from "../production/postInterpolation.mts"
9
- import { assembleShots } from "../production/assembleShots.mts"
10
  import { generateAudio } from "../production/generateAudio.mts"
11
  import { addAudioToVideo } from "../production/addAudioToVideo.mts"
12
 
13
  import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
14
  import { copyVideoFromTmpToPending } from "../utils/copyVideoFromTmpToPending.mts"
15
- import { copyVideoFromPendingToCompleted } from "../utils/copyVideoFromPendingToCompleted.mts"
16
 
17
  import { saveAndCheckIfNeedToStop } from "./saveAndCheckIfNeedToStop.mts"
18
  import { enrichVideoSpecsUsingLLM } from "../llm/enrichVideoSpecsUsingLLM.mts"
 
19
 
20
  export const processVideo = async (video: Video) => {
21
 
@@ -35,14 +34,23 @@ export const processVideo = async (video: Video) => {
35
  let nbCompletedSteps = 0
36
 
37
  if (!video.hasGeneratedSpecs) {
38
- await enrichVideoSpecsUsingLLM(video)
 
 
 
 
 
 
 
39
 
40
  nbCompletedSteps++
 
41
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
42
 
43
  if (await saveAndCheckIfNeedToStop(video)) { return }
44
  }
45
 
 
46
  for (const shot of video.shots) {
47
  nbCompletedSteps += shot.nbCompletedSteps
48
 
@@ -53,7 +61,6 @@ export const processVideo = async (video: Video) => {
53
 
54
  console.log(`need to complete shot ${shot.id}`)
55
 
56
-
57
  // currenty we cannot generate too many frames at once,
58
  // otherwise the upscaler will have trouble
59
 
@@ -61,7 +68,6 @@ export const processVideo = async (video: Video) => {
61
  // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
62
  const nbFramesForBaseModel = 24
63
 
64
- if (await saveAndCheckIfNeedToStop(video)) { return }
65
 
66
  if (!shot.hasGeneratedPreview) {
67
  console.log("generating a preview of the final result..")
@@ -80,25 +86,34 @@ export const processVideo = async (video: Video) => {
80
 
81
  await copyVideoFromTmpToPending(shot.fileName)
82
 
83
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
84
-
85
  shot.hasGeneratedPreview = true
86
  shot.nbCompletedSteps++
87
  nbCompletedSteps++
88
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
89
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
90
-
 
 
91
  if (await saveAndCheckIfNeedToStop(video)) { return }
92
  } catch (err) {
93
  console.error(`failed to generate preview for shot ${shot.id} (${err})`)
94
  // something is wrong, let's put the whole thing back into the queue
95
  video.error = `failed to generate preview for shot ${shot.id} (will try again later)`
96
  if (await saveAndCheckIfNeedToStop(video)) { return }
97
- break
 
 
98
  }
99
 
100
  }
101
 
 
 
 
 
 
 
 
102
  if (!shot.hasGeneratedVideo) {
103
  console.log("generating primordial pixel soup (raw video)..")
104
  let generatedVideoUrl = ""
@@ -125,7 +140,7 @@ export const processVideo = async (video: Video) => {
125
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
126
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
127
 
128
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
129
 
130
  if (await saveAndCheckIfNeedToStop(video)) { return }
131
  } catch (err) {
@@ -149,9 +164,9 @@ export const processVideo = async (video: Video) => {
149
  nbCompletedSteps++
150
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
151
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
152
-
153
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
154
-
155
  if (await saveAndCheckIfNeedToStop(video)) { return }
156
 
157
  } catch (err) {
@@ -177,9 +192,9 @@ export const processVideo = async (video: Video) => {
177
  // ATTENTION 3:
178
  // the interpolation step parameters are currently not passed to the space,
179
  // so changing those two variables below will have no effect!
180
- const interpolationSteps = 3
181
- const interpolatedFramesPerSecond = 24
182
- console.log('creating slow-mo video (910x512 @ 24 FPS)')
183
  try {
184
  await interpolateVideo(
185
  shot.fileName,
@@ -193,46 +208,35 @@ export const processVideo = async (video: Video) => {
193
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
194
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
195
 
196
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
197
 
198
- if (await saveAndCheckIfNeedToStop(video)) { return }
199
-
200
- } catch (err) {
201
- console.error(`failed to interpolate shot ${shot.id} (${err})`)
202
- // something is wrong, let's put the whole thing back into the queue
203
- video.error = `failed to interpolate shot ${shot.id} (will try again later)`
204
- if (await saveAndCheckIfNeedToStop(video)) { return }
205
- break
206
- }
207
- }
208
 
 
 
209
 
210
- if (!shot.hasPostProcessedVideo) {
211
- console.log("post-processing video..")
212
-
213
- // with our current interpolation settings, the 3 seconds video generated by the model
214
- // become a 7 seconds video, at 24 FPS
215
-
216
- // so we want to scale it back to the desired duration length
217
- // also, as a last trick we want to upscale it (without AI) and add some FXs
218
- console.log('performing final scaling (1280x720 @ 24 FPS)')
219
 
220
- try {
221
- await postInterpolation(shot.fileName, shot.durationMs, shot.fps, shot.noiseAmount)
222
-
223
- shot.hasPostProcessedVideo = true
224
- shot.nbCompletedSteps++
225
- nbCompletedSteps++
226
- shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
227
- video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
228
-
229
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
230
 
231
- if (await saveAndCheckIfNeedToStop(video)) { return }
 
 
 
 
 
232
  } catch (err) {
233
- console.error(`failed to post-process shot ${shot.id} (${err})`)
234
  // something is wrong, let's put the whole thing back into the queue
235
- video.error = `failed to post-process shot ${shot.id} (will try again later)`
236
  if (await saveAndCheckIfNeedToStop(video)) { return }
237
  break
238
  }
@@ -256,7 +260,7 @@ export const processVideo = async (video: Video) => {
256
 
257
  await addAudioToVideo(shot.fileName, foregroundAudioFileName)
258
 
259
- await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
260
 
261
  if (await saveAndCheckIfNeedToStop(video)) { return }
262
 
@@ -292,12 +296,19 @@ export const processVideo = async (video: Video) => {
292
 
293
  // now time to check the end game
294
 
 
295
  if (video.nbCompletedShots === video.shots.length) {
296
- console.log(`we have generated each individual shot!`)
297
- console.log(`assembling the fonal..`)
298
 
299
  if (!video.hasAssembledVideo) {
300
-
 
 
 
 
 
 
 
301
  if (video.shots.length === 1) {
302
  console.log(`we only have one shot, so this gonna be easy`)
303
  video.hasAssembledVideo = true
@@ -322,10 +333,10 @@ export const processVideo = async (video: Video) => {
322
  // something is wrong, let's put the whole thing back into the queue
323
  video.error = `failed to assemble the shots together (will try again later)`
324
  if (await saveAndCheckIfNeedToStop(video)) { return }
325
- return
326
  }
327
  }
328
  }
 
329
 
330
  nbCompletedSteps++
331
  video.completed = true
 
1
  import { v4 as uuidv4 } from "uuid"
2
 
3
+ import { Video, VideoShot } from "../types.mts"
4
 
5
  import { generateVideo } from "../production/generateVideo.mts"
6
  import { upscaleVideo } from "../production/upscaleVideo.mts"
7
  import { interpolateVideo } from "../production/interpolateVideo.mts"
8
  import { postInterpolation } from "../production/postInterpolation.mts"
 
9
  import { generateAudio } from "../production/generateAudio.mts"
10
  import { addAudioToVideo } from "../production/addAudioToVideo.mts"
11
 
12
  import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
13
  import { copyVideoFromTmpToPending } from "../utils/copyVideoFromTmpToPending.mts"
 
14
 
15
  import { saveAndCheckIfNeedToStop } from "./saveAndCheckIfNeedToStop.mts"
16
  import { enrichVideoSpecsUsingLLM } from "../llm/enrichVideoSpecsUsingLLM.mts"
17
+ import { updateShotPreview } from "./updateShotPreview.mts"
18
 
19
  export const processVideo = async (video: Video) => {
20
 
 
34
  let nbCompletedSteps = 0
35
 
36
  if (!video.hasGeneratedSpecs) {
37
+ try {
38
+ await enrichVideoSpecsUsingLLM(video)
39
+ } catch (err) {
40
+ console.error(`LLM error: ${err}`)
41
+ video.error = `LLM error: ${err}`
42
+ video.status = "delete"
43
+ if (await saveAndCheckIfNeedToStop(video)) { return }
44
+ }
45
 
46
  nbCompletedSteps++
47
+ video.hasGeneratedSpecs = true
48
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
49
 
50
  if (await saveAndCheckIfNeedToStop(video)) { return }
51
  }
52
 
53
+
54
  for (const shot of video.shots) {
55
  nbCompletedSteps += shot.nbCompletedSteps
56
 
 
61
 
62
  console.log(`need to complete shot ${shot.id}`)
63
 
 
64
  // currenty we cannot generate too many frames at once,
65
  // otherwise the upscaler will have trouble
66
 
 
68
  // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
69
  const nbFramesForBaseModel = 24
70
 
 
71
 
72
  if (!shot.hasGeneratedPreview) {
73
  console.log("generating a preview of the final result..")
 
86
 
87
  await copyVideoFromTmpToPending(shot.fileName)
88
 
 
 
89
  shot.hasGeneratedPreview = true
90
  shot.nbCompletedSteps++
91
  nbCompletedSteps++
92
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
93
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
94
+
95
+ await updateShotPreview(video, shot)
96
+
97
  if (await saveAndCheckIfNeedToStop(video)) { return }
98
  } catch (err) {
99
  console.error(`failed to generate preview for shot ${shot.id} (${err})`)
100
  // something is wrong, let's put the whole thing back into the queue
101
  video.error = `failed to generate preview for shot ${shot.id} (will try again later)`
102
  if (await saveAndCheckIfNeedToStop(video)) { return }
103
+
104
+ // always try to yield whenever possible
105
+ return
106
  }
107
 
108
  }
109
 
110
+ const notAllShotsHavePreview = video.shots.some(s => !s.hasGeneratedPreview)
111
+
112
+ if (notAllShotsHavePreview)Β {
113
+ console.log(`step 2 isn't unlocked yet, because not all videos have generated preview`)
114
+ continue
115
+ }
116
+
117
  if (!shot.hasGeneratedVideo) {
118
  console.log("generating primordial pixel soup (raw video)..")
119
  let generatedVideoUrl = ""
 
140
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
141
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
142
 
143
+ await updateShotPreview(video, shot)
144
 
145
  if (await saveAndCheckIfNeedToStop(video)) { return }
146
  } catch (err) {
 
164
  nbCompletedSteps++
165
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
166
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
167
+
168
+ await updateShotPreview(video, shot)
169
+
170
  if (await saveAndCheckIfNeedToStop(video)) { return }
171
 
172
  } catch (err) {
 
192
  // ATTENTION 3:
193
  // the interpolation step parameters are currently not passed to the space,
194
  // so changing those two variables below will have no effect!
195
+ const interpolationSteps = 2
196
+ const interpolatedFramesPerSecond = 30
197
+ console.log('creating slow-mo video (910x512 @ 30 FPS)')
198
  try {
199
  await interpolateVideo(
200
  shot.fileName,
 
208
  shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
209
  video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
210
 
 
211
 
212
+ // note: showing the intermediary result isn't very interesting here
 
 
 
 
 
 
 
 
 
213
 
214
+ // with our current interpolation settings, the 3 seconds video generated by the model
215
+ // become a 7 seconds video, at 30 FPS
216
 
217
+ // so we want to scale it back to the desired duration length
218
+ // also, as a last trick we want to upscale it (without AI) and add some FXs
219
+ console.log('performing final scaling (1280x720 @ 30 FPS)')
 
 
 
 
 
 
220
 
221
+ try {
222
+ await postInterpolation(shot.fileName, shot.durationMs, shot.fps, shot.noiseAmount)
223
+
224
+ shot.hasPostProcessedVideo = true
225
+ shot.nbCompletedSteps++
226
+ nbCompletedSteps++
227
+ shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
228
+ video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
 
 
229
 
230
+ await updateShotPreview(video, shot)
231
+
232
+ if (await saveAndCheckIfNeedToStop(video)) { return }
233
+ } catch (err) {
234
+ throw err
235
+ }
236
  } catch (err) {
237
+ console.error(`failed to interpolate and post-process shot ${shot.id} (${err})`)
238
  // something is wrong, let's put the whole thing back into the queue
239
+ video.error = `failed to interpolate and shot ${shot.id} (will try again later)`
240
  if (await saveAndCheckIfNeedToStop(video)) { return }
241
  break
242
  }
 
260
 
261
  await addAudioToVideo(shot.fileName, foregroundAudioFileName)
262
 
263
+ await updateShotPreview(video, shot)
264
 
265
  if (await saveAndCheckIfNeedToStop(video)) { return }
266
 
 
296
 
297
  // now time to check the end game
298
 
299
+
300
  if (video.nbCompletedShots === video.shots.length) {
301
+ console.log(`we have finished each individual shot!`)
 
302
 
303
  if (!video.hasAssembledVideo) {
304
+ video.hasAssembledVideo = true
305
+ }
306
+ /*
307
+ console.log(`assembling the final..`)
308
+ console.log(`note: this might be redundant..`)
309
+
310
+ if (!video.hasAssembledVideo) {
311
+ video.hasAssembledVideo = true
312
  if (video.shots.length === 1) {
313
  console.log(`we only have one shot, so this gonna be easy`)
314
  video.hasAssembledVideo = true
 
333
  // something is wrong, let's put the whole thing back into the queue
334
  video.error = `failed to assemble the shots together (will try again later)`
335
  if (await saveAndCheckIfNeedToStop(video)) { return }
 
336
  }
337
  }
338
  }
339
+ */
340
 
341
  nbCompletedSteps++
342
  video.completed = true
src/scheduler/saveAndCheckIfNeedToStop.mts CHANGED
@@ -63,6 +63,7 @@ export const saveAndCheckIfNeedToStop = async (video: Video): Promise<boolean> =
63
 
64
  await updatePendingVideo(video)
65
 
66
- // tell the loop if it should be aborted or not
67
- return mustStop
 
68
  }
 
63
 
64
  await updatePendingVideo(video)
65
 
66
+ // if we return "true", it means we will yield, which can be an interesting thing
67
+ // for us, to increase parallelism
68
+ return true
69
  }
src/scheduler/updateShotPreview.mts ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { assembleShots } from "../production/assembleShots.mts"
2
+ import { Video, VideoShot } from "../types.mts"
3
+ import { copyVideoFromPendingToCompleted } from "../utils/copyVideoFromPendingToCompleted.mts"
4
+
5
+ export const updateShotPreview = async (video: Video, shot: VideoShot) => {
6
+ // copy the individual shot
7
+ await copyVideoFromPendingToCompleted(shot.fileName)
8
+
9
+ // now let's create the latest version of the assembly
10
+ const shotsThatCanBeAssembled = video.shots.filter(sh => sh.hasGeneratedPreview)
11
+
12
+ // if we have multiple shots with at least a minimal image, we assemble them
13
+ if (shotsThatCanBeAssembled.length === 1) {
14
+ // copy the individual shot to become the final video
15
+ await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
16
+ } else if (shotsThatCanBeAssembled.length > 1) {
17
+ try {
18
+ // create an updated assembly
19
+ await assembleShots(shotsThatCanBeAssembled, video.fileName)
20
+
21
+ // copy the assembly to become the final video
22
+ await copyVideoFromPendingToCompleted(video.fileName)
23
+ } catch (err) {
24
+ console.error(`failed to create the temporary assembly: ${err}`)
25
+ }
26
+ }
27
+ }
src/utils/parseShotRequest.mts CHANGED
@@ -57,7 +57,7 @@ export const parseShotRequest = async (sequence: VideoSequence, maybeShotMeta: P
57
  steps: getValidNumber(maybeShotMeta.steps || sequence.steps, 10, 50, 45),
58
 
59
  // a video sequence MUST HAVE consistent frames per second
60
- fps: getValidNumber(sequence.fps, 8, 60, 24),
61
 
62
  // a video sequence MUST HAVE a consistent resolution
63
  resolution: sequence.resolution,
 
57
  steps: getValidNumber(maybeShotMeta.steps || sequence.steps, 10, 50, 45),
58
 
59
  // a video sequence MUST HAVE consistent frames per second
60
+ fps: getValidNumber(sequence.fps, 8, 60, 30),
61
 
62
  // a video sequence MUST HAVE a consistent resolution
63
  resolution: sequence.resolution,
src/utils/parseVideoRequest.mts CHANGED
@@ -64,7 +64,7 @@ export const parseVideoRequest = async (ownerId: string, request: VideoAPIReques
64
 
65
  steps: getValidNumber(request.sequence.steps, 10, 50, 45),
66
 
67
- fps: getValidNumber(request.sequence.fps, 8, 60, 24),
68
 
69
  resolution: getValidResolution(request.sequence.resolution),
70
 
 
64
 
65
  steps: getValidNumber(request.sequence.steps, 10, 50, 45),
66
 
67
+ fps: getValidNumber(request.sequence.fps, 8, 60, 30),
68
 
69
  resolution: getValidResolution(request.sequence.resolution),
70