Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

App Files Files Community

jbilcke-hf HF staff commited on Jul 24, 2023

Commit

ccd48b8

•

1 Parent(s): e4e0e54

live test

Browse files

Files changed (13) hide show

.gitignore +2 -1
src/index.mts +1 -0
src/llm/enrichVideoSpecsUsingLLM.mts +9 -13
src/preproduction/prompts.mts +8 -4
src/production/assembleShots.mts +8 -4
src/production/generateShot.mts +0 -209
src/production/normalizePendingVideoToTmpFilePath.mts +33 -0
src/production/postInterpolation.mts +3 -2
src/scheduler/processVideo.mts +66 -55
src/scheduler/saveAndCheckIfNeedToStop.mts +3 -2
src/scheduler/updateShotPreview.mts +27 -0
src/utils/parseShotRequest.mts +1 -1
src/utils/parseVideoRequest.mts +1 -1

.gitignore CHANGED Viewed

@@ -4,4 +4,5 @@ node_modules
 .DS_Store
 .venv
 *.mp4
-sandbox

 .DS_Store
 .venv
 *.mp4
+sandbox
+scripts

src/index.mts CHANGED Viewed

@@ -95,6 +95,7 @@ app.get("/:ownerId/:videoId\.mp4", async (req, res) => {
   */
   const ownerId = req.params.ownerId
   if (!uuidValidate(ownerId)) {
     console.error("invalid owner id")

   */
   const ownerId = req.params.ownerId
+  console.log("downloading..")
   if (!uuidValidate(ownerId)) {
     console.error("invalid owner id")

src/llm/enrichVideoSpecsUsingLLM.mts CHANGED Viewed

@@ -5,6 +5,7 @@ import { generateYAML } from "./openai/generateYAML.mts"
 import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
 import { getQueryChatMessages } from "../preproduction/prompts.mts"
 import { getValidNumber } from "../utils/getValidNumber.mts"
 export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
@@ -13,15 +14,9 @@ export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> =>
   const defaultValue = {} as unknown as HallucinatedVideoRequest
-  // console.log("enrichVideoSpecsUsingLLM: messages = ", messages)
   let hallucinatedVideo: HallucinatedVideoRequest
-  const referenceShot = video.shots[0]
   video.shots = []
-  // console.log("referenceShot:", referenceShot)
   try {
     hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
       messages,
@@ -64,12 +59,13 @@ export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> =>
   for (const hallucinatedShot of hallucinatedShots) {
-    const shot = JSON.parse(JSON.stringify(referenceShot))
-    shot.shotPrompt = hallucinatedShot.shotPrompt || shot.shotPrompt
-    shot.environmentPrompt = hallucinatedShot.environmentPrompt || shot.environmentPrompt
-    shot.photographyPrompt = hallucinatedShot.photographyPrompt || shot.photographyPrompt
-    shot.actionPrompt = hallucinatedShot.actionPrompt || shot.actionPrompt
-    shot.foregroundAudioPrompt = hallucinatedShot.foregroundAudioPrompt || shot.foregroundAudioPrompt
     video.shots.push(shot)
   }

 import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
 import { getQueryChatMessages } from "../preproduction/prompts.mts"
 import { getValidNumber } from "../utils/getValidNumber.mts"
+import { parseShotRequest } from "../utils/parseShotRequest.mts"
 export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
   const defaultValue = {} as unknown as HallucinatedVideoRequest
   let hallucinatedVideo: HallucinatedVideoRequest
   video.shots = []
   try {
     hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
       messages,
   for (const hallucinatedShot of hallucinatedShots) {
+    const shot = await parseShotRequest(video, {
+      shotPrompt: hallucinatedShot.shotPrompt,
+      environmentPrompt: hallucinatedShot.environmentPrompt,
+      photographyPrompt: hallucinatedShot.photographyPrompt,
+      actionPrompt: hallucinatedShot.actionPrompt,
+      foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
+    })
     video.shots.push(shot)
   }

src/preproduction/prompts.mts CHANGED Viewed

@@ -30,12 +30,16 @@ The format expected by the API must be in YAML. The TypeScript schema for this Y
   }>
 }
 \`\`\`
-# Guidelines
-You will generate 3 shots by default, unless more or less are specified.
-Is it crucial to repeat the elements consituting a sequence of multiple shots verbatim from one shot to another.
 For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
 This will ensure consistency from one scene to another.
-## Creating a movie
 Here are some guidelines regarding film-making:
 - The distance your subject is to the camera impacts how the audience feels about them.
 - Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.

   }>
 }
 \`\`\`
+# Guidelines for number of shots and their duration
+You will generate 1 shot by default, unless more or less are specified.
+A shot can only last 3 seconds max.
+So if you are asked to generate a 6 seconds videos, you need 2 shots, for a 9 seconds video, 3 shots, and so on.
+If you are asked to generate for instance a 11 seconds videos, you need three 3sec shots plus one 2sec shot.
+# Guidelines for writing descriptions
+Is it crucial to repeat the elements constituting a sequence of multiple shots verbatim from one shot to another.
 For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
 This will ensure consistency from one scene to another.
+## Choosing the right words
 Here are some guidelines regarding film-making:
 - The distance your subject is to the camera impacts how the audience feels about them.
 - Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.

src/production/assembleShots.mts CHANGED Viewed

@@ -4,6 +4,7 @@ import concat from 'ffmpeg-concat'
 import { VideoShot } from '../types.mts'
 import { pendingFilesDirFilePath } from "../config.mts"
 export const assembleShots = async (shots: VideoShot[], fileName: string) => {
@@ -26,6 +27,7 @@ export const assembleShots = async (shots: VideoShot[], fileName: string) => {
       // pass custom params to a transition
       params: { direction: [1, -1] },
     },
     /*
     {
       name: 'squaresWire',
@@ -36,10 +38,12 @@ export const assembleShots = async (shots: VideoShot[], fileName: string) => {
   const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
-  const shotFilesPaths = shots.map(shot => path.join(
-    pendingFilesDirFilePath,
-    shot.fileName
-  ))
   await concat({
     output: videoFilePath,

 import { VideoShot } from '../types.mts'
 import { pendingFilesDirFilePath } from "../config.mts"
+import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts"
 export const assembleShots = async (shots: VideoShot[], fileName: string) => {
       // pass custom params to a transition
       params: { direction: [1, -1] },
     },
     /*
     {
       name: 'squaresWire',
   const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
+  // before performing assembly, we must normalize images
+  const shotFilesPaths: string[] = []
+  for (let shot of shots) {
+    const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName)
+    shotFilesPaths.push(normalizedShotFilePath)
+  }
   await concat({
     output: videoFilePath,

src/production/generateShot.mts DELETED Viewed

@@ -1,209 +0,0 @@
-import path from "node:path"
-import { v4 as uuidv4 } from "uuid"
-import tmpDir from "temp-dir"
-import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
-import { generateAudio } from "./generateAudio.mts"
-import { generateVideo } from "./generateVideo.mts"
-import { upscaleVideo } from "./upscaleVideo.mts"
-import { generateVoice } from "./generateVoice.mts"
-import { generateSeed } from "../utils/generateSeed.mts"
-import { mergeAudio } from "./mergeAudio.mts"
-import { addAudioToVideo } from "./addAudioToVideo.mts"
-import { interpolateVideo } from "./interpolateVideo.mts"
-import { postInterpolation } from "./postInterpolation.mts"
-export const generateShot = async ({
-  seed = 0,
-  shotId = "",
-  actorPrompt = "",
-  shotPrompt = "",
-  backgroundAudioPrompt = "",
-  foregroundAudioPrompt = "",
-  actorDialoguePrompt = "",
-  actorVoicePrompt = "",
-  duration = 2,
-  nbFrames = 24,
-  resolution = 576,
-  nbSteps = 35,
-  upscale = true,
-  interpolate = true,
-  noise = true,
-}: {
-  seed?: number;
-  shotId?: string;
-  actorPrompt?: string;
-  shotPrompt?: string;
-  backgroundAudioPrompt?: string;
-  foregroundAudioPrompt?: string;
-  actorDialoguePrompt?: string;
-  actorVoicePrompt?: string;
-  duration?: number; // 2 seconds
-  nbFrames?: number; // 24 FPS
-  resolution?: number; // 256, 320, 512, 576, 720, 1080..
-  nbSteps?: number;
-  upscale?: boolean;
-  interpolate?: boolean;
-  noise?: boolean;
-}) => {
-  seed = seed || generateSeed()
-  shotId = shotId || uuidv4()
-  const shotFileName = `${shotId}.mp4`
-  console.log("generating video shot:", {
-    seed,
-    shotId,
-    actorPrompt,
-    shotPrompt,
-    backgroundAudioPrompt,
-    foregroundAudioPrompt,
-    actorDialoguePrompt,
-    actorVoicePrompt,
-    duration,
-    nbFrames,
-    resolution,
-    nbSteps,
-    upscale,
-    interpolate,
-    noise,
-  })
-  if (actorPrompt) {
-    console.log("generating actor..")
-    const actorIdentityFileName = `actor_${Date.now()}.png`
-    // await generateActor(actorPrompt, actorIdentityFileName, seed)
-  }
-  console.log("generating base video ..")
-  let generatedVideoUrl = ""
-  // currenty the base model is incapable of generating more than 24 FPS,
-  // because otherwise the upscaler will have trouble
-  // so for now, we fix it to 24 frames
-  // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
-  const nbFramesForBaseModel = 24
-  try {
-    generatedVideoUrl = await generateVideo(shotPrompt, {
-      seed,
-      nbFrames: nbFramesForBaseModel,
-      nbSteps
-    })
-  } catch (err) {
-    // upscaling can be finicky, if it fails we try again
-    console.log('- trying again to generate base shot..')
-    generatedVideoUrl = await generateVideo(shotPrompt, {
-      seed,
-      nbFrames: nbFramesForBaseModel,
-      nbSteps
-    })
-  }
-  console.log("downloading video..")
-  const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName)
-  if (upscale) {
-    console.log("upscaling video..")
-    try {
-      await upscaleVideo(videoFileName, shotPrompt)
-    } catch (err) {
-      // upscaling can be finicky, if it fails we try again
-      console.log('- trying again to upscale shot..')
-      await upscaleVideo(videoFileName, shotPrompt)
-    }
-  }
-  if (interpolate) {
-    console.log("upscaling video..")
-    // ATTENTION 1:
-    // the interpolation step always create a SLOW MOTION video
-    // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
-    // than the duration generated by the original video model
-    // ATTENTION 2:
-    // the interpolation step generates videos in 910x512!
-    // ATTENTION 3:
-    // the interpolation step parameters are currently not passed to the space,
-    // so changing those two variables below will have no effect!
-    const interpolationSteps = 3
-    const interpolatedFramesPerSecond = 24
-    await interpolateVideo(
-      video,
-      interpolationSteps,
-      interpolatedFramesPerSecond
-    )
-    console.log('creating slow-mo video (910x512 @ 24 FPS)')
-    // with our current interpolation settings, the 3 seconds video generated by the model
-    // become a 7 seconds video, at 24 FPS
-    // so we want to scale it back to the desired duration length
-    // also, as a last trick we want to upscale it (without AI) and add some FXs
-    console.log('performing final scaling (1280x720 @ 24 FPS)')
-    await postInterpolation(videoFileName, duration, nbFrames)
-  }
-  let backgroundAudioFileName = ''
-  if (backgroundAudioPrompt) {
-    console.log("generating background audio..")
-    backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
-  }
-  let foregroundAudioFileName = ''
-  if (foregroundAudioPrompt) {
-    console.log("generating foreground audio..")
-    foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
-  }
-  let voiceAudioFileName = ''
-  if (actorDialoguePrompt) {
-    console.log("configuring dialogue..")
-    if (actorVoicePrompt) {
-      console.log("configuring voice..")
-      // well.. that's a TODO!
-      // for now let's always use the same voice model
-      console.log('TODO this should be done in the sequence, not the prompt!')
-      voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
-    }
-  }
-  console.log('merging audio with video..')
-  if (backgroundAudioFileName || foregroundAudioFileName) {
-    let audioFileName = ''
-    // we have both background and foreground
-    if (backgroundAudioFileName && foregroundAudioFileName) {
-      audioFileName = await mergeAudio({
-        input1FileName: backgroundAudioFileName,
-        input1Volume: 0.2,// 20% volume
-        input2FileName: foregroundAudioFileName,
-        input2Volume: 0.7, // 70% volume
-      })
-    } else if (backgroundAudioFileName) {
-      audioFileName = backgroundAudioFileName
-    } else if (foregroundAudioFileName) {
-      audioFileName = foregroundAudioFileName
-    }
-    await addAudioToVideo(video, audioFileName)
-  }
-  console.log("returning result to user..")
-  const filePath = path.resolve(tmpDir, videoFileName)
-  return {
-    shotId,
-    filePath,
-    videoFileName
-  }
-}

src/production/normalizePendingVideoToTmpFilePath.mts ADDED Viewed

	@@ -0,0 +1,33 @@

+import path from "node:path"
+import { v4 as uuidv4 } from "uuid"
+import tmpDir from "temp-dir"
+import ffmpeg from "fluent-ffmpeg"
+import { pendingFilesDirFilePath } from "../config.mts"
+export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise<string> => {
+  return new Promise((resolve,reject) => {
+    const tmpFileName = `${uuidv4()}.mp4`
+    const filePath = path.join(pendingFilesDirFilePath, fileName)
+    const tmpFilePath = path.join(tmpDir, tmpFileName)
+    ffmpeg.ffprobe(filePath, function(err,) {
+      if (err) { reject(err); return; }
+    ffmpeg(filePath)
+      .size("1280x720")
+      .save(tmpFilePath)
+      .on("end", async () => {
+        resolve(tmpFilePath)
+      })
+      .on("error", (err) => {
+        reject(err)
+      })
+    })
+  })
+}

src/production/postInterpolation.mts CHANGED Viewed

@@ -4,13 +4,14 @@ import { v4 as uuidv4 } from "uuid"
 import tmpDir from "temp-dir"
 import ffmpeg from "fluent-ffmpeg"
 import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
 export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
   return new Promise((resolve,reject) => {
     const tmpFileName = `${uuidv4()}.mp4`
-    const filePath = path.join(tmpDir, fileName)
     const tmpFilePath = path.join(tmpDir, tmpFileName)
     ffmpeg.ffprobe(filePath, function(err, metadata) {
@@ -34,7 +35,7 @@ export const postInterpolation = async (fileName: string, durationMs: number, nb
       .size("1280x720")
       .videoFilters([
-        `setpts=${durationRatio}*PTS`, // we make the video faster
         //'scale=-1:576:lanczos',
         // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
         `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise

 import tmpDir from "temp-dir"
 import ffmpeg from "fluent-ffmpeg"
 import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
+import { pendingFilesDirFilePath } from "../config.mts"
 export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
   return new Promise((resolve,reject) => {
     const tmpFileName = `${uuidv4()}.mp4`
+    const filePath = path.join(pendingFilesDirFilePath, fileName)
     const tmpFilePath = path.join(tmpDir, tmpFileName)
     ffmpeg.ffprobe(filePath, function(err, metadata) {
       .size("1280x720")
       .videoFilters([
+        `setpts=0.5*PTS`, // we make the video faster
         //'scale=-1:576:lanczos',
         // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
         `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise

src/scheduler/processVideo.mts CHANGED Viewed

@@ -1,21 +1,20 @@
 import { v4 as uuidv4 } from "uuid"
-import { Video } from "../types.mts"
 import { generateVideo } from "../production/generateVideo.mts"
 import { upscaleVideo } from "../production/upscaleVideo.mts"
 import { interpolateVideo } from "../production/interpolateVideo.mts"
 import { postInterpolation } from "../production/postInterpolation.mts"
-import { assembleShots } from "../production/assembleShots.mts"
 import { generateAudio } from "../production/generateAudio.mts"
 import { addAudioToVideo } from "../production/addAudioToVideo.mts"
 import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
 import { copyVideoFromTmpToPending } from "../utils/copyVideoFromTmpToPending.mts"
-import { copyVideoFromPendingToCompleted } from "../utils/copyVideoFromPendingToCompleted.mts"
 import { saveAndCheckIfNeedToStop } from "./saveAndCheckIfNeedToStop.mts"
 import { enrichVideoSpecsUsingLLM } from "../llm/enrichVideoSpecsUsingLLM.mts"
 export const processVideo = async (video: Video) => {
@@ -35,14 +34,23 @@ export const processVideo = async (video: Video) => {
   let nbCompletedSteps = 0
   if (!video.hasGeneratedSpecs) {
-    await enrichVideoSpecsUsingLLM(video)
     nbCompletedSteps++
     video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
     if (await saveAndCheckIfNeedToStop(video)) { return }
   }
   for (const shot of video.shots) {
     nbCompletedSteps += shot.nbCompletedSteps
@@ -53,7 +61,6 @@ export const processVideo = async (video: Video) => {
     console.log(`need to complete shot ${shot.id}`)
     // currenty we cannot generate too many frames at once,
     // otherwise the upscaler will have trouble
@@ -61,7 +68,6 @@ export const processVideo = async (video: Video) => {
     // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
     const nbFramesForBaseModel = 24
-    if (await saveAndCheckIfNeedToStop(video)) { return }
     if (!shot.hasGeneratedPreview) {
       console.log("generating a preview of the final result..")
@@ -80,25 +86,34 @@ export const processVideo = async (video: Video) => {
         await copyVideoFromTmpToPending(shot.fileName)
-        await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
         shot.hasGeneratedPreview = true
         shot.nbCompletedSteps++
         nbCompletedSteps++
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
         console.error(`failed to generate preview for shot ${shot.id} (${err})`)
         // something is wrong, let's put the whole thing back into the queue
         video.error = `failed to generate preview for shot ${shot.id} (will try again later)`
         if (await saveAndCheckIfNeedToStop(video)) { return }
-        break
       }
     }
     if (!shot.hasGeneratedVideo) {
       console.log("generating primordial pixel soup (raw video)..")
       let generatedVideoUrl = ""
@@ -125,7 +140,7 @@ export const processVideo = async (video: Video) => {
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
-        await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
@@ -149,9 +164,9 @@ export const processVideo = async (video: Video) => {
         nbCompletedSteps++
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
-        await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
@@ -177,9 +192,9 @@ export const processVideo = async (video: Video) => {
       // ATTENTION 3:
       // the interpolation step parameters are currently not passed to the space,
       // so changing those two variables below will have no effect!
-      const interpolationSteps = 3
-      const interpolatedFramesPerSecond = 24
-      console.log('creating slow-mo video (910x512 @ 24 FPS)')
       try {
         await interpolateVideo(
           shot.fileName,
@@ -193,46 +208,35 @@ export const processVideo = async (video: Video) => {
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
-        await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
-        if (await saveAndCheckIfNeedToStop(video)) { return }
-      } catch (err) {
-        console.error(`failed to interpolate shot ${shot.id} (${err})`)
-        // something is wrong, let's put the whole thing back into the queue
-        video.error = `failed to interpolate shot ${shot.id} (will try again later)`
-        if (await saveAndCheckIfNeedToStop(video)) { return }
-        break
-      }
-    }
-    if (!shot.hasPostProcessedVideo) {
-      console.log("post-processing video..")
-    // with our current interpolation settings, the 3 seconds video generated by the model
-    // become a 7 seconds video, at 24 FPS
-    // so we want to scale it back to the desired duration length
-    // also, as a last trick we want to upscale it (without AI) and add some FXs
-    console.log('performing final scaling (1280x720 @ 24 FPS)')
-      try {
-        await postInterpolation(shot.fileName, shot.durationMs, shot.fps, shot.noiseAmount)
-        shot.hasPostProcessedVideo = true
-        shot.nbCompletedSteps++
-        nbCompletedSteps++
-        shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
-        video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
-        await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
-        if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
-        console.error(`failed to post-process shot ${shot.id} (${err})`)
         // something is wrong, let's put the whole thing back into the queue
-        video.error = `failed to post-process shot ${shot.id} (will try again later)`
         if (await saveAndCheckIfNeedToStop(video)) { return }
         break
       }
@@ -256,7 +260,7 @@ export const processVideo = async (video: Video) => {
           await addAudioToVideo(shot.fileName, foregroundAudioFileName)
-          await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
           if (await saveAndCheckIfNeedToStop(video)) { return }
@@ -292,12 +296,19 @@ export const processVideo = async (video: Video) => {
   // now time to check the end game
   if (video.nbCompletedShots === video.shots.length) {
-    console.log(`we have generated each individual shot!`)
-    console.log(`assembling the fonal..`)
     if (!video.hasAssembledVideo) {
     if (video.shots.length === 1) {
       console.log(`we only have one shot, so this gonna be easy`)
       video.hasAssembledVideo = true
@@ -322,10 +333,10 @@ export const processVideo = async (video: Video) => {
           // something is wrong, let's put the whole thing back into the queue
           video.error = `failed to assemble the shots together (will try again later)`
           if (await saveAndCheckIfNeedToStop(video)) { return }
-          return
         }
       }
     }
     nbCompletedSteps++
     video.completed = true

 import { v4 as uuidv4 } from "uuid"
+import { Video, VideoShot } from "../types.mts"
 import { generateVideo } from "../production/generateVideo.mts"
 import { upscaleVideo } from "../production/upscaleVideo.mts"
 import { interpolateVideo } from "../production/interpolateVideo.mts"
 import { postInterpolation } from "../production/postInterpolation.mts"
 import { generateAudio } from "../production/generateAudio.mts"
 import { addAudioToVideo } from "../production/addAudioToVideo.mts"
 import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
 import { copyVideoFromTmpToPending } from "../utils/copyVideoFromTmpToPending.mts"
 import { saveAndCheckIfNeedToStop } from "./saveAndCheckIfNeedToStop.mts"
 import { enrichVideoSpecsUsingLLM } from "../llm/enrichVideoSpecsUsingLLM.mts"
+import { updateShotPreview } from "./updateShotPreview.mts"
 export const processVideo = async (video: Video) => {
   let nbCompletedSteps = 0
   if (!video.hasGeneratedSpecs) {
+    try {
+      await enrichVideoSpecsUsingLLM(video)
+    } catch (err) {
+      console.error(`LLM error: ${err}`)
+      video.error = `LLM error: ${err}`
+      video.status = "delete"
+      if (await saveAndCheckIfNeedToStop(video)) { return }
+    }
     nbCompletedSteps++
+    video.hasGeneratedSpecs = true
     video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
     if (await saveAndCheckIfNeedToStop(video)) { return }
   }
   for (const shot of video.shots) {
     nbCompletedSteps += shot.nbCompletedSteps
     console.log(`need to complete shot ${shot.id}`)
     // currenty we cannot generate too many frames at once,
     // otherwise the upscaler will have trouble
     // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
     const nbFramesForBaseModel = 24
     if (!shot.hasGeneratedPreview) {
       console.log("generating a preview of the final result..")
         await copyVideoFromTmpToPending(shot.fileName)
         shot.hasGeneratedPreview = true
         shot.nbCompletedSteps++
         nbCompletedSteps++
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
+        await updateShotPreview(video, shot)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
         console.error(`failed to generate preview for shot ${shot.id} (${err})`)
         // something is wrong, let's put the whole thing back into the queue
         video.error = `failed to generate preview for shot ${shot.id} (will try again later)`
         if (await saveAndCheckIfNeedToStop(video)) { return }
+        // always try to yield whenever possible
+        return
       }
     }
+    const notAllShotsHavePreview = video.shots.some(s => !s.hasGeneratedPreview)
+    if (notAllShotsHavePreview) {
+      console.log(`step 2 isn't unlocked yet, because not all videos have generated preview`)
+      continue
+    }
     if (!shot.hasGeneratedVideo) {
       console.log("generating primordial pixel soup (raw video)..")
       let generatedVideoUrl = ""
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
+        await updateShotPreview(video, shot)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
         nbCompletedSteps++
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
+        await updateShotPreview(video, shot)
         if (await saveAndCheckIfNeedToStop(video)) { return }
       } catch (err) {
       // ATTENTION 3:
       // the interpolation step parameters are currently not passed to the space,
       // so changing those two variables below will have no effect!
+      const interpolationSteps = 2
+      const interpolatedFramesPerSecond = 30
+      console.log('creating slow-mo video (910x512 @ 30 FPS)')
       try {
         await interpolateVideo(
           shot.fileName,
         shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
         video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
+        // note: showing the intermediary result isn't very interesting here
+        // with our current interpolation settings, the 3 seconds video generated by the model
+        // become a 7 seconds video, at 30 FPS
+        // so we want to scale it back to the desired duration length
+        // also, as a last trick we want to upscale it (without AI) and add some FXs
+        console.log('performing final scaling (1280x720 @ 30 FPS)')
+        try {
+          await postInterpolation(shot.fileName, shot.durationMs, shot.fps, shot.noiseAmount)
+          shot.hasPostProcessedVideo = true
+          shot.nbCompletedSteps++
+          nbCompletedSteps++
+          shot.progressPercent = Math.round((shot.nbCompletedSteps / shot.nbTotalSteps) * 100)
+          video.progressPercent = Math.round((nbCompletedSteps / nbTotalSteps) * 100)
+          await updateShotPreview(video, shot)
+          if (await saveAndCheckIfNeedToStop(video)) { return }
+        } catch (err) {
+          throw err
+        }
       } catch (err) {
+        console.error(`failed to interpolate and post-process shot ${shot.id} (${err})`)
         // something is wrong, let's put the whole thing back into the queue
+        video.error = `failed to interpolate and shot ${shot.id} (will try again later)`
         if (await saveAndCheckIfNeedToStop(video)) { return }
         break
       }
           await addAudioToVideo(shot.fileName, foregroundAudioFileName)
+          await updateShotPreview(video, shot)
           if (await saveAndCheckIfNeedToStop(video)) { return }
   // now time to check the end game
   if (video.nbCompletedShots === video.shots.length) {
+    console.log(`we have finished each individual shot!`)
     if (!video.hasAssembledVideo) {
+      video.hasAssembledVideo = true
+    }
+    /*
+    console.log(`assembling the final..`)
+    console.log(`note: this might be redundant..`)
+    if (!video.hasAssembledVideo) {
+      video.hasAssembledVideo = true
     if (video.shots.length === 1) {
       console.log(`we only have one shot, so this gonna be easy`)
       video.hasAssembledVideo = true
           // something is wrong, let's put the whole thing back into the queue
           video.error = `failed to assemble the shots together (will try again later)`
           if (await saveAndCheckIfNeedToStop(video)) { return }
         }
       }
     }
+    */
     nbCompletedSteps++
     video.completed = true

src/scheduler/saveAndCheckIfNeedToStop.mts CHANGED Viewed

@@ -63,6 +63,7 @@ export const saveAndCheckIfNeedToStop = async (video: Video): Promise<boolean> =
   await updatePendingVideo(video)
-  // tell the loop if it should be aborted or not
-  return mustStop
 }

   await updatePendingVideo(video)
+  // if we return "true", it means we will yield, which can be an interesting thing
+  // for us, to increase parallelism
+  return true
 }

src/scheduler/updateShotPreview.mts ADDED Viewed

	@@ -0,0 +1,27 @@

+import { assembleShots } from "../production/assembleShots.mts"
+import { Video, VideoShot } from "../types.mts"
+import { copyVideoFromPendingToCompleted } from "../utils/copyVideoFromPendingToCompleted.mts"
+export const updateShotPreview = async (video: Video, shot: VideoShot) => {
+  // copy the individual shot
+  await copyVideoFromPendingToCompleted(shot.fileName)
+  // now let's create the latest version of the assembly
+  const shotsThatCanBeAssembled = video.shots.filter(sh => sh.hasGeneratedPreview)
+  // if we have multiple shots with at least a minimal image, we assemble them
+  if (shotsThatCanBeAssembled.length === 1) {
+    // copy the individual shot to become the final video
+    await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
+  } else if (shotsThatCanBeAssembled.length > 1) {
+    try {
+      // create an updated assembly
+      await assembleShots(shotsThatCanBeAssembled, video.fileName)
+      // copy the assembly to become the final video
+      await copyVideoFromPendingToCompleted(video.fileName)
+    } catch (err) {
+      console.error(`failed to create the temporary assembly: ${err}`)
+    }
+  }
+}

src/utils/parseShotRequest.mts CHANGED Viewed

@@ -57,7 +57,7 @@ export const parseShotRequest = async (sequence: VideoSequence, maybeShotMeta: P
     steps: getValidNumber(maybeShotMeta.steps || sequence.steps, 10, 50, 45),
     // a video sequence MUST HAVE consistent frames per second
-    fps: getValidNumber(sequence.fps, 8, 60, 24),
     // a video sequence MUST HAVE a consistent resolution
     resolution: sequence.resolution,

     steps: getValidNumber(maybeShotMeta.steps || sequence.steps, 10, 50, 45),
     // a video sequence MUST HAVE consistent frames per second
+    fps: getValidNumber(sequence.fps, 8, 60, 30),
     // a video sequence MUST HAVE a consistent resolution
     resolution: sequence.resolution,

src/utils/parseVideoRequest.mts CHANGED Viewed

@@ -64,7 +64,7 @@ export const parseVideoRequest = async (ownerId: string, request: VideoAPIReques
     steps: getValidNumber(request.sequence.steps, 10, 50, 45),
-    fps: getValidNumber(request.sequence.fps, 8, 60, 24),
     resolution: getValidResolution(request.sequence.resolution),

     steps: getValidNumber(request.sequence.steps, 10, 50, 45),
+    fps: getValidNumber(request.sequence.fps, 8, 60, 30),
     resolution: getValidResolution(request.sequence.resolution),