Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
β’
ec194c9
1
Parent(s):
8aa943e
wip
Browse files- package-lock.json +12 -0
- package.json +1 -0
- src/production/renderVideo.mts +1 -1
- src/providers/lip-syncing/generateLipSyncVideo.mts +7 -0
- src/providers/music-generation/generateMusicWithReplicate.mts +1 -0
- src/providers/video-generation/defaultPrompts.mts +49 -0
- src/providers/video-generation/generateVideoWithHotshotGradioAPI.mts +63 -0
- src/providers/video-generation/generateVideoWithHotshotReplicate.mts +135 -0
- src/providers/video-generation/generateVideoWithShow.mts +15 -0
- src/providers/video-generation/generateVideoWithZeroscope.mts +49 -0
- src/providers/video-generation/types.mts +62 -0
- src/providers/video-interpolation/interpolateVideoWithReplicate.mts +104 -0
- src/providers/{video-generation/generateVideo.mts β video-transformation/transformVideoWithHotshotReplicate.mts} +1 -3
- src/scheduler/processVideo.mts +1 -1
- src/types.mts +14 -0
package-lock.json
CHANGED
@@ -26,6 +26,7 @@
|
|
26 |
"nodejs-whisper": "^0.1.4",
|
27 |
"openai": "^3.3.0",
|
28 |
"puppeteer": "^20.8.0",
|
|
|
29 |
"resize-base64": "^1.0.12",
|
30 |
"sharp": "^0.32.4",
|
31 |
"temp-dir": "^3.0.0",
|
@@ -4401,6 +4402,17 @@
|
|
4401 |
"node": ">=0.10"
|
4402 |
}
|
4403 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4404 |
"node_modules/request": {
|
4405 |
"version": "2.88.2",
|
4406 |
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
|
|
|
26 |
"nodejs-whisper": "^0.1.4",
|
27 |
"openai": "^3.3.0",
|
28 |
"puppeteer": "^20.8.0",
|
29 |
+
"replicate": "^0.20.1",
|
30 |
"resize-base64": "^1.0.12",
|
31 |
"sharp": "^0.32.4",
|
32 |
"temp-dir": "^3.0.0",
|
|
|
4402 |
"node": ">=0.10"
|
4403 |
}
|
4404 |
},
|
4405 |
+
"node_modules/replicate": {
|
4406 |
+
"version": "0.20.1",
|
4407 |
+
"resolved": "https://registry.npmjs.org/replicate/-/replicate-0.20.1.tgz",
|
4408 |
+
"integrity": "sha512-QVyI1rowGsSfNuDrRmumYPdCHa/fN/RkI3NHpcK0i5hSSiWK69URAyheAC/0MIAiS3oUs4kD56PB9zEI4oHENw==",
|
4409 |
+
"engines": {
|
4410 |
+
"git": ">=2.11.0",
|
4411 |
+
"node": ">=18.0.0",
|
4412 |
+
"npm": ">=7.19.0",
|
4413 |
+
"yarn": ">=1.7.0"
|
4414 |
+
}
|
4415 |
+
},
|
4416 |
"node_modules/request": {
|
4417 |
"version": "2.88.2",
|
4418 |
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
|
package.json
CHANGED
@@ -33,6 +33,7 @@
|
|
33 |
"nodejs-whisper": "^0.1.4",
|
34 |
"openai": "^3.3.0",
|
35 |
"puppeteer": "^20.8.0",
|
|
|
36 |
"resize-base64": "^1.0.12",
|
37 |
"sharp": "^0.32.4",
|
38 |
"temp-dir": "^3.0.0",
|
|
|
33 |
"nodejs-whisper": "^0.1.4",
|
34 |
"openai": "^3.3.0",
|
35 |
"puppeteer": "^20.8.0",
|
36 |
+
"replicate": "^0.20.1",
|
37 |
"resize-base64": "^1.0.12",
|
38 |
"sharp": "^0.32.4",
|
39 |
"temp-dir": "^3.0.0",
|
src/production/renderVideo.mts
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import { RenderedScene, RenderRequest } from "../types.mts"
|
2 |
-
import { generateVideo } from "../providers/video-generation/
|
3 |
|
4 |
export async function renderVideo(
|
5 |
request: RenderRequest,
|
|
|
1 |
import { RenderedScene, RenderRequest } from "../types.mts"
|
2 |
+
import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
|
3 |
|
4 |
export async function renderVideo(
|
5 |
request: RenderRequest,
|
src/providers/lip-syncing/generateLipSyncVideo.mts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
// TODO: we should use
|
4 |
+
|
5 |
+
|
6 |
+
// or we can use Video ReTalking but it requires a video where the person is already talking I believe:
|
7 |
+
// https://twitter.com/camenduru/status/1713570931342237852
|
src/providers/music-generation/generateMusicWithReplicate.mts
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
// TODO use https://replicate.com/sakemin/musicgen-fine-tuner
|
src/providers/video-generation/defaultPrompts.mts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// should we really have default prompts in here?
|
2 |
+
// I think they should probably be defined at the applicative software layer (ie. in the client)
|
3 |
+
|
4 |
+
export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[] = []): string {
|
5 |
+
const promptWords = prompt.split(",").map(w => w.trim().toLocaleLowerCase())
|
6 |
+
|
7 |
+
return [
|
8 |
+
prompt,
|
9 |
+
// we add our keywords, unless they are already part of the prompt
|
10 |
+
...words.filter(w => !promptWords.includes(w.toLocaleLowerCase()))
|
11 |
+
].join(", ")
|
12 |
+
}
|
13 |
+
|
14 |
+
export function getPositivePrompt(prompt: string, triggerWord = "") {
|
15 |
+
return addWordsIfNotPartOfThePrompt(prompt, [
|
16 |
+
triggerWord,
|
17 |
+
"crisp",
|
18 |
+
"sharp",
|
19 |
+
"beautiful",
|
20 |
+
"4K",
|
21 |
+
"hd"
|
22 |
+
])
|
23 |
+
}
|
24 |
+
|
25 |
+
export function getNegativePrompt(prompt: string) {
|
26 |
+
return addWordsIfNotPartOfThePrompt(prompt, [
|
27 |
+
"cropped",
|
28 |
+
// "underexposed", // <-- can be a desired style
|
29 |
+
// "overexposed", // <-- can be a desired style
|
30 |
+
"logo",
|
31 |
+
"censored",
|
32 |
+
"watermark",
|
33 |
+
"watermarked",
|
34 |
+
"extra digit",
|
35 |
+
"fewer digits",
|
36 |
+
"bad fingers",
|
37 |
+
"bad quality",
|
38 |
+
"worst quality",
|
39 |
+
"low quality",
|
40 |
+
"low resolution",
|
41 |
+
"glitch", // <-- keep or not? could be a desired style?
|
42 |
+
"deformed",
|
43 |
+
"mutated",
|
44 |
+
"ugly",
|
45 |
+
"disfigured",
|
46 |
+
"3D render", // <-- keep or not? could be a desired style?
|
47 |
+
"signature"
|
48 |
+
])
|
49 |
+
}
|
src/providers/video-generation/generateVideoWithHotshotGradioAPI.mts
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { client } from "@gradio/client"
|
2 |
+
|
3 |
+
import { VideoGenerationOptions } from "./types.mts"
|
4 |
+
import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
|
5 |
+
import { generateSeed } from "../../utils/misc/generateSeed.mts"
|
6 |
+
|
7 |
+
// we don't use replicas yet, because it ain't easy to get their hostname
|
8 |
+
const instances: string[] = [
|
9 |
+
`${process.env.VC_HOTSHOT_XL_GRADIO_SPACE_API_URL_1 || ""}`,
|
10 |
+
].filter(instance => instance?.length > 0)
|
11 |
+
|
12 |
+
const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
13 |
+
|
14 |
+
export const generateVideo = async ({
|
15 |
+
positivePrompt,
|
16 |
+
negativePrompt = "",
|
17 |
+
seed,
|
18 |
+
nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
|
19 |
+
videoDuration = 1000, // for now Hotshot doesn't really supports anything else
|
20 |
+
nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
|
21 |
+
size = "768x320",
|
22 |
+
|
23 |
+
// for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
|
24 |
+
triggerWord = "cinematic-2",
|
25 |
+
|
26 |
+
huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
|
27 |
+
}: VideoGenerationOptions) => {
|
28 |
+
|
29 |
+
const instance = instances.shift()
|
30 |
+
instances.push(instance)
|
31 |
+
|
32 |
+
const api = await client(instance, {
|
33 |
+
hf_token: `${process.env.VC_HF_API_TOKEN}` as any
|
34 |
+
})
|
35 |
+
|
36 |
+
// pimp the prompt
|
37 |
+
positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
|
38 |
+
negativePrompt = getNegativePrompt(negativePrompt)
|
39 |
+
|
40 |
+
try {
|
41 |
+
|
42 |
+
const rawResponse = await api.predict('/run', [
|
43 |
+
secretToken,
|
44 |
+
positivePrompt, // string in 'Prompt' Textbox component
|
45 |
+
negativePrompt || "",
|
46 |
+
huggingFaceLora?.length || undefined, // string in 'Public LoRA ID' Textbox component
|
47 |
+
size || '512x512', // string (Option from: [('320x768', '320x768'), ('384x672', '384x672'), ('416x608', '416x608'), ('512x512', '512x512'), ('608x416', '608x416'), ('672x384', '672x384'), ('768x320', '768x320')]) in 'Size' Dropdown component
|
48 |
+
!isNaN(seed) && isFinite(seed) ? seed : generateSeed(), // number (numeric value between -1 and 423538377342) in 'Seed' Slider component, -1 to set to random
|
49 |
+
nbSteps || 30,
|
50 |
+
nbFrames || 8,
|
51 |
+
videoDuration || 1000,
|
52 |
+
]) as any
|
53 |
+
|
54 |
+
// console.log("rawResponse:", rawResponse)
|
55 |
+
|
56 |
+
console.log("data:", rawResponse?.data)
|
57 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
58 |
+
|
59 |
+
return `${instance}/file=${name}`
|
60 |
+
} catch (err) {
|
61 |
+
throw err
|
62 |
+
}
|
63 |
+
}
|
src/providers/video-generation/generateVideoWithHotshotReplicate.mts
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use server"
|
2 |
+
|
3 |
+
import Replicate from "replicate"
|
4 |
+
|
5 |
+
import { generateSeed } from "../../utils/misc/generateSeed.mts"
|
6 |
+
import { sleep } from "../../utils/misc/sleep.mts"
|
7 |
+
import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
|
8 |
+
import { VideoGenerationOptions } from "./types.mts"
|
9 |
+
|
10 |
+
const replicateToken = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
|
11 |
+
const replicateModel = `${process.env.HOTSHOT_XL_REPLICATE_MODEL || ""}`
|
12 |
+
const replicateModelVersion = `${process.env.HOTSHOT_XL_REPLICATE_MODEL_VERSION || ""}`
|
13 |
+
|
14 |
+
if (!replicateToken) {
|
15 |
+
throw new Error(`you need to configure your AUTH_REPLICATE_API_TOKEN`)
|
16 |
+
}
|
17 |
+
|
18 |
+
const replicate = new Replicate({ auth: replicateToken })
|
19 |
+
|
20 |
+
/**
|
21 |
+
* Generate a video with hotshot through Replicate
|
22 |
+
*
|
23 |
+
* Note that if nbFrames == 1, then it will generate a jpg
|
24 |
+
*
|
25 |
+
*/
|
26 |
+
export async function generateVideoWithHotshotReplicate({
|
27 |
+
positivePrompt,
|
28 |
+
negativePrompt = "",
|
29 |
+
seed,
|
30 |
+
nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
|
31 |
+
videoDuration = 1000, // for now Hotshot doesn't really supports anything else
|
32 |
+
nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
|
33 |
+
size = "768x320",
|
34 |
+
|
35 |
+
// for a replicate LoRa this is always the same ("In the style of TOK")
|
36 |
+
// triggerWord = "In the style of TOK",
|
37 |
+
|
38 |
+
// for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
|
39 |
+
triggerWord = "cinematic-2",
|
40 |
+
|
41 |
+
huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
|
42 |
+
|
43 |
+
// url to the weight
|
44 |
+
replicateLora,
|
45 |
+
}: VideoGenerationOptions): Promise<string> {
|
46 |
+
|
47 |
+
if (!positivePrompt?.length) {
|
48 |
+
throw new Error(`prompt is too short!`)
|
49 |
+
}
|
50 |
+
|
51 |
+
if (!replicateModel) {
|
52 |
+
throw new Error(`you need to configure your HOTSHOT_XL_REPLICATE_MODEL`)
|
53 |
+
}
|
54 |
+
|
55 |
+
if (!replicateModelVersion) {
|
56 |
+
throw new Error(`you need to configure your HOTSHOT_XL_REPLICATE_MODEL_VERSION`)
|
57 |
+
}
|
58 |
+
|
59 |
+
// pimp the prompt
|
60 |
+
positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
|
61 |
+
negativePrompt = getNegativePrompt(negativePrompt)
|
62 |
+
|
63 |
+
const [width, height] = size.split("x").map(x => Number(x))
|
64 |
+
|
65 |
+
// see an example here:
|
66 |
+
// https://replicate.com/p/incraplbv23g3zv6woinhgdira
|
67 |
+
// for params and doc see https://replicate.com/cloneofsimo/hotshot-xl-lora-controlnet
|
68 |
+
const prediction = await replicate.predictions.create({
|
69 |
+
version: replicateModelVersion,
|
70 |
+
input: {
|
71 |
+
prompt: positivePrompt,
|
72 |
+
negative_prompt: negativePrompt,
|
73 |
+
|
74 |
+
// this is not a URL but a model name
|
75 |
+
hf_lora_url: replicateLora?.length ? undefined : huggingFaceLora,
|
76 |
+
|
77 |
+
// this is a URL to the .tar (we can get it from the "trainings" page)
|
78 |
+
replicate_weights_url: huggingFaceLora?.length ? undefined : replicateLora,
|
79 |
+
|
80 |
+
width,
|
81 |
+
height,
|
82 |
+
|
83 |
+
// those are used to create an upsampling or downsampling
|
84 |
+
// original_width: width,
|
85 |
+
// original_height: height,
|
86 |
+
// target_width: width,
|
87 |
+
// target_height: height,
|
88 |
+
|
89 |
+
steps: nbSteps,
|
90 |
+
|
91 |
+
|
92 |
+
// note: right now it only makes sense to use either 1 (a jpg)
|
93 |
+
video_length: nbFrames, // nb frames
|
94 |
+
|
95 |
+
video_duration: videoDuration, // video duration in ms
|
96 |
+
|
97 |
+
seed: !isNaN(seed) && isFinite(seed) ? seed : generateSeed()
|
98 |
+
}
|
99 |
+
})
|
100 |
+
|
101 |
+
// console.log("prediction:", prediction)
|
102 |
+
|
103 |
+
// Replicate requires at least 30 seconds of mandatory delay
|
104 |
+
await sleep(30000)
|
105 |
+
|
106 |
+
let res: Response
|
107 |
+
let pollingCount = 0
|
108 |
+
do {
|
109 |
+
// Check every 5 seconds
|
110 |
+
await sleep(5000)
|
111 |
+
|
112 |
+
res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
|
113 |
+
method: "GET",
|
114 |
+
headers: {
|
115 |
+
Authorization: `Token ${replicateToken}`,
|
116 |
+
},
|
117 |
+
cache: 'no-store',
|
118 |
+
})
|
119 |
+
|
120 |
+
if (res.status === 200) {
|
121 |
+
const response = (await res.json()) as any
|
122 |
+
const error = `${response?.error || ""}`
|
123 |
+
if (error) {
|
124 |
+
throw new Error(error)
|
125 |
+
}
|
126 |
+
}
|
127 |
+
|
128 |
+
pollingCount++
|
129 |
+
|
130 |
+
// To prevent indefinite polling, we can stop after a certain number, here 30 (i.e. about 2 and half minutes)
|
131 |
+
if (pollingCount >= 30) {
|
132 |
+
throw new Error('Request time out.')
|
133 |
+
}
|
134 |
+
} while (true)
|
135 |
+
}
|
src/providers/video-generation/generateVideoWithShow.mts
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
// we don't use replicas yet, because it ain't easy to get their hostname
|
3 |
+
const instances: string[] = [
|
4 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
|
5 |
+
].filter(instance => instance?.length > 0)
|
6 |
+
|
7 |
+
const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
8 |
+
|
9 |
+
export const generateVideo = async (prompt: string, options?: {
|
10 |
+
seed: number;
|
11 |
+
nbFrames: number;
|
12 |
+
nbSteps: number;
|
13 |
+
}) => {
|
14 |
+
throw new Error("Not implemented yet")
|
15 |
+
}
|
src/providers/video-generation/generateVideoWithZeroscope.mts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { client } from "@gradio/client"
|
2 |
+
|
3 |
+
import { generateSeed } from "../../utils/misc/generateSeed.mts"
|
4 |
+
import { VideoGenerationOptions } from "./types.mts"
|
5 |
+
import { getPositivePrompt } from "./defaultPrompts.mts"
|
6 |
+
|
7 |
+
// we don't use replicas yet, because it ain't easy to get their hostname
|
8 |
+
const instances: string[] = [
|
9 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
|
10 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
|
11 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
|
12 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_4 || ""}`,
|
13 |
+
].filter(instance => instance?.length > 0)
|
14 |
+
|
15 |
+
const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
16 |
+
|
17 |
+
export const generateVideo = async ({
|
18 |
+
positivePrompt,
|
19 |
+
seed,
|
20 |
+
nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
|
21 |
+
nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
|
22 |
+
}: VideoGenerationOptions) => {
|
23 |
+
try {
|
24 |
+
const instance = instances.shift()
|
25 |
+
instances.push(instance)
|
26 |
+
|
27 |
+
const api = await client(instance, {
|
28 |
+
hf_token: `${process.env.VC_HF_API_TOKEN}` as any
|
29 |
+
})
|
30 |
+
|
31 |
+
const rawResponse = await api.predict('/run', [
|
32 |
+
getPositivePrompt(positivePrompt), // string in 'Prompt' Textbox component
|
33 |
+
|
34 |
+
// we treat 0 as meaning "random seed"
|
35 |
+
!isNaN(seed) && isFinite(seed) && seed > 0 ? seed : generateSeed(), // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
36 |
+
nbFrames || 24, // 24 // it is the nb of frames per seconds I think?
|
37 |
+
nbSteps || 35, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
38 |
+
secretToken,
|
39 |
+
]) as any
|
40 |
+
|
41 |
+
// console.log("rawResponse:", rawResponse)
|
42 |
+
|
43 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
44 |
+
|
45 |
+
return `${instance}/file=${name}`
|
46 |
+
} catch (err) {
|
47 |
+
throw err
|
48 |
+
}
|
49 |
+
}
|
src/providers/video-generation/types.mts
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { HotshotImageInferenceSize } from "../../types.mts"
|
2 |
+
|
3 |
+
export type VideoGenerationOptions = {
|
4 |
+
/**
|
5 |
+
* Positive prompt to use
|
6 |
+
*/
|
7 |
+
positivePrompt: string
|
8 |
+
|
9 |
+
/**
|
10 |
+
* Negative prompt to use
|
11 |
+
*/
|
12 |
+
negativePrompt?: string
|
13 |
+
|
14 |
+
/**
|
15 |
+
* Seed.
|
16 |
+
*
|
17 |
+
* Depending on the vendor, if you use a negative value (eg -1) it should give you an always random value
|
18 |
+
*/
|
19 |
+
seed?: number
|
20 |
+
|
21 |
+
/**
|
22 |
+
* Number of frames to generate
|
23 |
+
*/
|
24 |
+
nbFrames?: number
|
25 |
+
|
26 |
+
/**
|
27 |
+
* Duration of the video, in seconds
|
28 |
+
*/
|
29 |
+
videoDuration?: number
|
30 |
+
|
31 |
+
/**
|
32 |
+
* Number of inference steps (for final rendering use 70)
|
33 |
+
*/
|
34 |
+
nbSteps?: number
|
35 |
+
|
36 |
+
/**
|
37 |
+
* Image size (which is actually a ratio)
|
38 |
+
*
|
39 |
+
* Note that Hotshot wasn't trained on all possible combinations,
|
40 |
+
* and in particular by default it supposed to only support 512x512 well
|
41 |
+
*/
|
42 |
+
size?: HotshotImageInferenceSize
|
43 |
+
|
44 |
+
/**
|
45 |
+
* Trigger word
|
46 |
+
*
|
47 |
+
* for a replicate LoRa this is always the same ("In the style of TOK")
|
48 |
+
* triggerWord = "In the style of TOK",
|
49 |
+
* for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
|
50 |
+
*/
|
51 |
+
triggerWord?: string
|
52 |
+
|
53 |
+
/**
|
54 |
+
* Owner + repo name of the Hugging Face LoRA
|
55 |
+
*/
|
56 |
+
huggingFaceLora?: string
|
57 |
+
|
58 |
+
/**
|
59 |
+
* URL to the weights .tar (those can be hosted anywere, it doesn't have to be on Replicate.com)
|
60 |
+
*/
|
61 |
+
replicateLora?: string
|
62 |
+
}
|
src/providers/video-interpolation/interpolateVideoWithReplicate.mts
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
// this looks really great!
|
3 |
+
// https://replicate.com/zsxkib/st-mfnet?prediction=bufijj3b45cjoe43pzloqkcghy
|
4 |
+
|
5 |
+
"use server"
|
6 |
+
|
7 |
+
import Replicate from "replicate"
|
8 |
+
|
9 |
+
import { sleep } from "../../utils/misc/sleep.mts"
|
10 |
+
|
11 |
+
const replicateToken = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
|
12 |
+
const replicateModel = `${process.env.STMFNET_REPLICATE_MODEL || ""}`
|
13 |
+
const replicateModelVersion = `${process.env.STMFNET_REPLICATE_MODEL_VERSION || ""}`
|
14 |
+
|
15 |
+
if (!replicateToken) {
|
16 |
+
throw new Error(`you need to configure your AUTH_REPLICATE_API_TOKEN`)
|
17 |
+
}
|
18 |
+
|
19 |
+
const replicate = new Replicate({ auth: replicateToken })
|
20 |
+
|
21 |
+
/**
|
22 |
+
* Interpolate a video using Replicate
|
23 |
+
*
|
24 |
+
* Important note: the video will lose its sound, if any!
|
25 |
+
*
|
26 |
+
* With the current settingd, duration of the original video will be preserved
|
27 |
+
* (but we could make slow-mo too)
|
28 |
+
*/
|
29 |
+
export async function interpolateVideoWithReplicate({
|
30 |
+
video,
|
31 |
+
|
32 |
+
// so arguably 60 would look smoother, but we are tying to reach for a "movie" kind of feel here
|
33 |
+
nbFrames = 24,
|
34 |
+
}: {
|
35 |
+
video: string
|
36 |
+
|
37 |
+
/**
|
38 |
+
* Number of frame (duration of the original video will be preserved)
|
39 |
+
*/
|
40 |
+
nbFrames?: number // min 1, max: 240
|
41 |
+
}): Promise<string> {
|
42 |
+
|
43 |
+
if (!video) {
|
44 |
+
throw new Error(`no video provided`)
|
45 |
+
}
|
46 |
+
|
47 |
+
if (!replicateModel) {
|
48 |
+
throw new Error(`you need to configure your STMFNET_REPLICATE_MODEL`)
|
49 |
+
}
|
50 |
+
|
51 |
+
if (!replicateModelVersion) {
|
52 |
+
throw new Error(`you need to configure your STMFNET_REPLICATE_MODEL_VERSION`)
|
53 |
+
}
|
54 |
+
|
55 |
+
// for params and doc see https://replicate.com/zsxkib/st-mfnet
|
56 |
+
const prediction = await replicate.predictions.create({
|
57 |
+
version: replicateModelVersion,
|
58 |
+
input: {
|
59 |
+
mp4: video, // I think it should be a base64 object?
|
60 |
+
framerate_multiplier: 2, // can be one of 2, 4, 8, 16, 32
|
61 |
+
|
62 |
+
// note: for now we use the simplest setting, which is to keep the original video duration
|
63 |
+
// if we don't keep the original duration, the video will look like a slow motion animation
|
64 |
+
// which may be a desired effect, but let's keep it simple for now
|
65 |
+
keep_original_duration: true, // false,
|
66 |
+
custom_fps: nbFrames // min 1, max: 240
|
67 |
+
}
|
68 |
+
})
|
69 |
+
|
70 |
+
// console.log("prediction:", prediction)
|
71 |
+
|
72 |
+
// Replicate requires at least 8 seconds of mandatory delay
|
73 |
+
await sleep(10000)
|
74 |
+
|
75 |
+
let res: Response
|
76 |
+
let pollingCount = 0
|
77 |
+
do {
|
78 |
+
// This is normally a fast model, so let's check every 2 seconds
|
79 |
+
await sleep(2000)
|
80 |
+
|
81 |
+
res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
|
82 |
+
method: "GET",
|
83 |
+
headers: {
|
84 |
+
Authorization: `Token ${replicateToken}`,
|
85 |
+
},
|
86 |
+
cache: 'no-store',
|
87 |
+
})
|
88 |
+
|
89 |
+
if (res.status === 200) {
|
90 |
+
const response = (await res.json()) as any
|
91 |
+
const error = `${response?.error || ""}`
|
92 |
+
if (error) {
|
93 |
+
throw new Error(error)
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
+
pollingCount++
|
98 |
+
|
99 |
+
// To prevent indefinite polling, we can stop after a certain number
|
100 |
+
if (pollingCount >= 30) {
|
101 |
+
throw new Error('Request time out.')
|
102 |
+
}
|
103 |
+
} while (true)
|
104 |
+
}
|
src/providers/{video-generation/generateVideo.mts β video-transformation/transformVideoWithHotshotReplicate.mts}
RENAMED
@@ -9,9 +9,6 @@ export const state = {
|
|
9 |
// we don't use replicas yet, because it ain't easy to get their hostname
|
10 |
const instances: string[] = [
|
11 |
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
|
12 |
-
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
|
13 |
-
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
|
14 |
-
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_4 || ""}`,
|
15 |
].filter(instance => instance?.length > 0)
|
16 |
|
17 |
const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
@@ -21,6 +18,7 @@ export const generateVideo = async (prompt: string, options?: {
|
|
21 |
nbFrames: number;
|
22 |
nbSteps: number;
|
23 |
}) => {
|
|
|
24 |
|
25 |
/* let's disable load control, and let it use the queue */
|
26 |
/*
|
|
|
9 |
// we don't use replicas yet, because it ain't easy to get their hostname
|
10 |
const instances: string[] = [
|
11 |
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
|
|
|
|
|
|
|
12 |
].filter(instance => instance?.length > 0)
|
13 |
|
14 |
const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
|
|
18 |
nbFrames: number;
|
19 |
nbSteps: number;
|
20 |
}) => {
|
21 |
+
throw new Error("Not implemented yet")
|
22 |
|
23 |
/* let's disable load control, and let it use the queue */
|
24 |
/*
|
src/scheduler/processVideo.mts
CHANGED
@@ -2,7 +2,7 @@ import { v4 as uuidv4 } from "uuid"
|
|
2 |
|
3 |
import { Video, VideoShot } from "../types.mts"
|
4 |
|
5 |
-
import { generateVideo } from "../providers/video-generation/
|
6 |
import { upscaleVideo } from "../providers/video-upscaling/upscaleVideo.mts"
|
7 |
import { interpolateVideo } from "../providers/video-interpolation/interpolateVideo.mts"
|
8 |
import { postInterpolation } from "../production/postInterpolation.mts"
|
|
|
2 |
|
3 |
import { Video, VideoShot } from "../types.mts"
|
4 |
|
5 |
+
import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
|
6 |
import { upscaleVideo } from "../providers/video-upscaling/upscaleVideo.mts"
|
7 |
import { interpolateVideo } from "../providers/video-interpolation/interpolateVideo.mts"
|
8 |
import { postInterpolation } from "../production/postInterpolation.mts"
|
src/types.mts
CHANGED
@@ -384,3 +384,17 @@ export type RenderingJob = {
|
|
384 |
|
385 |
status: 'pending' | 'completed' | 'error'
|
386 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
|
385 |
status: 'pending' | 'completed' | 'error'
|
386 |
}
|
387 |
+
|
388 |
+
// vendor-specific types
|
389 |
+
|
390 |
+
export type HotshotImageInferenceSize =
|
391 |
+
| '320x768'
|
392 |
+
| '384x672'
|
393 |
+
| '416x608'
|
394 |
+
| '512x512'
|
395 |
+
| '608x416'
|
396 |
+
| '672x384'
|
397 |
+
| '768x320'
|
398 |
+
| '1024x1024' // custom ratio - this isn't supported / supposed to work properly
|
399 |
+
| '1024x512' // custom panoramic ratio - this isn't supported / supposed to work properly
|
400 |
+
| '1024x576' // movie ratio (16:9) this isn't supported / supposed to work properly
|