Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
β’
198274c
1
Parent(s):
96f407e
add support for real-time
Browse files- .nvmrc +1 -1
- package-lock.json +0 -0
- package.json +11 -11
- src/production/renderVideo.mts +3 -20
- src/providers/language-model/openai/openai.mts +4 -6
- src/providers/video-generation/defaultPrompts.mts +14 -6
- src/providers/video-generation/generateVideoWithAnimateDiffLightning.mts +123 -0
- src/providers/video-generation/generateVideoWithAnimateLCM.mts +3 -5
- src/types.mts +4 -6
- src/utils/requests/parseRenderRequest.mts +5 -1
.nvmrc
CHANGED
@@ -1 +1 @@
|
|
1 |
-
v20.10.0
|
|
|
1 |
+
v20.10.0
|
package-lock.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
package.json
CHANGED
@@ -4,11 +4,11 @@
|
|
4 |
"description": "A service which wraps and chains video and audio spaces together",
|
5 |
"main": "src/index.mts",
|
6 |
"scripts": {
|
7 |
-
"start": "
|
8 |
-
"test:submitVideo": "
|
9 |
-
"test:checkStatus": "
|
10 |
-
"test:downloadFileToTmp": "
|
11 |
-
"test:stuff": "
|
12 |
"docker": "npm run docker:build && npm run docker:run",
|
13 |
"docker:build": "docker build -t videochain-api .",
|
14 |
"docker:run": "docker run -it -p 7860:7860 videochain-api"
|
@@ -21,24 +21,24 @@
|
|
21 |
"@gradio/client": "^0.1.4",
|
22 |
"@huggingface/inference": "^2.6.1",
|
23 |
"@types/express": "^4.17.17",
|
24 |
-
"@types/
|
25 |
"@types/uuid": "^9.0.2",
|
26 |
"dotenv": "^16.3.1",
|
27 |
"eventsource-parser": "^1.0.0",
|
28 |
"express": "^4.18.2",
|
29 |
-
"ffmpeg-concat": "^1.3.0",
|
30 |
"fluent-ffmpeg": "^2.1.2",
|
31 |
"fs-extra": "^11.1.1",
|
32 |
"gpt-tokens": "^1.1.1",
|
33 |
"node-fetch": "^3.3.1",
|
34 |
"nodejs-whisper": "^0.1.4",
|
35 |
-
"openai": "^
|
36 |
-
"puppeteer": "^
|
37 |
-
"replicate": "^0.
|
38 |
"resize-base64": "^1.0.12",
|
39 |
"sharp": "^0.32.4",
|
40 |
"temp-dir": "^3.0.0",
|
41 |
-
"ts-node": "^10.9.
|
|
|
42 |
"tts-react": "^3.0.1",
|
43 |
"uuid": "^9.0.0",
|
44 |
"yaml": "^2.3.1"
|
|
|
4 |
"description": "A service which wraps and chains video and audio spaces together",
|
5 |
"main": "src/index.mts",
|
6 |
"scripts": {
|
7 |
+
"start": "tsx src/index.mts",
|
8 |
+
"test:submitVideo": "tsx src/tests/submitVideo.mts",
|
9 |
+
"test:checkStatus": "tsx src/tests/checkStatus.mts",
|
10 |
+
"test:downloadFileToTmp": "tsx src/tests/downloadFileToTmp.mts",
|
11 |
+
"test:stuff": "tsx src/utils/segmentImage.mts",
|
12 |
"docker": "npm run docker:build && npm run docker:run",
|
13 |
"docker:build": "docker build -t videochain-api .",
|
14 |
"docker:run": "docker run -it -p 7860:7860 videochain-api"
|
|
|
21 |
"@gradio/client": "^0.1.4",
|
22 |
"@huggingface/inference": "^2.6.1",
|
23 |
"@types/express": "^4.17.17",
|
24 |
+
"@types/node": "^20.12.7",
|
25 |
"@types/uuid": "^9.0.2",
|
26 |
"dotenv": "^16.3.1",
|
27 |
"eventsource-parser": "^1.0.0",
|
28 |
"express": "^4.18.2",
|
|
|
29 |
"fluent-ffmpeg": "^2.1.2",
|
30 |
"fs-extra": "^11.1.1",
|
31 |
"gpt-tokens": "^1.1.1",
|
32 |
"node-fetch": "^3.3.1",
|
33 |
"nodejs-whisper": "^0.1.4",
|
34 |
+
"openai": "^4.38.2",
|
35 |
+
"puppeteer": "^22.6.5",
|
36 |
+
"replicate": "^0.29.1",
|
37 |
"resize-base64": "^1.0.12",
|
38 |
"sharp": "^0.32.4",
|
39 |
"temp-dir": "^3.0.0",
|
40 |
+
"ts-node": "^10.9.2",
|
41 |
+
"tsx": "^4.7.0",
|
42 |
"tts-react": "^3.0.1",
|
43 |
"uuid": "^9.0.0",
|
44 |
"yaml": "^2.3.1"
|
src/production/renderVideo.mts
CHANGED
@@ -2,7 +2,8 @@ import { RenderedScene, RenderRequest, VideoGenerationParams } from "../types.mt
|
|
2 |
|
3 |
// import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
|
4 |
// import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts"
|
5 |
-
import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts"
|
|
|
6 |
import { generateSeed } from "../utils/misc/generateSeed.mts"
|
7 |
|
8 |
export async function renderVideo(
|
@@ -10,25 +11,7 @@ export async function renderVideo(
|
|
10 |
response: RenderedScene
|
11 |
): Promise<RenderedScene> {
|
12 |
|
13 |
-
|
14 |
-
prompt: request.prompt,
|
15 |
-
// image?: undefined, // can be empty (and thus, is empty)
|
16 |
-
// lora?: string // hardcoded on "3D render"
|
17 |
-
// style?: string // hardcoded on "3D render" for now
|
18 |
-
orientation: "landscape",
|
19 |
-
projection: "cartesian",
|
20 |
-
width: 512,
|
21 |
-
height: 256,
|
22 |
-
|
23 |
-
// ok, now what about those? they are in the gradio, are not exposed yet in the API
|
24 |
-
// nbFrames: request.nbFrames,
|
25 |
-
// nbSteps: request.nbSteps,
|
26 |
-
|
27 |
-
seed: request.seed || generateSeed(),
|
28 |
-
debug: true,
|
29 |
-
}
|
30 |
-
|
31 |
-
response.assetUrl = await generateVideoWithAnimateLCM(params)
|
32 |
|
33 |
return response
|
34 |
}
|
|
|
2 |
|
3 |
// import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
|
4 |
// import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts"
|
5 |
+
// import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts"
|
6 |
+
import { generateVideoWithAnimateDiffLightning } from "../providers/video-generation/generateVideoWithAnimateDiffLightning.mts"
|
7 |
import { generateSeed } from "../utils/misc/generateSeed.mts"
|
8 |
|
9 |
export async function renderVideo(
|
|
|
11 |
response: RenderedScene
|
12 |
): Promise<RenderedScene> {
|
13 |
|
14 |
+
response.assetUrl = await generateVideoWithAnimateDiffLightning(request, response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
return response
|
17 |
}
|
src/providers/language-model/openai/openai.mts
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import
|
2 |
|
3 |
-
export const openai = new
|
4 |
-
|
5 |
-
|
6 |
-
})
|
7 |
-
)
|
|
|
1 |
+
import OpenAI from "openai";
|
2 |
|
3 |
+
export const openai = new OpenAI({
|
4 |
+
apiKey: `${process.env.VC_OPENAI_API_KEY || ""}`
|
5 |
+
})
|
|
|
|
src/providers/video-generation/defaultPrompts.mts
CHANGED
@@ -14,9 +14,13 @@ export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[
|
|
14 |
export function getPositivePrompt(prompt: string, triggerWord = "") {
|
15 |
return addWordsIfNotPartOfThePrompt(prompt, [
|
16 |
triggerWord,
|
17 |
-
"
|
|
|
18 |
"sharp",
|
|
|
19 |
"beautiful",
|
|
|
|
|
20 |
"4K",
|
21 |
"hd"
|
22 |
])
|
@@ -28,9 +32,13 @@ export function getNegativePrompt(prompt: string) {
|
|
28 |
// "underexposed", // <-- can be a desired style
|
29 |
// "overexposed", // <-- can be a desired style
|
30 |
"logo",
|
|
|
|
|
31 |
"censored",
|
|
|
32 |
"watermark",
|
33 |
"watermarked",
|
|
|
34 |
"extra digit",
|
35 |
"fewer digits",
|
36 |
"bad fingers",
|
@@ -39,11 +47,11 @@ export function getNegativePrompt(prompt: string) {
|
|
39 |
"low quality",
|
40 |
"low resolution",
|
41 |
"glitch", // <-- keep or not? could be a desired style?
|
42 |
-
"deformed",
|
43 |
-
"mutated",
|
44 |
-
"ugly",
|
45 |
-
"disfigured",
|
46 |
-
"3D render", // <-- keep or not? could be a desired style?
|
47 |
"signature"
|
48 |
])
|
49 |
}
|
|
|
14 |
export function getPositivePrompt(prompt: string, triggerWord = "") {
|
15 |
return addWordsIfNotPartOfThePrompt(prompt, [
|
16 |
triggerWord,
|
17 |
+
"sublime",
|
18 |
+
"pro quality",
|
19 |
"sharp",
|
20 |
+
"crisp",
|
21 |
"beautiful",
|
22 |
+
"impressive",
|
23 |
+
"amazing",
|
24 |
"4K",
|
25 |
"hd"
|
26 |
])
|
|
|
32 |
// "underexposed", // <-- can be a desired style
|
33 |
// "overexposed", // <-- can be a desired style
|
34 |
"logo",
|
35 |
+
"hud",
|
36 |
+
"ui",
|
37 |
"censored",
|
38 |
+
"blurry",
|
39 |
"watermark",
|
40 |
"watermarked",
|
41 |
+
"copyright",
|
42 |
"extra digit",
|
43 |
"fewer digits",
|
44 |
"bad fingers",
|
|
|
47 |
"low quality",
|
48 |
"low resolution",
|
49 |
"glitch", // <-- keep or not? could be a desired style?
|
50 |
+
// "deformed",
|
51 |
+
// "mutated",
|
52 |
+
// "ugly",
|
53 |
+
// "disfigured",
|
54 |
+
// "3D render", // <-- keep or not? could be a desired style?
|
55 |
"signature"
|
56 |
])
|
57 |
}
|
src/providers/video-generation/generateVideoWithAnimateDiffLightning.mts
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { RenderedScene, RenderRequest } from "../../types.mts"
|
2 |
+
import { generateSeed } from "../../utils/misc/generateSeed.mts"
|
3 |
+
import { tryApiCalls } from "../../utils/misc/tryApiCall.mts"
|
4 |
+
import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
|
5 |
+
|
6 |
+
// const gradioApi = `${process.env.AI_TUBE_MODEL_ANIMATELCM_GRADIO_URL || ""}`
|
7 |
+
const gradioApi = "https://jbilcke-hf-ai-tube-model-animatediff-lightning.hf.space"
|
8 |
+
const accessToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
|
9 |
+
|
10 |
+
export const generateVideoWithAnimateDiffLightning = async (
|
11 |
+
request: RenderRequest,
|
12 |
+
response: RenderedScene,
|
13 |
+
): Promise<RenderedScene> => {
|
14 |
+
|
15 |
+
const debug = false
|
16 |
+
|
17 |
+
const actualFunction = async () => {
|
18 |
+
|
19 |
+
const prompt = request.prompt || ""
|
20 |
+
if (!prompt) {
|
21 |
+
response.error = "prompt is empty"
|
22 |
+
return response
|
23 |
+
}
|
24 |
+
|
25 |
+
// seed = seed || generateSeed()
|
26 |
+
request.seed = request.seed || generateSeed()
|
27 |
+
|
28 |
+
// see https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-animatediff-lightning/blob/main/app.py#L15-L18
|
29 |
+
const baseModel = "epiCRealism"
|
30 |
+
|
31 |
+
// the motion LoRA - could be useful one day
|
32 |
+
const motion = ""
|
33 |
+
|
34 |
+
// can be 1, 2, 4 or 8
|
35 |
+
// but values below 4 look bad
|
36 |
+
const nbSteps = getValidNumber(request.nbSteps, 1, 8, 4)
|
37 |
+
const width = getValidNumber(request.width, 256, 1024, 512)
|
38 |
+
const height = getValidNumber(request.height, 256, 1024, 256)
|
39 |
+
|
40 |
+
const nbFrames = getValidNumber(request.nbFrames, 10, 60, 20)
|
41 |
+
const nbFPS = getValidNumber(request.nbFPS, 10, 60, 10)
|
42 |
+
|
43 |
+
// by default AnimateDiff generates about 2 seconds of video at 10 fps
|
44 |
+
// the Gradio API now has some code to optional fix that using FFmpeg,
|
45 |
+
// but this will add some delay overhead, so use with care!
|
46 |
+
const durationInSec = Math.round(nbFrames / nbFPS)
|
47 |
+
const framesPerSec = nbFPS
|
48 |
+
|
49 |
+
try {
|
50 |
+
if (debug) {
|
51 |
+
console.log(`calling AnimateDiff Lightning API with params (some are hidden):`, {
|
52 |
+
baseModel,
|
53 |
+
motion,
|
54 |
+
nbSteps,
|
55 |
+
width,
|
56 |
+
height,
|
57 |
+
nbFrames,
|
58 |
+
nbFPS,
|
59 |
+
durationInSec,
|
60 |
+
framesPerSec,
|
61 |
+
})
|
62 |
+
}
|
63 |
+
|
64 |
+
const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", {
|
65 |
+
method: "POST",
|
66 |
+
headers: {
|
67 |
+
"Content-Type": "application/json",
|
68 |
+
// Authorization: `Bearer ${token}`,
|
69 |
+
},
|
70 |
+
body: JSON.stringify({
|
71 |
+
fn_index: 0, // <- important! it is currently 4, not 1!
|
72 |
+
data: [
|
73 |
+
accessToken,
|
74 |
+
prompt,
|
75 |
+
baseModel,
|
76 |
+
width,
|
77 |
+
height,
|
78 |
+
motion,
|
79 |
+
nbSteps,
|
80 |
+
durationInSec,
|
81 |
+
framesPerSec,
|
82 |
+
],
|
83 |
+
}),
|
84 |
+
cache: "no-store",
|
85 |
+
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
86 |
+
// next: { revalidate: 1 }
|
87 |
+
})
|
88 |
+
|
89 |
+
// console.log("res:", res)
|
90 |
+
|
91 |
+
const { data } = await res.json()
|
92 |
+
|
93 |
+
// console.log("data:", data)
|
94 |
+
// Recommendation: handle errors
|
95 |
+
if (res.status !== 200 || !Array.isArray(data)) {
|
96 |
+
// This will activate the closest `error.js` Error Boundary
|
97 |
+
throw new Error(`Failed to fetch data (status: ${res.status})`)
|
98 |
+
}
|
99 |
+
// console.log("data:", data.slice(0, 50))
|
100 |
+
|
101 |
+
const base64Content = (data?.[0] || "") as string
|
102 |
+
|
103 |
+
if (!base64Content) {
|
104 |
+
throw new Error(`invalid response (no content)`)
|
105 |
+
}
|
106 |
+
|
107 |
+
// this API already emits a data-uri with a content type
|
108 |
+
return base64Content // addBase64HeaderToMp4(base64Content)
|
109 |
+
} catch (err) {
|
110 |
+
if (debug) {
|
111 |
+
console.error(`failed to call the AnimateDiff Lightning API:`)
|
112 |
+
console.error(err)
|
113 |
+
}
|
114 |
+
throw err
|
115 |
+
}
|
116 |
+
}
|
117 |
+
|
118 |
+
return tryApiCalls({
|
119 |
+
func: actualFunction,
|
120 |
+
debug,
|
121 |
+
failureMessage: "failed to call the AnimateDiff Lightning API"
|
122 |
+
})
|
123 |
+
}
|
src/providers/video-generation/generateVideoWithAnimateLCM.mts
CHANGED
@@ -16,14 +16,14 @@ export const generateVideoWithAnimateLCM = async ({
|
|
16 |
width,
|
17 |
height,
|
18 |
style = "",
|
|
|
|
|
19 |
seed,
|
20 |
debug,
|
21 |
}: VideoGenerationParams): Promise<string> => {
|
22 |
|
23 |
|
24 |
const actualFunction = async () => {
|
25 |
-
const negPrompt = ""
|
26 |
-
prompt = prompt || ""
|
27 |
|
28 |
// seed = seed || generateSeed()
|
29 |
seed = generateSeed()
|
@@ -42,14 +42,12 @@ export const generateVideoWithAnimateLCM = async ({
|
|
42 |
const lcmLoraAlpha = 0.8 // spatial_lora_slider,
|
43 |
|
44 |
// label="Width", value=512, minimum=256, maximum=1024, step=64)
|
45 |
-
const width = 512
|
46 |
|
47 |
// label="Animation length", value=16, minimum=12, maximum=20, step=1)
|
48 |
const nbFrames = 16
|
49 |
|
50 |
// label="Height", value=512, minimum=256, maximum=1024, step=64)
|
51 |
-
|
52 |
-
|
53 |
// label="CFG Scale", value=1.5, minimum=1, maximum=2)
|
54 |
const cfgScale = 1.5
|
55 |
|
|
|
16 |
width,
|
17 |
height,
|
18 |
style = "",
|
19 |
+
nbSteps = 4,
|
20 |
+
nbFrames = 20,
|
21 |
seed,
|
22 |
debug,
|
23 |
}: VideoGenerationParams): Promise<string> => {
|
24 |
|
25 |
|
26 |
const actualFunction = async () => {
|
|
|
|
|
27 |
|
28 |
// seed = seed || generateSeed()
|
29 |
seed = generateSeed()
|
|
|
42 |
const lcmLoraAlpha = 0.8 // spatial_lora_slider,
|
43 |
|
44 |
// label="Width", value=512, minimum=256, maximum=1024, step=64)
|
|
|
45 |
|
46 |
// label="Animation length", value=16, minimum=12, maximum=20, step=1)
|
47 |
const nbFrames = 16
|
48 |
|
49 |
// label="Height", value=512, minimum=256, maximum=1024, step=64)
|
50 |
+
|
|
|
51 |
// label="CFG Scale", value=1.5, minimum=1, maximum=2)
|
52 |
const cfgScale = 1.5
|
53 |
|
src/types.mts
CHANGED
@@ -289,12 +289,9 @@ export type RenderRequest = {
|
|
289 |
// actionnables are names of things like "chest", "key", "tree", "chair" etc
|
290 |
actionnables: string[]
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
// nbFrames=16 -> 2 sec
|
296 |
-
// nbFrames=24 -> 3 sec
|
297 |
-
nbFrames: number // min: 8, max: 24
|
298 |
|
299 |
nbSteps: number // min: 1, max: 50
|
300 |
|
@@ -426,6 +423,7 @@ export type VideoGenerationParams = {
|
|
426 |
projection: VideoProjection
|
427 |
width: number
|
428 |
height: number
|
|
|
429 |
seed?: number
|
430 |
debug?: boolean
|
431 |
}
|
|
|
289 |
// actionnables are names of things like "chest", "key", "tree", "chair" etc
|
290 |
actionnables: string[]
|
291 |
|
292 |
+
nbFrames: number
|
293 |
+
|
294 |
+
nbFPS: number
|
|
|
|
|
|
|
295 |
|
296 |
nbSteps: number // min: 1, max: 50
|
297 |
|
|
|
423 |
projection: VideoProjection
|
424 |
width: number
|
425 |
height: number
|
426 |
+
nbSteps: number
|
427 |
seed?: number
|
428 |
debug?: boolean
|
429 |
}
|
src/utils/requests/parseRenderRequest.mts
CHANGED
@@ -7,7 +7,11 @@ export function parseRenderRequest(request: RenderRequest) {
|
|
7 |
|
8 |
// console.log("parseRenderRequest: "+JSON.stringify(request, null, 2))
|
9 |
try {
|
10 |
-
|
|
|
|
|
|
|
|
|
11 |
|
12 |
request.negativePrompt = request.negativePrompt || ""
|
13 |
|
|
|
7 |
|
8 |
// console.log("parseRenderRequest: "+JSON.stringify(request, null, 2))
|
9 |
try {
|
10 |
+
// we are large on the values here, since each model will have their own limits
|
11 |
+
// we just want pseudo-valid numbers
|
12 |
+
|
13 |
+
request.nbFrames = getValidNumber(request.nbFrames, 1, 2147483647, 1)
|
14 |
+
request.nbFPS = getValidNumber(request.nbFPS, 1, 2147483647, 1)
|
15 |
|
16 |
request.negativePrompt = request.negativePrompt || ""
|
17 |
|