jbilcke-hf HF staff commited on
Commit
198274c
β€’
1 Parent(s): 96f407e

add support for real-time

Browse files
.nvmrc CHANGED
@@ -1 +1 @@
1
- v20.10.0
 
1
+ v20.10.0
package-lock.json CHANGED
The diff for this file is too large to render. See raw diff
 
package.json CHANGED
@@ -4,11 +4,11 @@
4
  "description": "A service which wraps and chains video and audio spaces together",
5
  "main": "src/index.mts",
6
  "scripts": {
7
- "start": "node --loader ts-node/esm src/index.mts",
8
- "test:submitVideo": "node --loader ts-node/esm src/tests/submitVideo.mts",
9
- "test:checkStatus": "node --loader ts-node/esm src/tests/checkStatus.mts",
10
- "test:downloadFileToTmp": "node --loader ts-node/esm src/tests/downloadFileToTmp.mts",
11
- "test:stuff": "node --loader ts-node/esm src/utils/segmentImage.mts",
12
  "docker": "npm run docker:build && npm run docker:run",
13
  "docker:build": "docker build -t videochain-api .",
14
  "docker:run": "docker run -it -p 7860:7860 videochain-api"
@@ -21,24 +21,24 @@
21
  "@gradio/client": "^0.1.4",
22
  "@huggingface/inference": "^2.6.1",
23
  "@types/express": "^4.17.17",
24
- "@types/ffmpeg-concat": "^1.1.2",
25
  "@types/uuid": "^9.0.2",
26
  "dotenv": "^16.3.1",
27
  "eventsource-parser": "^1.0.0",
28
  "express": "^4.18.2",
29
- "ffmpeg-concat": "^1.3.0",
30
  "fluent-ffmpeg": "^2.1.2",
31
  "fs-extra": "^11.1.1",
32
  "gpt-tokens": "^1.1.1",
33
  "node-fetch": "^3.3.1",
34
  "nodejs-whisper": "^0.1.4",
35
- "openai": "^3.3.0",
36
- "puppeteer": "^21.5.1",
37
- "replicate": "^0.21.1",
38
  "resize-base64": "^1.0.12",
39
  "sharp": "^0.32.4",
40
  "temp-dir": "^3.0.0",
41
- "ts-node": "^10.9.1",
 
42
  "tts-react": "^3.0.1",
43
  "uuid": "^9.0.0",
44
  "yaml": "^2.3.1"
 
4
  "description": "A service which wraps and chains video and audio spaces together",
5
  "main": "src/index.mts",
6
  "scripts": {
7
+ "start": "tsx src/index.mts",
8
+ "test:submitVideo": "tsx src/tests/submitVideo.mts",
9
+ "test:checkStatus": "tsx src/tests/checkStatus.mts",
10
+ "test:downloadFileToTmp": "tsx src/tests/downloadFileToTmp.mts",
11
+ "test:stuff": "tsx src/utils/segmentImage.mts",
12
  "docker": "npm run docker:build && npm run docker:run",
13
  "docker:build": "docker build -t videochain-api .",
14
  "docker:run": "docker run -it -p 7860:7860 videochain-api"
 
21
  "@gradio/client": "^0.1.4",
22
  "@huggingface/inference": "^2.6.1",
23
  "@types/express": "^4.17.17",
24
+ "@types/node": "^20.12.7",
25
  "@types/uuid": "^9.0.2",
26
  "dotenv": "^16.3.1",
27
  "eventsource-parser": "^1.0.0",
28
  "express": "^4.18.2",
 
29
  "fluent-ffmpeg": "^2.1.2",
30
  "fs-extra": "^11.1.1",
31
  "gpt-tokens": "^1.1.1",
32
  "node-fetch": "^3.3.1",
33
  "nodejs-whisper": "^0.1.4",
34
+ "openai": "^4.38.2",
35
+ "puppeteer": "^22.6.5",
36
+ "replicate": "^0.29.1",
37
  "resize-base64": "^1.0.12",
38
  "sharp": "^0.32.4",
39
  "temp-dir": "^3.0.0",
40
+ "ts-node": "^10.9.2",
41
+ "tsx": "^4.7.0",
42
  "tts-react": "^3.0.1",
43
  "uuid": "^9.0.0",
44
  "yaml": "^2.3.1"
src/production/renderVideo.mts CHANGED
@@ -2,7 +2,8 @@ import { RenderedScene, RenderRequest, VideoGenerationParams } from "../types.mt
2
 
3
  // import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
4
  // import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts"
5
- import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts"
 
6
  import { generateSeed } from "../utils/misc/generateSeed.mts"
7
 
8
  export async function renderVideo(
@@ -10,25 +11,7 @@ export async function renderVideo(
10
  response: RenderedScene
11
  ): Promise<RenderedScene> {
12
 
13
- const params: VideoGenerationParams = {
14
- prompt: request.prompt,
15
- // image?: undefined, // can be empty (and thus, is empty)
16
- // lora?: string // hardcoded on "3D render"
17
- // style?: string // hardcoded on "3D render" for now
18
- orientation: "landscape",
19
- projection: "cartesian",
20
- width: 512,
21
- height: 256,
22
-
23
- // ok, now what about those? they are in the gradio, are not exposed yet in the API
24
- // nbFrames: request.nbFrames,
25
- // nbSteps: request.nbSteps,
26
-
27
- seed: request.seed || generateSeed(),
28
- debug: true,
29
- }
30
-
31
- response.assetUrl = await generateVideoWithAnimateLCM(params)
32
 
33
  return response
34
  }
 
2
 
3
  // import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
4
  // import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts"
5
+ // import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts"
6
+ import { generateVideoWithAnimateDiffLightning } from "../providers/video-generation/generateVideoWithAnimateDiffLightning.mts"
7
  import { generateSeed } from "../utils/misc/generateSeed.mts"
8
 
9
  export async function renderVideo(
 
11
  response: RenderedScene
12
  ): Promise<RenderedScene> {
13
 
14
+ response.assetUrl = await generateVideoWithAnimateDiffLightning(request, response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  return response
17
  }
src/providers/language-model/openai/openai.mts CHANGED
@@ -1,7 +1,5 @@
1
- import { Configuration, OpenAIApi } from "openai"
2
 
3
- export const openai = new OpenAIApi(
4
- new Configuration({
5
- apiKey: process.env.VC_OPENAI_API_KEY
6
- })
7
- )
 
1
+ import OpenAI from "openai";
2
 
3
+ export const openai = new OpenAI({
4
+ apiKey: `${process.env.VC_OPENAI_API_KEY || ""}`
5
+ })
 
 
src/providers/video-generation/defaultPrompts.mts CHANGED
@@ -14,9 +14,13 @@ export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[
14
  export function getPositivePrompt(prompt: string, triggerWord = "") {
15
  return addWordsIfNotPartOfThePrompt(prompt, [
16
  triggerWord,
17
- "crisp",
 
18
  "sharp",
 
19
  "beautiful",
 
 
20
  "4K",
21
  "hd"
22
  ])
@@ -28,9 +32,13 @@ export function getNegativePrompt(prompt: string) {
28
  // "underexposed", // <-- can be a desired style
29
  // "overexposed", // <-- can be a desired style
30
  "logo",
 
 
31
  "censored",
 
32
  "watermark",
33
  "watermarked",
 
34
  "extra digit",
35
  "fewer digits",
36
  "bad fingers",
@@ -39,11 +47,11 @@ export function getNegativePrompt(prompt: string) {
39
  "low quality",
40
  "low resolution",
41
  "glitch", // <-- keep or not? could be a desired style?
42
- "deformed",
43
- "mutated",
44
- "ugly",
45
- "disfigured",
46
- "3D render", // <-- keep or not? could be a desired style?
47
  "signature"
48
  ])
49
  }
 
14
  export function getPositivePrompt(prompt: string, triggerWord = "") {
15
  return addWordsIfNotPartOfThePrompt(prompt, [
16
  triggerWord,
17
+ "sublime",
18
+ "pro quality",
19
  "sharp",
20
+ "crisp",
21
  "beautiful",
22
+ "impressive",
23
+ "amazing",
24
  "4K",
25
  "hd"
26
  ])
 
32
  // "underexposed", // <-- can be a desired style
33
  // "overexposed", // <-- can be a desired style
34
  "logo",
35
+ "hud",
36
+ "ui",
37
  "censored",
38
+ "blurry",
39
  "watermark",
40
  "watermarked",
41
+ "copyright",
42
  "extra digit",
43
  "fewer digits",
44
  "bad fingers",
 
47
  "low quality",
48
  "low resolution",
49
  "glitch", // <-- keep or not? could be a desired style?
50
+ // "deformed",
51
+ // "mutated",
52
+ // "ugly",
53
+ // "disfigured",
54
+ // "3D render", // <-- keep or not? could be a desired style?
55
  "signature"
56
  ])
57
  }
src/providers/video-generation/generateVideoWithAnimateDiffLightning.mts ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { RenderedScene, RenderRequest } from "../../types.mts"
2
+ import { generateSeed } from "../../utils/misc/generateSeed.mts"
3
+ import { tryApiCalls } from "../../utils/misc/tryApiCall.mts"
4
+ import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
5
+
6
+ // const gradioApi = `${process.env.AI_TUBE_MODEL_ANIMATELCM_GRADIO_URL || ""}`
7
+ const gradioApi = "https://jbilcke-hf-ai-tube-model-animatediff-lightning.hf.space"
8
+ const accessToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
9
+
10
+ export const generateVideoWithAnimateDiffLightning = async (
11
+ request: RenderRequest,
12
+ response: RenderedScene,
13
+ ): Promise<RenderedScene> => {
14
+
15
+ const debug = false
16
+
17
+ const actualFunction = async () => {
18
+
19
+ const prompt = request.prompt || ""
20
+ if (!prompt) {
21
+ response.error = "prompt is empty"
22
+ return response
23
+ }
24
+
25
+ // seed = seed || generateSeed()
26
+ request.seed = request.seed || generateSeed()
27
+
28
+ // see https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-animatediff-lightning/blob/main/app.py#L15-L18
29
+ const baseModel = "epiCRealism"
30
+
31
+ // the motion LoRA - could be useful one day
32
+ const motion = ""
33
+
34
+ // can be 1, 2, 4 or 8
35
+ // but values below 4 look bad
36
+ const nbSteps = getValidNumber(request.nbSteps, 1, 8, 4)
37
+ const width = getValidNumber(request.width, 256, 1024, 512)
38
+ const height = getValidNumber(request.height, 256, 1024, 256)
39
+
40
+ const nbFrames = getValidNumber(request.nbFrames, 10, 60, 20)
41
+ const nbFPS = getValidNumber(request.nbFPS, 10, 60, 10)
42
+
43
+ // by default AnimateDiff generates about 2 seconds of video at 10 fps
44
+ // the Gradio API now has some code to optional fix that using FFmpeg,
45
+ // but this will add some delay overhead, so use with care!
46
+ const durationInSec = Math.round(nbFrames / nbFPS)
47
+ const framesPerSec = nbFPS
48
+
49
+ try {
50
+ if (debug) {
51
+ console.log(`calling AnimateDiff Lightning API with params (some are hidden):`, {
52
+ baseModel,
53
+ motion,
54
+ nbSteps,
55
+ width,
56
+ height,
57
+ nbFrames,
58
+ nbFPS,
59
+ durationInSec,
60
+ framesPerSec,
61
+ })
62
+ }
63
+
64
+ const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", {
65
+ method: "POST",
66
+ headers: {
67
+ "Content-Type": "application/json",
68
+ // Authorization: `Bearer ${token}`,
69
+ },
70
+ body: JSON.stringify({
71
+ fn_index: 0, // <- important! it is currently 4, not 1!
72
+ data: [
73
+ accessToken,
74
+ prompt,
75
+ baseModel,
76
+ width,
77
+ height,
78
+ motion,
79
+ nbSteps,
80
+ durationInSec,
81
+ framesPerSec,
82
+ ],
83
+ }),
84
+ cache: "no-store",
85
+ // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
86
+ // next: { revalidate: 1 }
87
+ })
88
+
89
+ // console.log("res:", res)
90
+
91
+ const { data } = await res.json()
92
+
93
+ // console.log("data:", data)
94
+ // Recommendation: handle errors
95
+ if (res.status !== 200 || !Array.isArray(data)) {
96
+ // This will activate the closest `error.js` Error Boundary
97
+ throw new Error(`Failed to fetch data (status: ${res.status})`)
98
+ }
99
+ // console.log("data:", data.slice(0, 50))
100
+
101
+ const base64Content = (data?.[0] || "") as string
102
+
103
+ if (!base64Content) {
104
+ throw new Error(`invalid response (no content)`)
105
+ }
106
+
107
+ // this API already emits a data-uri with a content type
108
+ return base64Content // addBase64HeaderToMp4(base64Content)
109
+ } catch (err) {
110
+ if (debug) {
111
+ console.error(`failed to call the AnimateDiff Lightning API:`)
112
+ console.error(err)
113
+ }
114
+ throw err
115
+ }
116
+ }
117
+
118
+ return tryApiCalls({
119
+ func: actualFunction,
120
+ debug,
121
+ failureMessage: "failed to call the AnimateDiff Lightning API"
122
+ })
123
+ }
src/providers/video-generation/generateVideoWithAnimateLCM.mts CHANGED
@@ -16,14 +16,14 @@ export const generateVideoWithAnimateLCM = async ({
16
  width,
17
  height,
18
  style = "",
 
 
19
  seed,
20
  debug,
21
  }: VideoGenerationParams): Promise<string> => {
22
 
23
 
24
  const actualFunction = async () => {
25
- const negPrompt = ""
26
- prompt = prompt || ""
27
 
28
  // seed = seed || generateSeed()
29
  seed = generateSeed()
@@ -42,14 +42,12 @@ export const generateVideoWithAnimateLCM = async ({
42
  const lcmLoraAlpha = 0.8 // spatial_lora_slider,
43
 
44
  // label="Width", value=512, minimum=256, maximum=1024, step=64)
45
- const width = 512
46
 
47
  // label="Animation length", value=16, minimum=12, maximum=20, step=1)
48
  const nbFrames = 16
49
 
50
  // label="Height", value=512, minimum=256, maximum=1024, step=64)
51
- const height = 256
52
-
53
  // label="CFG Scale", value=1.5, minimum=1, maximum=2)
54
  const cfgScale = 1.5
55
 
 
16
  width,
17
  height,
18
  style = "",
19
+ nbSteps = 4,
20
+ nbFrames = 20,
21
  seed,
22
  debug,
23
  }: VideoGenerationParams): Promise<string> => {
24
 
25
 
26
  const actualFunction = async () => {
 
 
27
 
28
  // seed = seed || generateSeed()
29
  seed = generateSeed()
 
42
  const lcmLoraAlpha = 0.8 // spatial_lora_slider,
43
 
44
  // label="Width", value=512, minimum=256, maximum=1024, step=64)
 
45
 
46
  // label="Animation length", value=16, minimum=12, maximum=20, step=1)
47
  const nbFrames = 16
48
 
49
  // label="Height", value=512, minimum=256, maximum=1024, step=64)
50
+
 
51
  // label="CFG Scale", value=1.5, minimum=1, maximum=2)
52
  const cfgScale = 1.5
53
 
src/types.mts CHANGED
@@ -289,12 +289,9 @@ export type RenderRequest = {
289
  // actionnables are names of things like "chest", "key", "tree", "chair" etc
290
  actionnables: string[]
291
 
292
- // note: this is the number of frames for Zeroscope,
293
- // which is currently configured to only output 3 seconds, so:
294
- // nbFrames=8 -> 1 sec
295
- // nbFrames=16 -> 2 sec
296
- // nbFrames=24 -> 3 sec
297
- nbFrames: number // min: 8, max: 24
298
 
299
  nbSteps: number // min: 1, max: 50
300
 
@@ -426,6 +423,7 @@ export type VideoGenerationParams = {
426
  projection: VideoProjection
427
  width: number
428
  height: number
 
429
  seed?: number
430
  debug?: boolean
431
  }
 
289
  // actionnables are names of things like "chest", "key", "tree", "chair" etc
290
  actionnables: string[]
291
 
292
+ nbFrames: number
293
+
294
+ nbFPS: number
 
 
 
295
 
296
  nbSteps: number // min: 1, max: 50
297
 
 
423
  projection: VideoProjection
424
  width: number
425
  height: number
426
+ nbSteps: number
427
  seed?: number
428
  debug?: boolean
429
  }
src/utils/requests/parseRenderRequest.mts CHANGED
@@ -7,7 +7,11 @@ export function parseRenderRequest(request: RenderRequest) {
7
 
8
  // console.log("parseRenderRequest: "+JSON.stringify(request, null, 2))
9
  try {
10
- request.nbFrames = getValidNumber(request.nbFrames, 1, 24, 16)
 
 
 
 
11
 
12
  request.negativePrompt = request.negativePrompt || ""
13
 
 
7
 
8
  // console.log("parseRenderRequest: "+JSON.stringify(request, null, 2))
9
  try {
10
+ // we are large on the values here, since each model will have their own limits
11
+ // we just want pseudo-valid numbers
12
+
13
+ request.nbFrames = getValidNumber(request.nbFrames, 1, 2147483647, 1)
14
+ request.nbFPS = getValidNumber(request.nbFPS, 1, 2147483647, 1)
15
 
16
  request.negativePrompt = request.negativePrompt || ""
17