jbilcke-hf HF staff commited on
Commit
ec194c9
β€’
1 Parent(s): 8aa943e
package-lock.json CHANGED
@@ -26,6 +26,7 @@
26
  "nodejs-whisper": "^0.1.4",
27
  "openai": "^3.3.0",
28
  "puppeteer": "^20.8.0",
 
29
  "resize-base64": "^1.0.12",
30
  "sharp": "^0.32.4",
31
  "temp-dir": "^3.0.0",
@@ -4401,6 +4402,17 @@
4401
  "node": ">=0.10"
4402
  }
4403
  },
 
 
 
 
 
 
 
 
 
 
 
4404
  "node_modules/request": {
4405
  "version": "2.88.2",
4406
  "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
 
26
  "nodejs-whisper": "^0.1.4",
27
  "openai": "^3.3.0",
28
  "puppeteer": "^20.8.0",
29
+ "replicate": "^0.20.1",
30
  "resize-base64": "^1.0.12",
31
  "sharp": "^0.32.4",
32
  "temp-dir": "^3.0.0",
 
4402
  "node": ">=0.10"
4403
  }
4404
  },
4405
+ "node_modules/replicate": {
4406
+ "version": "0.20.1",
4407
+ "resolved": "https://registry.npmjs.org/replicate/-/replicate-0.20.1.tgz",
4408
+ "integrity": "sha512-QVyI1rowGsSfNuDrRmumYPdCHa/fN/RkI3NHpcK0i5hSSiWK69URAyheAC/0MIAiS3oUs4kD56PB9zEI4oHENw==",
4409
+ "engines": {
4410
+ "git": ">=2.11.0",
4411
+ "node": ">=18.0.0",
4412
+ "npm": ">=7.19.0",
4413
+ "yarn": ">=1.7.0"
4414
+ }
4415
+ },
4416
  "node_modules/request": {
4417
  "version": "2.88.2",
4418
  "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
package.json CHANGED
@@ -33,6 +33,7 @@
33
  "nodejs-whisper": "^0.1.4",
34
  "openai": "^3.3.0",
35
  "puppeteer": "^20.8.0",
 
36
  "resize-base64": "^1.0.12",
37
  "sharp": "^0.32.4",
38
  "temp-dir": "^3.0.0",
 
33
  "nodejs-whisper": "^0.1.4",
34
  "openai": "^3.3.0",
35
  "puppeteer": "^20.8.0",
36
+ "replicate": "^0.20.1",
37
  "resize-base64": "^1.0.12",
38
  "sharp": "^0.32.4",
39
  "temp-dir": "^3.0.0",
src/production/renderVideo.mts CHANGED
@@ -1,5 +1,5 @@
1
  import { RenderedScene, RenderRequest } from "../types.mts"
2
- import { generateVideo } from "../providers/video-generation/generateVideo.mts"
3
 
4
  export async function renderVideo(
5
  request: RenderRequest,
 
1
  import { RenderedScene, RenderRequest } from "../types.mts"
2
+ import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
3
 
4
  export async function renderVideo(
5
  request: RenderRequest,
src/providers/lip-syncing/generateLipSyncVideo.mts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+
3
+ // TODO: we should use
4
+
5
+
6
+ // or we can use Video ReTalking but it requires a video where the person is already talking I believe:
7
+ // https://twitter.com/camenduru/status/1713570931342237852
src/providers/music-generation/generateMusicWithReplicate.mts ADDED
@@ -0,0 +1 @@
 
 
1
+ // TODO use https://replicate.com/sakemin/musicgen-fine-tuner
src/providers/video-generation/defaultPrompts.mts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // should we really have default prompts in here?
2
+ // I think they should probably be defined at the applicative software layer (ie. in the client)
3
+
4
+ export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[] = []): string {
5
+ const promptWords = prompt.split(",").map(w => w.trim().toLocaleLowerCase())
6
+
7
+ return [
8
+ prompt,
9
+ // we add our keywords, unless they are already part of the prompt
10
+ ...words.filter(w => !promptWords.includes(w.toLocaleLowerCase()))
11
+ ].join(", ")
12
+ }
13
+
14
+ export function getPositivePrompt(prompt: string, triggerWord = "") {
15
+ return addWordsIfNotPartOfThePrompt(prompt, [
16
+ triggerWord,
17
+ "crisp",
18
+ "sharp",
19
+ "beautiful",
20
+ "4K",
21
+ "hd"
22
+ ])
23
+ }
24
+
25
+ export function getNegativePrompt(prompt: string) {
26
+ return addWordsIfNotPartOfThePrompt(prompt, [
27
+ "cropped",
28
+ // "underexposed", // <-- can be a desired style
29
+ // "overexposed", // <-- can be a desired style
30
+ "logo",
31
+ "censored",
32
+ "watermark",
33
+ "watermarked",
34
+ "extra digit",
35
+ "fewer digits",
36
+ "bad fingers",
37
+ "bad quality",
38
+ "worst quality",
39
+ "low quality",
40
+ "low resolution",
41
+ "glitch", // <-- keep or not? could be a desired style?
42
+ "deformed",
43
+ "mutated",
44
+ "ugly",
45
+ "disfigured",
46
+ "3D render", // <-- keep or not? could be a desired style?
47
+ "signature"
48
+ ])
49
+ }
src/providers/video-generation/generateVideoWithHotshotGradioAPI.mts ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from "@gradio/client"
2
+
3
+ import { VideoGenerationOptions } from "./types.mts"
4
+ import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
5
+ import { generateSeed } from "../../utils/misc/generateSeed.mts"
6
+
7
+ // we don't use replicas yet, because it ain't easy to get their hostname
8
+ const instances: string[] = [
9
+ `${process.env.VC_HOTSHOT_XL_GRADIO_SPACE_API_URL_1 || ""}`,
10
+ ].filter(instance => instance?.length > 0)
11
+
12
+ const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
13
+
14
+ export const generateVideo = async ({
15
+ positivePrompt,
16
+ negativePrompt = "",
17
+ seed,
18
+ nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
19
+ videoDuration = 1000, // for now Hotshot doesn't really supports anything else
20
+ nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
21
+ size = "768x320",
22
+
23
+ // for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
24
+ triggerWord = "cinematic-2",
25
+
26
+ huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
27
+ }: VideoGenerationOptions) => {
28
+
29
+ const instance = instances.shift()
30
+ instances.push(instance)
31
+
32
+ const api = await client(instance, {
33
+ hf_token: `${process.env.VC_HF_API_TOKEN}` as any
34
+ })
35
+
36
+ // pimp the prompt
37
+ positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
38
+ negativePrompt = getNegativePrompt(negativePrompt)
39
+
40
+ try {
41
+
42
+ const rawResponse = await api.predict('/run', [
43
+ secretToken,
44
+ positivePrompt, // string in 'Prompt' Textbox component
45
+ negativePrompt || "",
46
+ huggingFaceLora?.length || undefined, // string in 'Public LoRA ID' Textbox component
47
+ size || '512x512', // string (Option from: [('320x768', '320x768'), ('384x672', '384x672'), ('416x608', '416x608'), ('512x512', '512x512'), ('608x416', '608x416'), ('672x384', '672x384'), ('768x320', '768x320')]) in 'Size' Dropdown component
48
+ !isNaN(seed) && isFinite(seed) ? seed : generateSeed(), // number (numeric value between -1 and 423538377342) in 'Seed' Slider component, -1 to set to random
49
+ nbSteps || 30,
50
+ nbFrames || 8,
51
+ videoDuration || 1000,
52
+ ]) as any
53
+
54
+ // console.log("rawResponse:", rawResponse)
55
+
56
+ console.log("data:", rawResponse?.data)
57
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
58
+
59
+ return `${instance}/file=${name}`
60
+ } catch (err) {
61
+ throw err
62
+ }
63
+ }
src/providers/video-generation/generateVideoWithHotshotReplicate.mts ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use server"
2
+
3
+ import Replicate from "replicate"
4
+
5
+ import { generateSeed } from "../../utils/misc/generateSeed.mts"
6
+ import { sleep } from "../../utils/misc/sleep.mts"
7
+ import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
8
+ import { VideoGenerationOptions } from "./types.mts"
9
+
10
+ const replicateToken = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
11
+ const replicateModel = `${process.env.HOTSHOT_XL_REPLICATE_MODEL || ""}`
12
+ const replicateModelVersion = `${process.env.HOTSHOT_XL_REPLICATE_MODEL_VERSION || ""}`
13
+
14
+ if (!replicateToken) {
15
+ throw new Error(`you need to configure your AUTH_REPLICATE_API_TOKEN`)
16
+ }
17
+
18
+ const replicate = new Replicate({ auth: replicateToken })
19
+
20
+ /**
21
+ * Generate a video with hotshot through Replicate
22
+ *
23
+ * Note that if nbFrames == 1, then it will generate a jpg
24
+ *
25
+ */
26
+ export async function generateVideoWithHotshotReplicate({
27
+ positivePrompt,
28
+ negativePrompt = "",
29
+ seed,
30
+ nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
31
+ videoDuration = 1000, // for now Hotshot doesn't really supports anything else
32
+ nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
33
+ size = "768x320",
34
+
35
+ // for a replicate LoRa this is always the same ("In the style of TOK")
36
+ // triggerWord = "In the style of TOK",
37
+
38
+ // for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
39
+ triggerWord = "cinematic-2",
40
+
41
+ huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
42
+
43
+ // url to the weight
44
+ replicateLora,
45
+ }: VideoGenerationOptions): Promise<string> {
46
+
47
+ if (!positivePrompt?.length) {
48
+ throw new Error(`prompt is too short!`)
49
+ }
50
+
51
+ if (!replicateModel) {
52
+ throw new Error(`you need to configure your HOTSHOT_XL_REPLICATE_MODEL`)
53
+ }
54
+
55
+ if (!replicateModelVersion) {
56
+ throw new Error(`you need to configure your HOTSHOT_XL_REPLICATE_MODEL_VERSION`)
57
+ }
58
+
59
+ // pimp the prompt
60
+ positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
61
+ negativePrompt = getNegativePrompt(negativePrompt)
62
+
63
+ const [width, height] = size.split("x").map(x => Number(x))
64
+
65
+ // see an example here:
66
+ // https://replicate.com/p/incraplbv23g3zv6woinhgdira
67
+ // for params and doc see https://replicate.com/cloneofsimo/hotshot-xl-lora-controlnet
68
+ const prediction = await replicate.predictions.create({
69
+ version: replicateModelVersion,
70
+ input: {
71
+ prompt: positivePrompt,
72
+ negative_prompt: negativePrompt,
73
+
74
+ // this is not a URL but a model name
75
+ hf_lora_url: replicateLora?.length ? undefined : huggingFaceLora,
76
+
77
+ // this is a URL to the .tar (we can get it from the "trainings" page)
78
+ replicate_weights_url: huggingFaceLora?.length ? undefined : replicateLora,
79
+
80
+ width,
81
+ height,
82
+
83
+ // those are used to create an upsampling or downsampling
84
+ // original_width: width,
85
+ // original_height: height,
86
+ // target_width: width,
87
+ // target_height: height,
88
+
89
+ steps: nbSteps,
90
+
91
+
92
+ // note: right now it only makes sense to use either 1 (a jpg)
93
+ video_length: nbFrames, // nb frames
94
+
95
+ video_duration: videoDuration, // video duration in ms
96
+
97
+ seed: !isNaN(seed) && isFinite(seed) ? seed : generateSeed()
98
+ }
99
+ })
100
+
101
+ // console.log("prediction:", prediction)
102
+
103
+ // Replicate requires at least 30 seconds of mandatory delay
104
+ await sleep(30000)
105
+
106
+ let res: Response
107
+ let pollingCount = 0
108
+ do {
109
+ // Check every 5 seconds
110
+ await sleep(5000)
111
+
112
+ res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
113
+ method: "GET",
114
+ headers: {
115
+ Authorization: `Token ${replicateToken}`,
116
+ },
117
+ cache: 'no-store',
118
+ })
119
+
120
+ if (res.status === 200) {
121
+ const response = (await res.json()) as any
122
+ const error = `${response?.error || ""}`
123
+ if (error) {
124
+ throw new Error(error)
125
+ }
126
+ }
127
+
128
+ pollingCount++
129
+
130
+ // To prevent indefinite polling, we can stop after a certain number, here 30 (i.e. about 2 and half minutes)
131
+ if (pollingCount >= 30) {
132
+ throw new Error('Request time out.')
133
+ }
134
+ } while (true)
135
+ }
src/providers/video-generation/generateVideoWithShow.mts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // we don't use replicas yet, because it ain't easy to get their hostname
3
+ const instances: string[] = [
4
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
5
+ ].filter(instance => instance?.length > 0)
6
+
7
+ const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
8
+
9
+ export const generateVideo = async (prompt: string, options?: {
10
+ seed: number;
11
+ nbFrames: number;
12
+ nbSteps: number;
13
+ }) => {
14
+ throw new Error("Not implemented yet")
15
+ }
src/providers/video-generation/generateVideoWithZeroscope.mts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from "@gradio/client"
2
+
3
+ import { generateSeed } from "../../utils/misc/generateSeed.mts"
4
+ import { VideoGenerationOptions } from "./types.mts"
5
+ import { getPositivePrompt } from "./defaultPrompts.mts"
6
+
7
+ // we don't use replicas yet, because it ain't easy to get their hostname
8
+ const instances: string[] = [
9
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
10
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
11
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
12
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_4 || ""}`,
13
+ ].filter(instance => instance?.length > 0)
14
+
15
+ const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
16
+
17
+ export const generateVideo = async ({
18
+ positivePrompt,
19
+ seed,
20
+ nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
21
+ nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
22
+ }: VideoGenerationOptions) => {
23
+ try {
24
+ const instance = instances.shift()
25
+ instances.push(instance)
26
+
27
+ const api = await client(instance, {
28
+ hf_token: `${process.env.VC_HF_API_TOKEN}` as any
29
+ })
30
+
31
+ const rawResponse = await api.predict('/run', [
32
+ getPositivePrompt(positivePrompt), // string in 'Prompt' Textbox component
33
+
34
+ // we treat 0 as meaning "random seed"
35
+ !isNaN(seed) && isFinite(seed) && seed > 0 ? seed : generateSeed(), // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
36
+ nbFrames || 24, // 24 // it is the nb of frames per seconds I think?
37
+ nbSteps || 35, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
38
+ secretToken,
39
+ ]) as any
40
+
41
+ // console.log("rawResponse:", rawResponse)
42
+
43
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
44
+
45
+ return `${instance}/file=${name}`
46
+ } catch (err) {
47
+ throw err
48
+ }
49
+ }
src/providers/video-generation/types.mts ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { HotshotImageInferenceSize } from "../../types.mts"
2
+
3
+ export type VideoGenerationOptions = {
4
+ /**
5
+ * Positive prompt to use
6
+ */
7
+ positivePrompt: string
8
+
9
+ /**
10
+ * Negative prompt to use
11
+ */
12
+ negativePrompt?: string
13
+
14
+ /**
15
+ * Seed.
16
+ *
17
+ * Depending on the vendor, if you use a negative value (eg -1) it should give you an always random value
18
+ */
19
+ seed?: number
20
+
21
+ /**
22
+ * Number of frames to generate
23
+ */
24
+ nbFrames?: number
25
+
26
+ /**
27
+ * Duration of the video, in seconds
28
+ */
29
+ videoDuration?: number
30
+
31
+ /**
32
+ * Number of inference steps (for final rendering use 70)
33
+ */
34
+ nbSteps?: number
35
+
36
+ /**
37
+ * Image size (which is actually a ratio)
38
+ *
39
+ * Note that Hotshot wasn't trained on all possible combinations,
40
+ * and in particular by default it supposed to only support 512x512 well
41
+ */
42
+ size?: HotshotImageInferenceSize
43
+
44
+ /**
45
+ * Trigger word
46
+ *
47
+ * for a replicate LoRa this is always the same ("In the style of TOK")
48
+ * triggerWord = "In the style of TOK",
49
+ * for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
50
+ */
51
+ triggerWord?: string
52
+
53
+ /**
54
+ * Owner + repo name of the Hugging Face LoRA
55
+ */
56
+ huggingFaceLora?: string
57
+
58
+ /**
59
+ * URL to the weights .tar (those can be hosted anywere, it doesn't have to be on Replicate.com)
60
+ */
61
+ replicateLora?: string
62
+ }
src/providers/video-interpolation/interpolateVideoWithReplicate.mts ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // this looks really great!
3
+ // https://replicate.com/zsxkib/st-mfnet?prediction=bufijj3b45cjoe43pzloqkcghy
4
+
5
+ "use server"
6
+
7
+ import Replicate from "replicate"
8
+
9
+ import { sleep } from "../../utils/misc/sleep.mts"
10
+
11
+ const replicateToken = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
12
+ const replicateModel = `${process.env.STMFNET_REPLICATE_MODEL || ""}`
13
+ const replicateModelVersion = `${process.env.STMFNET_REPLICATE_MODEL_VERSION || ""}`
14
+
15
+ if (!replicateToken) {
16
+ throw new Error(`you need to configure your AUTH_REPLICATE_API_TOKEN`)
17
+ }
18
+
19
+ const replicate = new Replicate({ auth: replicateToken })
20
+
21
+ /**
22
+ * Interpolate a video using Replicate
23
+ *
24
+ * Important note: the video will lose its sound, if any!
25
+ *
26
+ * With the current settingd, duration of the original video will be preserved
27
+ * (but we could make slow-mo too)
28
+ */
29
+ export async function interpolateVideoWithReplicate({
30
+ video,
31
+
32
+ // so arguably 60 would look smoother, but we are tying to reach for a "movie" kind of feel here
33
+ nbFrames = 24,
34
+ }: {
35
+ video: string
36
+
37
+ /**
38
+ * Number of frame (duration of the original video will be preserved)
39
+ */
40
+ nbFrames?: number // min 1, max: 240
41
+ }): Promise<string> {
42
+
43
+ if (!video) {
44
+ throw new Error(`no video provided`)
45
+ }
46
+
47
+ if (!replicateModel) {
48
+ throw new Error(`you need to configure your STMFNET_REPLICATE_MODEL`)
49
+ }
50
+
51
+ if (!replicateModelVersion) {
52
+ throw new Error(`you need to configure your STMFNET_REPLICATE_MODEL_VERSION`)
53
+ }
54
+
55
+ // for params and doc see https://replicate.com/zsxkib/st-mfnet
56
+ const prediction = await replicate.predictions.create({
57
+ version: replicateModelVersion,
58
+ input: {
59
+ mp4: video, // I think it should be a base64 object?
60
+ framerate_multiplier: 2, // can be one of 2, 4, 8, 16, 32
61
+
62
+ // note: for now we use the simplest setting, which is to keep the original video duration
63
+ // if we don't keep the original duration, the video will look like a slow motion animation
64
+ // which may be a desired effect, but let's keep it simple for now
65
+ keep_original_duration: true, // false,
66
+ custom_fps: nbFrames // min 1, max: 240
67
+ }
68
+ })
69
+
70
+ // console.log("prediction:", prediction)
71
+
72
+ // Replicate requires at least 8 seconds of mandatory delay
73
+ await sleep(10000)
74
+
75
+ let res: Response
76
+ let pollingCount = 0
77
+ do {
78
+ // This is normally a fast model, so let's check every 2 seconds
79
+ await sleep(2000)
80
+
81
+ res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
82
+ method: "GET",
83
+ headers: {
84
+ Authorization: `Token ${replicateToken}`,
85
+ },
86
+ cache: 'no-store',
87
+ })
88
+
89
+ if (res.status === 200) {
90
+ const response = (await res.json()) as any
91
+ const error = `${response?.error || ""}`
92
+ if (error) {
93
+ throw new Error(error)
94
+ }
95
+ }
96
+
97
+ pollingCount++
98
+
99
+ // To prevent indefinite polling, we can stop after a certain number
100
+ if (pollingCount >= 30) {
101
+ throw new Error('Request time out.')
102
+ }
103
+ } while (true)
104
+ }
src/providers/{video-generation/generateVideo.mts β†’ video-transformation/transformVideoWithHotshotReplicate.mts} RENAMED
@@ -9,9 +9,6 @@ export const state = {
9
  // we don't use replicas yet, because it ain't easy to get their hostname
10
  const instances: string[] = [
11
  `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
12
- `${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
13
- `${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
14
- `${process.env.VC_ZEROSCOPE_SPACE_API_URL_4 || ""}`,
15
  ].filter(instance => instance?.length > 0)
16
 
17
  const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
@@ -21,6 +18,7 @@ export const generateVideo = async (prompt: string, options?: {
21
  nbFrames: number;
22
  nbSteps: number;
23
  }) => {
 
24
 
25
  /* let's disable load control, and let it use the queue */
26
  /*
 
9
  // we don't use replicas yet, because it ain't easy to get their hostname
10
  const instances: string[] = [
11
  `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
 
 
 
12
  ].filter(instance => instance?.length > 0)
13
 
14
  const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 
18
  nbFrames: number;
19
  nbSteps: number;
20
  }) => {
21
+ throw new Error("Not implemented yet")
22
 
23
  /* let's disable load control, and let it use the queue */
24
  /*
src/scheduler/processVideo.mts CHANGED
@@ -2,7 +2,7 @@ import { v4 as uuidv4 } from "uuid"
2
 
3
  import { Video, VideoShot } from "../types.mts"
4
 
5
- import { generateVideo } from "../providers/video-generation/generateVideo.mts"
6
  import { upscaleVideo } from "../providers/video-upscaling/upscaleVideo.mts"
7
  import { interpolateVideo } from "../providers/video-interpolation/interpolateVideo.mts"
8
  import { postInterpolation } from "../production/postInterpolation.mts"
 
2
 
3
  import { Video, VideoShot } from "../types.mts"
4
 
5
+ import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
6
  import { upscaleVideo } from "../providers/video-upscaling/upscaleVideo.mts"
7
  import { interpolateVideo } from "../providers/video-interpolation/interpolateVideo.mts"
8
  import { postInterpolation } from "../production/postInterpolation.mts"
src/types.mts CHANGED
@@ -384,3 +384,17 @@ export type RenderingJob = {
384
 
385
  status: 'pending' | 'completed' | 'error'
386
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
  status: 'pending' | 'completed' | 'error'
386
  }
387
+
388
+ // vendor-specific types
389
+
390
+ export type HotshotImageInferenceSize =
391
+ | '320x768'
392
+ | '384x672'
393
+ | '416x608'
394
+ | '512x512'
395
+ | '608x416'
396
+ | '672x384'
397
+ | '768x320'
398
+ | '1024x1024' // custom ratio - this isn't supported / supposed to work properly
399
+ | '1024x512' // custom panoramic ratio - this isn't supported / supposed to work properly
400
+ | '1024x576' // movie ratio (16:9) this isn't supported / supposed to work properly