Julian BILCKE commited on
Commit
d3fb9b4
2 Parent(s): b99b820 2ef024a

Merge pull request #25 from devniel/stabilityai-image-to-video

Browse files
src/app/api/resolve/providers/stabilityai/generateVideo.ts CHANGED
@@ -1,23 +1,40 @@
1
- import { sleep } from '@/lib/utils/sleep'
2
  import { ResolveRequest } from '@aitube/clapper-services'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  export async function generateVideo(request: ResolveRequest): Promise<string> {
5
  if (!request.settings.stabilityAiApiKey) {
6
- throw new Error(
7
- `StabilityAI.generateVideo: cannot generate without a valid stabilityAiApiKey`
8
- )
9
  }
10
 
11
  if (!request.settings.videoGenerationModel) {
12
  throw new Error(
13
- `StabilityAI.generateVideo: cannot generate without a valid videoGenerationModel`
14
  )
15
  }
16
 
17
  if (!request.prompts.video.image) {
18
- throw new Error(
19
- `StabilityAI.generateVideo: cannot generate without a valid image input`
20
- )
21
  }
22
 
23
  // what's cool about the ultra model is its capacity to take in
@@ -30,14 +47,13 @@ export async function generateVideo(request: ResolveRequest): Promise<string> {
30
  // convey a sky that was blue and green, but more green than blue.
31
 
32
  const body = new FormData()
33
-
34
  // Supported Formats: jpeg, png
35
  // Supported Dimensions: 1024x576, 576x1024, 768x768
36
 
37
  // "Please ensure that the source image is in the correct format and dimensions"
38
- body.set('image', `${request.prompts.video.image || ''}`)
39
 
40
- const response = (await fetch(
41
  `https://api.stability.ai/v2beta/image-to-video`,
42
  {
43
  method: 'POST',
@@ -47,52 +63,120 @@ export async function generateVideo(request: ResolveRequest): Promise<string> {
47
  body,
48
  cache: 'no-store',
49
  }
50
- )) as unknown as { data: { id: number } }
51
-
52
- const generationId = response?.data?.id
53
- if (!generationId) {
54
- throw new Error(`StabilityAI failed to give us a valid response.data.id`)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
 
56
 
57
- console.log('Generation ID:', generationId)
58
-
59
- let pollingCount = 0
60
- do {
61
- // This is normally a fast model, so let's check every 4 seconds
62
- await sleep(10000)
63
-
64
- const res = await fetch(
65
- `https://api.stability.ai/v2beta/image-to-video/result/${generationId}`,
66
- {
67
- method: 'GET',
68
- headers: {
69
- Authorization: `Bearer ${request.settings.stabilityAiApiKey}`,
70
- Accept: 'video/*', // Use 'application/json' to receive base64 encoded JSON
71
- },
72
- cache: 'no-store',
73
- }
74
  )
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- if (res.status === 200) {
 
 
 
 
 
 
 
 
 
77
  try {
78
- const response = (await res.json()) as any
79
- const errors = `${response?.errors || ''}`
80
- if (errors) {
81
- throw new Error(errors)
 
 
 
 
 
 
 
 
 
 
82
  }
83
- return response.output.pop()
84
- } catch (err) {
85
- console.error('res.json() error:', err)
86
- }
87
- }
88
-
89
- pollingCount++
90
 
91
- // To prevent indefinite polling, we can stop after a certain number
92
- if (pollingCount >= 40) {
93
- throw new Error('Request timed out.')
94
- }
95
- } while (true)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- throw new Error('finish me')
 
 
 
 
 
 
 
 
 
98
  }
 
1
+ import { base64DataUriToBlob } from '@/lib/utils/base64DataUriToBlob'
2
  import { ResolveRequest } from '@aitube/clapper-services'
3
+ import sharp from 'sharp'
4
+
5
+ const TAG = `StabilityAI.generateVideo`
6
+
7
+ type StabilityAIVImageToVideoStartGenerationResponse = {
8
+ id: string
9
+ name?: string
10
+ errors?: string[]
11
+ }
12
+
13
+ enum StabilityAIVImageToVideoFetchhGenerationFinishReason {
14
+ SUCCESS = 'SUCCESS',
15
+ CONTENT_FILTERED = 'CONTENT_FILTERED',
16
+ }
17
+
18
+ type StabilityAIVImageToVideoFetchGenerationResponse = {
19
+ video: string
20
+ finish_reason: StabilityAIVImageToVideoFetchhGenerationFinishReason
21
+ seed: number
22
+ errors?: string[]
23
+ }
24
 
25
  export async function generateVideo(request: ResolveRequest): Promise<string> {
26
  if (!request.settings.stabilityAiApiKey) {
27
+ throw new Error(`${TAG}: cannot generate without a valid stabilityAiApiKey`)
 
 
28
  }
29
 
30
  if (!request.settings.videoGenerationModel) {
31
  throw new Error(
32
+ `${TAG}: cannot generate without a valid videoGenerationModel`
33
  )
34
  }
35
 
36
  if (!request.prompts.video.image) {
37
+ throw new Error(`${TAG}: cannot generate without a valid image input`)
 
 
38
  }
39
 
40
  // what's cool about the ultra model is its capacity to take in
 
47
  // convey a sky that was blue and green, but more green than blue.
48
 
49
  const body = new FormData()
 
50
  // Supported Formats: jpeg, png
51
  // Supported Dimensions: 1024x576, 576x1024, 768x768
52
 
53
  // "Please ensure that the source image is in the correct format and dimensions"
54
+ body.set('image', await getRequestImage(request))
55
 
56
+ const response = await fetch(
57
  `https://api.stability.ai/v2beta/image-to-video`,
58
  {
59
  method: 'POST',
 
63
  body,
64
  cache: 'no-store',
65
  }
66
+ )
67
+
68
+ if (response.status == 200) {
69
+ const { id }: StabilityAIVImageToVideoStartGenerationResponse =
70
+ await response.json()
71
+ console.log(TAG, `Generation ID: ${id}`)
72
+ const result = await pollGenerationResult(
73
+ id,
74
+ request.settings.stabilityAiApiKey
75
+ )
76
+ console.log(TAG, 'Video was successfully generated.', result.length)
77
+ return result
78
+ } else {
79
+ const { errors }: StabilityAIVImageToVideoStartGenerationResponse =
80
+ await response.json()
81
+ if (errors) {
82
+ throw new Error(`${TAG}: ${errors.join('\n')}`)
83
+ }
84
+ throw new Error(`${TAG}: Unexpected error`)
85
  }
86
+ }
87
 
88
+ /**
89
+ * Extracts the image from the request and resizes
90
+ * it based on the supported dimensions of StabilityAI
91
+ */
92
+ async function getRequestImage(request: ResolveRequest) {
93
+ const supportedDimensions = [`1024x576`, `576x1024`, `768x768`]
94
+ let imageBlob = base64DataUriToBlob(`${request.prompts.video.image || ''}`)
95
+ const imageBuffer = Buffer.from(await imageBlob.arrayBuffer())
96
+ const { width, height } = await sharp(imageBuffer).metadata()
97
+ const dimensions = `${width}x${height}`
98
+ if (!(dimensions in supportedDimensions)) {
99
+ console.log(
100
+ TAG,
101
+ `Unsupported dimensions ${width}x${height}, resizing to 1024x576 ...`
 
 
 
102
  )
103
+ const resizedImageBuffer = await sharp(imageBuffer)
104
+ .resize({
105
+ width: 1024,
106
+ height: 576,
107
+ fit: 'cover',
108
+ position: 'center',
109
+ })
110
+ .toBuffer()
111
+ imageBlob = new Blob([resizedImageBuffer], { type: 'image/jpeg' })
112
+ }
113
+ return imageBlob
114
+ }
115
 
116
+ async function pollGenerationResult(
117
+ generationId: string,
118
+ apiKey: string,
119
+ maxPollingCount = 40,
120
+ intervalMs = 10000
121
+ ): Promise<string> {
122
+ console.log(TAG, `Polling generation result width id = ${generationId} ...`)
123
+ return new Promise((resolve, reject) => {
124
+ let pollingCount = 0
125
+ const intervalId = setInterval(async () => {
126
  try {
127
+ const res = await fetch(
128
+ `https://api.stability.ai/v2beta/image-to-video/result/${generationId}`,
129
+ {
130
+ method: 'GET',
131
+ headers: {
132
+ Authorization: `Bearer ${apiKey}`,
133
+ Accept: 'application/json; type=video/mp4', // Use 'video/*' to receive raw bytes
134
+ },
135
+ cache: 'no-store',
136
+ }
137
+ )
138
+
139
+ if (res.status === 202) {
140
+ return pollingCount++
141
  }
 
 
 
 
 
 
 
142
 
143
+ try {
144
+ const {
145
+ video,
146
+ errors,
147
+ finish_reason,
148
+ }: StabilityAIVImageToVideoFetchGenerationResponse = await res.json()
149
+ if (res.status > 200) {
150
+ throw new Error(errors?.join('\n'))
151
+ }
152
+ if (
153
+ finish_reason !=
154
+ StabilityAIVImageToVideoFetchhGenerationFinishReason.SUCCESS
155
+ ) {
156
+ throw new Error('Content filtered')
157
+ }
158
+ resolve(`data:video/mp4;base64,${video}`)
159
+ } catch (err) {
160
+ console.error(TAG, err)
161
+ if (res.status < 500) {
162
+ reject(err)
163
+ }
164
+ } finally {
165
+ if (res.status < 500) {
166
+ return clearInterval(intervalId)
167
+ } else {
168
+ pollingCount++
169
+ }
170
+ }
171
 
172
+ if (pollingCount >= maxPollingCount) {
173
+ clearInterval(intervalId)
174
+ reject(new Error(`${TAG}: Request timed out.`))
175
+ }
176
+ } catch (error) {
177
+ clearInterval(intervalId)
178
+ reject(error)
179
+ }
180
+ }, intervalMs)
181
+ })
182
  }
src/app/api/resolve/providers/stabilityai/index.ts CHANGED
@@ -2,6 +2,7 @@ import { ClapSegmentCategory } from '@aitube/clap'
2
  import { TimelineSegment } from '@aitube/timeline'
3
  import { ResolveRequest } from '@aitube/clapper-services'
4
  import { generateImage } from './generateImage'
 
5
 
6
  export async function resolveSegment(
7
  request: ResolveRequest
@@ -12,11 +13,10 @@ export async function resolveSegment(
12
 
13
  const segment = request.segment
14
 
15
- // for doc see:
16
- // https://fal.ai/models/fal-ai/fast-sdxl/api
17
-
18
  if (request.segment.category === ClapSegmentCategory.STORYBOARD) {
19
  segment.assetUrl = await generateImage(request)
 
 
20
  } else {
21
  throw new Error(
22
  `Clapper doesn't support ${request.segment.category} generation for provider "Stability.ai". Please open a pull request with (working code) to solve this!`
 
2
  import { TimelineSegment } from '@aitube/timeline'
3
  import { ResolveRequest } from '@aitube/clapper-services'
4
  import { generateImage } from './generateImage'
5
+ import { generateVideo } from './generateVideo'
6
 
7
  export async function resolveSegment(
8
  request: ResolveRequest
 
13
 
14
  const segment = request.segment
15
 
 
 
 
16
  if (request.segment.category === ClapSegmentCategory.STORYBOARD) {
17
  segment.assetUrl = await generateImage(request)
18
+ } else if (request.segment.category === ClapSegmentCategory.VIDEO) {
19
+ segment.assetUrl = await generateVideo(request)
20
  } else {
21
  throw new Error(
22
  `Clapper doesn't support ${request.segment.category} generation for provider "Stability.ai". Please open a pull request with (working code) to solve this!`