jbilcke-hf HF staff commited on
Commit
1cef24b
1 Parent(s): 0176e5b

clusterize musicgen for today's release

Browse files
package-lock.json CHANGED
@@ -8,9 +8,9 @@
8
  "name": "@aitube/website",
9
  "version": "0.0.0",
10
  "dependencies": {
11
- "@aitube/clap": "0.0.16",
12
- "@aitube/client": "0.0.24",
13
- "@aitube/engine": "0.0.6",
14
  "@huggingface/hub": "0.12.3-oauth",
15
  "@huggingface/inference": "^2.6.7",
16
  "@jcoreio/async-throttle": "^1.6.0",
@@ -118,9 +118,9 @@
118
  }
119
  },
120
  "node_modules/@aitube/clap": {
121
- "version": "0.0.16",
122
- "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.16.tgz",
123
- "integrity": "sha512-EcBu4gbc8Kx58rkMmLjh0uyCWVR4cSgWqALlQ50C1GbHRg/r81gDET6faWeos14ZGrtg4B0CJv8WUTIDE54pjg==",
124
  "dependencies": {
125
  "pure-uuid": "^1.8.1",
126
  "yaml": "^2.4.1"
@@ -130,22 +130,22 @@
130
  }
131
  },
132
  "node_modules/@aitube/client": {
133
- "version": "0.0.24",
134
- "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.24.tgz",
135
- "integrity": "sha512-9J3PhVabyc/aOcB5j7wF5Fxb4VORB6aoHTTY6Y+ciFi96zp1YcFeYBMmBpIDq/6KqucIJFtANtZDPoy5a/j1Og==",
136
  "dependencies": {
137
  "query-string": "^9.0.0"
138
  },
139
  "peerDependencies": {
140
- "@aitube/clap": "0.0.16"
141
  }
142
  },
143
  "node_modules/@aitube/engine": {
144
- "version": "0.0.6",
145
- "resolved": "https://registry.npmjs.org/@aitube/engine/-/engine-0.0.6.tgz",
146
- "integrity": "sha512-Li+yE6MCIvD32Kyn43I7GHyQkmxtc/1/lrt3xPKMuFrWuc2FP9JR1ZYRlKqodmWqrSmhWCRBOt3Z6f62QpEvIQ==",
147
  "peerDependencies": {
148
- "@aitube/clap": "0.0.16"
149
  }
150
  },
151
  "node_modules/@alloc/quick-lru": {
@@ -6081,9 +6081,9 @@
6081
  }
6082
  },
6083
  "node_modules/jose": {
6084
- "version": "5.2.4",
6085
- "resolved": "https://registry.npmjs.org/jose/-/jose-5.2.4.tgz",
6086
- "integrity": "sha512-6ScbIk2WWCeXkmzF6bRPmEuaqy1m8SbsRFMa/FLrSCkGIhj8OLVG/IH+XHVmNMx/KUo8cVWEE6oKR4dJ+S0Rkg==",
6087
  "funding": {
6088
  "url": "https://github.com/sponsors/panva"
6089
  }
 
8
  "name": "@aitube/website",
9
  "version": "0.0.0",
10
  "dependencies": {
11
+ "@aitube/clap": "0.0.17",
12
+ "@aitube/client": "0.0.25",
13
+ "@aitube/engine": "0.0.7",
14
  "@huggingface/hub": "0.12.3-oauth",
15
  "@huggingface/inference": "^2.6.7",
16
  "@jcoreio/async-throttle": "^1.6.0",
 
118
  }
119
  },
120
  "node_modules/@aitube/clap": {
121
+ "version": "0.0.17",
122
+ "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.17.tgz",
123
+ "integrity": "sha512-g/jjePX2Hz9Eo4hk+rxd6FRwoy2Hx9sadGLgN9yWSm7dGHhr9B/DVv8eLjFabu7jgW0zvZZ1FHvlsNAby4Pr/Q==",
124
  "dependencies": {
125
  "pure-uuid": "^1.8.1",
126
  "yaml": "^2.4.1"
 
130
  }
131
  },
132
  "node_modules/@aitube/client": {
133
+ "version": "0.0.25",
134
+ "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.25.tgz",
135
+ "integrity": "sha512-gX5eJOKiigVY3xK1NcsStruUuWMQbj4o4XHTceZhUyKCgHDldC0Y15mvIWVabCtEW5FFebdmhH0EFeg+PBMCsg==",
136
  "dependencies": {
137
  "query-string": "^9.0.0"
138
  },
139
  "peerDependencies": {
140
+ "@aitube/clap": "0.0.17"
141
  }
142
  },
143
  "node_modules/@aitube/engine": {
144
+ "version": "0.0.7",
145
+ "resolved": "https://registry.npmjs.org/@aitube/engine/-/engine-0.0.7.tgz",
146
+ "integrity": "sha512-Bu3MhQ7DelO+K/lU82MGSu1ksf3IUi3L9q1E2WfdBh4bkI0Kq8hH+OgUFKp38e5t1zhwjY2lJPNCbAJau5RcBA==",
147
  "peerDependencies": {
148
+ "@aitube/clap": "0.0.17"
149
  }
150
  },
151
  "node_modules/@alloc/quick-lru": {
 
6081
  }
6082
  },
6083
  "node_modules/jose": {
6084
+ "version": "5.3.0",
6085
+ "resolved": "https://registry.npmjs.org/jose/-/jose-5.3.0.tgz",
6086
+ "integrity": "sha512-IChe9AtAE79ru084ow8jzkN2lNrG3Ntfiv65Cvj9uOCE2m5LNsdHG+9EbxWxAoWRF9TgDOqLN5jm08++owDVRg==",
6087
  "funding": {
6088
  "url": "https://github.com/sponsors/panva"
6089
  }
package.json CHANGED
@@ -10,9 +10,9 @@
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
- "@aitube/clap": "0.0.16",
14
- "@aitube/client": "0.0.24",
15
- "@aitube/engine": "0.0.6",
16
  "@huggingface/hub": "0.12.3-oauth",
17
  "@huggingface/inference": "^2.6.7",
18
  "@jcoreio/async-throttle": "^1.6.0",
 
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
+ "@aitube/clap": "0.0.17",
14
+ "@aitube/client": "0.0.25",
15
+ "@aitube/engine": "0.0.7",
16
  "@huggingface/hub": "0.12.3-oauth",
17
  "@huggingface/inference": "^2.6.7",
18
  "@jcoreio/async-throttle": "^1.6.0",
src/app/api/generators/music/generateMusicAsBase64.ts DELETED
@@ -1,72 +0,0 @@
1
- import { sleep } from "@/lib/utils/sleep"
2
- import { generateMusicWithMusicgen } from "./generateMusicWithMusicgen"
3
-
4
- // apparently if we ask to generate like 4 minutes of audio, it crashes
5
- const maxAudioDurationInSec = 120
6
-
7
- // generate music
8
- // this may generate multiple tracks (one after another)
9
- // if the durationInSec parameter is larger than the max audio duration
10
- export async function generateMusicAsBase64({
11
- prompt,
12
- durationInSec,
13
- hd = false,
14
- }: {
15
- prompt: string
16
- durationInSec: number
17
-
18
- // use diffusion (increases quality, but requires more RAM)
19
- hd?: boolean
20
- }): Promise<string[]> {
21
-
22
- const musicPrompt = prompt || ""
23
-
24
- if (durationInSec < 1 || !musicPrompt) { return [] }
25
-
26
- if (durationInSec > maxAudioDurationInSec) {
27
- const halfTheDuration = Math.round(durationInSec / 2)
28
-
29
- // no! we shouldn't generate them in parallel
30
- // or at least, no now, because we only have ONE music server!
31
- // const chunks = await Promise.all([
32
- // generateMusic({ video, durationInSec: halfTheDuration })
33
- //])
34
- // return chunks.reduce((acc, tracks) => ([...acc, ...tracks]), [])
35
-
36
- // instead, let's play it safe and generate them one after another
37
- let chunks: string[] = []
38
- const partA = await generateMusicAsBase64({ prompt, hd, durationInSec: halfTheDuration })
39
- if (partA) { chunks = chunks.concat(partA) }
40
-
41
- const partB = await generateMusicAsBase64({ prompt, hd, durationInSec: halfTheDuration })
42
- if (partB) { chunks = chunks.concat(partB) }
43
-
44
- return [...partA, ...partB]
45
- }
46
-
47
- let musicTracks: string[] = []
48
-
49
- const musicParams = {
50
- prompt: musicPrompt,
51
- durationInSec,
52
- hd,
53
- }
54
- try {
55
- console.log(` |- generating ${durationInSec} seconds of music..`)
56
- const musicTrack = await generateMusicWithMusicgen(musicParams)
57
- if (!musicTrack?.length) { throw new Error("audio is too short to be valid!")}
58
- musicTracks.push(musicTrack)
59
- } catch (err) {
60
- try {
61
- await sleep(4000)
62
- const musicTrack = await generateMusicWithMusicgen(musicParams)
63
- if (!musicTrack?.length) { throw new Error("audio is too short to be valid!")}
64
- musicTracks.push(musicTrack)
65
- } catch (err2) {
66
- console.error(` |- failed to generate the music (yes, we retried after a delay)`)
67
- }
68
- }
69
-
70
-
71
- return musicTracks
72
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/app/api/v1/create/index.ts CHANGED
@@ -131,49 +131,59 @@ Output: `
131
  clap.segments.push(newSegment({
132
  track: 0,
133
  startTimeInMs: currentElapsedTimeInMs,
 
134
  assetDurationInMs: defaultSegmentDurationInMs,
135
  category: ClapSegmentCategory.VIDEO,
136
  prompt: image,
137
  outputType: ClapOutputType.VIDEO,
 
138
  }))
139
 
140
  clap.segments.push(newSegment({
141
  track: 1,
142
  startTimeInMs: currentElapsedTimeInMs,
 
143
  assetDurationInMs: defaultSegmentDurationInMs,
144
  category: ClapSegmentCategory.STORYBOARD,
145
  prompt: image,
146
  outputType: ClapOutputType.IMAGE,
 
147
  }))
148
 
149
  clap.segments.push(newSegment({
150
  track: 2,
151
  startTimeInMs: currentElapsedTimeInMs,
 
152
  assetDurationInMs: defaultSegmentDurationInMs,
153
  category: ClapSegmentCategory.INTERFACE,
154
  prompt: comment,
155
  // assetUrl: `data:text/plain;base64,${btoa(comment)}`,
156
  assetUrl: comment,
157
  outputType: ClapOutputType.TEXT,
 
158
  }))
159
 
160
  clap.segments.push(newSegment({
161
  track: 3,
162
  startTimeInMs: currentElapsedTimeInMs,
 
163
  assetDurationInMs: defaultSegmentDurationInMs,
164
  category: ClapSegmentCategory.DIALOGUE,
165
  prompt: voice,
166
  outputType: ClapOutputType.AUDIO,
 
167
  }))
168
 
169
  // the presence of a camera is mandatory
170
  clap.segments.push(newSegment({
171
  track: 4,
172
  startTimeInMs: currentElapsedTimeInMs,
 
173
  assetDurationInMs: defaultSegmentDurationInMs,
174
  category: ClapSegmentCategory.CAMERA,
175
  prompt: "video",
176
  outputType: ClapOutputType.TEXT,
 
177
  }))
178
 
179
  currentElapsedTimeInMs += defaultSegmentDurationInMs
@@ -187,6 +197,21 @@ Output: `
187
  prompt,
188
  latentStory: await clapToLatentStory(clap)
189
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  } catch (err) {
191
  console.error(`[api/v1/create] failed to generate music prompts`)
192
  musicPrompts.push("lofi hiphop loop")
 
131
  clap.segments.push(newSegment({
132
  track: 0,
133
  startTimeInMs: currentElapsedTimeInMs,
134
+ endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
135
  assetDurationInMs: defaultSegmentDurationInMs,
136
  category: ClapSegmentCategory.VIDEO,
137
  prompt: image,
138
  outputType: ClapOutputType.VIDEO,
139
+ status: "to_generate",
140
  }))
141
 
142
  clap.segments.push(newSegment({
143
  track: 1,
144
  startTimeInMs: currentElapsedTimeInMs,
145
+ endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
146
  assetDurationInMs: defaultSegmentDurationInMs,
147
  category: ClapSegmentCategory.STORYBOARD,
148
  prompt: image,
149
  outputType: ClapOutputType.IMAGE,
150
+ status: "to_generate",
151
  }))
152
 
153
  clap.segments.push(newSegment({
154
  track: 2,
155
  startTimeInMs: currentElapsedTimeInMs,
156
+ endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
157
  assetDurationInMs: defaultSegmentDurationInMs,
158
  category: ClapSegmentCategory.INTERFACE,
159
  prompt: comment,
160
  // assetUrl: `data:text/plain;base64,${btoa(comment)}`,
161
  assetUrl: comment,
162
  outputType: ClapOutputType.TEXT,
163
+ status: "to_generate",
164
  }))
165
 
166
  clap.segments.push(newSegment({
167
  track: 3,
168
  startTimeInMs: currentElapsedTimeInMs,
169
+ endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
170
  assetDurationInMs: defaultSegmentDurationInMs,
171
  category: ClapSegmentCategory.DIALOGUE,
172
  prompt: voice,
173
  outputType: ClapOutputType.AUDIO,
174
+ status: "to_generate",
175
  }))
176
 
177
  // the presence of a camera is mandatory
178
  clap.segments.push(newSegment({
179
  track: 4,
180
  startTimeInMs: currentElapsedTimeInMs,
181
+ endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
182
  assetDurationInMs: defaultSegmentDurationInMs,
183
  category: ClapSegmentCategory.CAMERA,
184
  prompt: "video",
185
  outputType: ClapOutputType.TEXT,
186
+ status: "to_generate",
187
  }))
188
 
189
  currentElapsedTimeInMs += defaultSegmentDurationInMs
 
197
  prompt,
198
  latentStory: await clapToLatentStory(clap)
199
  })
200
+ const musicPrompt = musicPrompts.at(0)
201
+ if (!musicPrompt) { throw new Error(`not enough music prompts`) }
202
+
203
+ console.log("musicPrompt:", musicPrompt)
204
+
205
+ clap.segments.push(newSegment({
206
+ track: 5,
207
+ startTimeInMs: 0,
208
+ endTimeInMs: currentElapsedTimeInMs,
209
+ assetDurationInMs: currentElapsedTimeInMs,
210
+ category: ClapSegmentCategory.MUSIC,
211
+ prompt: musicPrompt,
212
+ outputType: ClapOutputType.AUDIO,
213
+ status: "to_generate",
214
+ }))
215
  } catch (err) {
216
  console.error(`[api/v1/create] failed to generate music prompts`)
217
  musicPrompts.push("lofi hiphop loop")
src/app/api/v1/edit/entities/generateEntityPrompts.ts CHANGED
@@ -37,7 +37,7 @@ export async function generateEntityPrompts({
37
 
38
  if (!latentStory.length) { throw new Error(`please provide a story`) }
39
 
40
- console.log("generateEntityPrompts(): latentStory:", latentStory)
41
 
42
  const userPrompt = `The input story is about: ${prompt}.
43
 
 
37
 
38
  if (!latentStory.length) { throw new Error(`please provide a story`) }
39
 
40
+ // console.log("generateEntityPrompts(): latentStory:", latentStory)
41
 
42
  const userPrompt = `The input story is about: ${prompt}.
43
 
src/app/api/v1/edit/music/cluster.ts ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { sleep } from "@/lib/utils/sleep"
2
+ import { ClusterMachine } from "../../types"
3
+
4
+ export const nbClusterMachines = 3
5
+ // make sure the machines are running!!
6
+
7
+ // https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-1/settings
8
+ // https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-2/settings
9
+ // https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-3/settings
10
+
11
+ // we maintain a global cluster state
12
+
13
+ export const clusterMachines: ClusterMachine[] = []
14
+ for (let i = 0; i < nbClusterMachines; i++) {
15
+ clusterMachines.push({
16
+ id: i,
17
+ url: `https://jbilcke-hf-ai-tube-model-musicgen-${i + 1}.hf.space`,
18
+ busy: false
19
+ })
20
+ }
21
+
22
+ export async function getClusterMachine(maxWaitTimeInMs: number = 10000): Promise<ClusterMachine> {
23
+ let clusterMachine: ClusterMachine | undefined = undefined
24
+ let timeSpentWaitingInMs = 0
25
+ const intervalInMs = 500
26
+
27
+ while (true) {
28
+ clusterMachine = clusterMachines.find(m => !m.busy)
29
+ if (clusterMachine) { break }
30
+ if (timeSpentWaitingInMs > maxWaitTimeInMs) { break }
31
+ await sleep(intervalInMs)
32
+ }
33
+
34
+ if (!clusterMachine) {
35
+ throw new Error(`failed to find a cluster machine within ${maxWaitTimeInMs/10} seconds`)
36
+ }
37
+
38
+ // change the global state
39
+ clusterMachine.busy = true
40
+
41
+ return clusterMachine
42
+ }
43
+
44
+ export const token = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
src/app/api/v1/edit/music/generateMusic.ts CHANGED
@@ -13,7 +13,7 @@ import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
13
 
14
  import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
15
  import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
16
- import { generateMusicWithMusicgen } from "@/app/api/generators/music/generateMusicWithMusicgen"
17
 
18
  export async function generateMusic({
19
  musicSegment,
@@ -32,6 +32,14 @@ export async function generateMusic({
32
  console.log(`generateMusic(): music segment is empty, so skipping music generation.`)
33
  return
34
  }
 
 
 
 
 
 
 
 
35
  // for now we do something very basic
36
  const prompt = musicSegment.prompt
37
  if (!prompt) {
@@ -39,28 +47,47 @@ export async function generateMusic({
39
  return
40
  }
41
 
 
 
 
 
42
  const assetUrl = await generateMusicWithMusicgen({
43
  prompt,
44
- durationInSec: 10,
45
  hd: false,
46
  debug: true,
47
  neverThrow: true,
48
  })
49
 
 
50
  if (!assetUrl || assetUrl?.length < 30) {
51
- console.log(`generateMusic(): generated assetUrl is empty, so music generation failed.`)
 
 
 
 
 
 
 
52
  return
53
  }
54
 
 
 
 
 
 
 
 
55
  if (mode !== ClapCompletionMode.FULL) {
56
  console.log(`generateMusic(): adding music to a new clap file`)
57
  newerClap.segments.push(newSegment({
58
  ...musicSegment,
59
- assetUrl,
60
  }))
61
  } else {
62
  console.log(`generateMusic(): overwriting the music inside the existing clap file`)
63
- // this will replace the existing clap (normally)
64
- musicSegment.assetUrl = assetUrl
65
  }
66
  }
 
13
 
14
  import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
15
  import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
16
+ import { generateMusicWithMusicgen } from "@/app/api/v1/edit/music/generateMusicWithMusicgen"
17
 
18
  export async function generateMusic({
19
  musicSegment,
 
32
  console.log(`generateMusic(): music segment is empty, so skipping music generation.`)
33
  return
34
  }
35
+
36
+ // for now we do something very basic
37
+
38
+ if (musicSegment.status === "completed") {
39
+ console.log(`generateMusic(): music segment is already generated, skipping doing it twice.`)
40
+ return
41
+ }
42
+
43
  // for now we do something very basic
44
  const prompt = musicSegment.prompt
45
  if (!prompt) {
 
47
  return
48
  }
49
 
50
+ const durationInSec = 10 // musicSegment.assetDurationInMs / 1000
51
+
52
+ console.log(`generateMusic(): generating a music with:\n duration: ${durationInSec} sec\n prompt: ${prompt}`)
53
+
54
  const assetUrl = await generateMusicWithMusicgen({
55
  prompt,
56
+ durationInSec,
57
  hd: false,
58
  debug: true,
59
  neverThrow: true,
60
  })
61
 
62
+
63
  if (!assetUrl || assetUrl?.length < 30) {
64
+ console.log(`generateMusic(): the generated assetUrl is empty, so music generation failed.`)
65
+ return
66
+ }
67
+
68
+ const { durationInMs, hasAudio } = await getMediaInfo(assetUrl)
69
+
70
+ if (!hasAudio) {
71
+ console.log(`generateMusic(): the generated music waveform is silent, so music generation failed.`)
72
  return
73
  }
74
 
75
+ const newProperties: Partial<ClapSegment> = {
76
+ assetUrl,
77
+ assetDurationInMs: durationInMs,
78
+ outputGain: 1.0,
79
+ status: "completed"
80
+ }
81
+
82
  if (mode !== ClapCompletionMode.FULL) {
83
  console.log(`generateMusic(): adding music to a new clap file`)
84
  newerClap.segments.push(newSegment({
85
  ...musicSegment,
86
+ ...newProperties,
87
  }))
88
  } else {
89
  console.log(`generateMusic(): overwriting the music inside the existing clap file`)
90
+ // this will update the existing clap (normally)
91
+ Object.assign(musicSegment, newProperties)
92
  }
93
  }
src/app/api/v1/edit/music/generateMusicPrompt.ts CHANGED
@@ -26,7 +26,7 @@ export async function generateMusicPrompts({
26
 
27
  if (!latentStory.length) { throw new Error(`please provide a story`) }
28
 
29
- console.log("generateMusicPrompts(): latentStory:", latentStory)
30
 
31
  const userPrompt = `The input story is about: ${prompt}.
32
 
 
26
 
27
  if (!latentStory.length) { throw new Error(`please provide a story`) }
28
 
29
+ // console.log("generateMusicPrompts(): latentStory:", latentStory)
30
 
31
  const userPrompt = `The input story is about: ${prompt}.
32
 
src/app/api/{generators → v1/edit}/music/generateMusicWithMusicgen.ts RENAMED
@@ -1,7 +1,6 @@
1
  import { addBase64Header } from "@/lib/data/addBase64Header"
2
-
3
- import { tryApiCalls } from "../../utils/tryApiCall"
4
  import { MusicGenerationParams } from "./types"
 
5
 
6
  const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-musicgen.hf.space`
7
  const huggingFaceSpace = "jbilcke-hf/ai-tube-model-musicgen"
@@ -18,16 +17,21 @@ export async function generateMusicWithMusicgen({
18
  neverThrow = false,
19
  }: MusicGenerationParams): Promise<string> {
20
 
21
- const actualFunction = async () => {
 
 
 
 
22
 
23
- const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
 
24
  method: "POST",
25
  headers: {
26
  "Content-Type": "application/json",
27
  // Authorization: `Bearer ${token}`,
28
  },
29
  body: JSON.stringify({
30
- fn_index: 0, // <- important!
31
  data: [
32
  microserviceApiKey, // string in 'Secret Token' Textbox component
33
  "facebook/musicgen-stereo-large", // string in 'Model' Radio component
@@ -55,8 +59,7 @@ export async function generateMusicWithMusicgen({
55
  if (res.status !== 200) {
56
  throw new Error('Failed to fetch data')
57
  }
58
-
59
-
60
  const { data } = await res.json()
61
 
62
  // console.log("data:", data)
@@ -66,34 +69,17 @@ export async function generateMusicWithMusicgen({
66
  throw new Error(`Failed to fetch data (status: ${res.status})`)
67
  }
68
  // console.log("data:", data.slice(0, 50))
69
-
70
  if (!data[0]) {
71
  throw new Error(`the returned music was empty`)
72
  }
73
-
74
- console.log("data:", data[0].slice(0, 60))
75
- return addBase64Header(data[0] as string, "mp3")
76
- }
77
-
78
- try {
79
- if (!prompt?.length) {
80
- throw new Error(`prompt is too short!`)
81
- }
82
 
83
- const result = await tryApiCalls({
84
- func: actualFunction,
85
- huggingFaceSpace,
86
- debug,
87
- failureMessage: "failed to generate the music"
88
- })
89
-
90
- return result
91
  } catch (err) {
92
- if (neverThrow) {
93
- console.error(`generateVoiceWithMusicgen():`, err)
94
- return ""
95
- } else {
96
- throw err
97
- }
98
  }
99
  }
 
1
  import { addBase64Header } from "@/lib/data/addBase64Header"
 
 
2
  import { MusicGenerationParams } from "./types"
3
+ import { getClusterMachine } from "./cluster"
4
 
5
  const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-musicgen.hf.space`
6
  const huggingFaceSpace = "jbilcke-hf/ai-tube-model-musicgen"
 
17
  neverThrow = false,
18
  }: MusicGenerationParams): Promise<string> {
19
 
20
+ if (!prompt?.length) {
21
+ throw new Error(`prompt is too short!`)
22
+ }
23
+
24
+ const machine = await getClusterMachine()
25
 
26
+ try {
27
+ const res = await fetch(machine.url + (machine.url.endsWith("/") ? "" : "/") + "api/predict", {
28
  method: "POST",
29
  headers: {
30
  "Content-Type": "application/json",
31
  // Authorization: `Bearer ${token}`,
32
  },
33
  body: JSON.stringify({
34
+ fn_index: 1, // <- important!
35
  data: [
36
  microserviceApiKey, // string in 'Secret Token' Textbox component
37
  "facebook/musicgen-stereo-large", // string in 'Model' Radio component
 
59
  if (res.status !== 200) {
60
  throw new Error('Failed to fetch data')
61
  }
62
+
 
63
  const { data } = await res.json()
64
 
65
  // console.log("data:", data)
 
69
  throw new Error(`Failed to fetch data (status: ${res.status})`)
70
  }
71
  // console.log("data:", data.slice(0, 50))
72
+
73
  if (!data[0]) {
74
  throw new Error(`the returned music was empty`)
75
  }
 
 
 
 
 
 
 
 
 
76
 
77
+ // console.log("data:", data[0].slice(0, 60))
78
+ return addBase64Header(data[0] as string, "mp3")
 
 
 
 
 
 
79
  } catch (err) {
80
+ throw err
81
+ } finally {
82
+ // important: we need to free up the machine!
83
+ machine.busy = false
 
 
84
  }
85
  }
src/app/api/v1/edit/music/systemPrompt.ts CHANGED
@@ -3,11 +3,16 @@ You are a backend API engine, designed to generate music prompt output from a st
3
 
4
  ## Prompting guidelines
5
 
 
 
 
6
  To create a music prompt, you need to combine styles with moods, plus a few other things.
 
7
  1. Please choose a base style among those categories: "Hip Hop and Rap track", "Classic track", "Jazz track", "Electronic and dance track", "Rock'n'Roll track", "Funk track", "Dubstep track", "Afrobeats", "Orchestral track", "Pop track", "Reggae track", "Metal track", "Country track", "Blues track", "Soul track", "R'n'B track", "Disco track", "Trap track", "Ambient track", "Lofi track", "Chill track", etc.
8
  2. Then choose a vibe: "with an happy vibe", "with a sad vibe", "with an angry vibe", "with a chill vibe", "with a romantic vibe", "with an epic vibe", "with an energetic vibe", "with a dreamy vibe", "with a mysterious vibe", "with a relaxing vibe", "with a dark vibe", "with an upbeat vibe", "with a motivational vibe", "with an inspiring vibe", "with a nostalgic vibe", "with a groovy vibe", "with a cheerful vibe", "with a melancholic vibe", "with a hopeful vibe", etc.
9
  3. build up a coherent description eg.: "80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums", "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130", "A cheerful country song with acoustic guitars", "lofi slow bpm electro chill with organic samples" etc.
10
 
 
11
  ## Example of input/output
12
 
13
  Given the following input story, provided as YAML:
 
3
 
4
  ## Prompting guidelines
5
 
6
+ Be concise! don't say things like "The track should have an cheerful vibe.." instead just add "cheerful vibe".
7
+ Avoid concepts that don't translate well to music (eg use "mysterious" instead of "investigative")
8
+
9
  To create a music prompt, you need to combine styles with moods, plus a few other things.
10
+
11
  1. Please choose a base style among those categories: "Hip Hop and Rap track", "Classic track", "Jazz track", "Electronic and dance track", "Rock'n'Roll track", "Funk track", "Dubstep track", "Afrobeats", "Orchestral track", "Pop track", "Reggae track", "Metal track", "Country track", "Blues track", "Soul track", "R'n'B track", "Disco track", "Trap track", "Ambient track", "Lofi track", "Chill track", etc.
12
  2. Then choose a vibe: "with an happy vibe", "with a sad vibe", "with an angry vibe", "with a chill vibe", "with a romantic vibe", "with an epic vibe", "with an energetic vibe", "with a dreamy vibe", "with a mysterious vibe", "with a relaxing vibe", "with a dark vibe", "with an upbeat vibe", "with a motivational vibe", "with an inspiring vibe", "with a nostalgic vibe", "with a groovy vibe", "with a cheerful vibe", "with a melancholic vibe", "with a hopeful vibe", etc.
13
  3. build up a coherent description eg.: "80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums", "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130", "A cheerful country song with acoustic guitars", "lofi slow bpm electro chill with organic samples" etc.
14
 
15
+
16
  ## Example of input/output
17
 
18
  Given the following input story, provided as YAML:
src/app/api/{generators → v1/edit}/music/types.ts RENAMED
File without changes
src/app/api/v1/export/route.ts CHANGED
@@ -23,6 +23,7 @@ export async function POST(req: NextRequest, res: NextResponse) {
23
  // console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
24
 
25
  const result = await fetch(
 
26
  `https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,
27
  { method: "POST", body: await req.blob() }
28
  )
 
23
  // console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
24
 
25
  const result = await fetch(
26
+ // `http://localhost:7860?f=${format}`,
27
  `https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,
28
  { method: "POST", body: await req.blob() }
29
  )
src/app/api/v1/render/cluster.ts CHANGED
@@ -1,10 +1,6 @@
1
  import { sleep } from "@/lib/utils/sleep"
 
2
 
3
- export type ClusterMachine = {
4
- id: number
5
- url: string
6
- busy: boolean
7
- }
8
 
9
  export const nbClusterMachines = 3
10
  // make sure the machines are running!!
 
1
  import { sleep } from "@/lib/utils/sleep"
2
+ import { ClusterMachine } from "../types"
3
 
 
 
 
 
 
4
 
5
  export const nbClusterMachines = 3
6
  // make sure the machines are running!!
src/app/api/v1/types.ts CHANGED
@@ -1,5 +1,11 @@
1
  import { ClapSegmentCategory } from "@aitube/clap"
2
 
 
 
 
 
 
 
3
  export type LatentEntity = {
4
  name: string
5
  category: ClapSegmentCategory
 
1
  import { ClapSegmentCategory } from "@aitube/clap"
2
 
3
+ export type ClusterMachine = {
4
+ id: number
5
+ url: string
6
+ busy: boolean
7
+ }
8
+
9
  export type LatentEntity = {
10
  name: string
11
  category: ClapSegmentCategory