Spaces:
Sleeping
Sleeping
Commit
·
2eea766
1
Parent(s):
b58c38b
use @aitube/engine
Browse files- package-lock.json +14 -0
- package.json +1 -0
- src/app/api/v1/README.md +1 -1
- src/app/api/v1/edit/dialogues/route.ts +2 -4
- src/app/api/v1/edit/storyboards/generateStoryboard.ts +1 -1
- src/app/api/v1/edit/storyboards/route.ts +3 -2
- src/app/api/v1/edit/videos/generateVideo.ts +61 -0
- src/app/api/v1/edit/videos/route.ts +103 -0
- src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts +0 -26
- src/components/interface/latent-engine/core/prompts/getCharacterReferencePrompt.ts +0 -29
- src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts +0 -52
- src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts +0 -20
- src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts +0 -90
- src/components/interface/latent-engine/core/useLatentEngine.ts +2 -4
package-lock.json
CHANGED
@@ -10,6 +10,7 @@
|
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.7",
|
12 |
"@aitube/client": "0.0.7",
|
|
|
13 |
"@huggingface/hub": "0.12.3-oauth",
|
14 |
"@huggingface/inference": "^2.6.7",
|
15 |
"@jcoreio/async-throttle": "^1.6.0",
|
@@ -139,6 +140,19 @@
|
|
139 |
"typescript": "^5.4.5"
|
140 |
}
|
141 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
"node_modules/@alloc/quick-lru": {
|
143 |
"version": "5.2.0",
|
144 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
|
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.7",
|
12 |
"@aitube/client": "0.0.7",
|
13 |
+
"@aitube/engine": "^0.0.0",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
16 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
140 |
"typescript": "^5.4.5"
|
141 |
}
|
142 |
},
|
143 |
+
"node_modules/@aitube/engine": {
|
144 |
+
"version": "0.0.0",
|
145 |
+
"resolved": "https://registry.npmjs.org/@aitube/engine/-/engine-0.0.0.tgz",
|
146 |
+
"integrity": "sha512-3yOAXXCUf6pehdB2t1Nt/F4CC2biC9LA3LZK1d7PwmEA4cahP5q5sb2P1633mSrX7ElJEFkXIsKdEz6hltIP2Q==",
|
147 |
+
"dependencies": {
|
148 |
+
"uuid": "^9.0.1",
|
149 |
+
"yaml": "^2.4.1"
|
150 |
+
},
|
151 |
+
"peerDependencies": {
|
152 |
+
"@aitube/clap": "0.0.7",
|
153 |
+
"typescript": "^5.4.5"
|
154 |
+
}
|
155 |
+
},
|
156 |
"node_modules/@alloc/quick-lru": {
|
157 |
"version": "5.2.0",
|
158 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
package.json
CHANGED
@@ -11,6 +11,7 @@
|
|
11 |
"dependencies": {
|
12 |
"@aitube/clap": "0.0.7",
|
13 |
"@aitube/client": "0.0.7",
|
|
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
16 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
11 |
"dependencies": {
|
12 |
"@aitube/clap": "0.0.7",
|
13 |
"@aitube/client": "0.0.7",
|
14 |
+
"@aitube/engine": "^0.0.0",
|
15 |
"@huggingface/hub": "0.12.3-oauth",
|
16 |
"@huggingface/inference": "^2.6.7",
|
17 |
"@jcoreio/async-throttle": "^1.6.0",
|
src/app/api/v1/README.md
CHANGED
@@ -12,7 +12,7 @@ This endpoint will generate a .clap (story only) from a prompt
|
|
12 |
|
13 |
## /edit
|
14 |
|
15 |
-
This endpoint will edit a .clap to change
|
16 |
|
17 |
## /export
|
18 |
|
|
|
12 |
|
13 |
## /edit
|
14 |
|
15 |
+
This endpoint will edit a .clap to change entities, storyboards, videos etc
|
16 |
|
17 |
## /export
|
18 |
|
src/app/api/v1/edit/dialogues/route.ts
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
-
import {
|
4 |
|
5 |
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
6 |
import { getToken } from "@/app/api/auth/getToken"
|
7 |
-
|
8 |
-
import { getSpeechBackgroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt"
|
9 |
-
import { getSpeechForegroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt"
|
10 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
|
|
11 |
|
12 |
// a helper to generate speech for a Clap
|
13 |
export async function POST(req: NextRequest) {
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
+
import { ClapProject, ClapSegment, getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
6 |
import { getToken } from "@/app/api/auth/getToken"
|
|
|
|
|
|
|
7 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
8 |
+
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
|
9 |
|
10 |
// a helper to generate speech for a Clap
|
11 |
export async function POST(req: NextRequest) {
|
src/app/api/v1/edit/storyboards/generateStoryboard.ts
CHANGED
@@ -16,7 +16,7 @@ export async function generateStoryboard({
|
|
16 |
width?: number
|
17 |
height?: number
|
18 |
seed?: number
|
19 |
-
}) {
|
20 |
|
21 |
width = getValidNumber(width, 256, 8192, 512)
|
22 |
height = getValidNumber(height, 256, 8192, 288)
|
|
|
16 |
width?: number
|
17 |
height?: number
|
18 |
seed?: number
|
19 |
+
}): Promise<string> {
|
20 |
|
21 |
width = getValidNumber(width, 256, 8192, 512)
|
22 |
height = getValidNumber(height, 256, 8192, 288)
|
src/app/api/v1/edit/storyboards/route.ts
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
-
import { ClapProject, ClapSegment, newSegment, parseClap, serializeClap } from "@aitube/clap"
|
|
|
4 |
|
5 |
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
6 |
-
import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
|
7 |
import { getToken } from "@/app/api/auth/getToken"
|
8 |
|
9 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
@@ -81,6 +81,7 @@ export async function POST(req: NextRequest) {
|
|
81 |
width: clap.meta.width,
|
82 |
height: clap.meta.height,
|
83 |
})
|
|
|
84 |
} catch (err) {
|
85 |
console.log(`[api/generate/storyboards] failed to generate an image: ${err}`)
|
86 |
throw err
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
+
import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
+
import { getVideoPrompt } from "@aitube/engine"
|
5 |
|
6 |
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
|
|
7 |
import { getToken } from "@/app/api/auth/getToken"
|
8 |
|
9 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
|
|
81 |
width: clap.meta.width,
|
82 |
height: clap.meta.height,
|
83 |
})
|
84 |
+
shotStoryboardSegment.assetSourceType = getClapAssetSourceType(shotStoryboardSegment.assetUrl)
|
85 |
} catch (err) {
|
86 |
console.log(`[api/generate/storyboards] failed to generate an image: ${err}`)
|
87 |
throw err
|
src/app/api/v1/edit/videos/generateVideo.ts
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { generateSeed, getValidNumber } from "@aitube/clap"
|
2 |
+
|
3 |
+
import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
|
4 |
+
import { sleep } from "@/lib/utils/sleep"
|
5 |
+
import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
6 |
+
|
7 |
+
export async function generateVideo({
|
8 |
+
prompt,
|
9 |
+
// negativePrompt,
|
10 |
+
width,
|
11 |
+
height,
|
12 |
+
seed,
|
13 |
+
}: {
|
14 |
+
prompt: string
|
15 |
+
// negativePrompt?: string
|
16 |
+
width?: number
|
17 |
+
height?: number
|
18 |
+
seed?: number
|
19 |
+
}): Promise<string> {
|
20 |
+
|
21 |
+
// we want to keep it vertical
|
22 |
+
width = getValidNumber(width, 256, 8192, 288)
|
23 |
+
height = getValidNumber(height, 256, 8192, 512)
|
24 |
+
|
25 |
+
// console.log("calling await newRender")
|
26 |
+
prompt = getPositivePrompt(prompt)
|
27 |
+
const negativePrompt = getNegativePrompt()
|
28 |
+
|
29 |
+
let render = await newRender({
|
30 |
+
prompt,
|
31 |
+
negativePrompt,
|
32 |
+
nbFrames: 80,
|
33 |
+
nbFPS: 24,
|
34 |
+
nbSteps: 4,
|
35 |
+
width,
|
36 |
+
height,
|
37 |
+
turbo: true,
|
38 |
+
shouldRenewCache: true,
|
39 |
+
seed: seed || generateSeed()
|
40 |
+
})
|
41 |
+
|
42 |
+
let attempts = 10
|
43 |
+
|
44 |
+
while (attempts-- > 0) {
|
45 |
+
if (render.status === "completed") {
|
46 |
+
return render.assetUrl
|
47 |
+
}
|
48 |
+
|
49 |
+
if (render.status === "error") {
|
50 |
+
console.error(render.error)
|
51 |
+
throw new Error(`failed to generate the video file ${render.error}`)
|
52 |
+
}
|
53 |
+
|
54 |
+
await sleep(2000) // minimum wait time
|
55 |
+
|
56 |
+
// console.log("asking getRender")
|
57 |
+
render = await getRender(render.renderId)
|
58 |
+
}
|
59 |
+
|
60 |
+
throw new Error(`failed to generate the video file`)
|
61 |
+
}
|
src/app/api/v1/edit/videos/route.ts
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { NextResponse, NextRequest } from "next/server"
|
2 |
+
|
3 |
+
import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
+
import { getVideoPrompt } from "@aitube/engine"
|
5 |
+
|
6 |
+
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
7 |
+
import { getToken } from "@/app/api/auth/getToken"
|
8 |
+
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
9 |
+
|
10 |
+
import { generateVideo } from "./generateVideo"
|
11 |
+
|
12 |
+
|
13 |
+
// a helper to generate videos for a Clap
|
14 |
+
// this is mostly used by external apps such as the Stories Factory
|
15 |
+
// this function will:
|
16 |
+
//
|
17 |
+
// - add missing videos to the shots
|
18 |
+
// - add missing video prompts
|
19 |
+
// - add missing video files
|
20 |
+
export async function POST(req: NextRequest) {
|
21 |
+
|
22 |
+
const jwtToken = await getToken({ user: "anonymous" })
|
23 |
+
|
24 |
+
const blob = await req.blob()
|
25 |
+
|
26 |
+
const clap: ClapProject = await parseClap(blob)
|
27 |
+
|
28 |
+
if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
29 |
+
|
30 |
+
console.log(`[api/generate/videos] detected ${clap.segments.length} segments`)
|
31 |
+
|
32 |
+
const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
|
33 |
+
console.log(`[api/generate/videos] detected ${shotsSegments.length} shots`)
|
34 |
+
|
35 |
+
if (shotsSegments.length > 32) {
|
36 |
+
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
37 |
+
}
|
38 |
+
|
39 |
+
for (const shotSegment of shotsSegments) {
|
40 |
+
|
41 |
+
const shotSegments: ClapSegment[] = clap.segments.filter(s =>
|
42 |
+
startOfSegment1IsWithinSegment2(s, shotSegment)
|
43 |
+
)
|
44 |
+
|
45 |
+
const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
|
46 |
+
s.category === "video"
|
47 |
+
)
|
48 |
+
|
49 |
+
let shotVideoSegment: ClapSegment | undefined = shotVideoSegments.at(0)
|
50 |
+
|
51 |
+
console.log(`[api/generate/videos] shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotVideoSegments.length} videos)`)
|
52 |
+
|
53 |
+
// TASK 1: GENERATE MISSING VIDEO SEGMENT
|
54 |
+
if (!shotVideoSegment) {
|
55 |
+
shotVideoSegment = newSegment({
|
56 |
+
track: 1,
|
57 |
+
startTimeInMs: shotSegment.startTimeInMs,
|
58 |
+
endTimeInMs: shotSegment.endTimeInMs,
|
59 |
+
assetDurationInMs: shotSegment.assetDurationInMs,
|
60 |
+
category: "video",
|
61 |
+
prompt: "",
|
62 |
+
assetUrl: "",
|
63 |
+
outputType: "video"
|
64 |
+
})
|
65 |
+
console.log(`[api/generate/videos] generated video segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
|
66 |
+
}
|
67 |
+
|
68 |
+
// TASK 2: GENERATE MISSING VIDEO PROMPT
|
69 |
+
if (shotVideoSegment && !shotVideoSegment?.prompt) {
|
70 |
+
// video is missing, let's generate it
|
71 |
+
shotVideoSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
|
72 |
+
console.log(`[api/generate/videos] generating video prompt: ${shotVideoSegment.prompt}`)
|
73 |
+
}
|
74 |
+
|
75 |
+
// TASK 3: GENERATE MISSING VIDEO FILE
|
76 |
+
if (shotVideoSegment && !shotVideoSegment.assetUrl) {
|
77 |
+
console.log(`[api/generate/videos] generating video file..`)
|
78 |
+
|
79 |
+
try {
|
80 |
+
shotVideoSegment.assetUrl = await generateVideo({
|
81 |
+
prompt: getPositivePrompt(shotVideoSegment.prompt),
|
82 |
+
width: clap.meta.width,
|
83 |
+
height: clap.meta.height,
|
84 |
+
})
|
85 |
+
shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
|
86 |
+
} catch (err) {
|
87 |
+
console.log(`[api/generate/videos] failed to generate a video file: ${err}`)
|
88 |
+
throw err
|
89 |
+
}
|
90 |
+
|
91 |
+
console.log(`[api/generate/videos] generated video files: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
92 |
+
} else {
|
93 |
+
console.log(`[api/generate/videos] there is already a video file: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
+
console.log(`[api/generate/videos] returning the clap augmented with videos`)
|
98 |
+
|
99 |
+
return new NextResponse(await serializeClap(clap), {
|
100 |
+
status: 200,
|
101 |
+
headers: new Headers({ "content-type": "application/x-gzip" }),
|
102 |
+
})
|
103 |
+
}
|
src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
import { ClapEntity } from "@aitube/clap"
|
2 |
-
|
3 |
-
export function getCharacterPrompt(entity: ClapEntity): string {
|
4 |
-
|
5 |
-
let characterPrompt = ""
|
6 |
-
if (entity.description) {
|
7 |
-
characterPrompt = [
|
8 |
-
// the label (character name) can help making the prompt more unique
|
9 |
-
// this might backfires however, if the name is
|
10 |
-
// something like "SUN", "SILVER" etc
|
11 |
-
// I'm not sure stable diffusion really needs this,
|
12 |
-
// so let's skip it for now (might still be useful for locations, though)
|
13 |
-
// we also want to avoid triggering "famous people" (BARBOSSA etc)
|
14 |
-
// entity.label,
|
15 |
-
|
16 |
-
entity.description
|
17 |
-
].join(", ")
|
18 |
-
} else {
|
19 |
-
characterPrompt = [
|
20 |
-
entity.gender !== "object" ? entity.gender : "",
|
21 |
-
entity.age ? `aged ${entity.age}yo` : '',
|
22 |
-
entity.label ? `named ${entity.label}` : '',
|
23 |
-
].map(i => i.trim()).filter(i => i).join(", ")
|
24 |
-
}
|
25 |
-
return characterPrompt
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/interface/latent-engine/core/prompts/getCharacterReferencePrompt.ts
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
import { ClapEntity } from "@aitube/clap";
|
2 |
-
|
3 |
-
import { getCharacterPrompt } from "./getCharacterPrompt";
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Return a prompt for a "formal" picture, centered, neutral etc
|
7 |
-
* @param entity
|
8 |
-
* @returns
|
9 |
-
*/
|
10 |
-
export function getCharacterReferencePrompt(entity: ClapEntity) {
|
11 |
-
const characterPrompt = [
|
12 |
-
`beautiful`,
|
13 |
-
`close-up`,
|
14 |
-
`photo portrait`,
|
15 |
-
`id photo`,
|
16 |
-
getCharacterPrompt(entity),
|
17 |
-
`neutral expression`,
|
18 |
-
`neutral background`,
|
19 |
-
`frontal`,
|
20 |
-
`photo studio`,
|
21 |
-
`crisp`,
|
22 |
-
`sharp`,
|
23 |
-
`intricate details`,
|
24 |
-
`centered`,
|
25 |
-
// `aligned`
|
26 |
-
].map(i => i.trim()).filter(i => i).join(", ")
|
27 |
-
|
28 |
-
return characterPrompt
|
29 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
import { ClapEntity, ClapSegment } from "@aitube/clap"
|
2 |
-
|
3 |
-
import { getCharacterPrompt } from "./getCharacterPrompt"
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Construct an audio background for a voice from a list of active segments
|
7 |
-
*
|
8 |
-
* @param segments
|
9 |
-
* @returns
|
10 |
-
*/
|
11 |
-
export function getSpeechBackgroundAudioPrompt(
|
12 |
-
segments: ClapSegment[] = [],
|
13 |
-
entitiesById: Record<string, ClapEntity> = {},
|
14 |
-
extraPositivePrompt: string[] = [] // "clear sound, high quality" etc
|
15 |
-
): string {
|
16 |
-
return segments
|
17 |
-
.filter(({ category, outputType }) => (
|
18 |
-
category === "dialogue" ||
|
19 |
-
category === "weather" ||
|
20 |
-
category === "location"
|
21 |
-
))
|
22 |
-
.sort((a, b) => b.label.localeCompare(a.label))
|
23 |
-
.map(segment => {
|
24 |
-
const entity: ClapEntity | undefined = entitiesById[segment?.entityId || ""] || undefined
|
25 |
-
|
26 |
-
if (segment.category === "dialogue") {
|
27 |
-
// if we can't find the entity then we are unable
|
28 |
-
// to make any assumption about the gender, age and voice timbre
|
29 |
-
if (!entity) {
|
30 |
-
return `person, speaking normally`
|
31 |
-
}
|
32 |
-
|
33 |
-
const characterPrompt = getCharacterPrompt(entity)
|
34 |
-
|
35 |
-
return `${characterPrompt}, speaking normally`
|
36 |
-
|
37 |
-
} else if (segment.category === "location") {
|
38 |
-
// the location is part of the background noise
|
39 |
-
// but this might produce unexpected results - we'll see!
|
40 |
-
return segment.prompt
|
41 |
-
} else if (segment.category === "weather") {
|
42 |
-
// the weather is part of the background noise
|
43 |
-
// here too this might produce weird and unexpected results 🍿
|
44 |
-
return segment.prompt
|
45 |
-
}
|
46 |
-
// ignore the rest
|
47 |
-
return ""
|
48 |
-
})
|
49 |
-
.filter(x => x)
|
50 |
-
.concat([ ...extraPositivePrompt ])
|
51 |
-
.join(". ")
|
52 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
import { ClapSegment } from "@aitube/clap"
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Construct an audio foreground for a voice from a list of active segments
|
5 |
-
*
|
6 |
-
* This is the "dialogue" prompt, ie. the actual spoken words,
|
7 |
-
* so we don't need to do anything fancy here, we only use the raw text
|
8 |
-
*
|
9 |
-
* @param segments
|
10 |
-
* @returns
|
11 |
-
*/
|
12 |
-
export function getSpeechForegroundAudioPrompt(
|
13 |
-
segments: ClapSegment[] = []
|
14 |
-
): string {
|
15 |
-
return segments
|
16 |
-
.filter(({ category }) => category === "dialogue")
|
17 |
-
.sort((a, b) => b.label.localeCompare(a.label))
|
18 |
-
.map(({ prompt }) => prompt).filter(x => x)
|
19 |
-
.join(". ")
|
20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts
DELETED
@@ -1,90 +0,0 @@
|
|
1 |
-
import { ClapEntity, ClapSegment } from "@aitube/clap"
|
2 |
-
|
3 |
-
import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
|
4 |
-
|
5 |
-
import { getCharacterPrompt } from "./getCharacterPrompt"
|
6 |
-
|
7 |
-
/**
|
8 |
-
* Construct a video prompt from a list of active segments
|
9 |
-
*
|
10 |
-
* @param segments
|
11 |
-
* @returns
|
12 |
-
*/
|
13 |
-
export function getVideoPrompt(
|
14 |
-
segments: ClapSegment[] = [],
|
15 |
-
entitiesIndex: Record<string, ClapEntity> = {},
|
16 |
-
extraPositivePrompt: string[] = []
|
17 |
-
): string {
|
18 |
-
|
19 |
-
// console.log("entitiesIndex:", entitiesIndex)
|
20 |
-
|
21 |
-
// to construct the video we need to collect all the segments describing it
|
22 |
-
// we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
|
23 |
-
const tmp = segments
|
24 |
-
.filter(({ category, outputType }) => {
|
25 |
-
if (outputType === "audio") {
|
26 |
-
return false
|
27 |
-
}
|
28 |
-
|
29 |
-
if (
|
30 |
-
category === "character" ||
|
31 |
-
category === "location" ||
|
32 |
-
category === "time" ||
|
33 |
-
category === "era" ||
|
34 |
-
category === "lighting" ||
|
35 |
-
category === "weather" ||
|
36 |
-
category === "action" ||
|
37 |
-
category === "style" ||
|
38 |
-
category === "camera" ||
|
39 |
-
category === "generic"
|
40 |
-
) {
|
41 |
-
return true
|
42 |
-
}
|
43 |
-
return false
|
44 |
-
})
|
45 |
-
|
46 |
-
tmp.sort((a, b) => b.label.localeCompare(a.label))
|
47 |
-
|
48 |
-
let videoPrompt = tmp.map(segment => {
|
49 |
-
const entity: ClapEntity | undefined = entitiesIndex[segment?.entityId || ""] || undefined
|
50 |
-
|
51 |
-
if (segment.category === "dialogue") {
|
52 |
-
|
53 |
-
// if we can't find the entity, then we are unable
|
54 |
-
// to make any assumption about the gender, age or appearance
|
55 |
-
if (!entity) {
|
56 |
-
console.log("ERROR: this is a dialogue, but couldn't find the entity!")
|
57 |
-
return `portrait of a person speaking, blurry background, bokeh`
|
58 |
-
}
|
59 |
-
|
60 |
-
const characterTrigger = entity?.triggerName || ""
|
61 |
-
const characterLabel = entity?.label || ""
|
62 |
-
const characterDescription = entity?.description || ""
|
63 |
-
const dialogueLine = segment?.prompt || ""
|
64 |
-
|
65 |
-
const characterPrompt = getCharacterPrompt(entity)
|
66 |
-
|
67 |
-
// in the context of a video, we some something additional:
|
68 |
-
// we create a "bokeh" style
|
69 |
-
return `portrait of a person speaking, blurry background, bokeh, ${characterPrompt}`
|
70 |
-
|
71 |
-
} else if (segment.category === "location") {
|
72 |
-
|
73 |
-
// if we can't find the location's entity, we default to returning the prompt
|
74 |
-
if (!entity) {
|
75 |
-
console.log("ERROR: this is a location, but couldn't find the entity!")
|
76 |
-
return segment.prompt
|
77 |
-
}
|
78 |
-
|
79 |
-
return entity.description
|
80 |
-
} else {
|
81 |
-
return segment.prompt
|
82 |
-
}
|
83 |
-
}).filter(x => x)
|
84 |
-
|
85 |
-
videoPrompt = videoPrompt.concat([
|
86 |
-
...extraPositivePrompt
|
87 |
-
])
|
88 |
-
|
89 |
-
return deduplicatePrompt(videoPrompt.join(", "))
|
90 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/components/interface/latent-engine/core/useLatentEngine.ts
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
|
2 |
import { create } from "zustand"
|
3 |
|
4 |
-
import {
|
|
|
5 |
|
6 |
import { LatentEngineStore } from "./types"
|
7 |
import { resolveSegments } from "../resolvers/resolveSegments"
|
@@ -10,13 +11,10 @@ import { fetchLatentClap } from "./generators/fetchLatentClap"
|
|
10 |
import { InteractiveSegmenterResult, MPMask } from "@mediapipe/tasks-vision"
|
11 |
import { segmentFrame } from "@/lib/on-device-ai/segmentFrameOnClick"
|
12 |
import { drawSegmentation } from "../utils/canvas/drawSegmentation"
|
13 |
-
import { filterImage } from "@/lib/on-device-ai/filterImage"
|
14 |
import { getZIndexDepth } from "../utils/data/getZIndexDepth"
|
15 |
import { getSegmentStartAt } from "../utils/data/getSegmentStartAt"
|
16 |
-
import { getSegmentId } from "../utils/data/getSegmentId"
|
17 |
import { getElementsSortedByStartAt } from "../utils/data/getElementsSortedByStartAt"
|
18 |
import { getSegmentEndAt } from "../utils/data/getSegmentEndAt"
|
19 |
-
import { getVideoPrompt } from "./prompts/getVideoPrompt"
|
20 |
import { setZIndexDepthId } from "../utils/data/setZIndexDepth"
|
21 |
import { setSegmentStartAt } from "../utils/data/setSegmentStartAt"
|
22 |
import { setSegmentEndAt } from "../utils/data/setSegmentEndAt"
|
|
|
1 |
|
2 |
import { create } from "zustand"
|
3 |
|
4 |
+
import { ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
|
5 |
+
import { getVideoPrompt } from "@aitube/engine"
|
6 |
|
7 |
import { LatentEngineStore } from "./types"
|
8 |
import { resolveSegments } from "../resolvers/resolveSegments"
|
|
|
11 |
import { InteractiveSegmenterResult, MPMask } from "@mediapipe/tasks-vision"
|
12 |
import { segmentFrame } from "@/lib/on-device-ai/segmentFrameOnClick"
|
13 |
import { drawSegmentation } from "../utils/canvas/drawSegmentation"
|
|
|
14 |
import { getZIndexDepth } from "../utils/data/getZIndexDepth"
|
15 |
import { getSegmentStartAt } from "../utils/data/getSegmentStartAt"
|
|
|
16 |
import { getElementsSortedByStartAt } from "../utils/data/getElementsSortedByStartAt"
|
17 |
import { getSegmentEndAt } from "../utils/data/getSegmentEndAt"
|
|
|
18 |
import { setZIndexDepthId } from "../utils/data/setZIndexDepth"
|
19 |
import { setSegmentStartAt } from "../utils/data/setSegmentStartAt"
|
20 |
import { setSegmentEndAt } from "../utils/data/setSegmentEndAt"
|