jbilcke-hf HF staff commited on
Commit
8919651
1 Parent(s): 5513dc6

eh, not bad for a side project

Browse files
Files changed (34) hide show
  1. package-lock.json +19 -16
  2. package.json +1 -1
  3. src/app/api/generators/image/generateImageWithVideochain.ts +4 -1
  4. src/app/api/generators/search/unknownObjectToLatentSearchResults.ts +4 -4
  5. src/app/api/parsers/parseCompletionMode.ts +19 -4
  6. src/app/api/parsers/parseEntityPrompts.ts +11 -0
  7. src/app/api/parsers/parseSupportedExportFormat.ts +16 -0
  8. src/app/api/parsers/{parseString.ts → parseTrimmedString.ts} +1 -1
  9. src/app/api/v1/auth/config.ts +5 -0
  10. src/app/api/{auth → v1/auth}/getToken.ts +5 -5
  11. src/app/api/v1/auth/parseToken.ts +7 -0
  12. src/app/api/v1/auth/throwIfInvalidToken.ts +22 -0
  13. src/app/api/v1/create/index.ts +15 -11
  14. src/app/api/v1/create/route.ts +5 -2
  15. src/app/api/v1/create/systemPrompt.ts +15 -11
  16. src/app/api/v1/create/types.ts +0 -6
  17. src/app/api/v1/edit/dialogues/processShot.ts +9 -4
  18. src/app/api/v1/edit/dialogues/route.ts +4 -4
  19. src/app/api/v1/edit/entities/clapToLatentStory.ts +50 -0
  20. src/app/api/v1/edit/entities/generateEntityPrompts.ts +135 -0
  21. src/app/api/v1/edit/entities/generateImageID.ts +0 -1
  22. src/app/api/v1/edit/entities/index.ts +97 -7
  23. src/app/api/v1/edit/entities/route.ts +7 -3
  24. src/app/api/v1/edit/entities/systemPrompt.ts +62 -1
  25. src/app/api/v1/edit/storyboards/processShot.ts +10 -3
  26. src/app/api/v1/edit/storyboards/route.ts +6 -7
  27. src/app/api/v1/edit/types.ts +0 -8
  28. src/app/api/v1/edit/videos/processShot.ts +11 -3
  29. src/app/api/v1/edit/videos/route.ts +8 -10
  30. src/app/api/v1/export/route.ts +4 -9
  31. src/app/api/v1/types.ts +15 -0
  32. src/app/latent/search/page.tsx +1 -1
  33. src/app/latent/watch/page.tsx +1 -1
  34. src/types/general.ts +2 -0
package-lock.json CHANGED
@@ -9,7 +9,7 @@
9
  "version": "0.0.0",
10
  "dependencies": {
11
  "@aitube/clap": "0.0.10",
12
- "@aitube/client": "0.0.12",
13
  "@aitube/engine": "0.0.2",
14
  "@huggingface/hub": "0.12.3-oauth",
15
  "@huggingface/inference": "^2.6.7",
@@ -129,9 +129,12 @@
129
  }
130
  },
131
  "node_modules/@aitube/client": {
132
- "version": "0.0.12",
133
- "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.12.tgz",
134
- "integrity": "sha512-b/QFTtAKwr7H5dMSco+iXhwJRpPw/sT487EGpNjDbuQamIJ3FqdlVMTC/c5jdX8meFp+m35n/dY58Iy39Lle5A==",
 
 
 
135
  "peerDependencies": {
136
  "@aitube/clap": "0.0.10"
137
  }
@@ -922,9 +925,9 @@
922
  }
923
  },
924
  "node_modules/@floating-ui/dom": {
925
- "version": "1.6.4",
926
- "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.4.tgz",
927
- "integrity": "sha512-0G8R+zOvQsAG1pg2Q99P21jiqxqGBW1iRe/iXHsBRBxnpXKFI8QwbB4x5KmYLggNO5m34IQgOIu9SCRfR/WWiQ==",
928
  "dependencies": {
929
  "@floating-ui/core": "^1.0.0",
930
  "@floating-ui/utils": "^0.2.0"
@@ -2958,9 +2961,9 @@
2958
  "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
2959
  },
2960
  "node_modules/@types/lodash": {
2961
- "version": "4.17.0",
2962
- "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.0.tgz",
2963
- "integrity": "sha512-t7dhREVv6dbNj0q17X12j7yDG4bD/DHYX7o5/DbDxobP0HnGPgpRz2Ej77aL7TZT3DSw13fqUTj8J4mMnqa7WA=="
2964
  },
2965
  "node_modules/@types/lodash.debounce": {
2966
  "version": "4.0.9",
@@ -3740,9 +3743,9 @@
3740
  }
3741
  },
3742
  "node_modules/caniuse-lite": {
3743
- "version": "1.0.30001615",
3744
- "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001615.tgz",
3745
- "integrity": "sha512-1IpazM5G3r38meiae0bHRnPhz+CBQ3ZLqbQMtrg+AsTPKAXgW38JNsXkyZ+v8waCsDmPq87lmfun5Q2AGysNEQ==",
3746
  "funding": [
3747
  {
3748
  "type": "opencollective",
@@ -6354,9 +6357,9 @@
6354
  }
6355
  },
6356
  "node_modules/minipass": {
6357
- "version": "7.0.4",
6358
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
6359
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ==",
6360
  "engines": {
6361
  "node": ">=16 || 14 >=14.17"
6362
  }
 
9
  "version": "0.0.0",
10
  "dependencies": {
11
  "@aitube/clap": "0.0.10",
12
+ "@aitube/client": "0.0.15",
13
  "@aitube/engine": "0.0.2",
14
  "@huggingface/hub": "0.12.3-oauth",
15
  "@huggingface/inference": "^2.6.7",
 
129
  }
130
  },
131
  "node_modules/@aitube/client": {
132
+ "version": "0.0.15",
133
+ "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.15.tgz",
134
+ "integrity": "sha512-lGmdsBqjNVStBxZSH+Iig/nOyPdSpqpqU6M0OvOBMTwR4rohSvIQ7TnFJGvoc4WEFciNoCc6Vg6Q5W99ovG+fg==",
135
+ "dependencies": {
136
+ "query-string": "^9.0.0"
137
+ },
138
  "peerDependencies": {
139
  "@aitube/clap": "0.0.10"
140
  }
 
925
  }
926
  },
927
  "node_modules/@floating-ui/dom": {
928
+ "version": "1.6.5",
929
+ "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.5.tgz",
930
+ "integrity": "sha512-Nsdud2X65Dz+1RHjAIP0t8z5e2ff/IRbei6BqFrl1urT8sDVzM1HMQ+R0XcU5ceRfyO3I6ayeqIfh+6Wb8LGTw==",
931
  "dependencies": {
932
  "@floating-ui/core": "^1.0.0",
933
  "@floating-ui/utils": "^0.2.0"
 
2961
  "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
2962
  },
2963
  "node_modules/@types/lodash": {
2964
+ "version": "4.17.1",
2965
+ "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.1.tgz",
2966
+ "integrity": "sha512-X+2qazGS3jxLAIz5JDXDzglAF3KpijdhFxlf/V1+hEsOUc+HnWi81L/uv/EvGuV90WY+7mPGFCUDGfQC3Gj95Q=="
2967
  },
2968
  "node_modules/@types/lodash.debounce": {
2969
  "version": "4.0.9",
 
3743
  }
3744
  },
3745
  "node_modules/caniuse-lite": {
3746
+ "version": "1.0.30001616",
3747
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001616.tgz",
3748
+ "integrity": "sha512-RHVYKov7IcdNjVHJFNY/78RdG4oGVjbayxv8u5IO74Wv7Hlq4PnJE6mo/OjFijjVFNy5ijnCt6H3IIo4t+wfEw==",
3749
  "funding": [
3750
  {
3751
  "type": "opencollective",
 
6357
  }
6358
  },
6359
  "node_modules/minipass": {
6360
+ "version": "7.1.0",
6361
+ "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.0.tgz",
6362
+ "integrity": "sha512-oGZRv2OT1lO2UF1zUcwdTb3wqUwI0kBGTgt/T7OdSj6M6N5m3o5uPf0AIW6lVxGGoiWUR7e2AwTE+xiwK8WQig==",
6363
  "engines": {
6364
  "node": ">=16 || 14 >=14.17"
6365
  }
package.json CHANGED
@@ -11,7 +11,7 @@
11
  },
12
  "dependencies": {
13
  "@aitube/clap": "0.0.10",
14
- "@aitube/client": "0.0.12",
15
  "@aitube/engine": "0.0.2",
16
  "@huggingface/hub": "0.12.3-oauth",
17
  "@huggingface/inference": "^2.6.7",
 
11
  },
12
  "dependencies": {
13
  "@aitube/clap": "0.0.10",
14
+ "@aitube/client": "0.0.15",
15
  "@aitube/engine": "0.0.2",
16
  "@huggingface/hub": "0.12.3-oauth",
17
  "@huggingface/inference": "^2.6.7",
src/app/api/generators/image/generateImageWithVideochain.ts CHANGED
@@ -10,6 +10,7 @@ const apiKey = `${process.env.VIDEOCHAIN_API_KEY || ""}`
10
  export async function newRender({
11
  prompt,
12
  negativePrompt,
 
13
  nbFrames,
14
  nbSteps,
15
  width,
@@ -20,6 +21,7 @@ export async function newRender({
20
  }: {
21
  prompt: string
22
  negativePrompt: string
 
23
  nbFrames: number
24
  nbSteps: number
25
  width: number
@@ -61,6 +63,7 @@ export async function newRender({
61
  body: JSON.stringify({
62
  prompt,
63
  negativePrompt,
 
64
  // nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
65
  nbFrames, // when nbFrames is 1, we will only generate static images
66
  nbSteps, // 20 = fast, 30 = better, 50 = best
@@ -72,7 +75,7 @@ export async function newRender({
72
  upscalingFactor: 1, // let's disable upscaling right now
73
  turbo, // always use turbo mode (it's for images only anyway)
74
  // also what could be done iw that we could use the width and height to control this
75
- cache: shouldRenewCache ? "renew" : "use"
76
  } as Partial<RenderRequest>),
77
  cache: 'no-store',
78
  // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
 
10
  export async function newRender({
11
  prompt,
12
  negativePrompt,
13
+ identityImage,
14
  nbFrames,
15
  nbSteps,
16
  width,
 
21
  }: {
22
  prompt: string
23
  negativePrompt: string
24
+ identityImage: string
25
  nbFrames: number
26
  nbSteps: number
27
  width: number
 
63
  body: JSON.stringify({
64
  prompt,
65
  negativePrompt,
66
+ identityImage,
67
  // nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
68
  nbFrames, // when nbFrames is 1, we will only generate static images
69
  nbSteps, // 20 = fast, 30 = better, 50 = best
 
75
  upscalingFactor: 1, // let's disable upscaling right now
76
  turbo, // always use turbo mode (it's for images only anyway)
77
  // also what could be done iw that we could use the width and height to control this
78
+ cache: shouldRenewCache ? "renew" : "use",
79
  } as Partial<RenderRequest>),
80
  cache: 'no-store',
81
  // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
src/app/api/generators/search/unknownObjectToLatentSearchResults.ts CHANGED
@@ -1,6 +1,6 @@
1
  import { generateSeed } from "@aitube/clap"
2
 
3
- import { parseString } from "../../parsers/parseString"
4
  import { parseStringArray } from "../../parsers/parseStringArray"
5
  import { LatentSearchResult, LatentSearchResults } from "./types"
6
 
@@ -9,9 +9,9 @@ export function unknownObjectToLatentSearchResults(something: any): LatentSearch
9
 
10
  if (Array.isArray(something)) {
11
  results = something.map(thing => ({
12
- label: parseString(thing && (thing?.label || thing?.title)),
13
- summary: parseString(thing && (thing?.summary || thing?.description || thing?.synopsis)),
14
- thumbnail: parseString(thing && (thing?.thumbnail)),
15
  tags: parseStringArray(thing && (thing?.tag)),
16
  seed: generateSeed(), // a seed is necessary for consistency between search results and viewer
17
  } as LatentSearchResult))
 
1
  import { generateSeed } from "@aitube/clap"
2
 
3
+ import { parseTrimmedString } from "../../parsers/parseTrimmedString"
4
  import { parseStringArray } from "../../parsers/parseStringArray"
5
  import { LatentSearchResult, LatentSearchResults } from "./types"
6
 
 
9
 
10
  if (Array.isArray(something)) {
11
  results = something.map(thing => ({
12
+ label: parseTrimmedString(thing && (thing?.label || thing?.title)),
13
+ summary: parseTrimmedString(thing && (thing?.summary || thing?.description || thing?.synopsis)),
14
+ thumbnail: parseTrimmedString(thing && (thing?.thumbnail)),
15
  tags: parseStringArray(thing && (thing?.tag)),
16
  seed: generateSeed(), // a seed is necessary for consistency between search results and viewer
17
  } as LatentSearchResult))
src/app/api/parsers/parseCompletionMode.ts CHANGED
@@ -1,10 +1,25 @@
1
- import { ClapCompletionMode } from "../v1/edit/types"
2
 
3
- export function parseCompletionMode(input?: any, defaultMode: ClapCompletionMode = "partial"): ClapCompletionMode {
 
 
 
4
  let mode = defaultMode
 
5
  try {
6
- let maybeMode = decodeURIComponent(`${input || ""}` || defaultMode).trim()
7
- mode = ["partial", "full"].includes(maybeMode) ? (maybeMode as ClapCompletionMode) : defaultMode
 
 
 
 
 
 
8
  } catch (err) {}
 
 
 
 
 
9
  return mode
10
  }
 
1
+ import { ClapCompletionMode } from "@aitube/client"
2
 
3
+ export function parseCompletionMode(
4
+ input?: any,
5
+ defaultMode: ClapCompletionMode = ClapCompletionMode.PARTIAL
6
+ ): ClapCompletionMode {
7
  let mode = defaultMode
8
+
9
  try {
10
+ let maybeMode = decodeURIComponent(`${input || ""}`).trim()
11
+
12
+ if (!maybeMode) {
13
+ maybeMode = defaultMode
14
+ }
15
+
16
+ mode = maybeMode as ClapCompletionMode
17
+
18
  } catch (err) {}
19
+
20
+ if (!Object.values(ClapCompletionMode).includes(mode)) {
21
+ throw new Error(`Invalid clap completion mode: "${mode}"`)
22
+ }
23
+
24
  return mode
25
  }
src/app/api/parsers/parseEntityPrompts.ts ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapEntityPrompt } from "@aitube/client"
2
+ import { decode } from "js-base64"
3
+
4
+ export function parseClapEntityPrompts(input?: any): ClapEntityPrompt[] {
5
+ let basicResult = JSON.parse(decode(`${input || ""}`))
6
+ if (Array.isArray(basicResult)) {
7
+ return basicResult as ClapEntityPrompt[]
8
+ } else {
9
+ return []
10
+ }
11
+ }
src/app/api/parsers/parseSupportedExportFormat.ts ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defaultExportFormat, SupportedExportFormat } from "@aitube/client"
2
+
3
+ export function parseSupportedExportFormat(
4
+ input?: any,
5
+ defaultFormat: SupportedExportFormat = defaultExportFormat
6
+ ): SupportedExportFormat {
7
+
8
+ let format: SupportedExportFormat = defaultFormat
9
+ try {
10
+ format = decodeURIComponent(`${input || ""}` || defaultFormat).trim() as SupportedExportFormat
11
+ if (format !== "mp4" && format !== "webm") {
12
+ format = defaultFormat
13
+ }
14
+ } catch (err) {}
15
+ return format
16
+ }
src/app/api/parsers/{parseString.ts → parseTrimmedString.ts} RENAMED
@@ -1,4 +1,4 @@
1
- export function parseString(something: any): string {
2
  let result: string = ""
3
  if (typeof something === "string") {
4
  result = `${something}`.trim()
 
1
+ export function parseTrimmedString(something: any): string {
2
  let result: string = ""
3
  if (typeof something === "string") {
4
  result = `${something}`.trim()
src/app/api/v1/auth/config.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import { createSecretKey } from "node:crypto"
2
+
3
+ export const secretKey = createSecretKey(`${process.env.API_SECRET_JWT_KEY || ""}`, 'utf-8')
4
+ export const issuer = `${process.env.API_SECRET_JWT_ISSUER || ""}`
5
+ export const audience = `${process.env.API_SECRET_JWT_AUDIENCE || ""}`
src/app/api/{auth → v1/auth}/getToken.ts RENAMED
@@ -1,20 +1,20 @@
1
- import { createSecretKey } from "crypto"
2
  import { SignJWT } from "jose"
3
 
 
 
4
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
5
 
6
  export async function getToken(data: Record<string, any> = {}): Promise<string> {
7
- const secretKey = createSecretKey(`${process.env.API_SECRET_JWT_KEY || ""}`, 'utf-8');
8
 
9
  const jwtToken = await new SignJWT(data)
10
  .setProtectedHeader({
11
  alg: 'HS256'
12
  }) // algorithm
13
  .setIssuedAt()
14
- .setIssuer(`${process.env.API_SECRET_JWT_ISSUER || ""}`) // issuer
15
- .setAudience(`${process.env.API_SECRET_JWT_AUDIENCE || ""}`) // audience
16
  .setExpirationTime("1 day") // token expiration time - to prevent hackers from re-using our URLs more than a day
17
- .sign(secretKey); // secretKey generated from previous step
18
 
19
  return jwtToken
20
  }
 
 
1
  import { SignJWT } from "jose"
2
 
3
+ import { secretKey, issuer, audience } from "./config"
4
+
5
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
6
 
7
  export async function getToken(data: Record<string, any> = {}): Promise<string> {
 
8
 
9
  const jwtToken = await new SignJWT(data)
10
  .setProtectedHeader({
11
  alg: 'HS256'
12
  }) // algorithm
13
  .setIssuedAt()
14
+ .setIssuer(issuer) // issuer
15
+ .setAudience(audience) // audience
16
  .setExpirationTime("1 day") // token expiration time - to prevent hackers from re-using our URLs more than a day
17
+ .sign(secretKey) // secretKey generated from previous step
18
 
19
  return jwtToken
20
  }
src/app/api/v1/auth/parseToken.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ export function parseToken(input?: any): string {
2
+ try {
3
+ return (decodeURIComponent(`${input || ""}`).split("Bearer").pop() || "").trim()
4
+ } catch (err) {
5
+ return ""
6
+ }
7
+ }
src/app/api/v1/auth/throwIfInvalidToken.ts ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { jwtVerify } from "jose"
2
+
3
+ import { secretKey } from "./config"
4
+ import { parseToken } from "./parseToken"
5
+
6
+ export async function throwIfInvalidToken(input?: any): Promise<boolean> {
7
+
8
+ // note: this performs a decodeURI, but I'm not sure we need to
9
+ const token = parseToken(input)
10
+
11
+ // verify token
12
+ const { payload, protectedHeader } = await jwtVerify(token, secretKey, {
13
+ issuer: `${process.env.API_SECRET_JWT_ISSUER || ""}`, // issuer
14
+ audience: `${process.env.API_SECRET_JWT_AUDIENCE || ""}`, // audience
15
+ })
16
+
17
+ // log values to console
18
+ console.log(payload)
19
+ console.log(protectedHeader)
20
+
21
+ return true
22
+ }
src/app/api/v1/create/index.ts CHANGED
@@ -2,12 +2,12 @@
2
 
3
  import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
4
 
 
5
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
6
  import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
7
- import { sleep } from "@/lib/utils/sleep"
8
 
9
  import { systemPrompt } from "./systemPrompt"
10
- import { LatentStory } from "./types"
11
 
12
  // a helper to generate Clap stories from a few sentences
13
  // this is mostly used by external apps such as the Stories Factory
@@ -20,7 +20,6 @@ export async function create(request: {
20
  width: 1024,
21
  height: 576,
22
  }): Promise<ClapProject> {
23
-
24
  const prompt = `${request?.prompt || ""}`.trim()
25
 
26
  console.log("api/v1/create(): request:", request)
@@ -30,7 +29,9 @@ export async function create(request: {
30
  const width = getValidNumber(request?.width, 256, 8192, 1024)
31
  const height = getValidNumber(request?.height, 256, 8192, 576)
32
 
33
- const userPrompt = `Video story to generate: ${prompt}`
 
 
34
 
35
  const prefix = "```yaml\n"
36
  const nbMaxNewTokens = 1400
@@ -70,12 +71,15 @@ export async function create(request: {
70
  maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
71
  if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
72
  console.log(`api/v1/create(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
73
- } else {
74
- shots = maybeShots
75
- }
76
- } else {
77
  shots = maybeShots
 
 
78
  }
 
79
  console.log(`api/v1/create(): generated ${shots.length} shots`)
80
 
81
  // this is approximate - TTS generation will determine the final duration of each shot
@@ -88,8 +92,8 @@ export async function create(request: {
88
  title: "Not needed", // we don't need a title actually
89
  description: "This video has been generated using AI",
90
  synopsis: "",
91
- licence: "Non Commercial",
92
- orientation: "vertical",
93
  width,
94
  height,
95
  isInteractive: false,
@@ -160,7 +164,7 @@ export async function create(request: {
160
  startTimeInMs: currentElapsedTimeInMs,
161
  assetDurationInMs: defaultSegmentDurationInMs,
162
  category: "camera",
163
- prompt: "vertical video",
164
  outputType: "text"
165
  }))
166
 
 
2
 
3
  import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
4
 
5
+ import { sleep } from "@/lib/utils/sleep"
6
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
7
  import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
8
+ import { LatentStory } from "@/app/api/v1/types"
9
 
10
  import { systemPrompt } from "./systemPrompt"
 
11
 
12
  // a helper to generate Clap stories from a few sentences
13
  // this is mostly used by external apps such as the Stories Factory
 
20
  width: 1024,
21
  height: 576,
22
  }): Promise<ClapProject> {
 
23
  const prompt = `${request?.prompt || ""}`.trim()
24
 
25
  console.log("api/v1/create(): request:", request)
 
29
  const width = getValidNumber(request?.width, 256, 8192, 1024)
30
  const height = getValidNumber(request?.height, 256, 8192, 576)
31
 
32
+ const userPrompt = `Movie story to generate: ${prompt}
33
+
34
+ Output: `
35
 
36
  const prefix = "```yaml\n"
37
  const nbMaxNewTokens = 1400
 
71
  maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
72
  if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
73
  console.log(`api/v1/create(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
74
+ }
75
+ }
76
+
77
+ if (maybeShots.length) {
78
  shots = maybeShots
79
+ } else {
80
+ throw new Error(`Hugging Face Inference API failure (the model failed to generate the shots)`)
81
  }
82
+
83
  console.log(`api/v1/create(): generated ${shots.length} shots`)
84
 
85
  // this is approximate - TTS generation will determine the final duration of each shot
 
92
  title: "Not needed", // we don't need a title actually
93
  description: "This video has been generated using AI",
94
  synopsis: "",
95
+ licence: "",
96
+ orientation: width > height ? "landscape" : height > width ? "portrait" : "square",
97
  width,
98
  height,
99
  isInteractive: false,
 
164
  startTimeInMs: currentElapsedTimeInMs,
165
  assetDurationInMs: defaultSegmentDurationInMs,
166
  category: "camera",
167
+ prompt: "video",
168
  outputType: "text"
169
  }))
170
 
src/app/api/v1/create/route.ts CHANGED
@@ -1,11 +1,14 @@
1
  import { NextResponse, NextRequest } from "next/server"
2
  import { getValidNumber, serializeClap } from "@aitube/clap"
3
 
 
 
4
  import { create } from "."
5
 
6
  // a helper to generate Clap stories from a few sentences
7
  // this is mostly used by external apps such as the Stories Factory
8
  export async function POST(req: NextRequest) {
 
9
 
10
  const request = await req.json() as {
11
  prompt: string
@@ -17,9 +20,9 @@ export async function POST(req: NextRequest) {
17
  console.log("[api/v1/create] request:", request)
18
 
19
  const clap = await create({
20
- prompt: `${request?.prompt || ""}`.trim(),
21
  width: getValidNumber(request?.width, 256, 8192, 1024),
22
- height: getValidNumber(request?.height, 256, 8192, 576)
23
  })
24
 
25
  // TODO replace by Clap file streaming
 
1
  import { NextResponse, NextRequest } from "next/server"
2
  import { getValidNumber, serializeClap } from "@aitube/clap"
3
 
4
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
5
+
6
  import { create } from "."
7
 
8
  // a helper to generate Clap stories from a few sentences
9
  // this is mostly used by external apps such as the Stories Factory
10
  export async function POST(req: NextRequest) {
11
+ await throwIfInvalidToken(req.headers.get("Authorization"))
12
 
13
  const request = await req.json() as {
14
  prompt: string
 
20
  console.log("[api/v1/create] request:", request)
21
 
22
  const clap = await create({
23
+ prompt: `${request?.prompt || ""}`.trim(),
24
  width: getValidNumber(request?.width, 256, 8192, 1024),
25
+ height: getValidNumber(request?.height, 256, 8192, 576)
26
  })
27
 
28
  // TODO replace by Clap file streaming
src/app/api/v1/create/systemPrompt.ts CHANGED
@@ -1,23 +1,29 @@
1
  export const systemPrompt: string =
2
  `# Context
3
  You are a server-side function generating stories from a single synopsis/brief (a "prompt").
4
- The video are vertical, so they can be displayed on mobile.
5
- They are meant to be shared on social media platform (Instagram, TikTok, Snapchat, Twitter, YouTube Shorts etc).
6
- Each video is composed of a sequence of static panels (a dozen in average),
7
- with a voice over and text.
8
 
9
  # Task
10
- Your mission is to generate a sequence of panels that will form the final video.
11
 
12
  You will be provided a "prompt" (for the story) and max number of images
13
- Each panel is composed of:
 
 
 
 
14
  - one title (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
15
- - one image (you must describe it using a Stable Diffusion prompt - about ~300 characters - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
16
  - one voice over (should be short too, about 10 to 15 words)
17
 
 
 
 
 
 
18
  # Examples
19
 
20
- You most reply by writing/completing a YAML list of objects.
21
  Here is a short example, the prompt was "a cute puppy who misbehaves in the kitchen, in 3 parts 🐶"
22
  Note how we asked for "3 parts". Sometimes the user will talk about steps, slides etc instead (that's fine, it means the same thing),
23
  or the user might omit to give the number (that's fine too, you can use 5 by default),
@@ -34,6 +40,4 @@ but if the user asks for large numbers, it should be ignored (our limit is 32).
34
  image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
35
  voice: "Now my dog is eating my birtday cake. Please send help."
36
  \`\`\
37
-
38
- # Your turn:
39
- `
 
1
  export const systemPrompt: string =
2
  `# Context
3
  You are a server-side function generating stories from a single synopsis/brief (a "prompt").
4
+ The videos are meant to be shared on social media platform (Instagram, TikTok, Snapchat, Twitter, YouTube Shorts etc).
5
+ Each video is composed of a sequence of shots (a dozen in average), with a voice over and text.
 
 
6
 
7
  # Task
8
+ Your mission is to generate a sequence of shots that will form the final video.
9
 
10
  You will be provided a "prompt" (for the story) and max number of images
11
+
12
+ # Output schema
13
+
14
+ Each shot is composed of:
15
+
16
  - one title (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
17
+ - one image (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
18
  - one voice over (should be short too, about 10 to 15 words)
19
 
20
+ # Important
21
+
22
+ You MUST reply by writing/completing a YAML list of objects.
23
+ Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
24
+
25
  # Examples
26
 
 
27
  Here is a short example, the prompt was "a cute puppy who misbehaves in the kitchen, in 3 parts 🐶"
28
  Note how we asked for "3 parts". Sometimes the user will talk about steps, slides etc instead (that's fine, it means the same thing),
29
  or the user might omit to give the number (that's fine too, you can use 5 by default),
 
40
  image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
41
  voice: "Now my dog is eating my birtday cake. Please send help."
42
  \`\`\
43
+ `
 
 
src/app/api/v1/create/types.ts DELETED
@@ -1,6 +0,0 @@
1
-
2
- export type LatentStory = {
3
- title: string
4
- image: string
5
- voice: string
6
- }
 
 
 
 
 
 
 
src/app/api/v1/edit/dialogues/processShot.ts CHANGED
@@ -1,12 +1,17 @@
1
 
2
- import { ClapProject, ClapSegment, getClapAssetSourceType, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
 
 
 
 
 
 
 
3
  import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
4
 
5
  import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
6
  import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
7
 
8
- import { ClapCompletionMode } from "../types"
9
-
10
  export async function processShot({
11
  shotSegment,
12
  existingClap,
@@ -70,7 +75,7 @@ export async function processShot({
70
  console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
71
 
72
  // if it's partial, we need to manually add it
73
- if (mode === "partial") {
74
  newerClap.segments.push(shotDialogueSegment)
75
  }
76
  } else {
 
1
 
2
+ import {
3
+ ClapProject,
4
+ ClapSegment,
5
+ getClapAssetSourceType,
6
+ filterSegments,
7
+ ClapSegmentFilteringMode
8
+ } from "@aitube/clap"
9
+ import { ClapCompletionMode } from "@aitube/client"
10
  import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
11
 
12
  import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
13
  import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
14
 
 
 
15
  export async function processShot({
16
  shotSegment,
17
  existingClap,
 
75
  console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
76
 
77
  // if it's partial, we need to manually add it
78
+ if (mode !== ClapCompletionMode.FULL) {
79
  newerClap.segments.push(shotDialogueSegment)
80
  }
81
  } else {
src/app/api/v1/edit/dialogues/route.ts CHANGED
@@ -2,16 +2,16 @@ import { NextResponse, NextRequest } from "next/server"
2
 
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
5
- import { getToken } from "@/app/api/auth/getToken"
6
 
7
  import { processShot } from "./processShot"
8
  import queryString from "query-string"
9
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 
 
10
 
11
  // a helper to generate speech for a Clap
12
  export async function POST(req: NextRequest) {
13
-
14
- const jwtToken = await getToken({ user: "anonymous" })
15
 
16
  const qs = queryString.parseUrl(req.url || "")
17
  const query = (qs || {}).query
@@ -33,7 +33,7 @@ export async function POST(req: NextRequest) {
33
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
34
  }
35
 
36
- const newerClap = mode === "full" ? existingClap : newClap()
37
 
38
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
39
  await Promise.all(shotsSegments.map(shotSegment =>
 
2
 
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
 
5
 
6
  import { processShot } from "./processShot"
7
  import queryString from "query-string"
8
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
9
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
10
+ import { ClapCompletionMode } from "@aitube/client"
11
 
12
  // a helper to generate speech for a Clap
13
  export async function POST(req: NextRequest) {
14
+ await throwIfInvalidToken(req.headers.get("Authorization"))
 
15
 
16
  const qs = queryString.parseUrl(req.url || "")
17
  const query = (qs || {}).query
 
33
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
34
  }
35
 
36
+ const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
37
 
38
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
39
  await Promise.all(shotsSegments.map(shotSegment =>
src/app/api/v1/edit/entities/clapToLatentStory.ts ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapProject, ClapSegmentFilteringMode, filterSegments } from "@aitube/clap"
2
+
3
+ import { LatentStory } from "@/app/api/v1/types"
4
+
5
+ /**
6
+ * Extract the latent story from a ClapProject
7
+ *
8
+ * This is useful to pass a simplified representation of a story to a LLM
9
+ *
10
+ * @param clap
11
+ * @returns
12
+ */
13
+ export async function clapToLatentStory(clap: ClapProject): Promise<LatentStory[]> {
14
+ const shots = clap.segments.filter(s => s.category === "camera")
15
+
16
+ const latentStories: LatentStory[] = []
17
+
18
+ for (const shot of shots) {
19
+ const image = filterSegments(
20
+ ClapSegmentFilteringMode.START,
21
+ shot,
22
+ clap.segments,
23
+ "storyboard"
24
+ ).at(0)
25
+
26
+ const title = filterSegments(
27
+ ClapSegmentFilteringMode.START,
28
+ shot,
29
+ clap.segments,
30
+ "interface"
31
+ ).at(0)
32
+
33
+ const voice = filterSegments(
34
+ ClapSegmentFilteringMode.START,
35
+ shot,
36
+ clap.segments,
37
+ "dialogue"
38
+ ).at(0)
39
+
40
+ const latentStory: LatentStory = {
41
+ title: title.prompt,
42
+ image: image.prompt,
43
+ voice: voice.prompt,
44
+ }
45
+
46
+ latentStories.push(latentStory)
47
+ }
48
+
49
+ return latentStories
50
+ }
src/app/api/v1/edit/entities/generateEntityPrompts.ts ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use server"
2
+
3
+ import YAML from "yaml"
4
+ import { generateSeed } from "@aitube/clap"
5
+ import { ClapEntityPrompt } from "@aitube/client"
6
+
7
+ import { sleep } from "@/lib/utils/sleep"
8
+ import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
9
+ import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
10
+ import { LatentEntity, LatentStory } from "@/app/api/v1/types"
11
+
12
+ import { systemPrompt } from "./systemPrompt"
13
+ import { generateImageID } from "./generateImageID"
14
+
15
+ export type EntityPromptResult = {
16
+ entityPrompt: ClapEntityPrompt
17
+ shots: number[]
18
+ }
19
+
20
+ // a helper to generate Clap stories from a few sentences
21
+ // this is mostly used by external apps such as the Stories Factory
22
+ export async function generateEntityPrompts({
23
+ prompt = "",
24
+ latentStory = []
25
+ }: {
26
+ prompt?: string
27
+ latentStory?: LatentStory[]
28
+ } = {
29
+ prompt: "",
30
+ latentStory: []
31
+ }): Promise<EntityPromptResult[]> {
32
+
33
+ if (!prompt.length) { throw new Error(`please provide a prompt`) }
34
+ console.log("generateEntityPrompts(): prompt:", prompt)
35
+
36
+
37
+ if (!latentStory.length) { throw new Error(`please provide a story`) }
38
+
39
+ console.log("generateEntityPrompts(): latentStory:", latentStory)
40
+
41
+ const userPrompt = `The input story is about: ${prompt}.
42
+
43
+ The input story timeline is:
44
+ \`\`\`yaml
45
+ ${YAML.stringify(
46
+ // we need to help the LLM by marking the shots with a simple numeric ID
47
+ latentStory.map((shot, i) => ({
48
+ shot: i,
49
+ ...shot,
50
+ }))
51
+ )}
52
+ \`\`\`
53
+
54
+ Now please generate the output entities:`
55
+
56
+ const prefix = "```yaml\n"
57
+ const nbMaxNewTokens = 1400
58
+
59
+ // TODO use streaming for the Hugging Face prediction
60
+ //
61
+ // note that a Clap file is actually a YAML stream of documents
62
+ // so technically we could stream everything from end-to-end
63
+ // (but I haven't coded the helpers to do this yet)
64
+ let rawString = await predict({
65
+ systemPrompt,
66
+ userPrompt,
67
+ nbMaxNewTokens,
68
+ prefix,
69
+ })
70
+
71
+ console.log("generateEntityPrompts(): rawString: ", rawString)
72
+
73
+ let results: EntityPromptResult[] = []
74
+
75
+ let maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
76
+
77
+ if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
78
+ console.log(`generateEntityPrompts(): failed to generate entities.. trying again`)
79
+
80
+ await sleep(2000)
81
+
82
+ rawString = await predict({
83
+ systemPrompt,
84
+ userPrompt: userPrompt + ".", // we trick the Hugging Face cache
85
+ nbMaxNewTokens,
86
+ prefix,
87
+ })
88
+
89
+ console.log("generateEntityPrompts(): rawString: ", rawString)
90
+
91
+ maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
92
+ if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
93
+ console.log(`generateEntityPrompts(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
94
+ }
95
+ }
96
+
97
+ if (maybeEntities.length) {
98
+ results = await Promise.all(maybeEntities.map(async ({
99
+ name,
100
+ category,
101
+ image,
102
+ audio,
103
+ shots,
104
+ }) => {
105
+
106
+ const entityPrompt: ClapEntityPrompt = {
107
+ name,
108
+ category,
109
+ age: "",
110
+ variant: image,
111
+ region: "",
112
+ identityImage: await generateImageID({
113
+ prompt: image,
114
+ seed: generateSeed()
115
+ }),
116
+
117
+ // TODO later
118
+ identityVoice: "" // await generateAudioID({ prompt: e.audio, seed: generateSeed() })
119
+ }
120
+
121
+ const result: EntityPromptResult = {
122
+ entityPrompt,
123
+ shots
124
+ }
125
+
126
+ return result
127
+ }))
128
+ } else {
129
+ throw new Error(`Hugging Face Inference API failure (the model failed to generate the entities)`)
130
+ }
131
+
132
+ console.log(`generateEntityPrompts(): generated ${results.length} entities with their images and voice ids`)
133
+
134
+ return results
135
+ }
src/app/api/v1/edit/entities/generateImageID.ts CHANGED
@@ -2,7 +2,6 @@
2
  import { generateSeed } from "@aitube/clap"
3
 
4
  import { sleep } from "@/lib/utils/sleep"
5
- import { getValidNumber } from "@/lib/utils/getValidNumber"
6
 
7
  import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
8
  import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
 
2
  import { generateSeed } from "@aitube/clap"
3
 
4
  import { sleep } from "@/lib/utils/sleep"
 
5
 
6
  import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
7
  import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
src/app/api/v1/edit/entities/index.ts CHANGED
@@ -1,23 +1,113 @@
1
 
2
- import { ClapProject, getClapAssetSourceType, newClap } from "@aitube/clap"
 
3
 
4
  import { generateImageID } from "./generateImageID"
5
  import { generateAudioID } from "./generateAudioID"
6
-
7
- import { ClapCompletionMode } from "../types"
8
 
9
  export async function editEntities({
10
  existingClap,
11
  newerClap,
12
- mode
 
13
  }: {
14
  existingClap: ClapProject
15
  newerClap: ClapProject
16
- mode: ClapCompletionMode
 
17
  }) {
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
20
 
 
 
21
  for (const entity of existingClap.entities) {
22
 
23
  let entityHasBeenModified = false
@@ -57,13 +147,13 @@ export async function editEntities({
57
  }
58
 
59
  // in case we are doing a partial update
60
- if (mode === "partial" && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
61
  newerClap.entities.push(entity)
62
  newerClap.entityIndex[entity.id] = entity
63
  }
64
  }
65
 
66
- console.log(`[api/edit/entities] returning the newerClap`)
67
 
68
  return newerClap
69
  }
 
1
 
2
+ import { ClapProject, getClapAssetSourceType, getValidNumber, newEntity } from "@aitube/clap"
3
+ import { ClapCompletionMode, ClapEntityPrompt } from "@aitube/client"
4
 
5
  import { generateImageID } from "./generateImageID"
6
  import { generateAudioID } from "./generateAudioID"
7
+ import { generateEntityPrompts } from "./generateEntityPrompts"
8
+ import { clapToLatentStory } from "./clapToLatentStory"
9
 
10
  export async function editEntities({
11
  existingClap,
12
  newerClap,
13
+ entityPrompts = [],
14
+ mode = ClapCompletionMode.PARTIAL
15
  }: {
16
  existingClap: ClapProject
17
  newerClap: ClapProject
18
+ entityPrompts?: ClapEntityPrompt[]
19
+ mode?: ClapCompletionMode
20
  }) {
21
 
22
+ // note that we can only handle either FULL or PARTIAL
23
+ // other modes such as MERGE, REPLACE.. are irrelevant since those are client-side modes
24
+ // so from a server point of view those correspond to PARTIAL
25
+ //
26
+ // it is also worth noting that the use of FULL should be discouraged
27
+ const isFull = mode === ClapCompletionMode.FULL
28
+ const isPartial = !isFull
29
+
30
+ // if we don't have existing entities, and user passed none,
31
+ // then we need to hallucinate them
32
+ if (existingClap.entities.length === 0 && entityPrompts.length === 0) {
33
+ const entityPromptsWithShots = await generateEntityPrompts({
34
+ prompt: existingClap.meta.description,
35
+ latentStory: await clapToLatentStory(existingClap)
36
+ })
37
+
38
+ for (const {
39
+ entityPrompt: { name, category, age, variant, region, identityImage, identityVoice },
40
+ shots
41
+ } of entityPromptsWithShots) {
42
+ const newEnt = newEntity({
43
+ category,
44
+ triggerName: name,
45
+ label: name,
46
+ description: name,
47
+ author: "auto",
48
+ thumbnailUrl: "",
49
+
50
+ imagePrompt: "",
51
+ imageSourceType: getClapAssetSourceType(identityImage),
52
+ imageEngine: "SDXL Lightning",
53
+ imageId: identityImage,
54
+ audioPrompt: "",
55
+ audioSourceType: getClapAssetSourceType(identityVoice),
56
+ audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
57
+ audioId: identityVoice,
58
+
59
+ // note: using a numeric age should be deprecated,
60
+ // instead we should be able to specify things using text,
61
+ // eg. "8 months", "25 years old", "12th century"
62
+ age: getValidNumber(age, 0, 120, 25),
63
+
64
+ // TODO: delete gender and appearance, replace by a single concept of "variant"
65
+ gender: "",
66
+ appearance: variant,
67
+ region: region,
68
+ })
69
+
70
+ existingClap.entities.push(newEnt)
71
+ }
72
+ }
73
+
74
+ // otherwise try to add what's new
75
+ for (const { name, category, age, variant, region, identityImage, identityVoice } of entityPrompts) {
76
+ const newEnt = newEntity({
77
+ category,
78
+ triggerName: name,
79
+ label: name,
80
+ description: name,
81
+ author: "auto",
82
+ thumbnailUrl: "",
83
+
84
+ imagePrompt: "",
85
+ imageSourceType: getClapAssetSourceType(identityImage),
86
+ imageEngine: "SDXL Lightning",
87
+ imageId: identityImage,
88
+ audioPrompt: "",
89
+ audioSourceType: getClapAssetSourceType(identityVoice),
90
+ audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
91
+ audioId: identityVoice,
92
+
93
+ // note: using a numeric age should be deprecated,
94
+ // instead we should be able to specify things using text,
95
+ // eg. "8 months", "25 years old", "12th century"
96
+ age: getValidNumber(age, 0, 120, 25),
97
+
98
+ // TODO: delete gender and appearance, replace by a single concept of "variant"
99
+ gender: "",
100
+ appearance: variant,
101
+ region: region,
102
+ })
103
+
104
+ existingClap.entities.push(newEnt)
105
+ }
106
+
107
  if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
108
 
109
+ // then we try to automatically repair, edit, complete.. all the existing entities
110
+
111
  for (const entity of existingClap.entities) {
112
 
113
  let entityHasBeenModified = false
 
147
  }
148
 
149
  // in case we are doing a partial update
150
+ if (mode !== ClapCompletionMode.FULL && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
151
  newerClap.entities.push(entity)
152
  newerClap.entityIndex[entity.id] = entity
153
  }
154
  }
155
 
156
+ console.log(`api/edit/entities(): returning the newerClap`)
157
 
158
  return newerClap
159
  }
src/app/api/v1/edit/entities/route.ts CHANGED
@@ -2,12 +2,15 @@ import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
  import { newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
5
- import { getToken } from "@/app/api/auth/getToken"
6
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 
 
7
 
8
  import { editEntities } from "."
 
9
 
10
  export async function POST(req: NextRequest) {
 
11
 
12
  const qs = queryString.parseUrl(req.url || "")
13
  const query = (qs || {}).query
@@ -15,17 +18,18 @@ export async function POST(req: NextRequest) {
15
  const mode = parseCompletionMode(query?.c)
16
  // const prompt = parsePrompt(query?.p)
17
 
18
- const jwtToken = await getToken({ user: "anonymous" })
19
 
20
  const blob = await req.blob()
21
 
22
  const existingClap = await parseClap(blob)
23
 
24
- const newerClap = mode === "full" ? existingClap : newClap()
25
 
26
  await editEntities({
27
  existingClap,
28
  newerClap,
 
29
  mode
30
  })
31
 
 
2
  import queryString from "query-string"
3
  import { newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
 
5
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
6
+ import { parseClapEntityPrompts } from "@/app/api/parsers/parseEntityPrompts"
7
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
8
 
9
  import { editEntities } from "."
10
+ import { ClapCompletionMode } from "@aitube/client"
11
 
12
  export async function POST(req: NextRequest) {
13
+ await throwIfInvalidToken(req.headers.get("Authorization"))
14
 
15
  const qs = queryString.parseUrl(req.url || "")
16
  const query = (qs || {}).query
 
18
  const mode = parseCompletionMode(query?.c)
19
  // const prompt = parsePrompt(query?.p)
20
 
21
+ const entityPrompts = parseClapEntityPrompts(query?.e)
22
 
23
  const blob = await req.blob()
24
 
25
  const existingClap = await parseClap(blob)
26
 
27
+ const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
28
 
29
  await editEntities({
30
  existingClap,
31
  newerClap,
32
+ entityPrompts,
33
  mode
34
  })
35
 
src/app/api/v1/edit/entities/systemPrompt.ts CHANGED
@@ -1,3 +1,64 @@
 
 
 
 
 
1
 
 
 
2
 
3
- export const systemPrompt = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export const systemPrompt: string =
2
+ `# Context
3
+ You are a server-side function generating stories from a single synopsis/brief (a "prompt").
4
+ The video are meant to be shared on social media platform (Instagram, TikTok, Snapchat, Twitter, YouTube Shorts etc).
5
+ Each video is composed of a sequence of shots (a dozen in average), with a voice over and text.
6
 
7
+ # Task
8
+ You mission is to generate a list of entities/assets (characters, locations etc) associated with each shot.
9
 
10
+ # Important
11
+
12
+ - You MUST reply by writing/completing a YAML list of objects.
13
+ - Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
14
+
15
+ # Output schema:
16
+
17
+ name: name of the entity
18
+ category: can be "character" or "location"
19
+ image: a description of the entity (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
20
+ audio: a textual description of what and how the entity sounds like
21
+ shots: an array containing the shot IDs where the entity is present
22
+
23
+ # Short example
24
+
25
+ Given the following inputs:
26
+
27
+ "A king goes to see a witch to ask if or how he can win an upcoming and challenging battle"
28
+
29
+ \`\`\`yaml
30
+ - shot: 1
31
+ title: "King Arthus seeks the witch's guidance to win his imminent battle."
32
+ image: "Establishing shot of KING ARTHUS, nervous, wet brown hair. dressed in golden armor and a colorful cape. His face reveals a mix of concern and determination. He's standing in the bright sunshine, inside a castle's courtyard, under cloudy skies. Behind him, a group of soldiers can be seen marching towards the castle gates."
33
+ voice: "Dark sorceress of the shadows, it is time for you to serve your Lord. Tell me the augur, tell me what you foreknow. Tell me how I will cleave my ennemies to the bone, and ravage them in battle to come up victorious."
34
+ - shot: 2
35
+ title: "The witch gives her counsel but warns of an unknown cost."
36
+ image: "close-up shot of THE WITCH, smiling cunningly, raising a finger while speaking. Background bokeh, dim lightning, menacing, mysterious."
37
+ voice: "Your Majesty, this will be a bloody battle, but I espy a way to victory for you. But if my advice you follow, victory I foresee, although at a great cost it will be."
38
+ - shot: 3
39
+ title: "The words of the witch are sinking in, but King Arthus tries to appear strong"
40
+ image: "close-up shot on KING ARTHUS, looking concerned, somber, false confidence"
41
+ voice: "Witch with the wicked tongue, what must be done will be done. I will do everything for my people's sake. Speak now, make know the path to glory."
42
+ \`\`\
43
+
44
+ An example YAML output from the server-side function can be:
45
+
46
+ \`\`\`yaml
47
+ - name: "Castle's Courtyard"
48
+ category: "location"
49
+ image: "A medieval castle courtyard, ashlar walls, soldiers and horses, cloudy sky"
50
+ audio: "Background noises of voices, horses, birds, wind, carriages"
51
+ shots: [1, 2, 3]
52
+ - name: "King Arthus"
53
+ category: "character"
54
+ image: 1 middle-aged king, pepper-and-salt hair, beared. Dressed in golden armor and a dark purple cape. Majestic, imposing."
55
+ label: King Arthus seeks the witch's guidance to win his imminent battle."
56
+ audio: a middle-aged man speaking clearly, with a deep voice tone, confident, imposing, calm, overpowering."
57
+ shots: [1, 3]
58
+ - name: "The Witch"
59
+ category: "character"
60
+ image: "an old witch, with a villainous face full of warts, gray hair, and a hunchback. Gypsy look. Yellowed teeth, piercing eyes. She wears a crude robe, she has wrinkled hands with long dirty nails."
61
+ audio: "a sneering old woman, speaking with a hoarse and raspy voice. She is confident, hiding something."
62
+ shots: [2]
63
+ \`\`\
64
+ `
src/app/api/v1/edit/storyboards/processShot.ts CHANGED
@@ -1,10 +1,17 @@
1
- import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
 
 
 
 
 
 
 
 
2
  import { getVideoPrompt } from "@aitube/engine"
3
 
4
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
5
 
6
  import { generateStoryboard } from "./generateStoryboard"
7
- import { ClapCompletionMode } from "../types"
8
 
9
  export async function processShot({
10
  shotSegment,
@@ -84,7 +91,7 @@ export async function processShot({
84
 
85
  // if mode is full, newerClap already contains the ference to shotStoryboardSegment
86
  // but if it's partial, we need to manually add it
87
- if (mode === "partial") {
88
  newerClap.segments.push(shotStoryboardSegment)
89
  }
90
  } else {
 
1
+ import {
2
+ ClapProject,
3
+ ClapSegment,
4
+ getClapAssetSourceType,
5
+ newSegment,
6
+ filterSegments,
7
+ ClapSegmentFilteringMode
8
+ } from "@aitube/clap"
9
+ import { ClapCompletionMode } from "@aitube/client"
10
  import { getVideoPrompt } from "@aitube/engine"
11
 
12
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
13
 
14
  import { generateStoryboard } from "./generateStoryboard"
 
15
 
16
  export async function processShot({
17
  shotSegment,
 
91
 
92
  // if mode is full, newerClap already contains the ference to shotStoryboardSegment
93
  // but if it's partial, we need to manually add it
94
+ if (mode !== ClapCompletionMode.FULL) {
95
  newerClap.segments.push(shotStoryboardSegment)
96
  }
97
  } else {
src/app/api/v1/edit/storyboards/route.ts CHANGED
@@ -2,11 +2,11 @@ import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
5
- import { getToken } from "@/app/api/auth/getToken"
6
-
7
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 
8
 
9
  import { processShot } from "./processShot"
 
10
 
11
  // a helper to generate storyboards for a Clap
12
  // this is mostly used by external apps such as the Stories Factory
@@ -16,8 +16,7 @@ import { processShot } from "./processShot"
16
  // - add missing storyboard prompts
17
  // - add missing storyboard images
18
  export async function POST(req: NextRequest) {
19
-
20
- const jwtToken = await getToken({ user: "anonymous" })
21
 
22
  const qs = queryString.parseUrl(req.url || "")
23
  const query = (qs || {}).query
@@ -30,16 +29,16 @@ export async function POST(req: NextRequest) {
30
 
31
  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
32
 
33
- console.log(`[api/v1/edit/storyboards] detected ${existingClap.segments.length} segments`)
34
 
35
  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
36
- console.log(`[api/v1/edit/storyboards] detected ${shotsSegments.length} shots`)
37
 
38
  if (shotsSegments.length > 32) {
39
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
40
  }
41
 
42
- const newerClap = mode === "full" ? existingClap : newClap()
43
 
44
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
45
  await Promise.all(shotsSegments.map(shotSegment =>
 
2
  import queryString from "query-string"
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
 
 
 
5
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
6
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
7
 
8
  import { processShot } from "./processShot"
9
+ import { ClapCompletionMode } from "@aitube/client"
10
 
11
  // a helper to generate storyboards for a Clap
12
  // this is mostly used by external apps such as the Stories Factory
 
16
  // - add missing storyboard prompts
17
  // - add missing storyboard images
18
  export async function POST(req: NextRequest) {
19
+ await throwIfInvalidToken(req.headers.get("Authorization"))
 
20
 
21
  const qs = queryString.parseUrl(req.url || "")
22
  const query = (qs || {}).query
 
29
 
30
  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
31
 
32
+ console.log(`api/v1/edit/storyboards(): detected ${existingClap.segments.length} segments`)
33
 
34
  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
35
+ console.log(`api/v1/edit/storyboards(): detected ${shotsSegments.length} shots`)
36
 
37
  if (shotsSegments.length > 32) {
38
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
39
  }
40
 
41
+ const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
42
 
43
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
44
  await Promise.all(shotsSegments.map(shotSegment =>
src/app/api/v1/edit/types.ts DELETED
@@ -1,8 +0,0 @@
1
- export type ClapCompletionMode =
2
- // the full .clap is returned, containing both previous data and also new entries
3
- // this isn't the most optimized mode, obviously
4
- | "full"
5
-
6
- // only changes are
7
- | "partial"
8
-
 
 
 
 
 
 
 
 
 
src/app/api/v1/edit/videos/processShot.ts CHANGED
@@ -1,11 +1,19 @@
1
 
2
- import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment,filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
 
 
 
 
 
 
 
 
3
  import { getVideoPrompt } from "@aitube/engine"
4
 
5
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
6
 
7
  import { generateVideo } from "./generateVideo"
8
- import { ClapCompletionMode } from "../types"
9
 
10
  export async function processShot({
11
  shotSegment,
@@ -89,7 +97,7 @@ export async function processShot({
89
 
90
  // if mode is full, newerClap already contains the ference to shotVideoSegment
91
  // but if it's partial, we need to manually add it
92
- if (mode === "partial") {
93
  newerClap.segments.push(shotVideoSegment)
94
  }
95
 
 
1
 
2
+ import {
3
+ ClapProject,
4
+ ClapSegment,
5
+ getClapAssetSourceType,
6
+ newSegment,
7
+ filterSegments,
8
+ ClapSegmentFilteringMode
9
+ } from "@aitube/clap"
10
+ import { ClapCompletionMode } from "@aitube/client"
11
  import { getVideoPrompt } from "@aitube/engine"
12
 
13
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
14
 
15
  import { generateVideo } from "./generateVideo"
16
+
17
 
18
  export async function processShot({
19
  shotSegment,
 
97
 
98
  // if mode is full, newerClap already contains the ference to shotVideoSegment
99
  // but if it's partial, we need to manually add it
100
+ if (mode !== ClapCompletionMode.FULL) {
101
  newerClap.segments.push(shotVideoSegment)
102
  }
103
 
src/app/api/v1/edit/videos/route.ts CHANGED
@@ -1,13 +1,12 @@
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
-
5
- import { getToken } from "@/app/api/auth/getToken"
6
-
7
- import { processShot } from "./processShot"
8
 
9
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
 
10
 
 
11
 
12
  // a helper to generate videos for a Clap
13
  // this is mostly used by external apps such as the Stories Factory
@@ -17,8 +16,7 @@ import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
17
  // - add missing video prompts
18
  // - add missing video files
19
  export async function POST(req: NextRequest) {
20
-
21
- const jwtToken = await getToken({ user: "anonymous" })
22
 
23
  const qs = queryString.parseUrl(req.url || "")
24
  const query = (qs || {}).query
@@ -31,16 +29,16 @@ export async function POST(req: NextRequest) {
31
 
32
  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
33
 
34
- console.log(`[api/edit/videos] detected ${existingClap.segments.length} segments`)
35
 
36
  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
37
- console.log(`[api/edit/videos] detected ${shotsSegments.length} shots`)
38
 
39
  if (shotsSegments.length > 32) {
40
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
41
  }
42
 
43
- const newerClap = mode === "full" ? existingClap : newClap()
44
 
45
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
46
  await Promise.all(shotsSegments.map(shotSegment =>
@@ -52,7 +50,7 @@ export async function POST(req: NextRequest) {
52
  })
53
  ))
54
 
55
- console.log(`[api/edit/videos] returning the clap augmented with videos`)
56
 
57
  return new NextResponse(await serializeClap(newerClap), {
58
  status: 200,
 
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
  import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
4
+ import { ClapCompletionMode } from "@aitube/client"
 
 
 
5
 
6
  import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
7
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
8
 
9
+ import { processShot } from "./processShot"
10
 
11
  // a helper to generate videos for a Clap
12
  // this is mostly used by external apps such as the Stories Factory
 
16
  // - add missing video prompts
17
  // - add missing video files
18
  export async function POST(req: NextRequest) {
19
+ await throwIfInvalidToken(req.headers.get("Authorization"))
 
20
 
21
  const qs = queryString.parseUrl(req.url || "")
22
  const query = (qs || {}).query
 
29
 
30
  if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
31
 
32
+ console.log(`api/edit/videos(): detected ${existingClap.segments.length} segments`)
33
 
34
  const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
35
+ console.log(`api/edit/videos(): detected ${shotsSegments.length} shots`)
36
 
37
  if (shotsSegments.length > 32) {
38
  throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
39
  }
40
 
41
+ const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
42
 
43
  // we process the shots in parallel (this will increase the queue size in the Gradio spaces)
44
  await Promise.all(shotsSegments.map(shotSegment =>
 
50
  })
51
  ))
52
 
53
+ console.log(`api/edit/videos(): returning the clap augmented with videos`)
54
 
55
  return new NextResponse(await serializeClap(newerClap), {
56
  status: 200,
src/app/api/v1/export/route.ts CHANGED
@@ -1,22 +1,17 @@
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
 
4
- type SupportedExportFormat = "mp4" | "webm"
5
- const defaultExportFormat = "mp4"
6
 
7
  // we hide/wrap the micro-service under a unified AiTube API
8
  export async function POST(req: NextRequest, res: NextResponse) {
 
9
 
10
  const qs = queryString.parseUrl(req.url || "")
11
  const query = (qs || {}).query
12
 
13
- let format: SupportedExportFormat = defaultExportFormat
14
- try {
15
- format = decodeURIComponent(query?.f?.toString() || defaultExportFormat).trim() as SupportedExportFormat
16
- if (format !== "mp4" && format !== "webm") {
17
- format = defaultExportFormat
18
- }
19
- } catch (err) {}
20
 
21
  // let's call our micro-service, which is currently open bar.
22
  const result = await fetch(
 
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
 
4
+ import { parseSupportedExportFormat } from "@/app/api/parsers/parseSupportedExportFormat"
5
+ import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
6
 
7
  // we hide/wrap the micro-service under a unified AiTube API
8
  export async function POST(req: NextRequest, res: NextResponse) {
9
+ await throwIfInvalidToken(req.headers.get("Authorization"))
10
 
11
  const qs = queryString.parseUrl(req.url || "")
12
  const query = (qs || {}).query
13
 
14
+ const format = parseSupportedExportFormat(query?.f)
 
 
 
 
 
 
15
 
16
  // let's call our micro-service, which is currently open bar.
17
  const result = await fetch(
src/app/api/v1/types.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapSegmentCategory } from "@aitube/clap"
2
+
3
+ export type LatentEntity = {
4
+ name: string
5
+ category: ClapSegmentCategory
6
+ image: string
7
+ audio: string
8
+ shots: number[]
9
+ }
10
+
11
+ export type LatentStory = {
12
+ title: string
13
+ image: string
14
+ voice: string
15
+ }
src/app/latent/search/page.tsx CHANGED
@@ -5,7 +5,7 @@ import { LatentQueryProps } from "@/types/general"
5
 
6
  import { Main } from "../../main"
7
  import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
8
- import { getToken } from "../../api/auth/getToken"
9
 
10
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
11
 
 
5
 
6
  import { Main } from "../../main"
7
  import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
8
+ import { getToken } from "../../api/v1/auth/getToken"
9
 
10
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
11
 
src/app/latent/watch/page.tsx CHANGED
@@ -6,7 +6,7 @@ import { parseBasicSearchResult } from '@/app/api/parsers/parseBasicSearchResult
6
 
7
  import { Main } from "../../main"
8
  import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
9
- import { getToken } from "../../api/auth/getToken"
10
 
11
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
12
 
 
6
 
7
  import { Main } from "../../main"
8
  import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
9
+ import { getToken } from "../../api/v1/auth/getToken"
10
 
11
  // https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
12
 
src/types/general.ts CHANGED
@@ -54,6 +54,8 @@ export interface RenderRequest {
54
  wait: boolean // wait until the job is completed
55
 
56
  analyze: boolean // analyze the image to generate a caption (optional)
 
 
57
  }
58
 
59
  export interface ImageSegment {
 
54
  wait: boolean // wait until the job is completed
55
 
56
  analyze: boolean // analyze the image to generate a caption (optional)
57
+
58
+ identityImage: string // reference image for the main entity
59
  }
60
 
61
  export interface ImageSegment {