jbilcke-hf HF staff commited on
Commit
e02a62b
1 Parent(s): e40bd21

work on clap file support

Browse files
src/clap/parseClap.ts CHANGED
@@ -2,7 +2,7 @@ import YAML from "yaml"
2
  import { v4 as uuidv4 } from "uuid"
3
 
4
  import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapSegment } from "./types"
5
- import { getValidNumber } from "@/lib/getValidNumber";
6
 
7
  /**
8
  * import a Clap file (from a plain text string)
@@ -50,7 +50,7 @@ export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapP
50
  width: getValidNumber(maybeClapMeta.width, 256, 8192, 1024),
51
  height: getValidNumber(maybeClapMeta.height, 256, 8192, 576),
52
  defaultVideoModel: typeof maybeClapMeta.defaultVideoModel === "string" ? maybeClapMeta.defaultVideoModel : "SVD",
53
- extraPositivePrompt: Array.isArray(maybeClapMeta.extraPositivePrompt) ? maybeClapMeta.extraPositivePrompt : [],
54
  }
55
 
56
  /*
@@ -83,31 +83,39 @@ export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapP
83
 
84
  const clapModels: ClapModel[] = maybeModels.map(({
85
  id,
86
- imageType,
87
- audioType,
88
  category,
89
  triggerName,
90
  label,
91
  description,
92
  author,
93
  thumbnailUrl,
94
- storageUrl,
95
- imagePrompt,
96
- audioPrompt,
 
 
 
 
 
 
97
  }) => ({
98
  // TODO: we should verify each of those, probably
99
  id,
100
- imageType,
101
- audioType,
102
  category,
103
  triggerName,
104
  label,
105
  description,
106
  author,
107
  thumbnailUrl,
108
- storageUrl,
109
- imagePrompt,
110
- audioPrompt,
 
 
 
 
 
 
111
  }))
112
 
113
 
@@ -148,3 +156,4 @@ export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapP
148
  segments: clapSegments
149
  }
150
  }
 
 
2
  import { v4 as uuidv4 } from "uuid"
3
 
4
  import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapSegment } from "./types"
5
+ import { getValidNumber } from "@/lib/getValidNumber"
6
 
7
  /**
8
  * import a Clap file (from a plain text string)
 
50
  width: getValidNumber(maybeClapMeta.width, 256, 8192, 1024),
51
  height: getValidNumber(maybeClapMeta.height, 256, 8192, 576),
52
  defaultVideoModel: typeof maybeClapMeta.defaultVideoModel === "string" ? maybeClapMeta.defaultVideoModel : "SVD",
53
+ extraPositivePrompt: Array.isArray(maybeClapMeta.extraPositivePrompt) ? maybeClapMeta.extraPositivePrompt : []
54
  }
55
 
56
  /*
 
83
 
84
  const clapModels: ClapModel[] = maybeModels.map(({
85
  id,
 
 
86
  category,
87
  triggerName,
88
  label,
89
  description,
90
  author,
91
  thumbnailUrl,
92
+ seed,
93
+ assetSourceType,
94
+ assetUrl,
95
+ age,
96
+ gender,
97
+ region,
98
+ appearance,
99
+ voiceVendor,
100
+ voiceId,
101
  }) => ({
102
  // TODO: we should verify each of those, probably
103
  id,
 
 
104
  category,
105
  triggerName,
106
  label,
107
  description,
108
  author,
109
  thumbnailUrl,
110
+ seed,
111
+ assetSourceType,
112
+ assetUrl,
113
+ age,
114
+ gender,
115
+ region,
116
+ appearance,
117
+ voiceVendor,
118
+ voiceId,
119
  }))
120
 
121
 
 
156
  segments: clapSegments
157
  }
158
  }
159
+
src/clap/serializeClap.ts CHANGED
@@ -14,30 +14,38 @@ export async function serializeClap({
14
  // to make sure we generate a valid clap file
15
  const clapModels: ClapModel[] = models.map(({
16
  id,
17
- imageType,
18
- audioType,
19
  category,
20
  triggerName,
21
  label,
22
  description,
23
  author,
24
  thumbnailUrl,
25
- storageUrl,
26
- imagePrompt,
27
- audioPrompt,
 
 
 
 
 
 
28
  }) => ({
29
  id,
30
- imageType,
31
- audioType,
32
  category,
33
  triggerName,
34
  label,
35
  description,
36
  author,
37
  thumbnailUrl,
38
- storageUrl,
39
- imagePrompt,
40
- audioPrompt,
 
 
 
 
 
 
41
  }))
42
 
43
  const clapSegments: ClapSegment[] = segments.map(({
 
14
  // to make sure we generate a valid clap file
15
  const clapModels: ClapModel[] = models.map(({
16
  id,
 
 
17
  category,
18
  triggerName,
19
  label,
20
  description,
21
  author,
22
  thumbnailUrl,
23
+ seed,
24
+ assetSourceType,
25
+ assetUrl,
26
+ age,
27
+ gender,
28
+ region,
29
+ appearance,
30
+ voiceVendor,
31
+ voiceId,
32
  }) => ({
33
  id,
 
 
34
  category,
35
  triggerName,
36
  label,
37
  description,
38
  author,
39
  thumbnailUrl,
40
+ seed,
41
+ assetSourceType,
42
+ assetUrl,
43
+ age,
44
+ gender,
45
+ region,
46
+ appearance,
47
+ voiceVendor,
48
+ voiceId,
49
  }))
50
 
51
  const clapSegments: ClapSegment[] = segments.map(({
src/clap/types.ts CHANGED
@@ -2,8 +2,60 @@
2
  export type ClapSegmentCategory = "render" | "preview" | "characters" | "location" | "time" | "era" | "lighting" | "weather" | "action" | "music" | "sound" | "dialogue" | "style" | "camera" | "generic"
3
  export type ClapOutputType = "text" | "movement" | "image" | "video" | "audio"
4
  export type ClapSegmentStatus = "pending" | "completed" | "error"
5
- export type ClapImageType = "reference_image" | "text_prompt" | "other"
6
- export type ClapAudioType = "reference_audio" | "text_prompt" | "other"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  export type ClapHeader = {
9
  format: "clap-0"
@@ -41,17 +93,24 @@ export type ClapSegment = {
41
 
42
  export type ClapModel = {
43
  id: string
44
- imageType: ClapImageType
45
- audioType: ClapAudioType
46
  category: ClapSegmentCategory
47
  triggerName: string
48
  label: string
49
  description: string
50
  author: string
51
  thumbnailUrl: string
52
- storageUrl: string
53
- imagePrompt: string
54
- audioPrompt: string
 
 
 
 
 
 
 
 
 
55
  }
56
 
57
  export type ClapProject = {
 
2
  export type ClapSegmentCategory = "render" | "preview" | "characters" | "location" | "time" | "era" | "lighting" | "weather" | "action" | "music" | "sound" | "dialogue" | "style" | "camera" | "generic"
3
  export type ClapOutputType = "text" | "movement" | "image" | "video" | "audio"
4
  export type ClapSegmentStatus = "pending" | "completed" | "error"
5
+
6
+ export type ClapAssetSource =
7
+ | "REMOTE" // http:// or https://
8
+
9
+ // note that "path" assets are potentially a security risk, they need to be treated with care
10
+ | "PATH" // a file path eg. /path or ./path/to/ or ../path/to/
11
+
12
+ | "DATA" // a data URI, starting with data:
13
+
14
+ | "PROMPT" // by default, a plain text prompt
15
+
16
+ | "EMPTY"
17
+
18
+ export type ClapModelGender =
19
+ | "male"
20
+ | "female"
21
+ | "person"
22
+ | "object"
23
+
24
+ export type ClapModelAppearance = "serious" | "neutral" | "friendly" | "chill"
25
+
26
+ // this is used for accent, style..
27
+ export type ClapModelRegion =
28
+ | "american"
29
+ | "british"
30
+ | "australian"
31
+ | "canadian"
32
+ | "indian"
33
+ | "french"
34
+ | "italian"
35
+ | "german"
36
+ | "chinese"
37
+
38
+ // note: this is all very subjective, so please use good judgment
39
+ //
40
+ // "deep" might indicate a deeper voice tone, thicker, rich in harmonics
41
+ // in this context, it is used to indicate voices that could
42
+ // be associated with African American (AADOS) characters
43
+ //
44
+ // "high" could be used for some other countries, eg. asia
45
+ export type ClapModelTimbre = "high" | "neutral" | "deep"
46
+
47
+ export type ClapVoiceVendor = "ElevenLabs" | "XTTS"
48
+
49
+ export type ClapVoice = {
50
+ name: string
51
+ gender: ClapModelGender
52
+ age: number
53
+ region: ClapModelRegion
54
+ timbre: ClapModelTimbre
55
+ appearance: ClapModelAppearance
56
+ voiceVendor: ClapVoiceVendor
57
+ voiceId: string
58
+ }
59
 
60
  export type ClapHeader = {
61
  format: "clap-0"
 
93
 
94
  export type ClapModel = {
95
  id: string
 
 
96
  category: ClapSegmentCategory
97
  triggerName: string
98
  label: string
99
  description: string
100
  author: string
101
  thumbnailUrl: string
102
+ seed: number
103
+
104
+ assetSourceType: ClapAssetSource
105
+ assetUrl: string
106
+
107
+ // those are only used by certain types of models
108
+ age: number
109
+ gender: ClapModelGender
110
+ region: ClapModelRegion
111
+ appearance: ClapModelAppearance
112
+ voiceVendor: ClapVoiceVendor
113
+ voiceId: string
114
  }
115
 
116
  export type ClapProject = {
src/lib/getClapAssetSourceType.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapAssetSource } from "@/clap/types"
2
+
3
+ export function getClapAssetSourceSource(input: string): ClapAssetSource {
4
+
5
+ const str = `${input || ""}`
6
+ if (str.startsWith("https://") || str.startsWith("http://")) {
7
+ return "REMOTE"
8
+ }
9
+
10
+ // note that "path" assets are potentially a security risk, they need to be treated with care
11
+ if (str.startsWith("/") || str.startsWith("../") || str.startsWith("./")) {
12
+ return "PATH"
13
+ }
14
+
15
+ if (str.startsWith("data:")) {
16
+ return "DATA"
17
+ }
18
+
19
+ if (!str) {
20
+ return "EMPTY"
21
+ }
22
+
23
+ return "PROMPT"
24
+ }
src/types/atoms.ts ADDED
File without changes