ai-tube / src /clap /types.ts
jbilcke's picture
update clap spec
566d763
raw
history blame
3.87 kB
export type ClapSegmentCategory = "splat" | "mesh" | "depth" | "video" | "storyboard" | "transition" | "characters" | "location" | "time" | "era" | "lighting" | "weather" | "action" | "music" | "sound" | "dialogue" | "style" | "camera" | "generic"
export type ClapOutputType = "text" | "animation" | "transition" | "image" | "video" | "audio"
export type ClapSegmentStatus =
| "to_generate"
| "to_interpolate"
| "to_upscale"
| "completed"
| "error"
export type ClapAuthor =
| "auto" // the element was edited automatically using basic if/else logical rules
| "ai" // the element was edited using a large language model
| "human" // the element was edited by a human
export type ClapAssetSource =
| "REMOTE" // http:// or https://
// note that "path" assets are potentially a security risk, they need to be treated with care
| "PATH" // a file path eg. /path or ./path/to/ or ../path/to/
| "DATA" // a data URI, starting with data:
| "PROMPT" // by default, a plain text prompt
| "EMPTY"
export type ClapModelGender =
| "male"
| "female"
| "person"
| "object"
export type ClapModelAppearance = "serious" | "neutral" | "friendly" | "chill"
// this is used for accent, style..
export type ClapModelRegion =
| "american"
| "british"
| "australian"
| "canadian"
| "indian"
| "french"
| "italian"
| "german"
| "chinese"
// note: this is all very subjective, so please use good judgment
//
// "deep" might indicate a deeper voice tone, thicker, rich in harmonics
// in this context, it is used to indicate voices that could
// be associated with African American (AADOS) characters
//
// "high" could be used for some other countries, eg. asia
export type ClapModelTimbre = "high" | "neutral" | "deep"
export type ClapVoiceVendor = "ElevenLabs" | "XTTS"
export type ClapVoice = {
name: string
gender: ClapModelGender
age: number
region: ClapModelRegion
timbre: ClapModelTimbre
appearance: ClapModelAppearance
voiceVendor: ClapVoiceVendor
voiceId: string
}
export type ClapHeader = {
format: "clap-0"
numberOfModels: number
numberOfScenes: number
numberOfSegments: number
}
export type ClapMeta = {
id: string
title: string
description: string
licence: string
orientation: string
width: number
height: number
defaultVideoModel: string
extraPositivePrompt: string[]
screenplay: string
}
export type ClapSceneEvent = {
id: string
type: "description" | "dialogue" | "action"
character?: string
description: string
behavior: string
startAtLine: number
endAtLine: number
}
export type ClapScene = {
id: string
scene: string
line: string
rawLine: string
sequenceFullText: string
sequenceStartAtLine: number
sequenceEndAtLine: number
startAtLine: number
endAtLine: number
events: ClapSceneEvent[]
}
export type ClapSegment = {
id: string
track: number
startTimeInMs: number
endTimeInMs: number
category: ClapSegmentCategory
modelId: string
sceneId: string
prompt: string
label: string
outputType: ClapOutputType
renderId: string
status: ClapSegmentStatus
assetUrl: string
assetDurationInMs: number
createdBy: ClapAuthor
editedBy: ClapAuthor
outputGain: number
seed: number
}
export type ClapModel = {
id: string
category: ClapSegmentCategory
triggerName: string
label: string
description: string
author: string
thumbnailUrl: string
seed: number
assetSourceType: ClapAssetSource
assetUrl: string
// those are only used by certain types of models
age: number
gender: ClapModelGender
region: ClapModelRegion
appearance: ClapModelAppearance
voiceVendor: ClapVoiceVendor
voiceId: string
}
export type ClapProject = {
meta: ClapMeta
models: ClapModel[]
scenes: ClapScene[]
segments: ClapSegment[]
// let's keep room for other stuff (screenplay etc)
}