Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

App Files Files Community

VideoChain-API / src /types.mts

jbilcke-hf HF staff

use PuLIB for the turbo mode

1373ff5 about 1 month ago

raw history blame contribute delete

No virus

9.41 kB


	export type VideoStatus =
	\| 'pending'
	\| 'abort' // this is an order (the video might still being processed by a task)
	\| 'delete' // this is an order (the video might still being processed by a task)
	\| 'pause' // this is an order (the video might still being processed by a task)
	\| 'completed'
	\| 'unknown'


	export type VideoTransition =
	\| 'dissolve'
	\| 'bookflip'
	\| 'bounce'
	\| 'bowtiehorizontal'
	\| 'bowtievertical'
	\| 'bowtiewithparameter'
	\| 'butterflywavescrawler'
	\| 'circlecrop'
	\| 'colourdistance'
	\| 'crazyparametricfun'
	\| 'crosszoom'
	\| 'directional'
	\| 'directionalscaled'
	\| 'doomscreentransition'
	\| 'dreamy'
	\| 'dreamyzoom'
	\| 'edgetransition'
	\| 'filmburn'
	\| 'filmburnglitchdisplace'
	\| 'glitchmemories'
	\| 'gridflip'
	\| 'horizontalclose'
	\| 'horizontalopen'
	\| 'invertedpagecurl'
	\| 'leftright'
	\| 'linearblur'
	\| 'mosaic'
	\| 'overexposure'
	\| 'polkadotscurtain'
	\| 'radial'
	\| 'rectangle'
	\| 'rectanglecrop'
	\| 'rolls'
	\| 'rotatescalevanish'
	\| 'simplezoom'
	\| 'simplezoomout'
	\| 'slides'
	\| 'staticfade'
	\| 'stereoviewer'
	\| 'swirl'
	\| 'tvstatic'
	\| 'topbottom'
	\| 'verticalclose'
	\| 'verticalopen'
	\| 'waterdrop'
	\| 'waterdropzoomincircles'
	\| 'zoomleftwipe'
	\| 'zoomrigthwipe'
	\| 'angular'
	\| 'burn'
	\| 'cannabisleaf'
	\| 'circle'
	\| 'circleopen'
	\| 'colorphase'
	\| 'coordfromin'
	\| 'crosshatch'
	\| 'crosswarp'
	\| 'cube'
	\| 'directionaleasing'
	\| 'directionalwarp'
	\| 'directionalwipe'
	\| 'displacement'
	\| 'doorway'
	\| 'fade'
	\| 'fadecolor'
	\| 'fadegrayscale'
	\| 'flyeye'
	\| 'heart'
	\| 'hexagonalize'
	\| 'kaleidoscope'
	\| 'luma'
	\| 'luminance_melt'
	\| 'morph'
	\| 'mosaic_transition'
	\| 'multiply_blend'
	\| 'perlin'
	\| 'pinwheel'
	\| 'pixelize'
	\| 'polar_function'
	\| 'powerkaleido'
	\| 'randomnoisex'
	\| 'randomsquares'
	\| 'ripple'
	\| 'rotatetransition'
	\| 'rotate_scale_fade'
	\| 'scalein'
	\| 'squareswire'
	\| 'squeeze'
	\| 'static_wipe'
	\| 'swap'
	\| 'tangentmotionblur'
	\| 'undulatingburnout'
	\| 'wind'
	\| 'windowblinds'
	\| 'windowslice'
	\| 'wipedown'
	\| 'wipeleft'
	\| 'wiperight'
	\| 'wipeup'
	\| 'x_axistranslation'


	export type VideoShotMeta = {
	shotPrompt: string
	// inputVideo?: string

	// background, weather, lights, time of the day, etc
	environmentPrompt: string

	// camera parameters, angle, type of shot etc
	photographyPrompt: string

	// dynamic elements of the scene, movement etc
	actionPrompt: string

	// describe the background audio (crowd, birds, wind, sea etc..)
	backgroundAudioPrompt: string

	// describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
	foregroundAudioPrompt: string

	// describe the main actor visible in the shot (optional)
	actorPrompt: string

	// describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
	actorVoicePrompt: string

	// describe the main actor dialogue line
	actorDialoguePrompt: string



	seed: number
	noise: boolean // add movie noise
	noiseAmount: number // noise strength (default is 2, and 10 is very visible)

	durationMs: number // in milliseconds
	steps: number

	fps: number // 8, 12, 24, 30, 60

	resolution: string // {width}x{height} (256, 512, 576, 720, 1080)

	introTransition: VideoTransition
	introDurationMs: number // in milliseconds
	}


	export type VideoShotData = {
	// must be unique
	id: string
	sequenceId: string
	ownerId: string

	fileName: string

	// used to check compatibility
	version: number

	// for internal use
	hasGeneratedPreview: boolean
	hasGeneratedVideo: boolean
	hasUpscaledVideo: boolean
	hasGeneratedBackgroundAudio: boolean
	hasGeneratedForegroundAudio: boolean
	hasGeneratedActor: boolean
	hasInterpolatedVideo: boolean
	hasAddedAudio: boolean
	hasPostProcessedVideo: boolean
	nbCompletedSteps: number
	nbTotalSteps: number
	progressPercent: number
	createdAt: string
	completedAt: string
	completed: boolean
	error: string
	}

	export type VideoShot = VideoShotMeta & VideoShotData

	export type VideoSequenceMeta = {

	// describe the whole movie
	videoPrompt: string

	// describe the background audio (crowd, birds, wind, sea etc..)
	backgroundAudioPrompt: string

	// describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
	foregroundAudioPrompt: string

	// describe the main actor visible in the shot (optional)
	actorPrompt: string

	// describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
	actorVoicePrompt: string

	// describe the main actor dialogue line
	actorDialoguePrompt: string

	seed: number

	noise: boolean // add movie noise
	noiseAmount: number // noise strength (default is 2, and 10 is very visible)

	steps: number // between 10 and 50

	fps: number // 8, 12, 24, 30, 60

	resolution: string // 256, 512, 576, 720, 1080

	outroTransition: VideoTransition
	outroDurationMs: number
	}


	export type VideoSequenceData = {
	// must be unique
	id: string

	ownerId: string

	fileName: string

	// used to check compatibility
	version: number

	status: VideoStatus

	hasGeneratedSpecs: boolean
	hasAssembledVideo: boolean
	nbCompletedShots: number
	progressPercent: number
	createdAt: string
	completedAt: string
	completed: boolean
	error: string
	}

	export type VideoSequence = VideoSequenceMeta & VideoSequenceData

	export type VideoStatusRequest = {
	status: VideoStatus
	}

	export type GenericAPIResponse = {
	success?: boolean
	error?: string
	}

	export type VideoAPIRequest = Partial<{
	prompt: string
	sequence: Partial<VideoSequenceMeta>
	shots: Array<Partial<VideoShotMeta>>
	}>

	export type Video = VideoSequence & {
	shots: VideoShot[]
	}

	export type ProjectionMode = 'cartesian' \| 'spherical'

	export type CacheMode = "use" \| "renew" \| "ignore"

	export type RenderRequest = {
	prompt: string

	// unused for now
	negativePrompt: string

	// image used for the consistent identity of the main entity (optional)
	identityImage: string

	// whether to use video segmentation
	// disabled (default)
	// firstframe: we only analyze the first frame
	// allframes: we analyze all the frames
	segmentation: 'disabled' \| 'firstframe' \| 'allframes'

	// segmentation will only be executed if we have a non-empty list of actionnables
	// actionnables are names of things like "chest", "key", "tree", "chair" etc
	actionnables: string[]

	nbFrames: number

	nbFPS: number

	nbSteps: number // min: 1, max: 50

	seed: number

	width: number
	height: number

	// upscaling factor
	// 0: no upscaling
	// 1: no upscaling
	// 2: 2x larger
	// 3: 3x larger
	// 4x: 4x larger, up to 4096x4096 (warning: a PNG of this size can be 50 Mb!)
	upscalingFactor: number

	projection: ProjectionMode

	/**
	* Use turbo mode
	*
	* At the time of writing this will use SSD-1B + LCM
	* https://huggingface.co/spaces/jbilcke-hf/fast-image-server
	*/
	turbo: boolean

	cache: CacheMode

	wait: boolean // wait until the job is completed

	analyze: boolean // analyze the image to generate a caption (optional)
	}

	export type ImageAnalysisRequest = {
	image: string // in base64
	prompt: string
	}

	export type ImageAnalysisResponse = {
	result: string
	error?: string
	}

	export type SoundAnalysisRequest = {
	sound: string // in base64
	prompt: string
	}

	export type SoundAnalysisResponse = {
	result: string
	error?: string
	}


	export type ImageSegmentationRequest = {
	image: string // in base64
	keywords: string[]
	}

	export type ImageSegment = {
	id: number
	box: number[]
	color: number[]
	label: string
	score: number
	}

	export type RenderedSceneStatus = 'pending' \| 'completed' \| 'error'

	export type RenderedScene = {
	renderId: string
	status: RenderedSceneStatus
	assetUrl: string
	alt: string
	error: string
	maskUrl: string
	segments: ImageSegment[]
	}

	export type RenderCache = {
	id: string
	hash: string
	scene: RenderedScene
	}

	// note: for video generation we are always going to have slow jobs,
	// because we need multiple seconds, minutes, hours.. of video + audio
	// but for rendering we aim at shorter delays, less than 45 seconds
	// so the goal of rendering "jobs" is mostly to give the illusion that
	// things go faster, by already providing some things like the background image,
	// before we send
	export type RenderingJob = {
	scene: RenderRequest
	result: RenderedScene

	status: 'pending' \| 'completed' \| 'error'
	}

	// vendor-specific types

	export type HotshotImageInferenceSize =
	\| '320x768'
	\| '384x672'
	\| '416x608'
	\| '512x512'
	\| '608x416'
	\| '672x384'
	\| '768x320'
	\| '1024x1024' // custom ratio - this isn't supported / supposed to work properly
	\| '1024x512' // custom panoramic ratio - this isn't supported / supposed to work properly
	\| '1024x576' // movie ratio (16:9) this isn't supported / supposed to work properly
	\| '576x1024' // tiktok ratio (9:16) this isn't supported / supposed to work properly

	export type VideoOrientation =
	\| "portrait"
	\| "landscape"
	\| "square"

	export type VideoProjection =
	\| "cartesian" // this is the default
	\| "equirectangular"

	export type VideoGenerationParams = {
	prompt: string // can be empty
	image?: string // can be empty
	lora?: string
	style?: string
	orientation: VideoOrientation
	projection: VideoProjection
	width: number
	height: number
	nbSteps: number
	seed?: number
	debug?: boolean
	}