Spaces:

jbilcke-hf
/

clapper

Build error

App Files Files Community

clapper / src /services /io /parseFileIntoSegments.ts

jbilcke-hf HF staff

first version of the video analyzer

5bd8810 7 months ago

raw

history blame

8.68 kB

	'use client'

	import {
	ClapAssetSource,
	ClapOutputType,
	ClapSegmentCategory,
	ClapSegmentStatus,
	isValidNumber,
	newSegment,
	UUID,
	} from '@aitube/clap'
	import {
	clapSegmentToTimelineSegment,
	SegmentEditionStatus,
	SegmentVisibility,
	TimelineSegment,
	useTimeline,
	TimelineStore,
	DEFAULT_DURATION_IN_MS_PER_STEP,
	} from '@aitube/timeline'

	import { blobToBase64DataUri } from '@/lib/utils/blobToBase64DataUri'

	import { analyzeAudio } from '../audio/analyzeAudio'
	import { ResourceCategory, ResourceType } from '@aitube/clapper-services'

	export async function parseFileIntoSegments({
	file,
	track,
	startTimeInMs: maybeStartTimeInMs,
	endTimeInMs: maybeEndTimeInMs,
	}: {
	/**
	* The file to import
	*/
	file: File

	track?: number
	startTimeInMs?: number
	endTimeInMs?: number
	}): Promise<TimelineSegment[]> {
	const timeline: TimelineStore = useTimeline.getState()
	const { cursorTimestampAtInMs } = timeline
	// console.log(`parseFileIntoSegments(): filename = ${file.name}`)
	// console.log(`parseFileIntoSegments(): file size = ${file.size} bytes`)
	// console.log(`parseFileIntoSegments(): file type = ${file.type}`)

	const extension = file.name.split('.').pop()?.toLowerCase()

	console.log(
	'TODO: open a popup to ask if this is a voice character sample, dialogue, music etc'
	)

	const newSegments: TimelineSegment[] = []

	switch (file.type) {
	case 'image/jpeg':
	case 'image/png':
	case 'image/avif':
	case 'image/heic':
	case 'image/webp': {
	const type: ResourceType = 'image'
	const resourceCategory: ResourceCategory = 'control_image'

	// ok let's stop for a minute there:
	// if someone drops a .mp3, and assuming we don't yet have the UI to select the category,
	// do you think it should be a SOUND, a VOICE or a MUSIC by default?
	// I expect people will use AI service providers for sound and voice,
	// maybe in some case music too, but there are also many people
	// who will want to use their own track eg. to create a music video
	const category = ClapSegmentCategory.STORYBOARD

	const assetUrl = await blobToBase64DataUri(file)

	const startTimeInMs = isValidNumber(maybeStartTimeInMs)
	? maybeStartTimeInMs!
	: cursorTimestampAtInMs
	const durationInSteps = 4
	const durationInMs = durationInSteps * DEFAULT_DURATION_IN_MS_PER_STEP
	const endTimeInMs = isValidNumber(maybeEndTimeInMs)
	? maybeEndTimeInMs!
	: startTimeInMs + durationInMs

	const partialVideo: Partial<TimelineSegment> = {
	category: ClapSegmentCategory.VIDEO,
	startTimeInMs,
	endTimeInMs,

	prompt: 'movie',
	label: 'movie', // `${file.name.split(".")[0] \|\| "Untitled"}`, // a short label to name the segment (optional, can be human or LLM-defined)

	outputType: ClapOutputType.VIDEO,
	status: ClapSegmentStatus.TO_GENERATE,

	assetUrl: '',
	assetDurationInMs: durationInMs,
	assetSourceType: ClapAssetSource.EMPTY,
	assetFileFormat: undefined,
	track: track ? track : undefined,
	}

	const video = await clapSegmentToTimelineSegment(newSegment(partialVideo))

	if (isValidNumber(track)) {
	video.track = track
	}

	video.outputType = ClapOutputType.VIDEO

	// we assume we want it to be immediately visible
	video.visibility = SegmentVisibility.VISIBLE

	// console.log("newSegment:", audioSegment)

	// poof! type disappears.. it's magic
	newSegments.push(video)

	const partialStoryboard: Partial<TimelineSegment> = {
	prompt: 'Storyboard', // note: this can be set later with an automatic captioning worker
	startTimeInMs, // start time of the segment
	endTimeInMs, // end time of the segment (startTimeInMs + durationInMs)
	status: ClapSegmentStatus.COMPLETED,
	// track: findFreeTrack({ segments, startTimeInMs, endTimeInMs }), // track row index
	label: `${file.name}`, // a short label to name the segment (optional, can be human or LLM-defined)
	category,
	assetUrl,
	assetDurationInMs: durationInMs,
	assetSourceType: ClapAssetSource.DATA,
	assetFileFormat: `${file.type}`,

	// important: we try to go below
	track: track ? track + 1 : undefined,
	}

	const storyboard = await clapSegmentToTimelineSegment(
	newSegment(partialStoryboard)
	)

	if (isValidNumber(track)) {
	storyboard.track = track
	}

	storyboard.outputType = ClapOutputType.IMAGE

	// we assume we want it to be immediately visible
	storyboard.visibility = SegmentVisibility.VISIBLE

	// console.log("newSegment:", audioSegment)

	// poof! type disappears.. it's magic
	newSegments.push(storyboard)
	break
	}

	case 'audio/mpeg': // this is the "official" one
	case 'audio/mp3': // this is just an alias
	case 'audio/wav':
	case 'audio/mp4':
	case 'audio/x-mp4': // should be rare, normally is is audio/mp4
	case 'audio/m4a': // shouldn't exist
	case 'audio/x-m4a': // should be rare, normally is is audio/mp4
	case 'audio/webm': {
	// for background track, or as an inspiration track, or a voice etc
	const type: ResourceType = 'audio'
	const resourceCategory: ResourceCategory = 'background_music'

	// TODO: add caption analysis
	const { durationInMs, durationInSteps, bpm, audioBuffer } =
	await analyzeAudio(file)
	console.log('User dropped an audio sample:', {
	bpm,
	durationInMs,
	durationInSteps,
	})

	// TODO: use the correct drop time
	const startTimeInMs = isValidNumber(maybeStartTimeInMs)
	? maybeStartTimeInMs!
	: 0
	const endTimeInMs = isValidNumber(maybeEndTimeInMs)
	? maybeEndTimeInMs!
	: startTimeInMs + durationInMs

	// ok let's stop for a minute there:
	// if someone drops a .mp3, and assuming we don't yet have the UI to select the category,
	// do you think it should be a SOUND, a VOICE or a MUSIC by default?
	// I expect people will use AI service providers for sound and voice,
	// maybe in some case music too, but there are also many people
	// who will want to use their own track eg. to create a music video
	const category = ClapSegmentCategory.MUSIC

	const assetUrl = await blobToBase64DataUri(file)

	const newSegmentData: Partial<TimelineSegment> = {
	prompt: 'audio track',
	startTimeInMs, // start time of the segment
	endTimeInMs, // end time of the segment (startTimeInMs + durationInMs)
	status: ClapSegmentStatus.COMPLETED,
	track,
	// track: findFreeTrack({ segments, startTimeInMs, endTimeInMs }), // track row index
	label: `${file.name} (${Math.round(durationInMs / 1000)}s @ ${Math.round(bpm * 100) / 100} BPM)`, // a short label to name the segment (optional, can be human or LLM-defined)
	category,
	assetUrl,
	assetDurationInMs: endTimeInMs,
	assetSourceType: ClapAssetSource.DATA,
	assetFileFormat: `${file.type}`,
	}

	const timelineSegment = await clapSegmentToTimelineSegment(
	newSegment(newSegmentData)
	)

	if (isValidNumber(track)) {
	timelineSegment.track = track
	}

	timelineSegment.outputType = ClapOutputType.AUDIO
	timelineSegment.outputGain = 1.0
	timelineSegment.audioBuffer = audioBuffer

	// we assume we want it to be immediately visible
	timelineSegment.visibility = SegmentVisibility.VISIBLE

	// console.log("newSegment:", audioSegment)

	// poof! type disappears.. it's magic
	newSegments.push(timelineSegment)
	break
	}

	case 'text/plain': {
	// for dialogue, prompts..
	const type: ResourceType = 'text'
	const resourceCategory: ResourceCategory = 'text_prompt'
	break
	}

	default: {
	console.log(`unrecognized file type "${file.type}"`)
	break
	}
	}

	// note: we always upload the files, because even if it is an unhandled format (eg. a PDF)
	// this can still be part of the project as a resource for humans (inspiration, guidelines etc)

	/*
	const id = UUID()
	const fileName = `${id}.${extension}`

	const storage = `resources`
	const filePath = `${type}/${fileName}`

	const { data, error } = await supabase
	.storage
	.from('avatars')
	.upload(filePath, file, {
	cacheControl: '3600',
	upsert: false
	})
	*/

	// Note: uploading is optional, some file type don't need it (eg. text prompt)

	return newSegments
	}