clapper / src /services /io /extractScenesFromVideo.ts
jbilcke-hf's picture
jbilcke-hf HF staff
first version of the video analyzer
history blame
No virus
9.37 kB
'use client'
import { FFmpeg } from '@ffmpeg/ffmpeg'
import { toBlobURL } from '@ffmpeg/util'
import mediaInfoFactory, { VideoTrack, AudioTrack } from 'mediainfo.js'
import { fileDataToBase64 } from './fileDataToBase64'
interface ExtractorOptions {
frameFormat: 'png' | 'jpg'
maxWidth: number
maxHeight: number
framesPerScene: number
onProgress?: (progress: number) => void
debug?: boolean
autoCrop?: boolean
sceneThreshold?: number
minSceneDuration?: number
interface SceneData {
sceneIndex: number
startTimeInMs: number
endTimeInMs: number
video: string
frames: string[]
export async function extractScenesFromVideo(
videoBlob: Blob,
options: ExtractorOptions
): Promise<SceneData[]> {
const ffmpeg = new FFmpeg()
const baseURL = ''
try {
console.log(`getting duration..`)
const duration = await getVideoDuration(videoBlob)
if (!duration) {
throw new Error(`couldn't get the video duration`)
if (options.debug) {
console.log('Video duration in seconds:', duration)
console.log(`loading FFmpeg..`)
await ffmpeg.load({
coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'),
wasmURL: await toBlobURL(
if (options.debug) {
console.log('FFmpeg loaded')
const videoUint8Array = new Uint8Array(await videoBlob.arrayBuffer())
await ffmpeg.writeFile('input.mp4', videoUint8Array)
console.log(`detecting crop parameters..`)
let cropParams = ''
if (options.autoCrop) {
cropParams = await detectCropParameters(ffmpeg, options)
const sceneThreshold = options.sceneThreshold || 0.2
const minSceneDuration = options.minSceneDuration || 1
const sceneDetectionFilter = `select='gt(scene,${sceneThreshold})'`
const scaleFilter = `scale='min(${options.maxWidth},iw)':min'(${options.maxHeight},ih)':force_original_aspect_ratio=decrease`
let filterChain = `${sceneDetectionFilter},${scaleFilter}`
if (cropParams) {
filterChain = `crop=${cropParams},${filterChain}`
console.log(`detecting scenes..`)
const sceneTimestamps = await detectScenes(
console.log(`detected ${sceneTimestamps.length} scenes`)
const scenes: SceneData[] = []
for (let i = 0; i < sceneTimestamps.length; i++) {
const startTime = sceneTimestamps[i]
const endTime =
i < sceneTimestamps.length - 1
? sceneTimestamps[i + 1]
: duration * 1000
const sceneDuration = endTime - startTime
console.log(`processing scene ${i}`)
try {
const sceneData = await processScene(
} catch (error) {
console.error(`Error processing scene ${i}:`, error)
options.onProgress?.(Math.round(((i + 1) / sceneTimestamps.length) * 100))
if (options.debug) {
console.log(`Total scenes processed: ${scenes.length}`)
return scenes
} catch (error) {
console.error('Error in extractFramesAndScenesFromVideo:', error)
throw error
} finally {
try {
await ffmpeg.terminate()
} catch (error) {
console.error('Error terminating FFmpeg:', error)
async function getVideoDuration(
videoBlob: Blob,
debug: boolean = false
): Promise<number> {
// Initialize MediaInfo
const mediaInfo = await mediaInfoFactory({
format: 'object',
locateFile: () => {
return '/wasm/MediaInfoModule.wasm'
// Get video duration using MediaInfo
const getSize = () => videoBlob.size
const readChunk = (chunkSize: number, offset: number) =>
new Promise<Uint8Array>((resolve, reject) => {
const reader = new FileReader()
reader.onload = (event) => {
if ( instanceof ArrayBuffer) {
resolve(new Uint8Array(
} else {
reject(new Error('Failed to read chunk'))
reader.onerror = (error) => reject(error)
reader.readAsArrayBuffer(videoBlob.slice(offset, offset + chunkSize))
if (debug) {
console.log('calling await mediaInfo.analyzeData(getSize, readChunk)')
const result = await mediaInfo.analyzeData(getSize, readChunk)
if (debug) {
console.log('result = ', result)
let duration: number = 0
for (const track of || []) {
if (debug) {
console.log('track = ', track)
let maybeDuration: number = 0
if (track['@type'] === 'Audio') {
const audioTrack = track as AudioTrack
maybeDuration = audioTrack.Duration
? parseFloat(`${audioTrack.Duration || 0}`)
: 0
} else if (track['@type'] === 'Video') {
const videoTrack = track as VideoTrack
maybeDuration = videoTrack.Duration
? parseFloat(`${videoTrack.Duration || 0}`)
: 0
if (
typeof maybeDuration === 'number' &&
isFinite(maybeDuration) &&
) {
duration = maybeDuration
return duration
async function detectCropParameters(
ffmpeg: FFmpeg,
options: ExtractorOptions
): Promise<string> {
const cropDetectCommand = [
if (options.debug) {
'Executing crop detection command:',
cropDetectCommand.join(' ')
let cropParams = ''
ffmpeg.on('log', ({ message }) => {
const cropMatch = message.match(/crop=(\d+:\d+:\d+:\d+)/)
if (cropMatch) {
cropParams = cropMatch[1]
await ffmpeg.exec(cropDetectCommand)
if (options.debug) {
console.log('Detected crop parameters:', cropParams)
return cropParams
async function detectScenes(
ffmpeg: FFmpeg,
filterChain: string,
options: ExtractorOptions,
duration: number
): Promise<number[]> {
const extractScenesCommand = [
if (options.debug) {
'Executing scene detection command:',
extractScenesCommand.join(' ')
await ffmpeg.exec(extractScenesCommand)
const scenesMetadata = await ffmpeg.readFile('scenes.txt')
const decodedMetadata = new TextDecoder().decode(scenesMetadata as Uint8Array)
if (options.debug) {
console.log('Scenes metadata:', decodedMetadata)
const sceneTimestamps = decodedMetadata
.filter((line) => line.includes('pts_time'))
.map((line) => parseFloat(line.split('pts_time:')[1]) * 1000) // Convert to milliseconds
// Add start and end timestamps
sceneTimestamps.push(duration * 1000)
// Filter out scenes that are too short
const filteredScenes = sceneTimestamps.filter((timestamp, index, array) => {
if (index === 0) return true
const sceneDuration = timestamp - array[index - 1]
return sceneDuration >= (options.minSceneDuration || 1) * 1000
return filteredScenes
async function processScene(
ffmpeg: FFmpeg,
index: number,
startTime: number,
endTime: number,
duration: number,
options: ExtractorOptions
): Promise<SceneData> {
const extractSceneCommand = [
(startTime / 1000).toString(),
(duration / 1000).toString(),
// console.log(`calling ffmpeg.exec(extractSceneCommand)`, extractSceneCommand)
await ffmpeg.exec(extractSceneCommand)
// Calculate frame interval to get the desired number of frames
const frameInterval = Math.max(
Math.floor(duration / (1000 * options.framesPerScene))
const extractFramesCommand = [
// console.log(`calling ffmpeg.exec(extractFramesCommand)`, extractFramesCommand)
await ffmpeg.exec(extractFramesCommand)
const sceneVideo = await ffmpeg.readFile(`scene_${index}.mp4`)
const frameFiles = (await ffmpeg.listDir('/')).filter(
(file) =>`scene_${index}_frame_`) &&`.${options.frameFormat}`)
const frames: string[] = []
for (const frameFile of frameFiles) {
const frameData = await ffmpeg.readFile(
const base64Frame = fileDataToBase64(frameData)
const base64Video = fileDataToBase64(sceneVideo)
return {
sceneIndex: index,
startTimeInMs: Math.round(startTime),
endTimeInMs: Math.round(endTime),
video: `data:video/mp4;base64,${base64Video}`,