Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
•
ae8cf98
1
Parent(s):
2272f3d
preparation work for the video-to-clap project
Browse files- next.config.js +9 -0
- package-lock.json +261 -0
- package.json +2 -0
- public/workers/captioning.worker.js +54 -0
- src/services/io/extractCaptionFromFrame.ts +68 -0
- src/services/io/extractFramesFromVideo.ts +141 -0
- src/services/io/fix-xenova-transformers.d.ts +1 -0
- src/services/io/imageCaptioning.ts +49 -0
next.config.js
CHANGED
@@ -15,6 +15,15 @@ const nextConfig = {
|
|
15 |
// The image may be corrupted or an unsupported format.
|
16 |
unoptimized: true,
|
17 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
async headers() {
|
19 |
return [
|
20 |
{
|
|
|
15 |
// The image may be corrupted or an unsupported format.
|
16 |
unoptimized: true,
|
17 |
},
|
18 |
+
// workaround for transformers.js issues
|
19 |
+
webpack: (config) => {
|
20 |
+
config.resolve.alias = {
|
21 |
+
...config.resolve.alias,
|
22 |
+
"sharp$": false,
|
23 |
+
"onnxruntime-node$": false,
|
24 |
+
}
|
25 |
+
return config;
|
26 |
+
},
|
27 |
async headers() {
|
28 |
return [
|
29 |
{
|
package-lock.json
CHANGED
@@ -57,6 +57,7 @@
|
|
57 |
"@types/dom-speech-recognition": "^0.0.4",
|
58 |
"@upstash/ratelimit": "^1.1.3",
|
59 |
"@upstash/redis": "^1.31.1",
|
|
|
60 |
"autoprefixer": "10.4.19",
|
61 |
"class-variance-authority": "^0.7.0",
|
62 |
"clsx": "^2.1.1",
|
@@ -68,6 +69,7 @@
|
|
68 |
"fs-extra": "^11.2.0",
|
69 |
"is-hotkey": "^0.2.0",
|
70 |
"lucide-react": "^0.396.0",
|
|
|
71 |
"mlt-xml": "^2.0.2",
|
72 |
"monaco-editor": "^0.50.0",
|
73 |
"next": "^14.2.5",
|
@@ -2253,6 +2255,14 @@
|
|
2253 |
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.11.2.tgz",
|
2254 |
"integrity": "sha512-vlwUJsj/QJcR/oLXvV+JBKheaVk9pqfAPYiS136cjHEDTeTW5/+ePpM6uKOc56oxqwrUjh5T0JylHJU8vyqr1A=="
|
2255 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2256 |
"node_modules/@huggingface/tasks": {
|
2257 |
"version": "0.10.22",
|
2258 |
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.10.22.tgz",
|
@@ -2864,6 +2874,18 @@
|
|
2864 |
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
2865 |
}
|
2866 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2867 |
"node_modules/@jridgewell/gen-mapping": {
|
2868 |
"version": "0.3.5",
|
2869 |
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
|
@@ -3406,6 +3428,60 @@
|
|
3406 |
"url": "https://opencollective.com/preact"
|
3407 |
}
|
3408 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3409 |
"node_modules/@radix-ui/number": {
|
3410 |
"version": "1.1.0",
|
3411 |
"resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.0.tgz",
|
@@ -6176,6 +6252,19 @@
|
|
6176 |
"url": "https://opencollective.com/vitest"
|
6177 |
}
|
6178 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6179 |
"node_modules/abort-controller": {
|
6180 |
"version": "3.0.0",
|
6181 |
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
@@ -6991,6 +7080,15 @@
|
|
6991 |
"node": ">= 6"
|
6992 |
}
|
6993 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6994 |
"node_modules/class-variance-authority": {
|
6995 |
"version": "0.7.0",
|
6996 |
"resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.0.tgz",
|
@@ -9045,6 +9143,11 @@
|
|
9045 |
"node": "^10.12.0 || >=12.0.0"
|
9046 |
}
|
9047 |
},
|
|
|
|
|
|
|
|
|
|
|
9048 |
"node_modules/flatted": {
|
9049 |
"version": "3.3.1",
|
9050 |
"resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
|
@@ -9592,6 +9695,11 @@
|
|
9592 |
"node": ">=12.0.0"
|
9593 |
}
|
9594 |
},
|
|
|
|
|
|
|
|
|
|
|
9595 |
"node_modules/has-bigints": {
|
9596 |
"version": "1.0.2",
|
9597 |
"resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
|
@@ -10772,6 +10880,11 @@
|
|
10772 |
"url": "https://github.com/sponsors/sindresorhus"
|
10773 |
}
|
10774 |
},
|
|
|
|
|
|
|
|
|
|
|
10775 |
"node_modules/loose-envify": {
|
10776 |
"version": "1.4.0",
|
10777 |
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
@@ -10851,6 +10964,20 @@
|
|
10851 |
"is-buffer": "~1.1.6"
|
10852 |
}
|
10853 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10854 |
"node_modules/merge-stream": {
|
10855 |
"version": "2.0.0",
|
10856 |
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
@@ -10946,6 +11073,52 @@
|
|
10946 |
"node": ">=16 || 14 >=14.17"
|
10947 |
}
|
10948 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10949 |
"node_modules/ml-array-mean": {
|
10950 |
"version": "1.1.6",
|
10951 |
"resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
|
@@ -11540,6 +11713,40 @@
|
|
11540 |
"url": "https://github.com/sponsors/sindresorhus"
|
11541 |
}
|
11542 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11543 |
"node_modules/openai": {
|
11544 |
"version": "4.52.7",
|
11545 |
"resolved": "https://registry.npmjs.org/openai/-/openai-4.52.7.tgz",
|
@@ -11868,6 +12075,11 @@
|
|
11868 |
"node": ">= 6"
|
11869 |
}
|
11870 |
},
|
|
|
|
|
|
|
|
|
|
|
11871 |
"node_modules/playwright": {
|
11872 |
"version": "1.45.2",
|
11873 |
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.2.tgz",
|
@@ -12230,6 +12442,29 @@
|
|
12230 |
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
12231 |
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
|
12232 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12233 |
"node_modules/psl": {
|
12234 |
"version": "1.9.0",
|
12235 |
"resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
|
@@ -13704,6 +13939,32 @@
|
|
13704 |
"node": ">=6"
|
13705 |
}
|
13706 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13707 |
"node_modules/text-table": {
|
13708 |
"version": "0.2.0",
|
13709 |
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
|
|
|
57 |
"@types/dom-speech-recognition": "^0.0.4",
|
58 |
"@upstash/ratelimit": "^1.1.3",
|
59 |
"@upstash/redis": "^1.31.1",
|
60 |
+
"@xenova/transformers": "github:xenova/transformers.js#v3",
|
61 |
"autoprefixer": "10.4.19",
|
62 |
"class-variance-authority": "^0.7.0",
|
63 |
"clsx": "^2.1.1",
|
|
|
69 |
"fs-extra": "^11.2.0",
|
70 |
"is-hotkey": "^0.2.0",
|
71 |
"lucide-react": "^0.396.0",
|
72 |
+
"mediainfo.js": "^0.3.2",
|
73 |
"mlt-xml": "^2.0.2",
|
74 |
"monaco-editor": "^0.50.0",
|
75 |
"next": "^14.2.5",
|
|
|
2255 |
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.11.2.tgz",
|
2256 |
"integrity": "sha512-vlwUJsj/QJcR/oLXvV+JBKheaVk9pqfAPYiS136cjHEDTeTW5/+ePpM6uKOc56oxqwrUjh5T0JylHJU8vyqr1A=="
|
2257 |
},
|
2258 |
+
"node_modules/@huggingface/jinja": {
|
2259 |
+
"version": "0.2.2",
|
2260 |
+
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
|
2261 |
+
"integrity": "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==",
|
2262 |
+
"engines": {
|
2263 |
+
"node": ">=18"
|
2264 |
+
}
|
2265 |
+
},
|
2266 |
"node_modules/@huggingface/tasks": {
|
2267 |
"version": "0.10.22",
|
2268 |
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.10.22.tgz",
|
|
|
2874 |
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
2875 |
}
|
2876 |
},
|
2877 |
+
"node_modules/@isaacs/fs-minipass": {
|
2878 |
+
"version": "4.0.1",
|
2879 |
+
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
2880 |
+
"integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
|
2881 |
+
"optional": true,
|
2882 |
+
"dependencies": {
|
2883 |
+
"minipass": "^7.0.4"
|
2884 |
+
},
|
2885 |
+
"engines": {
|
2886 |
+
"node": ">=18.0.0"
|
2887 |
+
}
|
2888 |
+
},
|
2889 |
"node_modules/@jridgewell/gen-mapping": {
|
2890 |
"version": "0.3.5",
|
2891 |
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
|
|
|
3428 |
"url": "https://opencollective.com/preact"
|
3429 |
}
|
3430 |
},
|
3431 |
+
"node_modules/@protobufjs/aspromise": {
|
3432 |
+
"version": "1.1.2",
|
3433 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
3434 |
+
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
|
3435 |
+
},
|
3436 |
+
"node_modules/@protobufjs/base64": {
|
3437 |
+
"version": "1.1.2",
|
3438 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
3439 |
+
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
|
3440 |
+
},
|
3441 |
+
"node_modules/@protobufjs/codegen": {
|
3442 |
+
"version": "2.0.4",
|
3443 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
3444 |
+
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
|
3445 |
+
},
|
3446 |
+
"node_modules/@protobufjs/eventemitter": {
|
3447 |
+
"version": "1.1.0",
|
3448 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
3449 |
+
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
|
3450 |
+
},
|
3451 |
+
"node_modules/@protobufjs/fetch": {
|
3452 |
+
"version": "1.1.0",
|
3453 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
3454 |
+
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
3455 |
+
"dependencies": {
|
3456 |
+
"@protobufjs/aspromise": "^1.1.1",
|
3457 |
+
"@protobufjs/inquire": "^1.1.0"
|
3458 |
+
}
|
3459 |
+
},
|
3460 |
+
"node_modules/@protobufjs/float": {
|
3461 |
+
"version": "1.0.2",
|
3462 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
3463 |
+
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
|
3464 |
+
},
|
3465 |
+
"node_modules/@protobufjs/inquire": {
|
3466 |
+
"version": "1.1.0",
|
3467 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
3468 |
+
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
|
3469 |
+
},
|
3470 |
+
"node_modules/@protobufjs/path": {
|
3471 |
+
"version": "1.1.2",
|
3472 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
3473 |
+
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
|
3474 |
+
},
|
3475 |
+
"node_modules/@protobufjs/pool": {
|
3476 |
+
"version": "1.1.0",
|
3477 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
3478 |
+
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
|
3479 |
+
},
|
3480 |
+
"node_modules/@protobufjs/utf8": {
|
3481 |
+
"version": "1.1.0",
|
3482 |
+
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
3483 |
+
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
|
3484 |
+
},
|
3485 |
"node_modules/@radix-ui/number": {
|
3486 |
"version": "1.1.0",
|
3487 |
"resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.0.tgz",
|
|
|
6252 |
"url": "https://opencollective.com/vitest"
|
6253 |
}
|
6254 |
},
|
6255 |
+
"node_modules/@xenova/transformers": {
|
6256 |
+
"version": "3.0.0-alpha.0",
|
6257 |
+
"resolved": "git+ssh://git@github.com/xenova/transformers.js.git#1b4d2428225ef8f63be94bfa38a0d7fd81ac7c0c",
|
6258 |
+
"license": "Apache-2.0",
|
6259 |
+
"dependencies": {
|
6260 |
+
"@huggingface/jinja": "^0.2.2",
|
6261 |
+
"onnxruntime-web": "^1.18.0",
|
6262 |
+
"sharp": "^0.33.2"
|
6263 |
+
},
|
6264 |
+
"optionalDependencies": {
|
6265 |
+
"onnxruntime-node": "^1.18.0"
|
6266 |
+
}
|
6267 |
+
},
|
6268 |
"node_modules/abort-controller": {
|
6269 |
"version": "3.0.0",
|
6270 |
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
|
|
7080 |
"node": ">= 6"
|
7081 |
}
|
7082 |
},
|
7083 |
+
"node_modules/chownr": {
|
7084 |
+
"version": "3.0.0",
|
7085 |
+
"resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
|
7086 |
+
"integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
|
7087 |
+
"optional": true,
|
7088 |
+
"engines": {
|
7089 |
+
"node": ">=18"
|
7090 |
+
}
|
7091 |
+
},
|
7092 |
"node_modules/class-variance-authority": {
|
7093 |
"version": "0.7.0",
|
7094 |
"resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.0.tgz",
|
|
|
9143 |
"node": "^10.12.0 || >=12.0.0"
|
9144 |
}
|
9145 |
},
|
9146 |
+
"node_modules/flatbuffers": {
|
9147 |
+
"version": "1.12.0",
|
9148 |
+
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
9149 |
+
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
9150 |
+
},
|
9151 |
"node_modules/flatted": {
|
9152 |
"version": "3.3.1",
|
9153 |
"resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
|
|
|
9695 |
"node": ">=12.0.0"
|
9696 |
}
|
9697 |
},
|
9698 |
+
"node_modules/guid-typescript": {
|
9699 |
+
"version": "1.0.9",
|
9700 |
+
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
9701 |
+
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
|
9702 |
+
},
|
9703 |
"node_modules/has-bigints": {
|
9704 |
"version": "1.0.2",
|
9705 |
"resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
|
|
|
10880 |
"url": "https://github.com/sponsors/sindresorhus"
|
10881 |
}
|
10882 |
},
|
10883 |
+
"node_modules/long": {
|
10884 |
+
"version": "5.2.3",
|
10885 |
+
"resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
|
10886 |
+
"integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
|
10887 |
+
},
|
10888 |
"node_modules/loose-envify": {
|
10889 |
"version": "1.4.0",
|
10890 |
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
|
|
10964 |
"is-buffer": "~1.1.6"
|
10965 |
}
|
10966 |
},
|
10967 |
+
"node_modules/mediainfo.js": {
|
10968 |
+
"version": "0.3.2",
|
10969 |
+
"resolved": "https://registry.npmjs.org/mediainfo.js/-/mediainfo.js-0.3.2.tgz",
|
10970 |
+
"integrity": "sha512-SC8z72ESV1z2lq2zfheoo9zgcoqjeBn0mzq6MhIn3aqlkh3RV84FwOiMtYA0HWY7mi1igM89Jcll4r2sk/yyZA==",
|
10971 |
+
"dependencies": {
|
10972 |
+
"yargs": "^17.7.2"
|
10973 |
+
},
|
10974 |
+
"bin": {
|
10975 |
+
"mediainfo.js": "dist/esm/cli.js"
|
10976 |
+
},
|
10977 |
+
"engines": {
|
10978 |
+
"node": ">=18.0.0"
|
10979 |
+
}
|
10980 |
+
},
|
10981 |
"node_modules/merge-stream": {
|
10982 |
"version": "2.0.0",
|
10983 |
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
|
|
11073 |
"node": ">=16 || 14 >=14.17"
|
11074 |
}
|
11075 |
},
|
11076 |
+
"node_modules/minizlib": {
|
11077 |
+
"version": "3.0.1",
|
11078 |
+
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
|
11079 |
+
"integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
|
11080 |
+
"optional": true,
|
11081 |
+
"dependencies": {
|
11082 |
+
"minipass": "^7.0.4",
|
11083 |
+
"rimraf": "^5.0.5"
|
11084 |
+
},
|
11085 |
+
"engines": {
|
11086 |
+
"node": ">= 18"
|
11087 |
+
}
|
11088 |
+
},
|
11089 |
+
"node_modules/minizlib/node_modules/rimraf": {
|
11090 |
+
"version": "5.0.9",
|
11091 |
+
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.9.tgz",
|
11092 |
+
"integrity": "sha512-3i7b8OcswU6CpU8Ej89quJD4O98id7TtVM5U4Mybh84zQXdrFmDLouWBEEaD/QfO3gDDfH+AGFCGsR7kngzQnA==",
|
11093 |
+
"optional": true,
|
11094 |
+
"dependencies": {
|
11095 |
+
"glob": "^10.3.7"
|
11096 |
+
},
|
11097 |
+
"bin": {
|
11098 |
+
"rimraf": "dist/esm/bin.mjs"
|
11099 |
+
},
|
11100 |
+
"engines": {
|
11101 |
+
"node": "14 >=14.20 || 16 >=16.20 || >=18"
|
11102 |
+
},
|
11103 |
+
"funding": {
|
11104 |
+
"url": "https://github.com/sponsors/isaacs"
|
11105 |
+
}
|
11106 |
+
},
|
11107 |
+
"node_modules/mkdirp": {
|
11108 |
+
"version": "3.0.1",
|
11109 |
+
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
|
11110 |
+
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
|
11111 |
+
"optional": true,
|
11112 |
+
"bin": {
|
11113 |
+
"mkdirp": "dist/cjs/src/bin.js"
|
11114 |
+
},
|
11115 |
+
"engines": {
|
11116 |
+
"node": ">=10"
|
11117 |
+
},
|
11118 |
+
"funding": {
|
11119 |
+
"url": "https://github.com/sponsors/isaacs"
|
11120 |
+
}
|
11121 |
+
},
|
11122 |
"node_modules/ml-array-mean": {
|
11123 |
"version": "1.1.6",
|
11124 |
"resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
|
|
|
11713 |
"url": "https://github.com/sponsors/sindresorhus"
|
11714 |
}
|
11715 |
},
|
11716 |
+
"node_modules/onnxruntime-common": {
|
11717 |
+
"version": "1.18.0",
|
11718 |
+
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
|
11719 |
+
"integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q=="
|
11720 |
+
},
|
11721 |
+
"node_modules/onnxruntime-node": {
|
11722 |
+
"version": "1.18.0",
|
11723 |
+
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.18.0.tgz",
|
11724 |
+
"integrity": "sha512-iTnFcxKpmywCatx8ov4GTbECe3tJk2Bp1OA2mWRJde78q+7tpPYBhKMnwhlaoKy9oKQcy4UoEuuhoy2PSD13ww==",
|
11725 |
+
"hasInstallScript": true,
|
11726 |
+
"optional": true,
|
11727 |
+
"os": [
|
11728 |
+
"win32",
|
11729 |
+
"darwin",
|
11730 |
+
"linux"
|
11731 |
+
],
|
11732 |
+
"dependencies": {
|
11733 |
+
"onnxruntime-common": "1.18.0",
|
11734 |
+
"tar": "^7.0.1"
|
11735 |
+
}
|
11736 |
+
},
|
11737 |
+
"node_modules/onnxruntime-web": {
|
11738 |
+
"version": "1.18.0",
|
11739 |
+
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz",
|
11740 |
+
"integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==",
|
11741 |
+
"dependencies": {
|
11742 |
+
"flatbuffers": "^1.12.0",
|
11743 |
+
"guid-typescript": "^1.0.9",
|
11744 |
+
"long": "^5.2.3",
|
11745 |
+
"onnxruntime-common": "1.18.0",
|
11746 |
+
"platform": "^1.3.6",
|
11747 |
+
"protobufjs": "^7.2.4"
|
11748 |
+
}
|
11749 |
+
},
|
11750 |
"node_modules/openai": {
|
11751 |
"version": "4.52.7",
|
11752 |
"resolved": "https://registry.npmjs.org/openai/-/openai-4.52.7.tgz",
|
|
|
12075 |
"node": ">= 6"
|
12076 |
}
|
12077 |
},
|
12078 |
+
"node_modules/platform": {
|
12079 |
+
"version": "1.3.6",
|
12080 |
+
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
12081 |
+
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
|
12082 |
+
},
|
12083 |
"node_modules/playwright": {
|
12084 |
"version": "1.45.2",
|
12085 |
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.2.tgz",
|
|
|
12442 |
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
12443 |
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
|
12444 |
},
|
12445 |
+
"node_modules/protobufjs": {
|
12446 |
+
"version": "7.3.2",
|
12447 |
+
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.3.2.tgz",
|
12448 |
+
"integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
|
12449 |
+
"hasInstallScript": true,
|
12450 |
+
"dependencies": {
|
12451 |
+
"@protobufjs/aspromise": "^1.1.2",
|
12452 |
+
"@protobufjs/base64": "^1.1.2",
|
12453 |
+
"@protobufjs/codegen": "^2.0.4",
|
12454 |
+
"@protobufjs/eventemitter": "^1.1.0",
|
12455 |
+
"@protobufjs/fetch": "^1.1.0",
|
12456 |
+
"@protobufjs/float": "^1.0.2",
|
12457 |
+
"@protobufjs/inquire": "^1.1.0",
|
12458 |
+
"@protobufjs/path": "^1.1.2",
|
12459 |
+
"@protobufjs/pool": "^1.1.0",
|
12460 |
+
"@protobufjs/utf8": "^1.1.0",
|
12461 |
+
"@types/node": ">=13.7.0",
|
12462 |
+
"long": "^5.0.0"
|
12463 |
+
},
|
12464 |
+
"engines": {
|
12465 |
+
"node": ">=12.0.0"
|
12466 |
+
}
|
12467 |
+
},
|
12468 |
"node_modules/psl": {
|
12469 |
"version": "1.9.0",
|
12470 |
"resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
|
|
|
13939 |
"node": ">=6"
|
13940 |
}
|
13941 |
},
|
13942 |
+
"node_modules/tar": {
|
13943 |
+
"version": "7.4.0",
|
13944 |
+
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.0.tgz",
|
13945 |
+
"integrity": "sha512-XQs0S8fuAkQWuqhDeCdMlJXDX80D7EOVLDPVFkna9yQfzS+PHKgfxcei0jf6/+QAWcjqrnC8uM3fSAnrQl+XYg==",
|
13946 |
+
"optional": true,
|
13947 |
+
"dependencies": {
|
13948 |
+
"@isaacs/fs-minipass": "^4.0.0",
|
13949 |
+
"chownr": "^3.0.0",
|
13950 |
+
"minipass": "^7.1.2",
|
13951 |
+
"minizlib": "^3.0.1",
|
13952 |
+
"mkdirp": "^3.0.1",
|
13953 |
+
"yallist": "^5.0.0"
|
13954 |
+
},
|
13955 |
+
"engines": {
|
13956 |
+
"node": ">=18"
|
13957 |
+
}
|
13958 |
+
},
|
13959 |
+
"node_modules/tar/node_modules/yallist": {
|
13960 |
+
"version": "5.0.0",
|
13961 |
+
"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
|
13962 |
+
"integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
|
13963 |
+
"optional": true,
|
13964 |
+
"engines": {
|
13965 |
+
"node": ">=18"
|
13966 |
+
}
|
13967 |
+
},
|
13968 |
"node_modules/text-table": {
|
13969 |
"version": "0.2.0",
|
13970 |
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
|
package.json
CHANGED
@@ -68,6 +68,7 @@
|
|
68 |
"@types/dom-speech-recognition": "^0.0.4",
|
69 |
"@upstash/ratelimit": "^1.1.3",
|
70 |
"@upstash/redis": "^1.31.1",
|
|
|
71 |
"autoprefixer": "10.4.19",
|
72 |
"class-variance-authority": "^0.7.0",
|
73 |
"clsx": "^2.1.1",
|
@@ -79,6 +80,7 @@
|
|
79 |
"fs-extra": "^11.2.0",
|
80 |
"is-hotkey": "^0.2.0",
|
81 |
"lucide-react": "^0.396.0",
|
|
|
82 |
"mlt-xml": "^2.0.2",
|
83 |
"monaco-editor": "^0.50.0",
|
84 |
"next": "^14.2.5",
|
|
|
68 |
"@types/dom-speech-recognition": "^0.0.4",
|
69 |
"@upstash/ratelimit": "^1.1.3",
|
70 |
"@upstash/redis": "^1.31.1",
|
71 |
+
"@xenova/transformers": "github:xenova/transformers.js#v3",
|
72 |
"autoprefixer": "10.4.19",
|
73 |
"class-variance-authority": "^0.7.0",
|
74 |
"clsx": "^2.1.1",
|
|
|
80 |
"fs-extra": "^11.2.0",
|
81 |
"is-hotkey": "^0.2.0",
|
82 |
"lucide-react": "^0.396.0",
|
83 |
+
"mediainfo.js": "^0.3.2",
|
84 |
"mlt-xml": "^2.0.2",
|
85 |
"monaco-editor": "^0.50.0",
|
86 |
"next": "^14.2.5",
|
public/workers/captioning.worker.js
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
importScripts('https://cdn.jsdelivr.net/npm/@xenova/transformers@3.0.0');
|
2 |
+
|
3 |
+
const { AutoProcessor, AutoTokenizer, Moondream1ForConditionalGeneration, RawImage } = transformers;
|
4 |
+
|
5 |
+
let processor;
|
6 |
+
let tokenizer;
|
7 |
+
let model;
|
8 |
+
|
9 |
+
async function initializeModel() {
|
10 |
+
const model_id = 'Xenova/moondream2';
|
11 |
+
processor = await AutoProcessor.from_pretrained(model_id);
|
12 |
+
tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
13 |
+
model = await Moondream1ForConditionalGeneration.from_pretrained(model_id, {
|
14 |
+
dtype: {
|
15 |
+
embed_tokens: 'fp16',
|
16 |
+
vision_encoder: 'fp16',
|
17 |
+
decoder_model_merged: 'q4',
|
18 |
+
},
|
19 |
+
device: 'webgpu',
|
20 |
+
});
|
21 |
+
}
|
22 |
+
|
23 |
+
async function captionImage(imageDataUrl) {
|
24 |
+
if (!processor || !tokenizer || !model) {
|
25 |
+
await initializeModel();
|
26 |
+
}
|
27 |
+
|
28 |
+
const prompt = 'Describe this image.';
|
29 |
+
const text = `<image>\n\nQuestion: ${prompt}\n\nAnswer:`;
|
30 |
+
const text_inputs = tokenizer(text);
|
31 |
+
|
32 |
+
const image = await RawImage.fromURL(imageDataUrl);
|
33 |
+
const vision_inputs = await processor(image);
|
34 |
+
|
35 |
+
const output = await model.generate({
|
36 |
+
...text_inputs,
|
37 |
+
...vision_inputs,
|
38 |
+
do_sample: false,
|
39 |
+
max_new_tokens: 64,
|
40 |
+
});
|
41 |
+
|
42 |
+
const decoded = tokenizer.batch_decode(output, { skip_special_tokens: true });
|
43 |
+
return decoded[0].trim();
|
44 |
+
}
|
45 |
+
|
46 |
+
self.addEventListener('message', async (event) => {
|
47 |
+
const { imageDataUrl } = event.data;
|
48 |
+
try {
|
49 |
+
const caption = await captionImage(imageDataUrl);
|
50 |
+
self.postMessage({ caption });
|
51 |
+
} catch (error) {
|
52 |
+
self.postMessage({ error: error.message });
|
53 |
+
}
|
54 |
+
});
|
src/services/io/extractCaptionFromFrame.ts
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
AutoProcessor,
|
3 |
+
AutoTokenizer,
|
4 |
+
Moondream1ForConditionalGeneration,
|
5 |
+
RawImage,
|
6 |
+
} from '@xenova/transformers'
|
7 |
+
|
8 |
+
export async function extractCaptionFromFrame(
|
9 |
+
imageInBase64DataUri: string
|
10 |
+
): Promise<string> {
|
11 |
+
if (!(navigator as any).gpu) {
|
12 |
+
throw new Error(`Please enable WebGPU to analyze video frames:
|
13 |
+
|
14 |
+
1. You need a modern browser such as Google Chrome 113+, Microsoft Edge 113+, Safari 18 (macOS 15), Firefox Nightly
|
15 |
+
|
16 |
+
2. You need to enable WebGPU (depends on your browser, see below)
|
17 |
+
|
18 |
+
2.1 For Chrome: Perform the following operations in the Chrome / Microsoft Edge address bar
|
19 |
+
The chrome://flags/#enable-unsafe-webgpu flag must be enabled (not enable-webgpu-developer-features). Linux experimental support also requires launching the browser with --enable-features=Vulkan.
|
20 |
+
|
21 |
+
2.2 For Safari 18 (macOS 15): WebGPU is enabled by default
|
22 |
+
|
23 |
+
2.3 For Firefox Nightly: Type about:config in the address bar and set 'dom.webgpu.enabled" to true
|
24 |
+
`)
|
25 |
+
}
|
26 |
+
|
27 |
+
// Load processor, tokenizer and model
|
28 |
+
const model_id = 'Xenova/moondream2'
|
29 |
+
const processor = await AutoProcessor.from_pretrained(model_id)
|
30 |
+
const tokenizer = await AutoTokenizer.from_pretrained(model_id)
|
31 |
+
const model = await Moondream1ForConditionalGeneration.from_pretrained(
|
32 |
+
model_id,
|
33 |
+
{
|
34 |
+
dtype: {
|
35 |
+
embed_tokens: 'fp16', // or 'fp32'
|
36 |
+
vision_encoder: 'fp16', // or 'q8'
|
37 |
+
decoder_model_merged: 'q4', // or 'q4f16' or 'q8'
|
38 |
+
},
|
39 |
+
device: 'webgpu',
|
40 |
+
}
|
41 |
+
)
|
42 |
+
|
43 |
+
// Prepare text inputs
|
44 |
+
const prompt = 'Describe this image.'
|
45 |
+
const text = `<image>\n\nQuestion: ${prompt}\n\nAnswer:`
|
46 |
+
const text_inputs = tokenizer(text)
|
47 |
+
|
48 |
+
// Prepare vision inputs
|
49 |
+
const image = await RawImage.fromURL(imageInBase64DataUri)
|
50 |
+
const vision_inputs = await processor(image)
|
51 |
+
|
52 |
+
// Generate response
|
53 |
+
const output = await model.generate({
|
54 |
+
...text_inputs,
|
55 |
+
...vision_inputs,
|
56 |
+
do_sample: false,
|
57 |
+
max_new_tokens: 177,
|
58 |
+
})
|
59 |
+
const decoded = tokenizer.batch_decode(output, { skip_special_tokens: false })
|
60 |
+
console.log(decoded)
|
61 |
+
// [
|
62 |
+
// '<|endoftext|><image>\n\n' +
|
63 |
+
// 'Question: Describe this image.\n\n' +
|
64 |
+
// 'Answer: A hand is holding a white book titled "The Little Book of Deep Learning" against a backdrop of a balcony with a railing and a view of a building and trees.<|endoftext|>'
|
65 |
+
// ]
|
66 |
+
|
67 |
+
return `${decoded[0] || ''}`
|
68 |
+
}
|
src/services/io/extractFramesFromVideo.ts
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { FFmpeg } from '@ffmpeg/ffmpeg'
|
2 |
+
import { toBlobURL } from '@ffmpeg/util'
|
3 |
+
import mediaInfoFactory, {
|
4 |
+
Track,
|
5 |
+
GeneralTrack,
|
6 |
+
VideoTrack,
|
7 |
+
AudioTrack,
|
8 |
+
TextTrack,
|
9 |
+
ImageTrack,
|
10 |
+
MenuTrack,
|
11 |
+
OtherTrack,
|
12 |
+
} from 'mediainfo.js'
|
13 |
+
|
14 |
+
interface FrameExtractorOptions {
|
15 |
+
format: 'png' | 'jpg'
|
16 |
+
maxWidth: number
|
17 |
+
maxHeight: number
|
18 |
+
sceneSamplingRate: number // Percentage of additional frames between scene changes (0-100)
|
19 |
+
onProgress?: (progress: number) => void // Callback function for progress updates
|
20 |
+
}
|
21 |
+
|
22 |
+
async function extractFramesFromVideo(
|
23 |
+
videoBlob: Blob,
|
24 |
+
options: FrameExtractorOptions
|
25 |
+
): Promise<string[]> {
|
26 |
+
// Initialize MediaInfo
|
27 |
+
const mediaInfo = await mediaInfoFactory({ format: 'object' })
|
28 |
+
|
29 |
+
// Get video duration using MediaInfo
|
30 |
+
const getSize = () => videoBlob.size
|
31 |
+
const readChunk = (chunkSize: number, offset: number) =>
|
32 |
+
new Promise<Uint8Array>((resolve, reject) => {
|
33 |
+
const reader = new FileReader()
|
34 |
+
reader.onload = (event) => {
|
35 |
+
if (event.target?.result instanceof ArrayBuffer) {
|
36 |
+
resolve(new Uint8Array(event.target.result))
|
37 |
+
} else {
|
38 |
+
reject(new Error('Failed to read chunk'))
|
39 |
+
}
|
40 |
+
}
|
41 |
+
reader.onerror = (error) => reject(error)
|
42 |
+
reader.readAsArrayBuffer(videoBlob.slice(offset, offset + chunkSize))
|
43 |
+
})
|
44 |
+
|
45 |
+
const result = await mediaInfo.analyzeData(getSize, readChunk)
|
46 |
+
|
47 |
+
let duration: number = 0
|
48 |
+
|
49 |
+
for (const track of result.media?.track || []) {
|
50 |
+
/// '@type': "General" | "Video" | "Audio" | "Text" | "Image" | "Menu" | "Other"
|
51 |
+
let maybeDuration: number = 0
|
52 |
+
if (track['@type'] === 'Audio') {
|
53 |
+
const audioTrack = track as AudioTrack
|
54 |
+
maybeDuration = audioTrack.Duration || 0
|
55 |
+
} else if (track['@type'] === 'Video') {
|
56 |
+
const videoTrack = track as VideoTrack
|
57 |
+
maybeDuration = videoTrack.Duration || 0
|
58 |
+
}
|
59 |
+
if (
|
60 |
+
typeof maybeDuration === 'number' &&
|
61 |
+
isFinite(maybeDuration) &&
|
62 |
+
!isNaN(maybeDuration)
|
63 |
+
) {
|
64 |
+
duration = maybeDuration
|
65 |
+
}
|
66 |
+
}
|
67 |
+
|
68 |
+
if (!duration) {
|
69 |
+
throw new Error('Could not determine video duration (or it is length 0)')
|
70 |
+
}
|
71 |
+
|
72 |
+
// Initialize FFmpeg
|
73 |
+
const ffmpeg = new FFmpeg()
|
74 |
+
const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd'
|
75 |
+
|
76 |
+
await ffmpeg.load({
|
77 |
+
coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'),
|
78 |
+
wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'),
|
79 |
+
})
|
80 |
+
|
81 |
+
// Write video file to FFmpeg's file system
|
82 |
+
const videoUint8Array = new Uint8Array(await videoBlob.arrayBuffer())
|
83 |
+
await ffmpeg.writeFile('input.mp4', videoUint8Array)
|
84 |
+
|
85 |
+
// Prepare FFmpeg command
|
86 |
+
const sceneFilter = `select='gt(scene,0.4)'`
|
87 |
+
const additionalFramesFilter = `select='not(mod(n,${Math.floor(100 / options.sceneSamplingRate)}))'`
|
88 |
+
const scaleFilter = `scale=iw*min(${options.maxWidth}/iw\,${options.maxHeight}/ih):ih*min(${options.maxWidth}/iw\,${options.maxHeight}/ih)`
|
89 |
+
|
90 |
+
let lastProgress = 0
|
91 |
+
ffmpeg.on('log', ({ message }) => {
|
92 |
+
const timeMatch = message.match(/time=(\d{2}):(\d{2}):(\d{2}\.\d{2})/)
|
93 |
+
if (timeMatch) {
|
94 |
+
const [, hours, minutes, seconds] = timeMatch
|
95 |
+
const currentTime =
|
96 |
+
parseInt(hours) * 3600 + parseInt(minutes) * 60 + parseFloat(seconds)
|
97 |
+
const progress = Math.min(100, Math.round((currentTime / duration) * 100))
|
98 |
+
if (progress > lastProgress) {
|
99 |
+
lastProgress = progress
|
100 |
+
options.onProgress?.(progress)
|
101 |
+
}
|
102 |
+
}
|
103 |
+
})
|
104 |
+
|
105 |
+
await ffmpeg.exec([
|
106 |
+
'-i',
|
107 |
+
'input.mp4',
|
108 |
+
'-vf',
|
109 |
+
`${sceneFilter},${additionalFramesFilter},${scaleFilter}`,
|
110 |
+
'-vsync',
|
111 |
+
'0',
|
112 |
+
'-q:v',
|
113 |
+
'2',
|
114 |
+
`frames_%03d.${options.format}`,
|
115 |
+
])
|
116 |
+
|
117 |
+
// Read generated frames
|
118 |
+
const files = await ffmpeg.listDir('/')
|
119 |
+
const frameFiles = files.filter(
|
120 |
+
(file) =>
|
121 |
+
file.name.startsWith('frames_') &&
|
122 |
+
file.name.endsWith(`.${options.format}`)
|
123 |
+
)
|
124 |
+
|
125 |
+
const frames: string[] = []
|
126 |
+
for (let i = 0; i < frameFiles.length; i++) {
|
127 |
+
const file = frameFiles[i]
|
128 |
+
const frameData = await ffmpeg.readFile(file.name)
|
129 |
+
const base64Frame = btoa(
|
130 |
+
String.fromCharCode.apply(null, frameData as unknown as number[])
|
131 |
+
)
|
132 |
+
frames.push(`data:image/${options.format};base64,${base64Frame}`)
|
133 |
+
|
134 |
+
// Update progress for frame processing (from 90% to 100%)
|
135 |
+
options.onProgress?.(90 + Math.round(((i + 1) / frameFiles.length) * 10))
|
136 |
+
}
|
137 |
+
|
138 |
+
return frames
|
139 |
+
}
|
140 |
+
|
141 |
+
export default extractFramesFromVideo
|
src/services/io/fix-xenova-transformers.d.ts
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
declare module '@xenova/transformers'
|
src/services/io/imageCaptioning.ts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
let worker: Worker | null = null
|
2 |
+
|
3 |
+
function initializeWorker() {
|
4 |
+
if (typeof window !== 'undefined' && !worker) {
|
5 |
+
worker = new Worker('/captioning.worker.js')
|
6 |
+
}
|
7 |
+
}
|
8 |
+
|
9 |
+
export async function captionImages(
|
10 |
+
imageDataUrls: string[]
|
11 |
+
): Promise<string[]> {
|
12 |
+
initializeWorker()
|
13 |
+
|
14 |
+
if (!worker) {
|
15 |
+
throw new Error(
|
16 |
+
'Worker could not be initialized. Are you running in a browser environment?'
|
17 |
+
)
|
18 |
+
}
|
19 |
+
|
20 |
+
const captions: string[] = []
|
21 |
+
|
22 |
+
for (const imageDataUrl of imageDataUrls) {
|
23 |
+
const caption = await new Promise<string>((resolve, reject) => {
|
24 |
+
const messageHandler = (event: MessageEvent) => {
|
25 |
+
worker!.removeEventListener('message', messageHandler)
|
26 |
+
if (event.data.error) {
|
27 |
+
reject(new Error(event.data.error))
|
28 |
+
} else {
|
29 |
+
resolve(event.data.caption)
|
30 |
+
}
|
31 |
+
}
|
32 |
+
|
33 |
+
worker!.addEventListener('message', messageHandler)
|
34 |
+
worker!.postMessage({ imageDataUrl })
|
35 |
+
})
|
36 |
+
|
37 |
+
captions.push(caption)
|
38 |
+
}
|
39 |
+
|
40 |
+
return captions
|
41 |
+
}
|
42 |
+
|
43 |
+
// Optionally, you can provide a cleanup function
|
44 |
+
export function terminateWorker() {
|
45 |
+
if (worker) {
|
46 |
+
worker.terminate()
|
47 |
+
worker = null
|
48 |
+
}
|
49 |
+
}
|