jbilcke-hf HF staff commited on
Commit
ae8cf98
1 Parent(s): 2272f3d

preparation work for the video-to-clap project

Browse files
next.config.js CHANGED
@@ -15,6 +15,15 @@ const nextConfig = {
15
  // The image may be corrupted or an unsupported format.
16
  unoptimized: true,
17
  },
 
 
 
 
 
 
 
 
 
18
  async headers() {
19
  return [
20
  {
 
15
  // The image may be corrupted or an unsupported format.
16
  unoptimized: true,
17
  },
18
+ // workaround for transformers.js issues
19
+ webpack: (config) => {
20
+ config.resolve.alias = {
21
+ ...config.resolve.alias,
22
+ "sharp$": false,
23
+ "onnxruntime-node$": false,
24
+ }
25
+ return config;
26
+ },
27
  async headers() {
28
  return [
29
  {
package-lock.json CHANGED
@@ -57,6 +57,7 @@
57
  "@types/dom-speech-recognition": "^0.0.4",
58
  "@upstash/ratelimit": "^1.1.3",
59
  "@upstash/redis": "^1.31.1",
 
60
  "autoprefixer": "10.4.19",
61
  "class-variance-authority": "^0.7.0",
62
  "clsx": "^2.1.1",
@@ -68,6 +69,7 @@
68
  "fs-extra": "^11.2.0",
69
  "is-hotkey": "^0.2.0",
70
  "lucide-react": "^0.396.0",
 
71
  "mlt-xml": "^2.0.2",
72
  "monaco-editor": "^0.50.0",
73
  "next": "^14.2.5",
@@ -2253,6 +2255,14 @@
2253
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.11.2.tgz",
2254
  "integrity": "sha512-vlwUJsj/QJcR/oLXvV+JBKheaVk9pqfAPYiS136cjHEDTeTW5/+ePpM6uKOc56oxqwrUjh5T0JylHJU8vyqr1A=="
2255
  },
 
 
 
 
 
 
 
 
2256
  "node_modules/@huggingface/tasks": {
2257
  "version": "0.10.22",
2258
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.10.22.tgz",
@@ -2864,6 +2874,18 @@
2864
  "url": "https://github.com/chalk/strip-ansi?sponsor=1"
2865
  }
2866
  },
 
 
 
 
 
 
 
 
 
 
 
 
2867
  "node_modules/@jridgewell/gen-mapping": {
2868
  "version": "0.3.5",
2869
  "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
@@ -3406,6 +3428,60 @@
3406
  "url": "https://opencollective.com/preact"
3407
  }
3408
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3409
  "node_modules/@radix-ui/number": {
3410
  "version": "1.1.0",
3411
  "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.0.tgz",
@@ -6176,6 +6252,19 @@
6176
  "url": "https://opencollective.com/vitest"
6177
  }
6178
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
6179
  "node_modules/abort-controller": {
6180
  "version": "3.0.0",
6181
  "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
@@ -6991,6 +7080,15 @@
6991
  "node": ">= 6"
6992
  }
6993
  },
 
 
 
 
 
 
 
 
 
6994
  "node_modules/class-variance-authority": {
6995
  "version": "0.7.0",
6996
  "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.0.tgz",
@@ -9045,6 +9143,11 @@
9045
  "node": "^10.12.0 || >=12.0.0"
9046
  }
9047
  },
 
 
 
 
 
9048
  "node_modules/flatted": {
9049
  "version": "3.3.1",
9050
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
@@ -9592,6 +9695,11 @@
9592
  "node": ">=12.0.0"
9593
  }
9594
  },
 
 
 
 
 
9595
  "node_modules/has-bigints": {
9596
  "version": "1.0.2",
9597
  "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
@@ -10772,6 +10880,11 @@
10772
  "url": "https://github.com/sponsors/sindresorhus"
10773
  }
10774
  },
 
 
 
 
 
10775
  "node_modules/loose-envify": {
10776
  "version": "1.4.0",
10777
  "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -10851,6 +10964,20 @@
10851
  "is-buffer": "~1.1.6"
10852
  }
10853
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10854
  "node_modules/merge-stream": {
10855
  "version": "2.0.0",
10856
  "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
@@ -10946,6 +11073,52 @@
10946
  "node": ">=16 || 14 >=14.17"
10947
  }
10948
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10949
  "node_modules/ml-array-mean": {
10950
  "version": "1.1.6",
10951
  "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
@@ -11540,6 +11713,40 @@
11540
  "url": "https://github.com/sponsors/sindresorhus"
11541
  }
11542
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11543
  "node_modules/openai": {
11544
  "version": "4.52.7",
11545
  "resolved": "https://registry.npmjs.org/openai/-/openai-4.52.7.tgz",
@@ -11868,6 +12075,11 @@
11868
  "node": ">= 6"
11869
  }
11870
  },
 
 
 
 
 
11871
  "node_modules/playwright": {
11872
  "version": "1.45.2",
11873
  "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.2.tgz",
@@ -12230,6 +12442,29 @@
12230
  "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
12231
  "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
12232
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12233
  "node_modules/psl": {
12234
  "version": "1.9.0",
12235
  "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
@@ -13704,6 +13939,32 @@
13704
  "node": ">=6"
13705
  }
13706
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13707
  "node_modules/text-table": {
13708
  "version": "0.2.0",
13709
  "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
 
57
  "@types/dom-speech-recognition": "^0.0.4",
58
  "@upstash/ratelimit": "^1.1.3",
59
  "@upstash/redis": "^1.31.1",
60
+ "@xenova/transformers": "github:xenova/transformers.js#v3",
61
  "autoprefixer": "10.4.19",
62
  "class-variance-authority": "^0.7.0",
63
  "clsx": "^2.1.1",
 
69
  "fs-extra": "^11.2.0",
70
  "is-hotkey": "^0.2.0",
71
  "lucide-react": "^0.396.0",
72
+ "mediainfo.js": "^0.3.2",
73
  "mlt-xml": "^2.0.2",
74
  "monaco-editor": "^0.50.0",
75
  "next": "^14.2.5",
 
2255
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.11.2.tgz",
2256
  "integrity": "sha512-vlwUJsj/QJcR/oLXvV+JBKheaVk9pqfAPYiS136cjHEDTeTW5/+ePpM6uKOc56oxqwrUjh5T0JylHJU8vyqr1A=="
2257
  },
2258
+ "node_modules/@huggingface/jinja": {
2259
+ "version": "0.2.2",
2260
+ "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
2261
+ "integrity": "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==",
2262
+ "engines": {
2263
+ "node": ">=18"
2264
+ }
2265
+ },
2266
  "node_modules/@huggingface/tasks": {
2267
  "version": "0.10.22",
2268
  "resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.10.22.tgz",
 
2874
  "url": "https://github.com/chalk/strip-ansi?sponsor=1"
2875
  }
2876
  },
2877
+ "node_modules/@isaacs/fs-minipass": {
2878
+ "version": "4.0.1",
2879
+ "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
2880
+ "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
2881
+ "optional": true,
2882
+ "dependencies": {
2883
+ "minipass": "^7.0.4"
2884
+ },
2885
+ "engines": {
2886
+ "node": ">=18.0.0"
2887
+ }
2888
+ },
2889
  "node_modules/@jridgewell/gen-mapping": {
2890
  "version": "0.3.5",
2891
  "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
 
3428
  "url": "https://opencollective.com/preact"
3429
  }
3430
  },
3431
+ "node_modules/@protobufjs/aspromise": {
3432
+ "version": "1.1.2",
3433
+ "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
3434
+ "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
3435
+ },
3436
+ "node_modules/@protobufjs/base64": {
3437
+ "version": "1.1.2",
3438
+ "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
3439
+ "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
3440
+ },
3441
+ "node_modules/@protobufjs/codegen": {
3442
+ "version": "2.0.4",
3443
+ "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
3444
+ "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
3445
+ },
3446
+ "node_modules/@protobufjs/eventemitter": {
3447
+ "version": "1.1.0",
3448
+ "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
3449
+ "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
3450
+ },
3451
+ "node_modules/@protobufjs/fetch": {
3452
+ "version": "1.1.0",
3453
+ "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
3454
+ "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
3455
+ "dependencies": {
3456
+ "@protobufjs/aspromise": "^1.1.1",
3457
+ "@protobufjs/inquire": "^1.1.0"
3458
+ }
3459
+ },
3460
+ "node_modules/@protobufjs/float": {
3461
+ "version": "1.0.2",
3462
+ "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
3463
+ "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
3464
+ },
3465
+ "node_modules/@protobufjs/inquire": {
3466
+ "version": "1.1.0",
3467
+ "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
3468
+ "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
3469
+ },
3470
+ "node_modules/@protobufjs/path": {
3471
+ "version": "1.1.2",
3472
+ "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
3473
+ "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
3474
+ },
3475
+ "node_modules/@protobufjs/pool": {
3476
+ "version": "1.1.0",
3477
+ "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
3478
+ "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
3479
+ },
3480
+ "node_modules/@protobufjs/utf8": {
3481
+ "version": "1.1.0",
3482
+ "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
3483
+ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
3484
+ },
3485
  "node_modules/@radix-ui/number": {
3486
  "version": "1.1.0",
3487
  "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.0.tgz",
 
6252
  "url": "https://opencollective.com/vitest"
6253
  }
6254
  },
6255
+ "node_modules/@xenova/transformers": {
6256
+ "version": "3.0.0-alpha.0",
6257
+ "resolved": "git+ssh://git@github.com/xenova/transformers.js.git#1b4d2428225ef8f63be94bfa38a0d7fd81ac7c0c",
6258
+ "license": "Apache-2.0",
6259
+ "dependencies": {
6260
+ "@huggingface/jinja": "^0.2.2",
6261
+ "onnxruntime-web": "^1.18.0",
6262
+ "sharp": "^0.33.2"
6263
+ },
6264
+ "optionalDependencies": {
6265
+ "onnxruntime-node": "^1.18.0"
6266
+ }
6267
+ },
6268
  "node_modules/abort-controller": {
6269
  "version": "3.0.0",
6270
  "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
 
7080
  "node": ">= 6"
7081
  }
7082
  },
7083
+ "node_modules/chownr": {
7084
+ "version": "3.0.0",
7085
+ "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
7086
+ "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
7087
+ "optional": true,
7088
+ "engines": {
7089
+ "node": ">=18"
7090
+ }
7091
+ },
7092
  "node_modules/class-variance-authority": {
7093
  "version": "0.7.0",
7094
  "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.0.tgz",
 
9143
  "node": "^10.12.0 || >=12.0.0"
9144
  }
9145
  },
9146
+ "node_modules/flatbuffers": {
9147
+ "version": "1.12.0",
9148
+ "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
9149
+ "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
9150
+ },
9151
  "node_modules/flatted": {
9152
  "version": "3.3.1",
9153
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
 
9695
  "node": ">=12.0.0"
9696
  }
9697
  },
9698
+ "node_modules/guid-typescript": {
9699
+ "version": "1.0.9",
9700
+ "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
9701
+ "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
9702
+ },
9703
  "node_modules/has-bigints": {
9704
  "version": "1.0.2",
9705
  "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
 
10880
  "url": "https://github.com/sponsors/sindresorhus"
10881
  }
10882
  },
10883
+ "node_modules/long": {
10884
+ "version": "5.2.3",
10885
+ "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
10886
+ "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
10887
+ },
10888
  "node_modules/loose-envify": {
10889
  "version": "1.4.0",
10890
  "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
 
10964
  "is-buffer": "~1.1.6"
10965
  }
10966
  },
10967
+ "node_modules/mediainfo.js": {
10968
+ "version": "0.3.2",
10969
+ "resolved": "https://registry.npmjs.org/mediainfo.js/-/mediainfo.js-0.3.2.tgz",
10970
+ "integrity": "sha512-SC8z72ESV1z2lq2zfheoo9zgcoqjeBn0mzq6MhIn3aqlkh3RV84FwOiMtYA0HWY7mi1igM89Jcll4r2sk/yyZA==",
10971
+ "dependencies": {
10972
+ "yargs": "^17.7.2"
10973
+ },
10974
+ "bin": {
10975
+ "mediainfo.js": "dist/esm/cli.js"
10976
+ },
10977
+ "engines": {
10978
+ "node": ">=18.0.0"
10979
+ }
10980
+ },
10981
  "node_modules/merge-stream": {
10982
  "version": "2.0.0",
10983
  "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
 
11073
  "node": ">=16 || 14 >=14.17"
11074
  }
11075
  },
11076
+ "node_modules/minizlib": {
11077
+ "version": "3.0.1",
11078
+ "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
11079
+ "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
11080
+ "optional": true,
11081
+ "dependencies": {
11082
+ "minipass": "^7.0.4",
11083
+ "rimraf": "^5.0.5"
11084
+ },
11085
+ "engines": {
11086
+ "node": ">= 18"
11087
+ }
11088
+ },
11089
+ "node_modules/minizlib/node_modules/rimraf": {
11090
+ "version": "5.0.9",
11091
+ "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.9.tgz",
11092
+ "integrity": "sha512-3i7b8OcswU6CpU8Ej89quJD4O98id7TtVM5U4Mybh84zQXdrFmDLouWBEEaD/QfO3gDDfH+AGFCGsR7kngzQnA==",
11093
+ "optional": true,
11094
+ "dependencies": {
11095
+ "glob": "^10.3.7"
11096
+ },
11097
+ "bin": {
11098
+ "rimraf": "dist/esm/bin.mjs"
11099
+ },
11100
+ "engines": {
11101
+ "node": "14 >=14.20 || 16 >=16.20 || >=18"
11102
+ },
11103
+ "funding": {
11104
+ "url": "https://github.com/sponsors/isaacs"
11105
+ }
11106
+ },
11107
+ "node_modules/mkdirp": {
11108
+ "version": "3.0.1",
11109
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
11110
+ "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
11111
+ "optional": true,
11112
+ "bin": {
11113
+ "mkdirp": "dist/cjs/src/bin.js"
11114
+ },
11115
+ "engines": {
11116
+ "node": ">=10"
11117
+ },
11118
+ "funding": {
11119
+ "url": "https://github.com/sponsors/isaacs"
11120
+ }
11121
+ },
11122
  "node_modules/ml-array-mean": {
11123
  "version": "1.1.6",
11124
  "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
 
11713
  "url": "https://github.com/sponsors/sindresorhus"
11714
  }
11715
  },
11716
+ "node_modules/onnxruntime-common": {
11717
+ "version": "1.18.0",
11718
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
11719
+ "integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q=="
11720
+ },
11721
+ "node_modules/onnxruntime-node": {
11722
+ "version": "1.18.0",
11723
+ "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.18.0.tgz",
11724
+ "integrity": "sha512-iTnFcxKpmywCatx8ov4GTbECe3tJk2Bp1OA2mWRJde78q+7tpPYBhKMnwhlaoKy9oKQcy4UoEuuhoy2PSD13ww==",
11725
+ "hasInstallScript": true,
11726
+ "optional": true,
11727
+ "os": [
11728
+ "win32",
11729
+ "darwin",
11730
+ "linux"
11731
+ ],
11732
+ "dependencies": {
11733
+ "onnxruntime-common": "1.18.0",
11734
+ "tar": "^7.0.1"
11735
+ }
11736
+ },
11737
+ "node_modules/onnxruntime-web": {
11738
+ "version": "1.18.0",
11739
+ "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz",
11740
+ "integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==",
11741
+ "dependencies": {
11742
+ "flatbuffers": "^1.12.0",
11743
+ "guid-typescript": "^1.0.9",
11744
+ "long": "^5.2.3",
11745
+ "onnxruntime-common": "1.18.0",
11746
+ "platform": "^1.3.6",
11747
+ "protobufjs": "^7.2.4"
11748
+ }
11749
+ },
11750
  "node_modules/openai": {
11751
  "version": "4.52.7",
11752
  "resolved": "https://registry.npmjs.org/openai/-/openai-4.52.7.tgz",
 
12075
  "node": ">= 6"
12076
  }
12077
  },
12078
+ "node_modules/platform": {
12079
+ "version": "1.3.6",
12080
+ "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
12081
+ "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
12082
+ },
12083
  "node_modules/playwright": {
12084
  "version": "1.45.2",
12085
  "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.2.tgz",
 
12442
  "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
12443
  "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
12444
  },
12445
+ "node_modules/protobufjs": {
12446
+ "version": "7.3.2",
12447
+ "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.3.2.tgz",
12448
+ "integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
12449
+ "hasInstallScript": true,
12450
+ "dependencies": {
12451
+ "@protobufjs/aspromise": "^1.1.2",
12452
+ "@protobufjs/base64": "^1.1.2",
12453
+ "@protobufjs/codegen": "^2.0.4",
12454
+ "@protobufjs/eventemitter": "^1.1.0",
12455
+ "@protobufjs/fetch": "^1.1.0",
12456
+ "@protobufjs/float": "^1.0.2",
12457
+ "@protobufjs/inquire": "^1.1.0",
12458
+ "@protobufjs/path": "^1.1.2",
12459
+ "@protobufjs/pool": "^1.1.0",
12460
+ "@protobufjs/utf8": "^1.1.0",
12461
+ "@types/node": ">=13.7.0",
12462
+ "long": "^5.0.0"
12463
+ },
12464
+ "engines": {
12465
+ "node": ">=12.0.0"
12466
+ }
12467
+ },
12468
  "node_modules/psl": {
12469
  "version": "1.9.0",
12470
  "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
 
13939
  "node": ">=6"
13940
  }
13941
  },
13942
+ "node_modules/tar": {
13943
+ "version": "7.4.0",
13944
+ "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.0.tgz",
13945
+ "integrity": "sha512-XQs0S8fuAkQWuqhDeCdMlJXDX80D7EOVLDPVFkna9yQfzS+PHKgfxcei0jf6/+QAWcjqrnC8uM3fSAnrQl+XYg==",
13946
+ "optional": true,
13947
+ "dependencies": {
13948
+ "@isaacs/fs-minipass": "^4.0.0",
13949
+ "chownr": "^3.0.0",
13950
+ "minipass": "^7.1.2",
13951
+ "minizlib": "^3.0.1",
13952
+ "mkdirp": "^3.0.1",
13953
+ "yallist": "^5.0.0"
13954
+ },
13955
+ "engines": {
13956
+ "node": ">=18"
13957
+ }
13958
+ },
13959
+ "node_modules/tar/node_modules/yallist": {
13960
+ "version": "5.0.0",
13961
+ "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
13962
+ "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
13963
+ "optional": true,
13964
+ "engines": {
13965
+ "node": ">=18"
13966
+ }
13967
+ },
13968
  "node_modules/text-table": {
13969
  "version": "0.2.0",
13970
  "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
package.json CHANGED
@@ -68,6 +68,7 @@
68
  "@types/dom-speech-recognition": "^0.0.4",
69
  "@upstash/ratelimit": "^1.1.3",
70
  "@upstash/redis": "^1.31.1",
 
71
  "autoprefixer": "10.4.19",
72
  "class-variance-authority": "^0.7.0",
73
  "clsx": "^2.1.1",
@@ -79,6 +80,7 @@
79
  "fs-extra": "^11.2.0",
80
  "is-hotkey": "^0.2.0",
81
  "lucide-react": "^0.396.0",
 
82
  "mlt-xml": "^2.0.2",
83
  "monaco-editor": "^0.50.0",
84
  "next": "^14.2.5",
 
68
  "@types/dom-speech-recognition": "^0.0.4",
69
  "@upstash/ratelimit": "^1.1.3",
70
  "@upstash/redis": "^1.31.1",
71
+ "@xenova/transformers": "github:xenova/transformers.js#v3",
72
  "autoprefixer": "10.4.19",
73
  "class-variance-authority": "^0.7.0",
74
  "clsx": "^2.1.1",
 
80
  "fs-extra": "^11.2.0",
81
  "is-hotkey": "^0.2.0",
82
  "lucide-react": "^0.396.0",
83
+ "mediainfo.js": "^0.3.2",
84
  "mlt-xml": "^2.0.2",
85
  "monaco-editor": "^0.50.0",
86
  "next": "^14.2.5",
public/workers/captioning.worker.js ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ importScripts('https://cdn.jsdelivr.net/npm/@xenova/transformers@3.0.0');
2
+
3
+ const { AutoProcessor, AutoTokenizer, Moondream1ForConditionalGeneration, RawImage } = transformers;
4
+
5
+ let processor;
6
+ let tokenizer;
7
+ let model;
8
+
9
+ async function initializeModel() {
10
+ const model_id = 'Xenova/moondream2';
11
+ processor = await AutoProcessor.from_pretrained(model_id);
12
+ tokenizer = await AutoTokenizer.from_pretrained(model_id);
13
+ model = await Moondream1ForConditionalGeneration.from_pretrained(model_id, {
14
+ dtype: {
15
+ embed_tokens: 'fp16',
16
+ vision_encoder: 'fp16',
17
+ decoder_model_merged: 'q4',
18
+ },
19
+ device: 'webgpu',
20
+ });
21
+ }
22
+
23
+ async function captionImage(imageDataUrl) {
24
+ if (!processor || !tokenizer || !model) {
25
+ await initializeModel();
26
+ }
27
+
28
+ const prompt = 'Describe this image.';
29
+ const text = `<image>\n\nQuestion: ${prompt}\n\nAnswer:`;
30
+ const text_inputs = tokenizer(text);
31
+
32
+ const image = await RawImage.fromURL(imageDataUrl);
33
+ const vision_inputs = await processor(image);
34
+
35
+ const output = await model.generate({
36
+ ...text_inputs,
37
+ ...vision_inputs,
38
+ do_sample: false,
39
+ max_new_tokens: 64,
40
+ });
41
+
42
+ const decoded = tokenizer.batch_decode(output, { skip_special_tokens: true });
43
+ return decoded[0].trim();
44
+ }
45
+
46
+ self.addEventListener('message', async (event) => {
47
+ const { imageDataUrl } = event.data;
48
+ try {
49
+ const caption = await captionImage(imageDataUrl);
50
+ self.postMessage({ caption });
51
+ } catch (error) {
52
+ self.postMessage({ error: error.message });
53
+ }
54
+ });
src/services/io/extractCaptionFromFrame.ts ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ AutoProcessor,
3
+ AutoTokenizer,
4
+ Moondream1ForConditionalGeneration,
5
+ RawImage,
6
+ } from '@xenova/transformers'
7
+
8
+ export async function extractCaptionFromFrame(
9
+ imageInBase64DataUri: string
10
+ ): Promise<string> {
11
+ if (!(navigator as any).gpu) {
12
+ throw new Error(`Please enable WebGPU to analyze video frames:
13
+
14
+ 1. You need a modern browser such as Google Chrome 113+, Microsoft Edge 113+, Safari 18 (macOS 15), Firefox Nightly
15
+
16
+ 2. You need to enable WebGPU (depends on your browser, see below)
17
+
18
+ 2.1 For Chrome: Perform the following operations in the Chrome / Microsoft Edge address bar
19
+ The chrome://flags/#enable-unsafe-webgpu flag must be enabled (not enable-webgpu-developer-features). Linux experimental support also requires launching the browser with --enable-features=Vulkan.
20
+
21
+ 2.2 For Safari 18 (macOS 15): WebGPU is enabled by default
22
+
23
+ 2.3 For Firefox Nightly: Type about:config in the address bar and set 'dom.webgpu.enabled" to true
24
+ `)
25
+ }
26
+
27
+ // Load processor, tokenizer and model
28
+ const model_id = 'Xenova/moondream2'
29
+ const processor = await AutoProcessor.from_pretrained(model_id)
30
+ const tokenizer = await AutoTokenizer.from_pretrained(model_id)
31
+ const model = await Moondream1ForConditionalGeneration.from_pretrained(
32
+ model_id,
33
+ {
34
+ dtype: {
35
+ embed_tokens: 'fp16', // or 'fp32'
36
+ vision_encoder: 'fp16', // or 'q8'
37
+ decoder_model_merged: 'q4', // or 'q4f16' or 'q8'
38
+ },
39
+ device: 'webgpu',
40
+ }
41
+ )
42
+
43
+ // Prepare text inputs
44
+ const prompt = 'Describe this image.'
45
+ const text = `<image>\n\nQuestion: ${prompt}\n\nAnswer:`
46
+ const text_inputs = tokenizer(text)
47
+
48
+ // Prepare vision inputs
49
+ const image = await RawImage.fromURL(imageInBase64DataUri)
50
+ const vision_inputs = await processor(image)
51
+
52
+ // Generate response
53
+ const output = await model.generate({
54
+ ...text_inputs,
55
+ ...vision_inputs,
56
+ do_sample: false,
57
+ max_new_tokens: 177,
58
+ })
59
+ const decoded = tokenizer.batch_decode(output, { skip_special_tokens: false })
60
+ console.log(decoded)
61
+ // [
62
+ // '<|endoftext|><image>\n\n' +
63
+ // 'Question: Describe this image.\n\n' +
64
+ // 'Answer: A hand is holding a white book titled "The Little Book of Deep Learning" against a backdrop of a balcony with a railing and a view of a building and trees.<|endoftext|>'
65
+ // ]
66
+
67
+ return `${decoded[0] || ''}`
68
+ }
src/services/io/extractFramesFromVideo.ts ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { FFmpeg } from '@ffmpeg/ffmpeg'
2
+ import { toBlobURL } from '@ffmpeg/util'
3
+ import mediaInfoFactory, {
4
+ Track,
5
+ GeneralTrack,
6
+ VideoTrack,
7
+ AudioTrack,
8
+ TextTrack,
9
+ ImageTrack,
10
+ MenuTrack,
11
+ OtherTrack,
12
+ } from 'mediainfo.js'
13
+
14
+ interface FrameExtractorOptions {
15
+ format: 'png' | 'jpg'
16
+ maxWidth: number
17
+ maxHeight: number
18
+ sceneSamplingRate: number // Percentage of additional frames between scene changes (0-100)
19
+ onProgress?: (progress: number) => void // Callback function for progress updates
20
+ }
21
+
22
+ async function extractFramesFromVideo(
23
+ videoBlob: Blob,
24
+ options: FrameExtractorOptions
25
+ ): Promise<string[]> {
26
+ // Initialize MediaInfo
27
+ const mediaInfo = await mediaInfoFactory({ format: 'object' })
28
+
29
+ // Get video duration using MediaInfo
30
+ const getSize = () => videoBlob.size
31
+ const readChunk = (chunkSize: number, offset: number) =>
32
+ new Promise<Uint8Array>((resolve, reject) => {
33
+ const reader = new FileReader()
34
+ reader.onload = (event) => {
35
+ if (event.target?.result instanceof ArrayBuffer) {
36
+ resolve(new Uint8Array(event.target.result))
37
+ } else {
38
+ reject(new Error('Failed to read chunk'))
39
+ }
40
+ }
41
+ reader.onerror = (error) => reject(error)
42
+ reader.readAsArrayBuffer(videoBlob.slice(offset, offset + chunkSize))
43
+ })
44
+
45
+ const result = await mediaInfo.analyzeData(getSize, readChunk)
46
+
47
+ let duration: number = 0
48
+
49
+ for (const track of result.media?.track || []) {
50
+ /// '@type': "General" | "Video" | "Audio" | "Text" | "Image" | "Menu" | "Other"
51
+ let maybeDuration: number = 0
52
+ if (track['@type'] === 'Audio') {
53
+ const audioTrack = track as AudioTrack
54
+ maybeDuration = audioTrack.Duration || 0
55
+ } else if (track['@type'] === 'Video') {
56
+ const videoTrack = track as VideoTrack
57
+ maybeDuration = videoTrack.Duration || 0
58
+ }
59
+ if (
60
+ typeof maybeDuration === 'number' &&
61
+ isFinite(maybeDuration) &&
62
+ !isNaN(maybeDuration)
63
+ ) {
64
+ duration = maybeDuration
65
+ }
66
+ }
67
+
68
+ if (!duration) {
69
+ throw new Error('Could not determine video duration (or it is length 0)')
70
+ }
71
+
72
+ // Initialize FFmpeg
73
+ const ffmpeg = new FFmpeg()
74
+ const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd'
75
+
76
+ await ffmpeg.load({
77
+ coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'),
78
+ wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'),
79
+ })
80
+
81
+ // Write video file to FFmpeg's file system
82
+ const videoUint8Array = new Uint8Array(await videoBlob.arrayBuffer())
83
+ await ffmpeg.writeFile('input.mp4', videoUint8Array)
84
+
85
+ // Prepare FFmpeg command
86
+ const sceneFilter = `select='gt(scene,0.4)'`
87
+ const additionalFramesFilter = `select='not(mod(n,${Math.floor(100 / options.sceneSamplingRate)}))'`
88
+ const scaleFilter = `scale=iw*min(${options.maxWidth}/iw\,${options.maxHeight}/ih):ih*min(${options.maxWidth}/iw\,${options.maxHeight}/ih)`
89
+
90
+ let lastProgress = 0
91
+ ffmpeg.on('log', ({ message }) => {
92
+ const timeMatch = message.match(/time=(\d{2}):(\d{2}):(\d{2}\.\d{2})/)
93
+ if (timeMatch) {
94
+ const [, hours, minutes, seconds] = timeMatch
95
+ const currentTime =
96
+ parseInt(hours) * 3600 + parseInt(minutes) * 60 + parseFloat(seconds)
97
+ const progress = Math.min(100, Math.round((currentTime / duration) * 100))
98
+ if (progress > lastProgress) {
99
+ lastProgress = progress
100
+ options.onProgress?.(progress)
101
+ }
102
+ }
103
+ })
104
+
105
+ await ffmpeg.exec([
106
+ '-i',
107
+ 'input.mp4',
108
+ '-vf',
109
+ `${sceneFilter},${additionalFramesFilter},${scaleFilter}`,
110
+ '-vsync',
111
+ '0',
112
+ '-q:v',
113
+ '2',
114
+ `frames_%03d.${options.format}`,
115
+ ])
116
+
117
+ // Read generated frames
118
+ const files = await ffmpeg.listDir('/')
119
+ const frameFiles = files.filter(
120
+ (file) =>
121
+ file.name.startsWith('frames_') &&
122
+ file.name.endsWith(`.${options.format}`)
123
+ )
124
+
125
+ const frames: string[] = []
126
+ for (let i = 0; i < frameFiles.length; i++) {
127
+ const file = frameFiles[i]
128
+ const frameData = await ffmpeg.readFile(file.name)
129
+ const base64Frame = btoa(
130
+ String.fromCharCode.apply(null, frameData as unknown as number[])
131
+ )
132
+ frames.push(`data:image/${options.format};base64,${base64Frame}`)
133
+
134
+ // Update progress for frame processing (from 90% to 100%)
135
+ options.onProgress?.(90 + Math.round(((i + 1) / frameFiles.length) * 10))
136
+ }
137
+
138
+ return frames
139
+ }
140
+
141
+ export default extractFramesFromVideo
src/services/io/fix-xenova-transformers.d.ts ADDED
@@ -0,0 +1 @@
 
 
1
+ declare module '@xenova/transformers'
src/services/io/imageCaptioning.ts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let worker: Worker | null = null
2
+
3
+ function initializeWorker() {
4
+ if (typeof window !== 'undefined' && !worker) {
5
+ worker = new Worker('/captioning.worker.js')
6
+ }
7
+ }
8
+
9
+ export async function captionImages(
10
+ imageDataUrls: string[]
11
+ ): Promise<string[]> {
12
+ initializeWorker()
13
+
14
+ if (!worker) {
15
+ throw new Error(
16
+ 'Worker could not be initialized. Are you running in a browser environment?'
17
+ )
18
+ }
19
+
20
+ const captions: string[] = []
21
+
22
+ for (const imageDataUrl of imageDataUrls) {
23
+ const caption = await new Promise<string>((resolve, reject) => {
24
+ const messageHandler = (event: MessageEvent) => {
25
+ worker!.removeEventListener('message', messageHandler)
26
+ if (event.data.error) {
27
+ reject(new Error(event.data.error))
28
+ } else {
29
+ resolve(event.data.caption)
30
+ }
31
+ }
32
+
33
+ worker!.addEventListener('message', messageHandler)
34
+ worker!.postMessage({ imageDataUrl })
35
+ })
36
+
37
+ captions.push(caption)
38
+ }
39
+
40
+ return captions
41
+ }
42
+
43
+ // Optionally, you can provide a cleanup function
44
+ export function terminateWorker() {
45
+ if (worker) {
46
+ worker.terminate()
47
+ worker = null
48
+ }
49
+ }