jbilcke-hf HF staff commited on
Commit
1cea837
1 Parent(s): 0df1259

rename from model to entity

Browse files
.env CHANGED
@@ -28,6 +28,8 @@ AUTH_OPENAI_API_KEY=""
28
  VIDEOCHAIN_API_URL=""
29
  VIDEOCHAIN_API_KEY=""
30
 
 
 
31
  # ----------- CENSORSHIP -------
32
  ENABLE_CENSORSHIP=
33
  FINGERPRINT_KEY=
 
28
  VIDEOCHAIN_API_URL=""
29
  VIDEOCHAIN_API_KEY=""
30
 
31
+ MICROSERVICE_API_SECRET_TOKEN=""
32
+
33
  # ----------- CENSORSHIP -------
34
  ENABLE_CENSORSHIP=
35
  FINGERPRINT_KEY=
package-lock.json CHANGED
@@ -1,14 +1,15 @@
1
  {
2
- "name": "ai-tube",
3
  "version": "0.0.0",
4
  "lockfileVersion": 3,
5
  "requires": true,
6
  "packages": {
7
  "": {
8
- "name": "ai-tube",
9
  "version": "0.0.0",
10
  "dependencies": {
11
- "@aitube/clap": "^0.0.6",
 
12
  "@huggingface/hub": "0.12.3-oauth",
13
  "@huggingface/inference": "^2.6.7",
14
  "@jcoreio/async-throttle": "^1.6.0",
@@ -60,6 +61,7 @@
60
  "eslint": "8.45.0",
61
  "eslint-config-next": "13.4.10",
62
  "fastest-levenshtein": "^1.0.16",
 
63
  "gsplat": "^1.2.4",
64
  "hash-wasm": "^4.11.0",
65
  "jose": "^5.2.4",
@@ -103,6 +105,7 @@
103
  "zustand": "^4.4.7"
104
  },
105
  "devDependencies": {
 
106
  "@types/proper-lockfile": "^4.1.2",
107
  "@types/qs": "^6.9.7",
108
  "@types/react-copy-to-clipboard": "^5.0.7",
@@ -111,18 +114,10 @@
111
  "daisyui": "^3.7.4"
112
  }
113
  },
114
- "node_modules/@aashutoshrathi/word-wrap": {
115
- "version": "1.2.6",
116
- "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz",
117
- "integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==",
118
- "engines": {
119
- "node": ">=0.10.0"
120
- }
121
- },
122
  "node_modules/@aitube/clap": {
123
- "version": "0.0.6",
124
- "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.6.tgz",
125
- "integrity": "sha512-SPo90RBnOJCmp+DqzxllNOcp38AbHSzqkAbYEudRiubqWHDF1GGqYi25gCdG7bFIWH+8evjSiiwsjkzedpbhoA==",
126
  "dependencies": {
127
  "pure-uuid": "^1.8.1",
128
  "yaml": "^2.4.1"
@@ -131,6 +126,19 @@
131
  "typescript": "^5.4.5"
132
  }
133
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "node_modules/@alloc/quick-lru": {
135
  "version": "5.2.0",
136
  "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
@@ -901,28 +909,28 @@
901
  }
902
  },
903
  "node_modules/@floating-ui/core": {
904
- "version": "1.6.0",
905
- "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.0.tgz",
906
- "integrity": "sha512-PcF++MykgmTj3CIyOQbKA/hDzOAiqI3mhuoN44WRCopIs1sgoDoU4oty4Jtqaj/y3oDU6fnVSm4QG0a3t5i0+g==",
907
  "dependencies": {
908
- "@floating-ui/utils": "^0.2.1"
909
  }
910
  },
911
  "node_modules/@floating-ui/dom": {
912
- "version": "1.6.3",
913
- "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.3.tgz",
914
- "integrity": "sha512-RnDthu3mzPlQ31Ss/BTwQ1zjzIhr3lk1gZB1OC56h/1vEtaXkESrOqL5fQVMfXpwGtRwX+YsZBdyHtJMQnkArw==",
915
  "dependencies": {
916
  "@floating-ui/core": "^1.0.0",
917
  "@floating-ui/utils": "^0.2.0"
918
  }
919
  },
920
  "node_modules/@floating-ui/react-dom": {
921
- "version": "2.0.8",
922
- "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.8.tgz",
923
- "integrity": "sha512-HOdqOt3R3OGeTKidaLvJKcgg75S6tibQ3Tif4eyd91QnIJWr0NLvoXFpJA/j8HqkFSL68GDca9AuyWEHlhyClw==",
924
  "dependencies": {
925
- "@floating-ui/dom": "^1.6.1"
926
  },
927
  "peerDependencies": {
928
  "react": ">=16.8.0",
@@ -930,9 +938,9 @@
930
  }
931
  },
932
  "node_modules/@floating-ui/utils": {
933
- "version": "0.2.1",
934
- "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.1.tgz",
935
- "integrity": "sha512-9TANp6GPoMtYzQdt54kfAyMmz1+osLlXdg2ENroU7zzrtflTLrrC/lgrIfaSe+Wu0b89GKccT7vxXA0MoAIO+Q=="
936
  },
937
  "node_modules/@huggingface/hub": {
938
  "version": "0.12.3-oauth",
@@ -1507,9 +1515,9 @@
1507
  }
1508
  },
1509
  "node_modules/@mediapipe/tasks-vision": {
1510
- "version": "0.10.13-rc.20240426",
1511
- "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240426.tgz",
1512
- "integrity": "sha512-YyickIMLXr2/pEOZ00bHYWfWmCAwC8uRv0Ek6haQvnzahwfiw4Evlka3XRa8SxL4X7p432puS558xAm5h0SJTA=="
1513
  },
1514
  "node_modules/@next/env": {
1515
  "version": "14.2.3",
@@ -2930,6 +2938,15 @@
2930
  "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
2931
  "integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
2932
  },
 
 
 
 
 
 
 
 
 
2933
  "node_modules/@types/json5": {
2934
  "version": "0.0.29",
2935
  "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
@@ -3479,6 +3496,11 @@
3479
  "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
3480
  "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
3481
  },
 
 
 
 
 
3482
  "node_modules/asynckit": {
3483
  "version": "0.4.0",
3484
  "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
@@ -3713,9 +3735,9 @@
3713
  }
3714
  },
3715
  "node_modules/caniuse-lite": {
3716
- "version": "1.0.30001612",
3717
- "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001612.tgz",
3718
- "integrity": "sha512-lFgnZ07UhaCcsSZgWW0K5j4e69dK1u/ltrL9lTUiFOwNHs12S3UMIEYgBV0Z6C6hRDev7iRnMzzYmKabYdXF9g==",
3719
  "funding": [
3720
  {
3721
  "type": "opencollective",
@@ -5116,6 +5138,29 @@
5116
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
5117
  "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
5118
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5119
  "node_modules/follow-redirects": {
5120
  "version": "1.15.6",
5121
  "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
@@ -6655,16 +6700,16 @@
6655
  }
6656
  },
6657
  "node_modules/optionator": {
6658
- "version": "0.9.3",
6659
- "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz",
6660
- "integrity": "sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==",
6661
  "dependencies": {
6662
- "@aashutoshrathi/word-wrap": "^1.2.3",
6663
  "deep-is": "^0.1.3",
6664
  "fast-levenshtein": "^2.0.6",
6665
  "levn": "^0.4.1",
6666
  "prelude-ls": "^1.2.1",
6667
- "type-check": "^0.4.0"
 
6668
  },
6669
  "engines": {
6670
  "node": ">= 0.8.0"
@@ -6759,9 +6804,9 @@
6759
  }
6760
  },
6761
  "node_modules/path-scurry/node_modules/lru-cache": {
6762
- "version": "10.2.1",
6763
- "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.1.tgz",
6764
- "integrity": "sha512-tS24spDe/zXhWbNPErCHs/AGOzbKGHT+ybSBqmdLm8WZ1xXLWvH8Qn71QPAlqVhd0qUTWjy+Kl9JmISgDdEjsA==",
6765
  "engines": {
6766
  "node": "14 || >=16.14"
6767
  }
@@ -8248,9 +8293,9 @@
8248
  }
8249
  },
8250
  "node_modules/type-fest": {
8251
- "version": "4.17.0",
8252
- "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.17.0.tgz",
8253
- "integrity": "sha512-9flrz1zkfLRH3jO3bLflmTxryzKMxVa7841VeMgBaNQGY6vH4RCcpN/sQLB7mQQYh1GZ5utT2deypMuCy4yicw==",
8254
  "engines": {
8255
  "node": ">=16"
8256
  },
@@ -8640,6 +8685,14 @@
8640
  "url": "https://github.com/sponsors/ljharb"
8641
  }
8642
  },
 
 
 
 
 
 
 
 
8643
  "node_modules/wrap-ansi": {
8644
  "version": "8.1.0",
8645
  "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
@@ -8766,9 +8819,9 @@
8766
  "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
8767
  },
8768
  "node_modules/yaml": {
8769
- "version": "2.4.1",
8770
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.1.tgz",
8771
- "integrity": "sha512-pIXzoImaqmfOrL7teGUBt/T7ZDnyeGBWyXQBvOVhLkWLN37GXv8NMLK406UY6dS51JfcQHsmcW5cJ441bHg6Lg==",
8772
  "bin": {
8773
  "yaml": "bin.mjs"
8774
  },
 
1
  {
2
+ "name": "@aitube/website",
3
  "version": "0.0.0",
4
  "lockfileVersion": 3,
5
  "requires": true,
6
  "packages": {
7
  "": {
8
+ "name": "@aitube/website",
9
  "version": "0.0.0",
10
  "dependencies": {
11
+ "@aitube/clap": "0.0.7",
12
+ "@aitube/client": "0.0.7",
13
  "@huggingface/hub": "0.12.3-oauth",
14
  "@huggingface/inference": "^2.6.7",
15
  "@jcoreio/async-throttle": "^1.6.0",
 
61
  "eslint": "8.45.0",
62
  "eslint-config-next": "13.4.10",
63
  "fastest-levenshtein": "^1.0.16",
64
+ "fluent-ffmpeg": "^2.1.2",
65
  "gsplat": "^1.2.4",
66
  "hash-wasm": "^4.11.0",
67
  "jose": "^5.2.4",
 
105
  "zustand": "^4.4.7"
106
  },
107
  "devDependencies": {
108
+ "@types/fluent-ffmpeg": "^2.1.24",
109
  "@types/proper-lockfile": "^4.1.2",
110
  "@types/qs": "^6.9.7",
111
  "@types/react-copy-to-clipboard": "^5.0.7",
 
114
  "daisyui": "^3.7.4"
115
  }
116
  },
 
 
 
 
 
 
 
 
117
  "node_modules/@aitube/clap": {
118
+ "version": "0.0.7",
119
+ "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.7.tgz",
120
+ "integrity": "sha512-0muPu4G1sRsNqSVZ/ICBCc4QibZ9OT33ORbahPP1+h3GYcD/7K+ZLYJjdbQwJWVEcpKDosDVaQKeNYdab0S0LA==",
121
  "dependencies": {
122
  "pure-uuid": "^1.8.1",
123
  "yaml": "^2.4.1"
 
126
  "typescript": "^5.4.5"
127
  }
128
  },
129
+ "node_modules/@aitube/client": {
130
+ "version": "0.0.7",
131
+ "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.7.tgz",
132
+ "integrity": "sha512-s6vxst7pkLt7tI96JS508gfk4EgdLJy5Itr76ej/zvtMRMgnKgAlfB6Bb8/1u7L5CToz4Wgk6h4kz8T+yEbEeg==",
133
+ "dependencies": {
134
+ "uuid": "^9.0.1",
135
+ "yaml": "^2.4.1"
136
+ },
137
+ "peerDependencies": {
138
+ "@aitube/clap": "0.0.7",
139
+ "typescript": "^5.4.5"
140
+ }
141
+ },
142
  "node_modules/@alloc/quick-lru": {
143
  "version": "5.2.0",
144
  "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
 
909
  }
910
  },
911
  "node_modules/@floating-ui/core": {
912
+ "version": "1.6.1",
913
+ "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.1.tgz",
914
+ "integrity": "sha512-42UH54oPZHPdRHdw6BgoBD6cg/eVTmVrFcgeRDM3jbO7uxSoipVcmcIGFcA5jmOHO5apcyvBhkSKES3fQJnu7A==",
915
  "dependencies": {
916
+ "@floating-ui/utils": "^0.2.0"
917
  }
918
  },
919
  "node_modules/@floating-ui/dom": {
920
+ "version": "1.6.4",
921
+ "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.4.tgz",
922
+ "integrity": "sha512-0G8R+zOvQsAG1pg2Q99P21jiqxqGBW1iRe/iXHsBRBxnpXKFI8QwbB4x5KmYLggNO5m34IQgOIu9SCRfR/WWiQ==",
923
  "dependencies": {
924
  "@floating-ui/core": "^1.0.0",
925
  "@floating-ui/utils": "^0.2.0"
926
  }
927
  },
928
  "node_modules/@floating-ui/react-dom": {
929
+ "version": "2.0.9",
930
+ "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.9.tgz",
931
+ "integrity": "sha512-q0umO0+LQK4+p6aGyvzASqKbKOJcAHJ7ycE9CuUvfx3s9zTHWmGJTPOIlM/hmSBfUfg/XfY5YhLBLR/LHwShQQ==",
932
  "dependencies": {
933
+ "@floating-ui/dom": "^1.0.0"
934
  },
935
  "peerDependencies": {
936
  "react": ">=16.8.0",
 
938
  }
939
  },
940
  "node_modules/@floating-ui/utils": {
941
+ "version": "0.2.2",
942
+ "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz",
943
+ "integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw=="
944
  },
945
  "node_modules/@huggingface/hub": {
946
  "version": "0.12.3-oauth",
 
1515
  }
1516
  },
1517
  "node_modules/@mediapipe/tasks-vision": {
1518
+ "version": "0.10.13-rc.20240428",
1519
+ "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240428.tgz",
1520
+ "integrity": "sha512-YMOshYcwxzLNNNEKSs4hWVTRjtuX+irWIjsbENrOee491t/oM1a9bnhggMdWLq0FBQ7xuCfvp1diu/JeZFoE0A=="
1521
  },
1522
  "node_modules/@next/env": {
1523
  "version": "14.2.3",
 
2938
  "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
2939
  "integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
2940
  },
2941
+ "node_modules/@types/fluent-ffmpeg": {
2942
+ "version": "2.1.24",
2943
+ "resolved": "https://registry.npmjs.org/@types/fluent-ffmpeg/-/fluent-ffmpeg-2.1.24.tgz",
2944
+ "integrity": "sha512-g5oQO8Jgi2kFS3tTub7wLvfLztr1s8tdXmRd8PiL/hLMLzTIAyMR2sANkTggM/rdEDAg3d63nYRRVepwBiCw5A==",
2945
+ "dev": true,
2946
+ "dependencies": {
2947
+ "@types/node": "*"
2948
+ }
2949
+ },
2950
  "node_modules/@types/json5": {
2951
  "version": "0.0.29",
2952
  "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
 
3496
  "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
3497
  "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
3498
  },
3499
+ "node_modules/async": {
3500
+ "version": "3.2.5",
3501
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
3502
+ "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
3503
+ },
3504
  "node_modules/asynckit": {
3505
  "version": "0.4.0",
3506
  "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
 
3735
  }
3736
  },
3737
  "node_modules/caniuse-lite": {
3738
+ "version": "1.0.30001614",
3739
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001614.tgz",
3740
+ "integrity": "sha512-jmZQ1VpmlRwHgdP1/uiKzgiAuGOfLEJsYFP4+GBou/QQ4U6IOJCB4NP1c+1p9RGLpwObcT94jA5/uO+F1vBbog==",
3741
  "funding": [
3742
  {
3743
  "type": "opencollective",
 
5138
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
5139
  "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
5140
  },
5141
+ "node_modules/fluent-ffmpeg": {
5142
+ "version": "2.1.2",
5143
+ "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz",
5144
+ "integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==",
5145
+ "dependencies": {
5146
+ "async": ">=0.2.9",
5147
+ "which": "^1.1.1"
5148
+ },
5149
+ "engines": {
5150
+ "node": ">=0.8.0"
5151
+ }
5152
+ },
5153
+ "node_modules/fluent-ffmpeg/node_modules/which": {
5154
+ "version": "1.3.1",
5155
+ "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz",
5156
+ "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==",
5157
+ "dependencies": {
5158
+ "isexe": "^2.0.0"
5159
+ },
5160
+ "bin": {
5161
+ "which": "bin/which"
5162
+ }
5163
+ },
5164
  "node_modules/follow-redirects": {
5165
  "version": "1.15.6",
5166
  "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
 
6700
  }
6701
  },
6702
  "node_modules/optionator": {
6703
+ "version": "0.9.4",
6704
+ "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
6705
+ "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==",
6706
  "dependencies": {
 
6707
  "deep-is": "^0.1.3",
6708
  "fast-levenshtein": "^2.0.6",
6709
  "levn": "^0.4.1",
6710
  "prelude-ls": "^1.2.1",
6711
+ "type-check": "^0.4.0",
6712
+ "word-wrap": "^1.2.5"
6713
  },
6714
  "engines": {
6715
  "node": ">= 0.8.0"
 
6804
  }
6805
  },
6806
  "node_modules/path-scurry/node_modules/lru-cache": {
6807
+ "version": "10.2.2",
6808
+ "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz",
6809
+ "integrity": "sha512-9hp3Vp2/hFQUiIwKo8XCeFVnrg8Pk3TYNPIR7tJADKi5YfcF7vEaK7avFHTlSy3kOKYaJQaalfEo6YuXdceBOQ==",
6810
  "engines": {
6811
  "node": "14 || >=16.14"
6812
  }
 
8293
  }
8294
  },
8295
  "node_modules/type-fest": {
8296
+ "version": "4.18.0",
8297
+ "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.18.0.tgz",
8298
+ "integrity": "sha512-+dbmiyliDY/2TTcjCS7NpI9yV2iEFlUDk5TKnsbkN7ZoRu5s7bT+zvYtNFhFXC2oLwURGT2frACAZvbbyNBI+w==",
8299
  "engines": {
8300
  "node": ">=16"
8301
  },
 
8685
  "url": "https://github.com/sponsors/ljharb"
8686
  }
8687
  },
8688
+ "node_modules/word-wrap": {
8689
+ "version": "1.2.5",
8690
+ "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
8691
+ "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==",
8692
+ "engines": {
8693
+ "node": ">=0.10.0"
8694
+ }
8695
+ },
8696
  "node_modules/wrap-ansi": {
8697
  "version": "8.1.0",
8698
  "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
 
8819
  "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
8820
  },
8821
  "node_modules/yaml": {
8822
+ "version": "2.4.2",
8823
+ "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.2.tgz",
8824
+ "integrity": "sha512-B3VqDZ+JAg1nZpaEmWtTXUlBneoGx6CPM9b0TENK6aoSu5t73dItudwdgmi6tHlIZZId4dZ9skcAQ2UbcyAeVA==",
8825
  "bin": {
8826
  "yaml": "bin.mjs"
8827
  },
package.json CHANGED
@@ -9,7 +9,8 @@
9
  "lint": "next lint"
10
  },
11
  "dependencies": {
12
- "@aitube/clap": "^0.0.6",
 
13
  "@huggingface/hub": "0.12.3-oauth",
14
  "@huggingface/inference": "^2.6.7",
15
  "@jcoreio/async-throttle": "^1.6.0",
@@ -61,6 +62,7 @@
61
  "eslint": "8.45.0",
62
  "eslint-config-next": "13.4.10",
63
  "fastest-levenshtein": "^1.0.16",
 
64
  "gsplat": "^1.2.4",
65
  "hash-wasm": "^4.11.0",
66
  "jose": "^5.2.4",
@@ -104,6 +106,7 @@
104
  "zustand": "^4.4.7"
105
  },
106
  "devDependencies": {
 
107
  "@types/proper-lockfile": "^4.1.2",
108
  "@types/qs": "^6.9.7",
109
  "@types/react-copy-to-clipboard": "^5.0.7",
 
9
  "lint": "next lint"
10
  },
11
  "dependencies": {
12
+ "@aitube/clap": "0.0.7",
13
+ "@aitube/client": "0.0.7",
14
  "@huggingface/hub": "0.12.3-oauth",
15
  "@huggingface/inference": "^2.6.7",
16
  "@jcoreio/async-throttle": "^1.6.0",
 
62
  "eslint": "8.45.0",
63
  "eslint-config-next": "13.4.10",
64
  "fastest-levenshtein": "^1.0.16",
65
+ "fluent-ffmpeg": "^2.1.2",
66
  "gsplat": "^1.2.4",
67
  "hash-wasm": "^4.11.0",
68
  "jose": "^5.2.4",
 
106
  "zustand": "^4.4.7"
107
  },
108
  "devDependencies": {
109
+ "@types/fluent-ffmpeg": "^2.1.24",
110
  "@types/proper-lockfile": "^4.1.2",
111
  "@types/qs": "^6.9.7",
112
  "@types/react-copy-to-clipboard": "^5.0.7",
src/app/api/generators/speech/generateVoiceWithElevenLabs.txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { getMediaInfo } from "../../utils/getMediaInfo"
2
+ import { readMp3FileToBase64 } from "../../utils/readMp3FileToBase64"
3
+
4
+ export async function generateSpeechWithElevenLabs({
5
+ text,
6
+ audioId,
7
+ debug = false,
8
+ }: {
9
+ text: string
10
+ audioId: string
11
+ debug?: boolean
12
+ }): Promise<{
13
+ filePath: string
14
+ fileName: string
15
+ format: string // "mp3"
16
+ base64: string // data uri
17
+ durationInSec: number
18
+ durationInMs: number
19
+ }> {
20
+ const api = await ElevenLabs()
21
+
22
+ // Converts text to speech, saves the file to the output folder and returns the relative path to the file.
23
+ // Output file is in the following format: TTS_date-time.mp3
24
+ // Returns an object with the following structure: { code: CODE, message: "STATUS_MESSAGE" }
25
+ const result = await api.tts(
26
+ text,
27
+ audioId
28
+ )
29
+
30
+ // ...really? that's the API?
31
+ let relativeOutputPath = result.message.split("File written successfully:").pop().trim()
32
+
33
+ // we remove the ./ at the beginning, so we get something like:
34
+ // "/../../../../var/folders/x4/2w7-------------------"
35
+ // then we remove relative navifation to only keep this:
36
+ // "/var/folders/x4/2w7-------------------"
37
+ const filePath = relativeOutputPath.slice(1).replaceAll("/..", "")
38
+
39
+ const fileName = filePath.split("/").pop()
40
+
41
+ const format = fileName.split(".").pop()
42
+
43
+ const { durationInSec, durationInMs } = await getMediaInfo(filePath)
44
+
45
+ const base64 = await readMp3FileToBase64(filePath)
46
+
47
+ return {
48
+ filePath,
49
+ fileName,
50
+ format,
51
+ base64,
52
+ durationInSec,
53
+ durationInMs,
54
+ }
55
+ }
src/app/api/generators/speech/generateVoiceWithParlerTTS.ts ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { addBase64Header } from "@/lib/data/addBase64Header"
2
+ import { tryApiCalls } from "../../utils/tryApiCall"
3
+
4
+ const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-parler-tts-mini.hf.space`
5
+ const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
6
+ const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
7
+
8
+ export async function generateSpeechWithParlerTTS({
9
+ text,
10
+ audioId,
11
+ debug = false,
12
+ neverThrow = false,
13
+ }: {
14
+ text: string
15
+ audioId: string
16
+ debug?: boolean
17
+ neverThrow?: boolean
18
+ }): Promise<string> {
19
+
20
+ const result = {
21
+ filePath: "",
22
+ fileName: "",
23
+ format: "mp3",
24
+ base64: "",
25
+ durationInSec: 5,
26
+ durationInMs: 5000
27
+ }
28
+
29
+
30
+ const actualFunction = async () => {
31
+
32
+ const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
33
+ method: "POST",
34
+ headers: {
35
+ "Content-Type": "application/json",
36
+ // Authorization: `Bearer ${token}`,
37
+ },
38
+ body: JSON.stringify({
39
+ fn_index: 0, // <- important!
40
+ data: [
41
+ apiKey,
42
+ text,
43
+ audioId,
44
+ ],
45
+ }),
46
+ cache: "no-store",
47
+ // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
48
+ // next: { revalidate: 1 }
49
+ })
50
+
51
+ if (res.status !== 200) {
52
+ throw new Error('Failed to fetch data')
53
+ }
54
+
55
+ const rawJson = await res.json()
56
+
57
+ console.log("rawJson:", rawJson)
58
+
59
+ // TODO: addBAse64 with the right header type
60
+
61
+ return ""
62
+ }
63
+
64
+ try {
65
+ if (!text?.length) {
66
+ throw new Error(`text is too short!`)
67
+ }
68
+
69
+ const result = await tryApiCalls({
70
+ func: actualFunction,
71
+ huggingFaceSpace,
72
+ debug,
73
+ failureMessage: "failed to generate the audio"
74
+ })
75
+ return result
76
+ } catch (err) {
77
+ if (neverThrow) {
78
+ console.error(`generateVoiceWithParlerTTS():`, err)
79
+ return ""
80
+ } else {
81
+ throw err
82
+ }
83
+ }
84
+ }
src/app/api/generators/speech/generateVoiceWithXTTS2.txt ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { StoryLine } from "../../types/structures.mts"
3
+ import { tryApiCalls } from "../../utils/tryApiCalls.mts"
4
+ import { promptToGenerateAudioStory } from "../prompts/prompts.mts"
5
+ import { microserviceApiKey } from "../../config.mts"
6
+ import { addBase64Header } from "../../base64/addBase64.mts"
7
+
8
+ // TODO delete this? we don't need an env var for this I think?
9
+ const aiStoryServerApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
10
+ const huggingFaceSpace = "jbilcke-hf/ai-story-server"
11
+
12
+ export async function generateAudioStory({
13
+ prompt,
14
+ voice,
15
+ // maxLines,
16
+ neverThrow,
17
+ debug,
18
+ }: {
19
+ prompt: string
20
+ voice?: string
21
+ // maxLines: number
22
+ neverThrow?: boolean
23
+ debug?: boolean
24
+ }): Promise<StoryLine[]> {
25
+ const actualFunction = async () => {
26
+
27
+ const cropped = prompt.slice(0, 30)
28
+ // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
29
+
30
+ // positivePrompt = filterOutBadWords(positivePrompt)
31
+
32
+ const res = await fetch(aiStoryServerApiUrl + (aiStoryServerApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
33
+ method: "POST",
34
+ headers: {
35
+ "Content-Type": "application/json",
36
+ // Authorization: `Bearer ${token}`,
37
+ },
38
+ body: JSON.stringify({
39
+ fn_index: 0, // <- important!
40
+ data: [
41
+ microserviceApiKey,
42
+ promptToGenerateAudioStory,
43
+ prompt,
44
+
45
+ // TODO: add support for custom wav
46
+ voice === "Julian" ? "Julian" : "Cloée",
47
+
48
+ // maxLines,
49
+ ],
50
+ }),
51
+ cache: "no-store",
52
+ // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
53
+ // next: { revalidate: 1 }
54
+ })
55
+
56
+
57
+ const rawJson = await res.json()
58
+ const data = rawJson.data as StoryLine[][]
59
+
60
+ const stories = data?.[0] || []
61
+
62
+ if (res.status !== 200) {
63
+ throw new Error('Failed to fetch data')
64
+ }
65
+
66
+ return stories.map(line => ({
67
+ text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
68
+ audio: addBase64Header(line.audio, "mp4")
69
+ }))
70
+ }
71
+
72
+ try {
73
+ if (!prompt?.length) {
74
+ throw new Error(`prompt is too short!`)
75
+ }
76
+
77
+ const result = await tryApiCalls({
78
+ func: actualFunction,
79
+ huggingFaceSpace,
80
+ debug,
81
+ failureMessage: "failed to generate the audio story"
82
+ })
83
+ return result
84
+ } catch (err) {
85
+ if (neverThrow) {
86
+ console.error(`generateAudioStory():`, err)
87
+ return []
88
+ } else {
89
+ throw err
90
+ }
91
+ }
92
+ }
src/app/api/utils/addBase64.ts ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export function addBase64Header(
2
+ image?: string,
3
+ format?:
4
+ | "jpeg" | "jpg" | "png" | "webp" | "heic"
5
+ | "mp3" | "wav"
6
+ | "mp4" | "webm"
7
+ | string
8
+ ) {
9
+
10
+ if (!image || typeof image !== "string" || image.length < 60) {
11
+ return ""
12
+ }
13
+
14
+ const ext = (`${format || ""}`.split(".").pop() || "").toLowerCase().trim()
15
+
16
+ let mime = ""
17
+ if (
18
+ ext === "jpeg" ||
19
+ ext === "jpg") {
20
+ mime = "image/jpeg"
21
+ } else if (
22
+ ext === "webp"
23
+ ) {
24
+ mime = "image/webp"
25
+ } else if (
26
+ ext === "png") {
27
+ mime = "image/png"
28
+ } else if (ext === "heic") {
29
+ mime = "image/heic"
30
+ } else if (ext === "mp3") {
31
+ mime = "audio/mp3"
32
+ } else if (ext === "mp4") {
33
+ mime = "video/mp4"
34
+ } else if (ext === "webm") {
35
+ mime = "video/webm"
36
+ } else if (ext === "wav") {
37
+ mime = "audio/wav"
38
+ } else {
39
+ throw new Error(`addBase64Header failed (unsupported format: ${format})`)
40
+ }
41
+
42
+ if (image.startsWith('data:')) {
43
+ if (image.startsWith(`data:${mime};base64,`)) {
44
+ return image
45
+ } else {
46
+ throw new Error(`addBase64Header failed (input string is NOT a ${mime} image)`)
47
+ }
48
+ } else {
49
+ return `data:${mime};base64,${image}`
50
+ }
51
+ }
src/app/api/utils/getHuggingFaceSpaceStatus.ts ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /** Actually `hf_${string}`, but for convenience, using the string type */
3
+ type AccessToken = string;
4
+
5
+ interface Credentials {
6
+ accessToken: AccessToken;
7
+ }
8
+
9
+ type SpaceHardwareFlavor =
10
+ | "cpu-basic"
11
+ | "cpu-upgrade"
12
+ | "t4-small"
13
+ | "t4-medium"
14
+ | "a10g-small"
15
+ | "a10g-large"
16
+ | "a100-large";
17
+
18
+ type SpaceSdk = "streamlit" | "gradio" | "docker" | "static";
19
+
20
+ type SpaceStage =
21
+ | "NO_APP_FILE"
22
+ | "CONFIG_ERROR"
23
+ | "BUILDING"
24
+ | "BUILD_ERROR"
25
+ | "RUNNING"
26
+ | "RUNNING_BUILDING"
27
+ | "RUNTIME_ERROR"
28
+ | "DELETING"
29
+ | "PAUSED"
30
+ | "SLEEPING";
31
+
32
+ type AccessTokenRole = "admin" | "write" | "contributor" | "read";
33
+
34
+ type AuthType = "access_token" | "app_token" | "app_token_as_user";
35
+
36
+
37
+ interface SpaceRuntime {
38
+ stage: SpaceStage;
39
+ sdk?: SpaceSdk;
40
+ sdkVersion?: string;
41
+ errorMessage?: string;
42
+ hardware?: {
43
+ current: SpaceHardwareFlavor | null;
44
+ currentPrettyName?: string;
45
+ requested: SpaceHardwareFlavor | null;
46
+ requestedPrettyName?: string;
47
+ };
48
+ /** when calling /spaces, those props are only fetched if ?full=true */
49
+ resources?: SpaceResourceConfig;
50
+ /** in seconds */
51
+ gcTimeout?: number | null;
52
+ }
53
+
54
+ interface SpaceResourceRequirement {
55
+ cpu?: string;
56
+ memory?: string;
57
+ gpu?: string;
58
+ gpuModel?: string;
59
+ ephemeral?: string;
60
+ }
61
+
62
+ interface SpaceResourceConfig {
63
+ requests: SpaceResourceRequirement;
64
+ limits: SpaceResourceRequirement;
65
+ replicas?: number;
66
+ throttled?: boolean;
67
+ is_custom?: boolean;
68
+ }
69
+
70
+ export interface HFSpaceStatus {
71
+ _id: string
72
+ id: string
73
+ author: string
74
+ sha: string
75
+ lastModified: string
76
+ private: boolean
77
+ gated: boolean
78
+ disabled: boolean
79
+ host: string
80
+ subdomain: string
81
+ tags: string[]
82
+ likes: number
83
+ sdk: string
84
+ runtime: SpaceRuntime
85
+ createdAt: string
86
+ }
87
+
88
+ export async function getHuggingFaceSpaceStatus({
89
+ space,
90
+ // userName,
91
+ // spaceName,
92
+ }: {
93
+ space: string // a joined "user_name/space_name"
94
+ // userName: string
95
+ // spaceName: string
96
+ }): Promise<HFSpaceStatus> {
97
+ const res = await fetch(`https://huggingface.co/api/spaces/${space}`, {
98
+ method: "GET",
99
+ headers: {
100
+ Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
101
+ }
102
+ })
103
+
104
+ if (res.status !== 200) {
105
+ throw new Error("failed to get the space data")
106
+ }
107
+
108
+ try {
109
+ const data = await res.json() as HFSpaceStatus
110
+ return data
111
+ } catch (err) {
112
+ throw new Error(`failed to parse space data: ${err}`)
113
+ }
114
+ }
src/app/api/utils/getMediaInfo.ts ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ffmpeg from "fluent-ffmpeg";
2
+
3
+ import { tmpdir } from "node:os";
4
+ import { promises as fs } from "node:fs";
5
+ import { join } from "node:path";
6
+
7
+ export type MediaMetadata = {
8
+ durationInSec: number;
9
+ durationInMs: number;
10
+ hasAudio: boolean;
11
+ };
12
+
13
+ /**
14
+ * Get the media info of a base64 or file path
15
+ * @param input
16
+ * @returns
17
+ */
18
+ export async function getMediaInfo(input: string): Promise<MediaMetadata> {
19
+ // If the input is a base64 string
20
+ if (input.startsWith("data:")) {
21
+ // Extract the base64 content
22
+ const base64Content = input.split(";base64,").pop();
23
+ if (!base64Content) {
24
+ throw new Error("Invalid base64 data");
25
+ }
26
+
27
+ // Decode the base64 content to a buffer
28
+ const buffer = Buffer.from(base64Content, 'base64');
29
+
30
+ // Generate a temporary file name
31
+ const tempFileName = join(tmpdir(), `temp-media-${Date.now()}`);
32
+
33
+ // Write the buffer to a temporary file
34
+ await fs.writeFile(tempFileName, buffer);
35
+
36
+ // Get metadata from the temporary file then delete the file
37
+ try {
38
+ return await getMetaDataFromPath(tempFileName);
39
+ } finally {
40
+ await fs.rm(tempFileName);
41
+ }
42
+ }
43
+
44
+ // If the input is a path to the file
45
+ return await getMetaDataFromPath(input);
46
+ }
47
+
48
+ async function getMetaDataFromPath(filePath: string): Promise<MediaMetadata> {
49
+ return new Promise((resolve, reject) => {
50
+ ffmpeg.ffprobe(filePath, (err, metadata) => {
51
+
52
+ let results = {
53
+ durationInSec: 0,
54
+ durationInMs: 0,
55
+ hasAudio: false,
56
+ }
57
+
58
+ if (err) {
59
+ console.error("getMediaInfo(): failed to analyze the source (might happen with empty files)")
60
+ // reject(err);
61
+ resolve(results);
62
+ return;
63
+ }
64
+
65
+ try {
66
+ results.durationInSec = metadata?.format?.duration || 0;
67
+ results.durationInMs = results.durationInSec * 1000;
68
+ results.hasAudio = (metadata?.streams || []).some((stream) => stream.codec_type === 'audio');
69
+
70
+ } catch (err) {
71
+ console.error(`getMediaInfo(): failed to analyze the source (might happen with empty files)`)
72
+ results.durationInSec = 0
73
+ results.durationInMs = 0
74
+ results.hasAudio = false
75
+ }
76
+ resolve(results);
77
+ });
78
+ });
79
+ }
src/app/api/utils/makeSureSpaceIsRunning.ts ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { getHuggingFaceSpaceStatus } from "./getHuggingFaceSpaceStatus"
2
+ import { sleep } from "./sleep"
3
+
4
+
5
+ export async function makeSureSpaceIsRunning({
6
+ space,
7
+ maxWaitTimeInSec = 15 * 60, // some spaces are ultra slow to cold boot (eg. data dl at runtime)
8
+ statusUpdateFrequencyInSec = 5,
9
+ // userName,
10
+ // spaceName,
11
+ }: {
12
+ space?: string // a joined "user_name/space_name"
13
+
14
+ maxWaitTimeInSec?: number
15
+
16
+ statusUpdateFrequencyInSec?: number
17
+
18
+ // userName: string
19
+ // spaceName: string
20
+ }): Promise<void> {
21
+ if (!space) { return }
22
+
23
+ // process.stdout.write(`trying to restart space "${space}"`)
24
+ try {
25
+ const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
26
+ if (stage === "RUNNING") {
27
+ // process.stdout.write(`: well, it is already ${stage}!\n`)
28
+ return
29
+ }
30
+ } catch (err) {
31
+ }
32
+
33
+ const res = await fetch(`https://huggingface.co/api/spaces/${space}/restart`, {
34
+ method: "POST",
35
+ headers: {
36
+ Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
37
+ }
38
+ })
39
+
40
+ if (res.status !== 200) {
41
+ process.stdout.write(`failure!\nwe couldn't trigger the restart of space "${space}"\n`)
42
+
43
+ throw new Error(`failed to trigger the restart of space "${space}" (status is not 200)`)
44
+ }
45
+
46
+ let elapsedTime = 0
47
+
48
+ process.stdout.write(`trying to restart space "${space}"`)
49
+
50
+ while (true) {
51
+ process.stdout.write(".")
52
+ const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
53
+
54
+ if (stage === "RUNNING") {
55
+ process.stdout.write(`success!\nspace "${space}" is ${stage} (took ${elapsedTime} sec)\n`)
56
+ return
57
+ } else if (stage === "BUILDING" || stage === "RUNNING_BUILDING") {
58
+ // let's wait more
59
+ await sleep(statusUpdateFrequencyInSec * 1000)
60
+
61
+ elapsedTime += statusUpdateFrequencyInSec
62
+
63
+ if (elapsedTime >= maxWaitTimeInSec) {
64
+ process.stdout.write(`failure!\nspace "${space}" is still ${stage} (after ${elapsedTime} sec)\n`)
65
+ if (stage === "BUILDING") {
66
+ throw new Error(`failed to start space ${space} (reason: space is ${stage}, but we reached the ${maxWaitTimeInSec} sec timeout)`)
67
+ } else {
68
+ // if we are "RUNNING_BUILDING" we assume it is.. okay? I guess?
69
+ return
70
+ }
71
+ }
72
+ } else {
73
+ process.stdout.write(`failure!\nspace "${space}" is ${stage} (after ${elapsedTime} sec)\n`)
74
+ throw new Error(`failed to build space ${space} (reason: space is ${stage})`)
75
+ }
76
+ }
77
+ }
src/app/api/utils/readMp3FileToBase64.ts ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { readFile } from "node:fs/promises"
2
+
3
+ export async function readMp3FileToBase64(filePath: string): Promise<string> {
4
+ try {
5
+ // Read the file's content as a Buffer
6
+ const fileBuffer = await readFile(filePath);
7
+
8
+ // Convert the buffer to a base64 string
9
+ const base64 = fileBuffer.toString('base64');
10
+
11
+ return `data:audio/mp3;base64,${base64}`;
12
+ } catch (error) {
13
+ // Handle errors (e.g., file not found, no permissions, etc.)
14
+ console.error(error);
15
+ throw error;
16
+ }
17
+ }
src/app/api/utils/sleep.ts ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export const sleep = async (durationInMs: number) =>
2
+ new Promise((resolve) => {
3
+ setTimeout(() => {
4
+ resolve(true)
5
+ }, durationInMs)
6
+ })
src/app/api/utils/timeout.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export function timeout<T>(
2
+ promise: Promise<T>,
3
+ ms: number,
4
+ timeoutError = new Error('Promise timed out')
5
+ ): Promise<T> {
6
+ // create a promise that rejects in milliseconds
7
+ const promiseWithTimeout = new Promise<never>((_, reject) => {
8
+ setTimeout(() => {
9
+ reject(timeoutError);
10
+ }, ms);
11
+ });
12
+
13
+ // returns a race between timeout and the passed promise
14
+ return Promise.race<T>([promise, promiseWithTimeout]);
15
+ }
src/app/api/utils/tryApiCall.ts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { makeSureSpaceIsRunning } from "./makeSureSpaceIsRunning"
2
+ import { sleep } from "./sleep"
3
+ import { timeout } from "./timeout"
4
+
5
+ const sec = 1000
6
+ const min = 60 *sec
7
+
8
+ export async function tryApiCalls<T>({
9
+ func,
10
+ huggingFaceSpace,
11
+ debug = false,
12
+ failureMessage = "failed to call the endpoint",
13
+ autostart = true,
14
+
15
+ // wait up to 10 min
16
+ timeoutInSec = 10 * 60,
17
+
18
+ delays = [
19
+ 5 *sec,
20
+ 15 *sec,
21
+ 40 *sec, // total 1 min wait time
22
+
23
+ //at this stage, if it is so slow it means we are probably waking up a model
24
+ // which is a slow operation (takes ~5 min)
25
+
26
+ 2 *min, // ~ 3 min ~
27
+ 1 *min, // ~ 4 min ~
28
+ 1 *min, // ~ 5 min ~
29
+ ]
30
+ }: {
31
+ func: () => Promise<T>
32
+
33
+ // optional: the name of the hugging face space
34
+ // this will be used to "wake up" the space if necessary
35
+ huggingFaceSpace?: string
36
+
37
+ debug?: boolean
38
+ failureMessage?: string
39
+ autostart?: boolean
40
+ timeoutInSec?: number
41
+ delays?: number[]
42
+ }) {
43
+
44
+ for (let i = 0; i < delays.length; i++) {
45
+ try {
46
+ if (autostart) {
47
+ await makeSureSpaceIsRunning({ space: huggingFaceSpace })
48
+ }
49
+
50
+ // due to an error with the Gradio client, sometimes calling the api.predict
51
+ // will never throw an error
52
+ const result = await timeout(
53
+ func(), // grab the promise
54
+ timeoutInSec * 1000,
55
+ new Error(`call to ${huggingFaceSpace || "the API"} failed after ${timeoutInSec} seconds`)
56
+ )
57
+ return result
58
+ } catch (err) {
59
+ if (debug) { console.error(err) }
60
+ process.stdout.write(".")
61
+
62
+ if (i > 0) {
63
+ await sleep(delays[i])
64
+ }
65
+ }
66
+ }
67
+
68
+ throw new Error(`${failureMessage} after ${delays.length} attempts`)
69
+ }
src/app/api/v1/edit/dialogues/route.ts ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextResponse, NextRequest } from "next/server"
2
+
3
+ import { ClapEntity, ClapProject, ClapSegment, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
4
+
5
+ import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
6
+ import { getToken } from "@/app/api/auth/getToken"
7
+
8
+ import { getSpeechBackgroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt"
9
+ import { getSpeechForegroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt"
10
+ import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
11
+
12
+ // a helper to generate speech for a Clap
13
+ export async function POST(req: NextRequest) {
14
+
15
+ const jwtToken = await getToken({ user: "anonymous" })
16
+
17
+ const blob = await req.blob()
18
+
19
+ const clap: ClapProject = await parseClap(blob)
20
+
21
+ if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
22
+
23
+ console.log(`[api/generate/dialogues] detected ${clap.segments.length} segments`)
24
+
25
+ const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
26
+ console.log(`[api/generate/dialogues] detected ${shotsSegments.length} shots`)
27
+
28
+ if (shotsSegments.length > 32) {
29
+ throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
30
+ }
31
+
32
+
33
+ for (const shotSegment of shotsSegments) {
34
+
35
+ const shotSegments: ClapSegment[] = clap.segments.filter(s =>
36
+ startOfSegment1IsWithinSegment2(s, shotSegment)
37
+ )
38
+
39
+ const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
40
+ s.category === "dialogue"
41
+ )
42
+
43
+ let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
44
+
45
+ console.log(`[api/generate/dialogues] shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)
46
+
47
+ if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
48
+ console.log(`[api/generate/dialogues] generating audio..`)
49
+
50
+ try {
51
+ shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
52
+ text: shotDialogueSegment.prompt,
53
+ audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),
54
+ debug: true,
55
+ })
56
+ shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
57
+
58
+ console.log("TODO julian: properly set the asset type format")
59
+
60
+ } catch (err) {
61
+ console.log(`[api/generate/dialogues] failed to generate audio: ${err}`)
62
+ throw err
63
+ }
64
+
65
+ console.log(`[api/generate/dialogues] generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
66
+ } else {
67
+ console.log(`[api/generate/dialogues] there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
68
+ }
69
+ }
70
+
71
+ console.log(`[api/generate/dialogues] returning the clap augmented with dialogues`)
72
+
73
+ return new NextResponse(await serializeClap(clap), {
74
+ status: 200,
75
+ headers: new Headers({ "content-type": "application/x-gzip" }),
76
+ })
77
+ }
src/app/api/v1/edit/{models → entities}/generateAudioID.ts RENAMED
File without changes
src/app/api/v1/edit/{models → entities}/generateImageID.ts RENAMED
File without changes
src/app/api/v1/edit/{models → entities}/route.ts RENAMED
@@ -1,7 +1,7 @@
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
 
4
- import { parseClap, serializeClap, ClapModel } from "@aitube/clap"
5
  import { getToken } from "@/app/api/auth/getToken"
6
 
7
  import { generateImageID } from "./generateImageID"
@@ -25,7 +25,7 @@ export async function POST(req: NextRequest) {
25
  if (!prompt.length) { throw new Error(`please provide a prompt`) }
26
  */
27
 
28
- console.log("[api/generate/models] request:", prompt)
29
 
30
  const jwtToken = await getToken({ user: "anonymous" })
31
 
@@ -33,40 +33,42 @@ export async function POST(req: NextRequest) {
33
 
34
  const clap = await parseClap(blob)
35
 
36
- if (!clap.models.length) { throw new Error(`please provide at least one model`) }
37
 
38
- for (const model of clap.models) {
39
 
40
  // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
41
- if (!model.imagePrompt) {
42
- model.imagePrompt = "a man with a beard"
43
  }
44
 
45
  // TASK 2: GENERATE THE IMAGE ID IF MISSING
46
- if (!model.imageId) {
47
- model.imageId = await generateImageID({
48
- prompt: model.imagePrompt,
49
- seed: model.seed
50
  })
 
51
  }
52
 
53
  // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
54
- if (!model.audioPrompt) {
55
- model.audioPrompt = "a man with a beard"
56
  }
57
 
58
  // TASK 4: GENERATE THE AUDIO ID IF MISSING
59
 
60
  // TODO here: call Parler-TTS or a generic audio generator
61
- if (!model.audioId) {
62
- model.audioId = await generateAudioID({
63
- prompt: model.audioPrompt,
64
- seed: model.seed
65
  })
 
66
  }
67
  }
68
 
69
- console.log(`[api/generate/models] returning the clap extended with the model`)
70
 
71
  return new NextResponse(await serializeClap(clap), {
72
  status: 200,
 
1
  import { NextResponse, NextRequest } from "next/server"
2
  import queryString from "query-string"
3
 
4
+ import { getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
5
  import { getToken } from "@/app/api/auth/getToken"
6
 
7
  import { generateImageID } from "./generateImageID"
 
25
  if (!prompt.length) { throw new Error(`please provide a prompt`) }
26
  */
27
 
28
+ console.log("[api/generate/entities] request:", prompt)
29
 
30
  const jwtToken = await getToken({ user: "anonymous" })
31
 
 
33
 
34
  const clap = await parseClap(blob)
35
 
36
+ if (!clap.entities.length) { throw new Error(`please provide at least one entity`) }
37
 
38
+ for (const entity of clap.entities) {
39
 
40
  // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
41
+ if (!entity.imagePrompt) {
42
+ entity.imagePrompt = "a man with a beard"
43
  }
44
 
45
  // TASK 2: GENERATE THE IMAGE ID IF MISSING
46
+ if (!entity.imageId) {
47
+ entity.imageId = await generateImageID({
48
+ prompt: entity.imagePrompt,
49
+ seed: entity.seed
50
  })
51
+ entity.imageSourceType = getClapAssetSourceType(entity.imageId)
52
  }
53
 
54
  // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
55
+ if (!entity.audioPrompt) {
56
+ entity.audioPrompt = "a man with a beard"
57
  }
58
 
59
  // TASK 4: GENERATE THE AUDIO ID IF MISSING
60
 
61
  // TODO here: call Parler-TTS or a generic audio generator
62
+ if (!entity.audioId) {
63
+ entity.audioId = await generateAudioID({
64
+ prompt: entity.audioPrompt,
65
+ seed: entity.seed
66
  })
67
+ entity.audioSourceType = getClapAssetSourceType(entity.audioId)
68
  }
69
  }
70
 
71
+ console.log(`[api/generate/entities] returning the clap extended with the entities`)
72
 
73
  return new NextResponse(await serializeClap(clap), {
74
  status: 200,
src/app/api/v1/edit/{models → entities}/systemPrompt.ts RENAMED
File without changes
src/app/api/v1/edit/storyboards/route.ts CHANGED
@@ -6,7 +6,6 @@ import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWi
6
  import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
7
  import { getToken } from "@/app/api/auth/getToken"
8
 
9
- import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
10
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
11
  import { generateStoryboard } from "./generateStoryboard"
12
 
@@ -68,7 +67,7 @@ export async function POST(req: NextRequest) {
68
  // TASK 2: GENERATE MISSING STORYBOARD PROMPT
69
  if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
70
  // storyboard is missing, let's generate it
71
- shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, {}, [])
72
  console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
73
  }
74
 
 
6
  import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
7
  import { getToken } from "@/app/api/auth/getToken"
8
 
 
9
  import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
10
  import { generateStoryboard } from "./generateStoryboard"
11
 
 
67
  // TASK 2: GENERATE MISSING STORYBOARD PROMPT
68
  if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
69
  // storyboard is missing, let's generate it
70
+ shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
71
  console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
72
  }
73
 
src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts CHANGED
@@ -1,9 +1,9 @@
1
- import { ClapModel } from "@aitube/clap"
2
 
3
- export function getCharacterPrompt(model: ClapModel): string {
4
 
5
  let characterPrompt = ""
6
- if (model.description) {
7
  characterPrompt = [
8
  // the label (character name) can help making the prompt more unique
9
  // this might backfires however, if the name is
@@ -11,15 +11,15 @@ export function getCharacterPrompt(model: ClapModel): string {
11
  // I'm not sure stable diffusion really needs this,
12
  // so let's skip it for now (might still be useful for locations, though)
13
  // we also want to avoid triggering "famous people" (BARBOSSA etc)
14
- // model.label,
15
 
16
- model.description
17
  ].join(", ")
18
  } else {
19
  characterPrompt = [
20
- model.gender !== "object" ? model.gender : "",
21
- model.age ? `aged ${model.age}yo` : '',
22
- model.label ? `named ${model.label}` : '',
23
  ].map(i => i.trim()).filter(i => i).join(", ")
24
  }
25
  return characterPrompt
 
1
+ import { ClapEntity } from "@aitube/clap"
2
 
3
+ export function getCharacterPrompt(entity: ClapEntity): string {
4
 
5
  let characterPrompt = ""
6
+ if (entity.description) {
7
  characterPrompt = [
8
  // the label (character name) can help making the prompt more unique
9
  // this might backfires however, if the name is
 
11
  // I'm not sure stable diffusion really needs this,
12
  // so let's skip it for now (might still be useful for locations, though)
13
  // we also want to avoid triggering "famous people" (BARBOSSA etc)
14
+ // entity.label,
15
 
16
+ entity.description
17
  ].join(", ")
18
  } else {
19
  characterPrompt = [
20
+ entity.gender !== "object" ? entity.gender : "",
21
+ entity.age ? `aged ${entity.age}yo` : '',
22
+ entity.label ? `named ${entity.label}` : '',
23
  ].map(i => i.trim()).filter(i => i).join(", ")
24
  }
25
  return characterPrompt
src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapEntity, ClapSegment } from "@aitube/clap"
2
+
3
+ import { getCharacterPrompt } from "./getCharacterPrompt"
4
+
5
+ /**
6
+ * Construct an audio background for a voice from a list of active segments
7
+ *
8
+ * @param segments
9
+ * @returns
10
+ */
11
+ export function getSpeechBackgroundAudioPrompt(
12
+ segments: ClapSegment[] = [],
13
+ entitiesById: Record<string, ClapEntity> = {},
14
+ extraPositivePrompt: string[] = [] // "clear sound, high quality" etc
15
+ ): string {
16
+ return segments
17
+ .filter(({ category, outputType }) => (
18
+ category === "dialogue" ||
19
+ category === "weather" ||
20
+ category === "location"
21
+ ))
22
+ .sort((a, b) => b.label.localeCompare(a.label))
23
+ .map(segment => {
24
+ const entity: ClapEntity | undefined = entitiesById[segment?.entityId || ""] || undefined
25
+
26
+ if (segment.category === "dialogue") {
27
+ // if we can't find the entity then we are unable
28
+ // to make any assumption about the gender, age and voice timbre
29
+ if (!entity) {
30
+ return `person, speaking normally`
31
+ }
32
+
33
+ const characterPrompt = getCharacterPrompt(entity)
34
+
35
+ return `${characterPrompt}, speaking normally`
36
+
37
+ } else if (segment.category === "location") {
38
+ // the location is part of the background noise
39
+ // but this might produce unexpected results - we'll see!
40
+ return segment.prompt
41
+ } else if (segment.category === "weather") {
42
+ // the weather is part of the background noise
43
+ // here too this might produce weird and unexpected results 🍿
44
+ return segment.prompt
45
+ }
46
+ // ignore the rest
47
+ return ""
48
+ })
49
+ .filter(x => x)
50
+ .concat([ ...extraPositivePrompt ])
51
+ .join(". ")
52
+ }
src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapSegment } from "@aitube/clap"
2
+
3
+ /**
4
+ * Construct an audio foreground for a voice from a list of active segments
5
+ *
6
+ * This is the "dialogue" prompt, ie. the actual spoken words,
7
+ * so we don't need to do anything fancy here, we only use the raw text
8
+ *
9
+ * @param segments
10
+ * @returns
11
+ */
12
+ export function getSpeechForegroundAudioPrompt(
13
+ segments: ClapSegment[] = []
14
+ ): string {
15
+ return segments
16
+ .filter(({ category }) => category === "dialogue")
17
+ .sort((a, b) => b.label.localeCompare(a.label))
18
+ .map(({ prompt }) => prompt).filter(x => x)
19
+ .join(". ")
20
+ }
src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ClapModel, ClapSegment } from "@aitube/clap"
2
 
3
  import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
4
 
@@ -11,12 +11,12 @@ import { getCharacterPrompt } from "./getCharacterPrompt"
11
  * @returns
12
  */
13
  export function getVideoPrompt(
14
- segments: ClapSegment[],
15
- modelsById: Record<string, ClapModel>,
16
- extraPositivePrompt: string[]
17
  ): string {
18
 
19
- // console.log("modelsById:", modelsById)
20
 
21
  // to construct the video we need to collect all the segments describing it
22
  // we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
@@ -60,23 +60,23 @@ export function getVideoPrompt(
60
  tmp.sort((a, b) => b.label.localeCompare(a.label))
61
 
62
  let videoPrompt = tmp.map(segment => {
63
- const model: ClapModel | undefined = modelsById[segment?.modelId || ""] || undefined
64
 
65
  if (segment.category === "dialogue") {
66
 
67
- // if we can't find the model, then we are unable
68
  // to make any assumption about the gender, age or appearance
69
- if (!model) {
70
- console.log("ERROR: this is a dialogue, but couldn't find the model!")
71
  return `portrait of a person speaking, blurry background, bokeh`
72
  }
73
 
74
- const characterTrigger = model?.triggerName || ""
75
- const characterLabel = model?.label || ""
76
- const characterDescription = model?.description || ""
77
  const dialogueLine = segment?.prompt || ""
78
 
79
- const characterPrompt = getCharacterPrompt(model)
80
 
81
  // in the context of a video, we some something additional:
82
  // we create a "bokeh" style
@@ -84,13 +84,13 @@ export function getVideoPrompt(
84
 
85
  } else if (segment.category === "location") {
86
 
87
- // if we can't find the location's model, we default to returning the prompt
88
- if (!model) {
89
- console.log("ERROR: this is a location, but couldn't find the model!")
90
  return segment.prompt
91
  }
92
 
93
- return model.description
94
  } else {
95
  return segment.prompt
96
  }
 
1
+ import { ClapEntity, ClapSegment } from "@aitube/clap"
2
 
3
  import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
4
 
 
11
  * @returns
12
  */
13
  export function getVideoPrompt(
14
+ segments: ClapSegment[] = [],
15
+ entitiesIndex: Record<string, ClapEntity> = {},
16
+ extraPositivePrompt: string[] = []
17
  ): string {
18
 
19
+ // console.log("entitiesIndex:", entitiesIndex)
20
 
21
  // to construct the video we need to collect all the segments describing it
22
  // we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
 
60
  tmp.sort((a, b) => b.label.localeCompare(a.label))
61
 
62
  let videoPrompt = tmp.map(segment => {
63
+ const entity: ClapEntity | undefined = entitiesIndex[segment?.entityId || ""] || undefined
64
 
65
  if (segment.category === "dialogue") {
66
 
67
+ // if we can't find the entity, then we are unable
68
  // to make any assumption about the gender, age or appearance
69
+ if (!entity) {
70
+ console.log("ERROR: this is a dialogue, but couldn't find the entity!")
71
  return `portrait of a person speaking, blurry background, bokeh`
72
  }
73
 
74
+ const characterTrigger = entity?.triggerName || ""
75
+ const characterLabel = entity?.label || ""
76
+ const characterDescription = entity?.description || ""
77
  const dialogueLine = segment?.prompt || ""
78
 
79
+ const characterPrompt = getCharacterPrompt(entity)
80
 
81
  // in the context of a video, we some something additional:
82
  // we create a "bokeh" style
 
84
 
85
  } else if (segment.category === "location") {
86
 
87
+ // if we can't find the location's entity, we default to returning the prompt
88
+ if (!entity) {
89
+ console.log("ERROR: this is a location, but couldn't find the entity!")
90
  return segment.prompt
91
  }
92
 
93
+ return entity.description
94
  } else {
95
  return segment.prompt
96
  }
src/components/interface/latent-engine/core/useLatentEngine.ts CHANGED
@@ -1,7 +1,7 @@
1
 
2
  import { create } from "zustand"
3
 
4
- import { ClapModel, ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
5
 
6
  import { LatentEngineStore } from "./types"
7
  import { resolveSegments } from "../resolvers/resolveSegments"
@@ -409,9 +409,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
409
  //
410
  // yes: I know the code is complex and not intuitive - sorry about that
411
 
412
- // TODO Julian: use the Clap project to fill in those
413
- const modelsById: Record<string, ClapModel> = {}
414
- const extraPositivePrompt: string[] = []
415
 
416
  let bufferAheadOfCurrentPositionInMs = positionInMs
417
 
@@ -427,7 +425,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
427
 
428
  bufferAheadOfCurrentPositionInMs += videoDurationInMs
429
 
430
- const prompt = getVideoPrompt(shotSegmentsToPreload, modelsById, extraPositivePrompt)
431
 
432
  console.log(`video prompt: ${prompt}`)
433
  // could also be the camera
 
1
 
2
  import { create } from "zustand"
3
 
4
+ import { ClapEntity, ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
5
 
6
  import { LatentEngineStore } from "./types"
7
  import { resolveSegments } from "../resolvers/resolveSegments"
 
409
  //
410
  // yes: I know the code is complex and not intuitive - sorry about that
411
 
412
+ const extraPositivePrompt: string[] = ["high quality", "crisp", "detailed"]
 
 
413
 
414
  let bufferAheadOfCurrentPositionInMs = positionInMs
415
 
 
425
 
426
  bufferAheadOfCurrentPositionInMs += videoDurationInMs
427
 
428
+ const prompt = getVideoPrompt(shotSegmentsToPreload, clap.entityIndex, extraPositivePrompt)
429
 
430
  console.log(`video prompt: ${prompt}`)
431
  // could also be the camera
src/lib/business/getClapAssetSourceType.ts DELETED
@@ -1,25 +0,0 @@
1
- import { ClapAssetSource } from "@aitube/clap"
2
-
3
- export function getClapAssetSourceSource(input: string = ""): ClapAssetSource {
4
-
5
- const str = `${input || ""}`.trim()
6
-
7
- if (!str || !str.length) {
8
- return "EMPTY"
9
- }
10
-
11
- if (str.startsWith("https://") || str.startsWith("http://")) {
12
- return "REMOTE"
13
- }
14
-
15
- // note that "path" assets are potentially a security risk, they need to be treated with care
16
- if (str.startsWith("/") || str.startsWith("../") || str.startsWith("./")) {
17
- return "PATH"
18
- }
19
-
20
- if (str.startsWith("data:")) {
21
- return "DATA"
22
- }
23
-
24
- return "PROMPT"
25
- }