Spaces:

jbilcke-hf
/

ai-tube

Running

App Files Files Community

jbilcke-hf HF staff commited on Apr 29, 2024

Commit

1cea837

1 Parent(s): 0df1259

rename from model to entity

Browse files

Files changed (26) hide show

.env +2 -0
package-lock.json +101 -48
package.json +4 -1
src/app/api/generators/speech/generateVoiceWithElevenLabs.txt +55 -0
src/app/api/generators/speech/generateVoiceWithParlerTTS.ts +84 -0
src/app/api/generators/speech/generateVoiceWithXTTS2.txt +92 -0
src/app/api/utils/addBase64.ts +51 -0
src/app/api/utils/getHuggingFaceSpaceStatus.ts +114 -0
src/app/api/utils/getMediaInfo.ts +79 -0
src/app/api/utils/makeSureSpaceIsRunning.ts +77 -0
src/app/api/utils/readMp3FileToBase64.ts +17 -0
src/app/api/utils/sleep.ts +6 -0
src/app/api/utils/timeout.ts +15 -0
src/app/api/utils/tryApiCall.ts +69 -0
src/app/api/v1/edit/dialogues/route.ts +77 -0
src/app/api/v1/edit/{models → entities}/generateAudioID.ts +0 -0
src/app/api/v1/edit/{models → entities}/generateImageID.ts +0 -0
src/app/api/v1/edit/{models → entities}/route.ts +19 -17
src/app/api/v1/edit/{models → entities}/systemPrompt.ts +0 -0
src/app/api/v1/edit/storyboards/route.ts +1 -2
src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts +8 -8
src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts +52 -0
src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts +20 -0
src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts +17 -17
src/components/interface/latent-engine/core/useLatentEngine.ts +3 -5
src/lib/business/getClapAssetSourceType.ts +0 -25

.env CHANGED Viewed

@@ -28,6 +28,8 @@ AUTH_OPENAI_API_KEY=""
 VIDEOCHAIN_API_URL=""
 VIDEOCHAIN_API_KEY=""
 # ----------- CENSORSHIP -------
 ENABLE_CENSORSHIP=
 FINGERPRINT_KEY=

 VIDEOCHAIN_API_URL=""
 VIDEOCHAIN_API_KEY=""
+MICROSERVICE_API_SECRET_TOKEN=""
 # ----------- CENSORSHIP -------
 ENABLE_CENSORSHIP=
 FINGERPRINT_KEY=

package-lock.json CHANGED Viewed

@@ -1,14 +1,15 @@
 {
-  "name": "ai-tube",
   "version": "0.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "ai-tube",
       "version": "0.0.0",
       "dependencies": {
-        "@aitube/clap": "^0.0.6",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
         "@jcoreio/async-throttle": "^1.6.0",
@@ -60,6 +61,7 @@
         "eslint": "8.45.0",
         "eslint-config-next": "13.4.10",
         "fastest-levenshtein": "^1.0.16",
         "gsplat": "^1.2.4",
         "hash-wasm": "^4.11.0",
         "jose": "^5.2.4",
@@ -103,6 +105,7 @@
         "zustand": "^4.4.7"
       },
       "devDependencies": {
         "@types/proper-lockfile": "^4.1.2",
         "@types/qs": "^6.9.7",
         "@types/react-copy-to-clipboard": "^5.0.7",
@@ -111,18 +114,10 @@
         "daisyui": "^3.7.4"
       }
     },
-    "node_modules/@aashutoshrathi/word-wrap": {
-      "version": "1.2.6",
-      "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz",
-      "integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/@aitube/clap": {
-      "version": "0.0.6",
-      "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.6.tgz",
-      "integrity": "sha512-SPo90RBnOJCmp+DqzxllNOcp38AbHSzqkAbYEudRiubqWHDF1GGqYi25gCdG7bFIWH+8evjSiiwsjkzedpbhoA==",
       "dependencies": {
         "pure-uuid": "^1.8.1",
         "yaml": "^2.4.1"
@@ -131,6 +126,19 @@
         "typescript": "^5.4.5"
       }
     },
     "node_modules/@alloc/quick-lru": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
@@ -901,28 +909,28 @@
       }
     },
     "node_modules/@floating-ui/core": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.0.tgz",
-      "integrity": "sha512-PcF++MykgmTj3CIyOQbKA/hDzOAiqI3mhuoN44WRCopIs1sgoDoU4oty4Jtqaj/y3oDU6fnVSm4QG0a3t5i0+g==",
       "dependencies": {
-        "@floating-ui/utils": "^0.2.1"
       }
     },
     "node_modules/@floating-ui/dom": {
-      "version": "1.6.3",
-      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.3.tgz",
-      "integrity": "sha512-RnDthu3mzPlQ31Ss/BTwQ1zjzIhr3lk1gZB1OC56h/1vEtaXkESrOqL5fQVMfXpwGtRwX+YsZBdyHtJMQnkArw==",
       "dependencies": {
         "@floating-ui/core": "^1.0.0",
         "@floating-ui/utils": "^0.2.0"
       }
     },
     "node_modules/@floating-ui/react-dom": {
-      "version": "2.0.8",
-      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.8.tgz",
-      "integrity": "sha512-HOdqOt3R3OGeTKidaLvJKcgg75S6tibQ3Tif4eyd91QnIJWr0NLvoXFpJA/j8HqkFSL68GDca9AuyWEHlhyClw==",
       "dependencies": {
-        "@floating-ui/dom": "^1.6.1"
       },
       "peerDependencies": {
         "react": ">=16.8.0",
@@ -930,9 +938,9 @@
       }
     },
     "node_modules/@floating-ui/utils": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.1.tgz",
-      "integrity": "sha512-9TANp6GPoMtYzQdt54kfAyMmz1+osLlXdg2ENroU7zzrtflTLrrC/lgrIfaSe+Wu0b89GKccT7vxXA0MoAIO+Q=="
     },
     "node_modules/@huggingface/hub": {
       "version": "0.12.3-oauth",
@@ -1507,9 +1515,9 @@
       }
     },
     "node_modules/@mediapipe/tasks-vision": {
-      "version": "0.10.13-rc.20240426",
-      "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240426.tgz",
-      "integrity": "sha512-YyickIMLXr2/pEOZ00bHYWfWmCAwC8uRv0Ek6haQvnzahwfiw4Evlka3XRa8SxL4X7p432puS558xAm5h0SJTA=="
     },
     "node_modules/@next/env": {
       "version": "14.2.3",
@@ -2930,6 +2938,15 @@
       "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
       "integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
     },
     "node_modules/@types/json5": {
       "version": "0.0.29",
       "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
@@ -3479,6 +3496,11 @@
       "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
       "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
     },
     "node_modules/asynckit": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
@@ -3713,9 +3735,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001612",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001612.tgz",
-      "integrity": "sha512-lFgnZ07UhaCcsSZgWW0K5j4e69dK1u/ltrL9lTUiFOwNHs12S3UMIEYgBV0Z6C6hRDev7iRnMzzYmKabYdXF9g==",
       "funding": [
         {
           "type": "opencollective",
@@ -5116,6 +5138,29 @@
       "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
       "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
     },
     "node_modules/follow-redirects": {
       "version": "1.15.6",
       "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
@@ -6655,16 +6700,16 @@
       }
     },
     "node_modules/optionator": {
-      "version": "0.9.3",
-      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz",
-      "integrity": "sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==",
       "dependencies": {
-        "@aashutoshrathi/word-wrap": "^1.2.3",
         "deep-is": "^0.1.3",
         "fast-levenshtein": "^2.0.6",
         "levn": "^0.4.1",
         "prelude-ls": "^1.2.1",
-        "type-check": "^0.4.0"
       },
       "engines": {
         "node": ">= 0.8.0"
@@ -6759,9 +6804,9 @@
       }
     },
     "node_modules/path-scurry/node_modules/lru-cache": {
-      "version": "10.2.1",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.1.tgz",
-      "integrity": "sha512-tS24spDe/zXhWbNPErCHs/AGOzbKGHT+ybSBqmdLm8WZ1xXLWvH8Qn71QPAlqVhd0qUTWjy+Kl9JmISgDdEjsA==",
       "engines": {
         "node": "14 || >=16.14"
       }
@@ -8248,9 +8293,9 @@
       }
     },
     "node_modules/type-fest": {
-      "version": "4.17.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.17.0.tgz",
-      "integrity": "sha512-9flrz1zkfLRH3jO3bLflmTxryzKMxVa7841VeMgBaNQGY6vH4RCcpN/sQLB7mQQYh1GZ5utT2deypMuCy4yicw==",
       "engines": {
         "node": ">=16"
       },
@@ -8640,6 +8685,14 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
     "node_modules/wrap-ansi": {
       "version": "8.1.0",
       "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
@@ -8766,9 +8819,9 @@
       "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
     },
     "node_modules/yaml": {
-      "version": "2.4.1",
-      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.1.tgz",
-      "integrity": "sha512-pIXzoImaqmfOrL7teGUBt/T7ZDnyeGBWyXQBvOVhLkWLN37GXv8NMLK406UY6dS51JfcQHsmcW5cJ441bHg6Lg==",
       "bin": {
         "yaml": "bin.mjs"
       },

 {
+  "name": "@aitube/website",
   "version": "0.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
+      "name": "@aitube/website",
       "version": "0.0.0",
       "dependencies": {
+        "@aitube/clap": "0.0.7",
+        "@aitube/client": "0.0.7",
         "@huggingface/hub": "0.12.3-oauth",
         "@huggingface/inference": "^2.6.7",
         "@jcoreio/async-throttle": "^1.6.0",
         "eslint": "8.45.0",
         "eslint-config-next": "13.4.10",
         "fastest-levenshtein": "^1.0.16",
+        "fluent-ffmpeg": "^2.1.2",
         "gsplat": "^1.2.4",
         "hash-wasm": "^4.11.0",
         "jose": "^5.2.4",
         "zustand": "^4.4.7"
       },
       "devDependencies": {
+        "@types/fluent-ffmpeg": "^2.1.24",
         "@types/proper-lockfile": "^4.1.2",
         "@types/qs": "^6.9.7",
         "@types/react-copy-to-clipboard": "^5.0.7",
         "daisyui": "^3.7.4"
       }
     },
     "node_modules/@aitube/clap": {
+      "version": "0.0.7",
+      "resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.7.tgz",
+      "integrity": "sha512-0muPu4G1sRsNqSVZ/ICBCc4QibZ9OT33ORbahPP1+h3GYcD/7K+ZLYJjdbQwJWVEcpKDosDVaQKeNYdab0S0LA==",
       "dependencies": {
         "pure-uuid": "^1.8.1",
         "yaml": "^2.4.1"
         "typescript": "^5.4.5"
       }
     },
+    "node_modules/@aitube/client": {
+      "version": "0.0.7",
+      "resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.7.tgz",
+      "integrity": "sha512-s6vxst7pkLt7tI96JS508gfk4EgdLJy5Itr76ej/zvtMRMgnKgAlfB6Bb8/1u7L5CToz4Wgk6h4kz8T+yEbEeg==",
+      "dependencies": {
+        "uuid": "^9.0.1",
+        "yaml": "^2.4.1"
+      },
+      "peerDependencies": {
+        "@aitube/clap": "0.0.7",
+        "typescript": "^5.4.5"
+      }
+    },
     "node_modules/@alloc/quick-lru": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
       }
     },
     "node_modules/@floating-ui/core": {
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.1.tgz",
+      "integrity": "sha512-42UH54oPZHPdRHdw6BgoBD6cg/eVTmVrFcgeRDM3jbO7uxSoipVcmcIGFcA5jmOHO5apcyvBhkSKES3fQJnu7A==",
       "dependencies": {
+        "@floating-ui/utils": "^0.2.0"
       }
     },
     "node_modules/@floating-ui/dom": {
+      "version": "1.6.4",
+      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.4.tgz",
+      "integrity": "sha512-0G8R+zOvQsAG1pg2Q99P21jiqxqGBW1iRe/iXHsBRBxnpXKFI8QwbB4x5KmYLggNO5m34IQgOIu9SCRfR/WWiQ==",
       "dependencies": {
         "@floating-ui/core": "^1.0.0",
         "@floating-ui/utils": "^0.2.0"
       }
     },
     "node_modules/@floating-ui/react-dom": {
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.9.tgz",
+      "integrity": "sha512-q0umO0+LQK4+p6aGyvzASqKbKOJcAHJ7ycE9CuUvfx3s9zTHWmGJTPOIlM/hmSBfUfg/XfY5YhLBLR/LHwShQQ==",
       "dependencies": {
+        "@floating-ui/dom": "^1.0.0"
       },
       "peerDependencies": {
         "react": ">=16.8.0",
       }
     },
     "node_modules/@floating-ui/utils": {
+      "version": "0.2.2",
+      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz",
+      "integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw=="
     },
     "node_modules/@huggingface/hub": {
       "version": "0.12.3-oauth",
       }
     },
     "node_modules/@mediapipe/tasks-vision": {
+      "version": "0.10.13-rc.20240428",
+      "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240428.tgz",
+      "integrity": "sha512-YMOshYcwxzLNNNEKSs4hWVTRjtuX+irWIjsbENrOee491t/oM1a9bnhggMdWLq0FBQ7xuCfvp1diu/JeZFoE0A=="
     },
     "node_modules/@next/env": {
       "version": "14.2.3",
       "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
       "integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
     },
+    "node_modules/@types/fluent-ffmpeg": {
+      "version": "2.1.24",
+      "resolved": "https://registry.npmjs.org/@types/fluent-ffmpeg/-/fluent-ffmpeg-2.1.24.tgz",
+      "integrity": "sha512-g5oQO8Jgi2kFS3tTub7wLvfLztr1s8tdXmRd8PiL/hLMLzTIAyMR2sANkTggM/rdEDAg3d63nYRRVepwBiCw5A==",
+      "dev": true,
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/json5": {
       "version": "0.0.29",
       "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
       "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
       "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
     },
+    "node_modules/async": {
+      "version": "3.2.5",
+      "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
+      "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
+    },
     "node_modules/asynckit": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
       }
     },
     "node_modules/caniuse-lite": {
+      "version": "1.0.30001614",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001614.tgz",
+      "integrity": "sha512-jmZQ1VpmlRwHgdP1/uiKzgiAuGOfLEJsYFP4+GBou/QQ4U6IOJCB4NP1c+1p9RGLpwObcT94jA5/uO+F1vBbog==",
       "funding": [
         {
           "type": "opencollective",
       "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
       "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
     },
+    "node_modules/fluent-ffmpeg": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz",
+      "integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==",
+      "dependencies": {
+        "async": ">=0.2.9",
+        "which": "^1.1.1"
+      },
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/fluent-ffmpeg/node_modules/which": {
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz",
+      "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==",
+      "dependencies": {
+        "isexe": "^2.0.0"
+      },
+      "bin": {
+        "which": "bin/which"
+      }
+    },
     "node_modules/follow-redirects": {
       "version": "1.15.6",
       "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
       }
     },
     "node_modules/optionator": {
+      "version": "0.9.4",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
+      "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==",
       "dependencies": {
         "deep-is": "^0.1.3",
         "fast-levenshtein": "^2.0.6",
         "levn": "^0.4.1",
         "prelude-ls": "^1.2.1",
+        "type-check": "^0.4.0",
+        "word-wrap": "^1.2.5"
       },
       "engines": {
         "node": ">= 0.8.0"
       }
     },
     "node_modules/path-scurry/node_modules/lru-cache": {
+      "version": "10.2.2",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz",
+      "integrity": "sha512-9hp3Vp2/hFQUiIwKo8XCeFVnrg8Pk3TYNPIR7tJADKi5YfcF7vEaK7avFHTlSy3kOKYaJQaalfEo6YuXdceBOQ==",
       "engines": {
         "node": "14 || >=16.14"
       }
       }
     },
     "node_modules/type-fest": {
+      "version": "4.18.0",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.18.0.tgz",
+      "integrity": "sha512-+dbmiyliDY/2TTcjCS7NpI9yV2iEFlUDk5TKnsbkN7ZoRu5s7bT+zvYtNFhFXC2oLwURGT2frACAZvbbyNBI+w==",
       "engines": {
         "node": ">=16"
       },
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/word-wrap": {
+      "version": "1.2.5",
+      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
+      "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/wrap-ansi": {
       "version": "8.1.0",
       "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
       "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
     },
     "node_modules/yaml": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.2.tgz",
+      "integrity": "sha512-B3VqDZ+JAg1nZpaEmWtTXUlBneoGx6CPM9b0TENK6aoSu5t73dItudwdgmi6tHlIZZId4dZ9skcAQ2UbcyAeVA==",
       "bin": {
         "yaml": "bin.mjs"
       },

package.json CHANGED Viewed

@@ -9,7 +9,8 @@
     "lint": "next lint"
   },
   "dependencies": {
-    "@aitube/clap": "^0.0.6",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",
     "@jcoreio/async-throttle": "^1.6.0",
@@ -61,6 +62,7 @@
     "eslint": "8.45.0",
     "eslint-config-next": "13.4.10",
     "fastest-levenshtein": "^1.0.16",
     "gsplat": "^1.2.4",
     "hash-wasm": "^4.11.0",
     "jose": "^5.2.4",
@@ -104,6 +106,7 @@
     "zustand": "^4.4.7"
   },
   "devDependencies": {
     "@types/proper-lockfile": "^4.1.2",
     "@types/qs": "^6.9.7",
     "@types/react-copy-to-clipboard": "^5.0.7",

     "lint": "next lint"
   },
   "dependencies": {
+    "@aitube/clap": "0.0.7",
+    "@aitube/client": "0.0.7",
     "@huggingface/hub": "0.12.3-oauth",
     "@huggingface/inference": "^2.6.7",
     "@jcoreio/async-throttle": "^1.6.0",
     "eslint": "8.45.0",
     "eslint-config-next": "13.4.10",
     "fastest-levenshtein": "^1.0.16",
+    "fluent-ffmpeg": "^2.1.2",
     "gsplat": "^1.2.4",
     "hash-wasm": "^4.11.0",
     "jose": "^5.2.4",
     "zustand": "^4.4.7"
   },
   "devDependencies": {
+    "@types/fluent-ffmpeg": "^2.1.24",
     "@types/proper-lockfile": "^4.1.2",
     "@types/qs": "^6.9.7",
     "@types/react-copy-to-clipboard": "^5.0.7",

src/app/api/generators/speech/generateVoiceWithElevenLabs.txt ADDED Viewed

	@@ -0,0 +1,55 @@

+import { getMediaInfo } from "../../utils/getMediaInfo"
+import { readMp3FileToBase64 } from "../../utils/readMp3FileToBase64"
+export async function generateSpeechWithElevenLabs({
+  text,
+  audioId,
+  debug = false,
+}: {
+  text: string
+  audioId: string
+  debug?: boolean
+}): Promise<{
+  filePath: string
+  fileName: string
+  format: string // "mp3"
+  base64: string // data uri
+  durationInSec: number
+  durationInMs: number
+}> {
+  const api = await ElevenLabs()
+  // Converts text to speech, saves the file to the output folder and returns the relative path to the file.
+  // Output file is in the following format: TTS_date-time.mp3
+  // Returns an object with the following structure: { code: CODE, message: "STATUS_MESSAGE" }
+  const result = await api.tts(
+    text,
+    audioId
+  )
+  // ...really? that's the API?
+  let relativeOutputPath = result.message.split("File written successfully:").pop().trim()
+  // we remove the ./ at the beginning, so we get something like:
+  // "/../../../../var/folders/x4/2w7-------------------"
+  // then we remove relative navifation to only keep this:
+  // "/var/folders/x4/2w7-------------------"
+  const filePath = relativeOutputPath.slice(1).replaceAll("/..", "")
+  const fileName = filePath.split("/").pop()
+  const format = fileName.split(".").pop()
+  const { durationInSec, durationInMs } = await getMediaInfo(filePath)
+  const base64 = await readMp3FileToBase64(filePath)
+  return {
+    filePath,
+    fileName,
+    format,
+    base64,
+    durationInSec,
+    durationInMs,
+  }
+}

src/app/api/generators/speech/generateVoiceWithParlerTTS.ts ADDED Viewed

	@@ -0,0 +1,84 @@

+import { addBase64Header } from "@/lib/data/addBase64Header"
+import { tryApiCalls } from "../../utils/tryApiCall"
+const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-parler-tts-mini.hf.space`
+const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
+const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
+export async function generateSpeechWithParlerTTS({
+  text,
+  audioId,
+  debug = false,
+  neverThrow = false,
+}: {
+  text: string
+  audioId: string
+  debug?: boolean
+  neverThrow?: boolean
+}): Promise<string> {
+  const result = {
+    filePath: "",
+    fileName: "",
+    format: "mp3",
+    base64: "",
+    durationInSec: 5,
+    durationInMs: 5000
+  }
+  const actualFunction = async () => {
+    const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // Authorization: `Bearer ${token}`,
+      },
+      body: JSON.stringify({
+        fn_index: 0, // <- important!
+        data: [
+          apiKey,
+          text,
+          audioId,
+        ],
+      }),
+      cache: "no-store",
+      // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
+      // next: { revalidate: 1 }
+    })
+    if (res.status !== 200) {
+      throw new Error('Failed to fetch data')
+    }
+    const rawJson = await res.json()
+    console.log("rawJson:", rawJson)
+    // TODO: addBAse64 with the right header type
+    return ""
+  }
+  try {
+    if (!text?.length) {
+      throw new Error(`text is too short!`)
+    }
+    const result = await tryApiCalls({
+      func: actualFunction,
+      huggingFaceSpace,
+      debug,
+      failureMessage: "failed to generate the audio"
+    })
+    return result
+  } catch (err) {
+    if (neverThrow) {
+      console.error(`generateVoiceWithParlerTTS():`, err)
+      return ""
+    } else {
+      throw err
+    }
+  }
+}

src/app/api/generators/speech/generateVoiceWithXTTS2.txt ADDED Viewed

	@@ -0,0 +1,92 @@

+import { StoryLine } from "../../types/structures.mts"
+import { tryApiCalls } from "../../utils/tryApiCalls.mts"
+import { promptToGenerateAudioStory } from "../prompts/prompts.mts"
+import { microserviceApiKey } from "../../config.mts"
+import { addBase64Header } from "../../base64/addBase64.mts"
+// TODO delete this? we don't need an env var for this I think?
+const aiStoryServerApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
+const huggingFaceSpace = "jbilcke-hf/ai-story-server"
+export async function generateAudioStory({
+  prompt,
+  voice,
+  // maxLines,
+  neverThrow,
+  debug,
+}: {
+  prompt: string
+  voice?: string
+  // maxLines: number
+  neverThrow?: boolean
+  debug?: boolean
+}): Promise<StoryLine[]> {
+  const actualFunction = async () => {
+    const cropped = prompt.slice(0, 30)
+    // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
+    // positivePrompt = filterOutBadWords(positivePrompt)
+    const res = await fetch(aiStoryServerApiUrl + (aiStoryServerApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // Authorization: `Bearer ${token}`,
+      },
+      body: JSON.stringify({
+        fn_index: 0, // <- important!
+        data: [
+          microserviceApiKey,
+          promptToGenerateAudioStory,
+          prompt,
+          // TODO: add support for custom wav
+          voice === "Julian" ? "Julian" : "Cloée",
+          // maxLines,
+        ],
+      }),
+      cache: "no-store",
+      // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
+      // next: { revalidate: 1 }
+    })
+    const rawJson = await res.json()
+    const data = rawJson.data as StoryLine[][]
+    const stories = data?.[0] || []
+    if (res.status !== 200) {
+      throw new Error('Failed to fetch data')
+    }
+    return stories.map(line => ({
+      text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
+      audio: addBase64Header(line.audio, "mp4")
+    }))
+  }
+  try {
+    if (!prompt?.length) {
+      throw new Error(`prompt is too short!`)
+    }
+    const result = await tryApiCalls({
+      func: actualFunction,
+      huggingFaceSpace,
+      debug,
+      failureMessage: "failed to generate the audio story"
+    })
+    return result
+  } catch (err) {
+    if (neverThrow) {
+      console.error(`generateAudioStory():`, err)
+      return []
+    } else {
+      throw err
+    }
+  }
+}

src/app/api/utils/addBase64.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+export function addBase64Header(
+  image?: string,
+  format?:
+    | "jpeg" | "jpg" | "png" | "webp" | "heic"
+    | "mp3" | "wav"
+    | "mp4" | "webm"
+    | string
+) {
+  if (!image || typeof image !== "string" || image.length < 60) {
+    return ""
+  }
+  const ext = (`${format || ""}`.split(".").pop() || "").toLowerCase().trim()
+  let mime = ""
+  if (
+    ext === "jpeg" ||
+    ext === "jpg") {
+    mime = "image/jpeg"
+  } else if (
+    ext === "webp"
+  ) {
+    mime = "image/webp"
+  } else if (
+    ext === "png") {
+    mime = "image/png"
+  } else if (ext === "heic") {
+    mime = "image/heic"
+  } else if (ext === "mp3") {
+    mime = "audio/mp3"
+  } else if (ext === "mp4") {
+    mime = "video/mp4"
+  } else if (ext === "webm") {
+    mime = "video/webm"
+  } else if (ext === "wav") {
+    mime = "audio/wav"
+  } else {
+    throw new Error(`addBase64Header failed (unsupported format: ${format})`)
+  }
+  if (image.startsWith('data:')) {
+    if (image.startsWith(`data:${mime};base64,`)) {
+      return image
+    } else {
+      throw new Error(`addBase64Header failed (input string is NOT a ${mime} image)`)
+    }
+  } else {
+    return `data:${mime};base64,${image}`
+  }
+}

src/app/api/utils/getHuggingFaceSpaceStatus.ts ADDED Viewed

	@@ -0,0 +1,114 @@

+/** Actually `hf_${string}`, but for convenience, using the string type */
+type AccessToken = string;
+interface Credentials {
+	accessToken: AccessToken;
+}
+type SpaceHardwareFlavor =
+	| "cpu-basic"
+	| "cpu-upgrade"
+	| "t4-small"
+	| "t4-medium"
+	| "a10g-small"
+	| "a10g-large"
+	| "a100-large";
+type SpaceSdk = "streamlit" | "gradio" | "docker" | "static";
+type SpaceStage =
+	| "NO_APP_FILE"
+	| "CONFIG_ERROR"
+	| "BUILDING"
+	| "BUILD_ERROR"
+	| "RUNNING"
+	| "RUNNING_BUILDING"
+	| "RUNTIME_ERROR"
+	| "DELETING"
+	| "PAUSED"
+	| "SLEEPING";
+type AccessTokenRole = "admin" | "write" | "contributor" | "read";
+type AuthType = "access_token" | "app_token" | "app_token_as_user";
+interface SpaceRuntime {
+	stage: SpaceStage;
+	sdk?: SpaceSdk;
+	sdkVersion?: string;
+	errorMessage?: string;
+	hardware?: {
+		current: SpaceHardwareFlavor | null;
+		currentPrettyName?: string;
+		requested: SpaceHardwareFlavor | null;
+		requestedPrettyName?: string;
+	};
+	/** when calling /spaces, those props are only fetched if ?full=true */
+	resources?: SpaceResourceConfig;
+	/** in seconds */
+	gcTimeout?: number | null;
+}
+interface SpaceResourceRequirement {
+	cpu?: string;
+	memory?: string;
+	gpu?: string;
+	gpuModel?: string;
+	ephemeral?: string;
+}
+interface SpaceResourceConfig {
+	requests: SpaceResourceRequirement;
+	limits: SpaceResourceRequirement;
+	replicas?: number;
+	throttled?: boolean;
+	is_custom?: boolean;
+}
+export interface HFSpaceStatus {
+  _id: string
+  id: string
+  author: string
+  sha: string
+  lastModified: string
+  private: boolean
+  gated: boolean
+  disabled: boolean
+  host: string
+  subdomain: string
+  tags: string[]
+  likes: number
+  sdk: string
+  runtime: SpaceRuntime
+  createdAt: string
+}
+export async function getHuggingFaceSpaceStatus({
+  space,
+  // userName,
+  // spaceName,
+}: {
+  space: string // a joined "user_name/space_name"
+  // userName: string
+  // spaceName: string
+}): Promise<HFSpaceStatus> {
+  const res = await fetch(`https://huggingface.co/api/spaces/${space}`, {
+    method: "GET",
+    headers: {
+      Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
+    }
+  })
+  if (res.status !== 200)  {
+    throw new Error("failed to get the space data")
+  }
+  try {
+    const data = await res.json() as HFSpaceStatus
+    return data
+  } catch (err) {
+    throw new Error(`failed to parse space data: ${err}`)
+  }
+}

src/app/api/utils/getMediaInfo.ts ADDED Viewed

	@@ -0,0 +1,79 @@

+import ffmpeg from "fluent-ffmpeg";
+import { tmpdir } from "node:os";
+import { promises as fs } from "node:fs";
+import { join } from "node:path";
+export type MediaMetadata = {
+  durationInSec: number;
+  durationInMs: number;
+  hasAudio: boolean;
+};
+/**
+ * Get the media info of a base64 or file path
+ * @param input
+ * @returns
+ */
+export async function getMediaInfo(input: string): Promise<MediaMetadata> {
+  // If the input is a base64 string
+  if (input.startsWith("data:")) {
+    // Extract the base64 content
+    const base64Content = input.split(";base64,").pop();
+    if (!base64Content) {
+      throw new Error("Invalid base64 data");
+    }
+    // Decode the base64 content to a buffer
+    const buffer = Buffer.from(base64Content, 'base64');
+    // Generate a temporary file name
+    const tempFileName = join(tmpdir(), `temp-media-${Date.now()}`);
+    // Write the buffer to a temporary file
+    await fs.writeFile(tempFileName, buffer);
+    // Get metadata from the temporary file then delete the file
+    try {
+      return await getMetaDataFromPath(tempFileName);
+    } finally {
+      await fs.rm(tempFileName);
+    }
+  }
+  // If the input is a path to the file
+  return await getMetaDataFromPath(input);
+}
+async function getMetaDataFromPath(filePath: string): Promise<MediaMetadata> {
+  return new Promise((resolve, reject) => {
+    ffmpeg.ffprobe(filePath, (err, metadata) => {
+      let results = {
+        durationInSec: 0,
+        durationInMs: 0,
+        hasAudio: false,
+      }
+      if (err) {
+        console.error("getMediaInfo(): failed to analyze the source (might happen with empty files)")
+        // reject(err);
+        resolve(results);
+        return;
+      }
+      try {
+        results.durationInSec = metadata?.format?.duration || 0;
+        results.durationInMs = results.durationInSec * 1000;
+        results.hasAudio = (metadata?.streams || []).some((stream) => stream.codec_type === 'audio');
+      } catch (err) {
+        console.error(`getMediaInfo(): failed to analyze the source (might happen with empty files)`)
+        results.durationInSec = 0
+        results.durationInMs = 0
+        results.hasAudio = false
+      }
+      resolve(results);
+    });
+  });
+}

src/app/api/utils/makeSureSpaceIsRunning.ts ADDED Viewed

	@@ -0,0 +1,77 @@

+import { getHuggingFaceSpaceStatus } from "./getHuggingFaceSpaceStatus"
+import { sleep } from "./sleep"
+export async function makeSureSpaceIsRunning({
+  space,
+  maxWaitTimeInSec = 15 * 60, // some spaces are ultra slow to cold boot (eg. data dl at runtime)
+  statusUpdateFrequencyInSec = 5,
+  // userName,
+  // spaceName,
+}: {
+  space?: string // a joined "user_name/space_name"
+  maxWaitTimeInSec?: number
+  statusUpdateFrequencyInSec?: number
+  // userName: string
+  // spaceName: string
+}): Promise<void> {
+  if (!space) { return }
+  // process.stdout.write(`trying to restart space "${space}"`)
+  try {
+    const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
+    if (stage === "RUNNING") {
+      // process.stdout.write(`: well, it is already ${stage}!\n`)
+      return
+    }
+  } catch (err) {
+  }
+  const res = await fetch(`https://huggingface.co/api/spaces/${space}/restart`, {
+    method: "POST",
+    headers: {
+      Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
+    }
+  })
+  if (res.status !== 200) {
+    process.stdout.write(`failure!\nwe couldn't trigger the restart of space "${space}"\n`)
+    throw new Error(`failed to trigger the restart of space "${space}" (status is not 200)`)
+  }
+  let elapsedTime = 0
+  process.stdout.write(`trying to restart space "${space}"`)
+  while (true) {
+    process.stdout.write(".")
+    const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
+    if (stage === "RUNNING") {
+      process.stdout.write(`success!\nspace "${space}" is ${stage} (took ${elapsedTime} sec)\n`)
+      return
+    } else if (stage === "BUILDING" || stage === "RUNNING_BUILDING") {
+      // let's wait more
+      await sleep(statusUpdateFrequencyInSec * 1000)
+      elapsedTime += statusUpdateFrequencyInSec
+      if (elapsedTime >= maxWaitTimeInSec) {
+        process.stdout.write(`failure!\nspace "${space}" is still ${stage} (after ${elapsedTime} sec)\n`)
+        if (stage === "BUILDING") {
+          throw new Error(`failed to start space ${space} (reason: space is ${stage}, but we reached the ${maxWaitTimeInSec} sec timeout)`)
+        } else {
+          // if we are "RUNNING_BUILDING" we assume it is.. okay? I guess?
+          return
+        }
+      }
+    } else {
+      process.stdout.write(`failure!\nspace "${space}" is ${stage} (after ${elapsedTime} sec)\n`)
+      throw new Error(`failed to build space ${space} (reason: space is ${stage})`)
+    }
+  }
+}

src/app/api/utils/readMp3FileToBase64.ts ADDED Viewed

	@@ -0,0 +1,17 @@

+import { readFile } from "node:fs/promises"
+export async function readMp3FileToBase64(filePath: string): Promise<string> {
+  try {
+    // Read the file's content as a Buffer
+    const fileBuffer = await readFile(filePath);
+    // Convert the buffer to a base64 string
+    const base64 = fileBuffer.toString('base64');
+    return `data:audio/mp3;base64,${base64}`;
+  } catch (error) {
+    // Handle errors (e.g., file not found, no permissions, etc.)
+    console.error(error);
+    throw error;
+  }
+}

src/app/api/utils/sleep.ts ADDED Viewed

	@@ -0,0 +1,6 @@

+export const sleep = async (durationInMs: number) =>
+  new Promise((resolve) => {
+    setTimeout(() => {
+      resolve(true)
+    }, durationInMs)
+  })

src/app/api/utils/timeout.ts ADDED Viewed

	@@ -0,0 +1,15 @@

+export function timeout<T>(
+  promise: Promise<T>,
+  ms: number,
+  timeoutError = new Error('Promise timed out')
+): Promise<T> {
+  // create a promise that rejects in milliseconds
+  const promiseWithTimeout = new Promise<never>((_, reject) => {
+    setTimeout(() => {
+      reject(timeoutError);
+    }, ms);
+  });
+  // returns a race between timeout and the passed promise
+  return Promise.race<T>([promise, promiseWithTimeout]);
+}

src/app/api/utils/tryApiCall.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import { makeSureSpaceIsRunning } from "./makeSureSpaceIsRunning"
+import { sleep } from "./sleep"
+import { timeout } from "./timeout"
+const sec = 1000
+const min = 60 *sec
+export async function tryApiCalls<T>({
+  func,
+  huggingFaceSpace,
+  debug = false,
+  failureMessage = "failed to call the endpoint",
+  autostart = true,
+  // wait up to 10 min
+  timeoutInSec = 10 * 60,
+  delays = [
+    5 *sec,
+    15 *sec,
+    40 *sec, // total 1 min wait time
+    //at this stage, if it is so slow it means we are probably waking up a model
+    // which is a slow operation (takes ~5 min)
+    2 *min, //     ~ 3 min ~
+    1 *min, //     ~ 4 min ~
+    1 *min, //     ~ 5 min ~
+  ]
+}: {
+  func: () => Promise<T>
+  // optional: the name of the hugging face space
+  // this will be used to "wake up" the space if necessary
+  huggingFaceSpace?: string
+  debug?: boolean
+  failureMessage?: string
+  autostart?: boolean
+  timeoutInSec?: number
+  delays?: number[]
+}) {
+  for (let i = 0; i < delays.length; i++) {
+    try {
+      if (autostart) {
+        await makeSureSpaceIsRunning({ space: huggingFaceSpace })
+      }
+      // due to an error with the Gradio client, sometimes calling the api.predict
+      // will never throw an error
+      const result = await timeout(
+        func(), // grab the promise
+        timeoutInSec * 1000,
+        new Error(`call to ${huggingFaceSpace || "the API"} failed after ${timeoutInSec} seconds`)
+      )
+      return result
+    } catch (err) {
+      if (debug) { console.error(err) }
+      process.stdout.write(".")
+      if (i > 0) {
+        await sleep(delays[i])
+      }
+    }
+  }
+  throw new Error(`${failureMessage} after ${delays.length} attempts`)
+}

src/app/api/v1/edit/dialogues/route.ts ADDED Viewed

	@@ -0,0 +1,77 @@

+import { NextResponse, NextRequest } from "next/server"
+import { ClapEntity, ClapProject, ClapSegment, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
+import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
+import { getToken } from "@/app/api/auth/getToken"
+import { getSpeechBackgroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt"
+import { getSpeechForegroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt"
+import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
+// a helper to generate speech for a Clap
+export async function POST(req: NextRequest) {
+  const jwtToken = await getToken({ user: "anonymous" })
+  const blob = await req.blob()
+  const clap: ClapProject = await parseClap(blob)
+  if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
+  console.log(`[api/generate/dialogues] detected ${clap.segments.length} segments`)
+  const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
+  console.log(`[api/generate/dialogues] detected ${shotsSegments.length} shots`)
+  if (shotsSegments.length > 32) {
+    throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
+  }
+  for (const shotSegment of shotsSegments) {
+    const shotSegments: ClapSegment[] = clap.segments.filter(s =>
+      startOfSegment1IsWithinSegment2(s, shotSegment)
+    )
+    const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
+      s.category === "dialogue"
+    )
+    let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
+    console.log(`[api/generate/dialogues] shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)
+    if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
+      console.log(`[api/generate/dialogues] generating audio..`)
+      try {
+        shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
+          text: shotDialogueSegment.prompt,
+          audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),
+          debug: true,
+        })
+        shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
+        console.log("TODO julian: properly set the asset type format")
+      } catch (err) {
+        console.log(`[api/generate/dialogues] failed to generate audio: ${err}`)
+        throw err
+      }
+      console.log(`[api/generate/dialogues] generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
+    } else {
+      console.log(`[api/generate/dialogues] there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
+    }
+  }
+  console.log(`[api/generate/dialogues] returning the clap augmented with dialogues`)
+  return new NextResponse(await serializeClap(clap), {
+    status: 200,
+    headers: new Headers({ "content-type": "application/x-gzip" }),
+  })
+}

src/app/api/v1/edit/{models → entities}/generateAudioID.ts RENAMED Viewed

File without changes

src/app/api/v1/edit/{models → entities}/generateImageID.ts RENAMED Viewed

File without changes

src/app/api/v1/edit/{models → entities}/route.ts RENAMED Viewed

@@ -1,7 +1,7 @@
 import { NextResponse, NextRequest } from "next/server"
 import queryString from "query-string"
-import { parseClap, serializeClap, ClapModel } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { generateImageID } from "./generateImageID"
@@ -25,7 +25,7 @@ export async function POST(req: NextRequest) {
   if (!prompt.length) { throw new Error(`please provide a prompt`) }
   */
-  console.log("[api/generate/models] request:", prompt)
   const jwtToken = await getToken({ user: "anonymous" })
@@ -33,40 +33,42 @@ export async function POST(req: NextRequest) {
   const clap = await parseClap(blob)
-  if (!clap.models.length) { throw new Error(`please provide at least one model`) }
-  for (const model of clap.models) {
     // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
-    if (!model.imagePrompt) {
-      model.imagePrompt = "a man with a beard"
     }
     // TASK 2: GENERATE THE IMAGE ID IF MISSING
-    if (!model.imageId) {
-      model.imageId = await generateImageID({
-        prompt: model.imagePrompt,
-        seed: model.seed
       })
     }
     // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
-    if (!model.audioPrompt) {
-      model.audioPrompt = "a man with a beard"
     }
     // TASK 4: GENERATE THE AUDIO ID IF MISSING
     // TODO here: call Parler-TTS or a generic audio generator
-    if (!model.audioId) {
-      model.audioId = await generateAudioID({
-        prompt: model.audioPrompt,
-        seed: model.seed
       })
     }
   }
-  console.log(`[api/generate/models] returning the clap extended with the model`)
   return new NextResponse(await serializeClap(clap), {
     status: 200,

 import { NextResponse, NextRequest } from "next/server"
 import queryString from "query-string"
+import { getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
 import { getToken } from "@/app/api/auth/getToken"
 import { generateImageID } from "./generateImageID"
   if (!prompt.length) { throw new Error(`please provide a prompt`) }
   */
+  console.log("[api/generate/entities] request:", prompt)
   const jwtToken = await getToken({ user: "anonymous" })
   const clap = await parseClap(blob)
+  if (!clap.entities.length) { throw new Error(`please provide at least one entity`) }
+  for (const entity of clap.entities) {
     // TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
+    if (!entity.imagePrompt) {
+      entity.imagePrompt = "a man with a beard"
     }
     // TASK 2: GENERATE THE IMAGE ID IF MISSING
+    if (!entity.imageId) {
+      entity.imageId = await generateImageID({
+        prompt: entity.imagePrompt,
+        seed: entity.seed
       })
+      entity.imageSourceType = getClapAssetSourceType(entity.imageId)
     }
     // TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
+    if (!entity.audioPrompt) {
+      entity.audioPrompt = "a man with a beard"
     }
     // TASK 4: GENERATE THE AUDIO ID IF MISSING
     // TODO here: call Parler-TTS or a generic audio generator
+    if (!entity.audioId) {
+      entity.audioId = await generateAudioID({
+        prompt: entity.audioPrompt,
+        seed: entity.seed
       })
+      entity.audioSourceType = getClapAssetSourceType(entity.audioId)
     }
   }
+  console.log(`[api/generate/entities] returning the clap extended with the entities`)
   return new NextResponse(await serializeClap(clap), {
     status: 200,

src/app/api/v1/edit/{models → entities}/systemPrompt.ts RENAMED Viewed

File without changes

src/app/api/v1/edit/storyboards/route.ts CHANGED Viewed

@@ -6,7 +6,6 @@ import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWi
 import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
 import { getToken } from "@/app/api/auth/getToken"
-import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateStoryboard } from "./generateStoryboard"
@@ -68,7 +67,7 @@ export async function POST(req: NextRequest) {
     // TASK 2: GENERATE MISSING STORYBOARD PROMPT
     if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
       // storyboard is missing, let's generate it
-      shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, {}, [])
       console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
     }

 import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
 import { getToken } from "@/app/api/auth/getToken"
 import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
 import { generateStoryboard } from "./generateStoryboard"
     // TASK 2: GENERATE MISSING STORYBOARD PROMPT
     if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
       // storyboard is missing, let's generate it
+      shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
       console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
     }

src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { ClapModel } from "@aitube/clap"
-export function getCharacterPrompt(model: ClapModel): string {
   let characterPrompt = ""
-  if (model.description) {
     characterPrompt = [
       // the label (character name) can help making the prompt more unique
       // this might backfires however, if the name is
@@ -11,15 +11,15 @@ export function getCharacterPrompt(model: ClapModel): string {
       // I'm not sure stable diffusion really needs this,
       // so let's skip it for now (might still be useful for locations, though)
       // we also want to avoid triggering "famous people" (BARBOSSA etc)
-      // model.label,
-      model.description
     ].join(", ")
   } else {
     characterPrompt = [
-      model.gender !== "object" ? model.gender : "",
-      model.age ? `aged ${model.age}yo` : '',
-      model.label ? `named ${model.label}` : '',
     ].map(i => i.trim()).filter(i => i).join(", ")
   }
   return characterPrompt

+import { ClapEntity } from "@aitube/clap"
+export function getCharacterPrompt(entity: ClapEntity): string {
   let characterPrompt = ""
+  if (entity.description) {
     characterPrompt = [
       // the label (character name) can help making the prompt more unique
       // this might backfires however, if the name is
       // I'm not sure stable diffusion really needs this,
       // so let's skip it for now (might still be useful for locations, though)
       // we also want to avoid triggering "famous people" (BARBOSSA etc)
+      // entity.label,
+      entity.description
     ].join(", ")
   } else {
     characterPrompt = [
+      entity.gender !== "object" ? entity.gender : "",
+      entity.age ? `aged ${entity.age}yo` : '',
+      entity.label ? `named ${entity.label}` : '',
     ].map(i => i.trim()).filter(i => i).join(", ")
   }
   return characterPrompt

src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts ADDED Viewed

	@@ -0,0 +1,52 @@

+import { ClapEntity, ClapSegment } from "@aitube/clap"
+import { getCharacterPrompt } from "./getCharacterPrompt"
+/**
+ * Construct an audio background for a voice from a list of active segments
+ *
+ * @param segments
+ * @returns
+ */
+export function getSpeechBackgroundAudioPrompt(
+  segments: ClapSegment[] = [],
+  entitiesById: Record<string, ClapEntity> = {},
+  extraPositivePrompt: string[] = [] // "clear sound, high quality" etc
+): string {
+  return segments
+    .filter(({ category, outputType }) => (
+      category === "dialogue" ||
+      category === "weather" ||
+      category === "location"
+    ))
+    .sort((a, b) => b.label.localeCompare(a.label))
+    .map(segment => {
+      const entity: ClapEntity | undefined = entitiesById[segment?.entityId || ""] || undefined
+      if (segment.category === "dialogue") {
+        // if we can't find the entity then we are unable
+        // to make any assumption about the gender, age and voice timbre
+        if (!entity) {
+          return `person, speaking normally`
+        }
+        const characterPrompt = getCharacterPrompt(entity)
+        return `${characterPrompt}, speaking normally`
+      } else if (segment.category === "location") {
+        // the location is part of the background noise
+        // but this might produce unexpected results - we'll see!
+        return segment.prompt
+      } else if (segment.category === "weather") {
+        // the weather is part of the background noise
+        // here too this might produce weird and unexpected results 🍿
+        return segment.prompt
+      }
+      // ignore the rest
+      return ""
+    })
+    .filter(x => x)
+    .concat([ ...extraPositivePrompt ])
+    .join(". ")
+}

src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts ADDED Viewed

	@@ -0,0 +1,20 @@

+import {  ClapSegment } from "@aitube/clap"
+/**
+ * Construct an audio foreground for a voice from a list of active segments
+ *
+ * This is the "dialogue" prompt, ie. the actual spoken words,
+ * so we don't need to do anything fancy here, we only use the raw text
+ *
+ * @param segments
+ * @returns
+ */
+export function getSpeechForegroundAudioPrompt(
+  segments: ClapSegment[] = []
+): string {
+  return segments
+    .filter(({ category }) => category === "dialogue")
+    .sort((a, b) => b.label.localeCompare(a.label))
+    .map(({ prompt }) => prompt).filter(x => x)
+    .join(". ")
+}

src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ClapModel, ClapSegment } from "@aitube/clap"
 import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
@@ -11,12 +11,12 @@ import { getCharacterPrompt } from "./getCharacterPrompt"
  * @returns
  */
 export function getVideoPrompt(
-  segments: ClapSegment[],
-  modelsById: Record<string, ClapModel>,
-  extraPositivePrompt: string[]
 ): string {
-  // console.log("modelsById:", modelsById)
   // to construct the video we need to collect all the segments describing it
   // we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
@@ -60,23 +60,23 @@ export function getVideoPrompt(
   tmp.sort((a, b) => b.label.localeCompare(a.label))
   let videoPrompt = tmp.map(segment => {
-    const model: ClapModel | undefined = modelsById[segment?.modelId || ""] || undefined
     if (segment.category === "dialogue") {
-      // if we can't find the model, then we are unable
       // to make any assumption about the gender, age or appearance
-      if (!model) {
-        console.log("ERROR: this is a dialogue, but couldn't find the model!")
         return `portrait of a person speaking, blurry background, bokeh`
       }
-      const characterTrigger = model?.triggerName || ""
-      const characterLabel = model?.label || ""
-      const characterDescription = model?.description || ""
       const dialogueLine = segment?.prompt || ""
-      const characterPrompt = getCharacterPrompt(model)
       // in the context of a video, we some something additional:
       // we create a "bokeh" style
@@ -84,13 +84,13 @@ export function getVideoPrompt(
     } else if (segment.category === "location") {
-      // if we can't find the location's model, we default to returning the prompt
-      if (!model) {
-        console.log("ERROR: this is a location, but couldn't find the model!")
         return segment.prompt
       }
-      return model.description
     } else {
       return segment.prompt
     }

+import { ClapEntity, ClapSegment } from "@aitube/clap"
 import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
  * @returns
  */
 export function getVideoPrompt(
+  segments: ClapSegment[] = [],
+  entitiesIndex: Record<string, ClapEntity> = {},
+  extraPositivePrompt: string[] = []
 ): string {
+  // console.log("entitiesIndex:", entitiesIndex)
   // to construct the video we need to collect all the segments describing it
   // we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
   tmp.sort((a, b) => b.label.localeCompare(a.label))
   let videoPrompt = tmp.map(segment => {
+    const entity: ClapEntity | undefined = entitiesIndex[segment?.entityId || ""] || undefined
     if (segment.category === "dialogue") {
+      // if we can't find the entity, then we are unable
       // to make any assumption about the gender, age or appearance
+      if (!entity) {
+        console.log("ERROR: this is a dialogue, but couldn't find the entity!")
         return `portrait of a person speaking, blurry background, bokeh`
       }
+      const characterTrigger = entity?.triggerName || ""
+      const characterLabel = entity?.label || ""
+      const characterDescription = entity?.description || ""
       const dialogueLine = segment?.prompt || ""
+      const characterPrompt = getCharacterPrompt(entity)
       // in the context of a video, we some something additional:
       // we create a "bokeh" style
     } else if (segment.category === "location") {
+      // if we can't find the location's entity, we default to returning the prompt
+      if (!entity) {
+        console.log("ERROR: this is a location, but couldn't find the entity!")
         return segment.prompt
       }
+      return entity.description
     } else {
       return segment.prompt
     }

src/components/interface/latent-engine/core/useLatentEngine.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { create } from "zustand"
-import { ClapModel, ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
 import { LatentEngineStore } from "./types"
 import { resolveSegments } from "../resolvers/resolveSegments"
@@ -409,9 +409,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
     //
     // yes: I know the code is complex and not intuitive - sorry about that
-    // TODO Julian: use the Clap project to fill in those
-    const modelsById: Record<string, ClapModel> = {}
-    const extraPositivePrompt: string[] = []
     let bufferAheadOfCurrentPositionInMs = positionInMs
@@ -427,7 +425,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
       bufferAheadOfCurrentPositionInMs += videoDurationInMs
-      const prompt = getVideoPrompt(shotSegmentsToPreload, modelsById, extraPositivePrompt)
       console.log(`video prompt: ${prompt}`)
       // could also be the camera

 import { create } from "zustand"
+import { ClapEntity, ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
 import { LatentEngineStore } from "./types"
 import { resolveSegments } from "../resolvers/resolveSegments"
     //
     // yes: I know the code is complex and not intuitive - sorry about that
+    const extraPositivePrompt: string[] = ["high quality", "crisp", "detailed"]
     let bufferAheadOfCurrentPositionInMs = positionInMs
       bufferAheadOfCurrentPositionInMs += videoDurationInMs
+      const prompt = getVideoPrompt(shotSegmentsToPreload, clap.entityIndex, extraPositivePrompt)
       console.log(`video prompt: ${prompt}`)
       // could also be the camera

src/lib/business/getClapAssetSourceType.ts DELETED Viewed

@@ -1,25 +0,0 @@
-import { ClapAssetSource } from "@aitube/clap"
-export function getClapAssetSourceSource(input: string = ""): ClapAssetSource {
-  const str = `${input || ""}`.trim()
-  if (!str || !str.length) {
-    return "EMPTY"
-  }
-  if (str.startsWith("https://") || str.startsWith("http://")) {
-    return "REMOTE"
-  }
-  // note that "path" assets are potentially a security risk, they need to be treated with care
-  if (str.startsWith("/") || str.startsWith("../") || str.startsWith("./")) {
-    return "PATH"
-  }
-  if (str.startsWith("data:")) {
-    return "DATA"
-  }
-  return "PROMPT"
-}