arible
/

narrator

Inference Endpoints

Model card Files Files and versions Community

sim04ful commited on Apr 23

Commit

8a5d43b

•

1 Parent(s): 5e4143f

type casting

Browse files

Files changed (2) hide show

arible_schema_power.json +78 -0
handler.py +6 -6

arible_schema_power.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+    "title": "AI Voice Cloner",
+    "description": "Clone a voice using AI",
+    "inputs": [
+        {
+            "name": "text",
+            "type": "text",
+            "description": "Text to be narrated",
+            "area": true,
+            "options": {
+                "min": 100,
+                "max": 50000
+            },
+            "title": "Content"
+        },
+        {
+            "name": "audio_urls",
+            "type": "constant",
+            "value": [
+                "https://pub-93685b189ac24b30839990a7d9a14391.r2.dev/attenborough_short.wav"
+            ]
+        },
+        {
+            "name": "gpt_cond_len",
+            "type": "number",
+            "description": "Length of audio used for gpt latents.",
+            "title": "GPT Conditioning Length",
+            "options": {
+                "min": 6,
+                "max": 60
+            },
+            "slider_step": 0.5
+        },
+        {
+            "name": "gpt_cond_chunk_len",
+            "type": "number",
+            "description": "Length of audio chunks used for gpt latents.",
+            "title": "GPT Conditioning Chunk Length",
+            "options": {
+                "min": 6,
+                "max": 60
+            },
+            "slider_step": 0.5
+        },
+        {
+            "name": "max_ref_length",
+            "type": "constant",
+            "value": 30
+        },
+        {
+            "name": "temperature",
+            "type": "number",
+            "description": "Temperature for sampling.",
+            "title": "Temperature",
+            "options": {
+                "min": 0.0,
+                "max": 1.0
+            },
+            "slider_step": 0.1
+        },
+        {
+            "name": "repetition_penalty",
+            "type": "number",
+            "description": "Penalty for repetition.",
+            "title": "Repetition Penalty",
+            "options": {
+                "min": 1.0,
+                "max": 10.0
+            },
+            "slider_step": 0.1
+        },
+        {
+            "name": "language",
+            "type": "constant",
+            "value": "en"
+        }
+    ]
+}

handler.py CHANGED Viewed

@@ -69,9 +69,9 @@ class EndpointHandler:
             speaker_embedding,
         ) = self.model.get_conditioning_latents(
             audio_path=audio_paths,
-            gpt_cond_len=model_input["gpt_cond_len"],
-            gpt_cond_chunk_len=model_input["gpt_cond_chunk_len"],
-            max_ref_length=model_input["max_ref_length"],
         )
         print("Generating audio")
@@ -81,10 +81,10 @@ class EndpointHandler:
             text=model_input["text"],
             speaker_embedding=speaker_embedding,
             gpt_cond_latent=gpt_cond_latent,
-            temperature=model_input["temperature"],
-            repetition_penalty=model_input["repetition_penalty"],
             language=model_input["language"],
-            enable_text_splitting=True,
         )
         audio_file = io.BytesIO()
         torchaudio.save(

             speaker_embedding,
         ) = self.model.get_conditioning_latents(
             audio_path=audio_paths,
+            gpt_cond_len=int(model_input["gpt_cond_len"]),
+            gpt_cond_chunk_len=int(model_input["gpt_cond_chunk_len"]),
+            max_ref_length=int(model_input["max_ref_length"]),
         )
         print("Generating audio")
             text=model_input["text"],
             speaker_embedding=speaker_embedding,
             gpt_cond_latent=gpt_cond_latent,
+            temperature=float(model_input["temperature"]),
+            repetition_penalty=float(model_input["repetition_penalty"]),
             language=model_input["language"],
+            enable_text_splitting=False,
         )
         audio_file = io.BytesIO()
         torchaudio.save(