TTS-Arena-JA

Paused

App Files Files Community

kamahori commited on May 1

Commit

11f8a08

1 Parent(s): e308dcc

Update for new arena

Browse files

Files changed (1) hide show

app.py +52 -64

app.py CHANGED Viewed

@@ -36,10 +36,8 @@ with open('ja_sentences.txt') as f:
 # Constants
 ####################################
-SPK1 = os.getenv('KOTOBA_SPK1')
-SPK2 = os.getenv('KOTOBA_SPK2')
-SPK3 = os.getenv('KOTOBA_SPK3')
-SPK4 = os.getenv('KOTOBA_SPK4')
 AVAILABLE_MODELS = {
     # 'XTTSv2': 'xtts',
@@ -57,10 +55,7 @@ AVAILABLE_MODELS = {
     # 'Parler TTS': 'parler'
     'MOE-VITS': 'moe-vits',
     'BARK': 'bark',
-    f'KOTOBA-SPEECH-{SPK1.upper()}': f'kotoba-speech-{SPK1.lower()}',
-    f'KOTOBA-SPEECH-{SPK2.upper()}': f'kotoba-speech-{SPK2.lower()}',
-    f'KOTOBA-SPEECH-{SPK3.upper()}': f'kotoba-speech-{SPK3.lower()}',
-    f'KOTOBA-SPEECH-{SPK4.upper()}': f'kotoba-speech-{SPK4.lower()}',
     #'BLANE-TTS': 'blane-tts',
     'AMITARO-VITS': 'amitaro-vits',
     'GOOGLE-TTS': 'google-tts',
@@ -130,60 +125,12 @@ def get_db():
 def get_tts_file(text: str, model: str):
     url = {
-        f"kotoba-speech-{SPK1.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
-        f"kotoba-speech-{SPK2.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
-        f"kotoba-speech-{SPK3.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
-        f"kotoba-speech-{SPK4.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
         "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
     }
     headers = {
         "Content-Type": "application/json"
     }
     data = {
-        f"kotoba-speech-{SPK1.lower()}": {
-            "data": [
-                text,
-                5,
-                5,
-                "Preset voices",
-                SPK1,
-                {"path": "fam/ui/voice01_A.mp3"},
-                {"path": "fam/ui/voice01_A.mp3"}
-            ]
-        },
-        f"kotoba-speech-{SPK2.lower()}": {
-            "data": [
-                text,
-                5,
-                5,
-                "Preset voices",
-                SPK2,
-                {"path": "fam/ui/voice01_A.mp3"},
-                {"path": "fam/ui/voice01_A.mp3"}
-            ]
-        },
-        f"kotoba-speech-{SPK3.lower()}": {
-            "data": [
-                text,
-                5,
-                5,
-                "Preset voices",
-                SPK3,
-                {"path": "fam/ui/voice01_A.mp3"},
-                {"path": "fam/ui/voice01_A.mp3"}
-            ]
-        },
-        f"kotoba-speech-{SPK4.lower()}": {
-            "data": [
-                text,
-                5,
-                5,
-                "Preset voices",
-                SPK4,
-                {"path": "fam/ui/voice01_A.mp3"},
-                {"path": "fam/ui/voice01_A.mp3"}
-            ]
-        },
         "blane-tts": {
             "data": [
                 text,
@@ -398,10 +345,7 @@ model_names = {
     # 'metavoice': 'MetaVoice-1B',
     'BARK': 'BARK',
     'MOE-VITS': 'MOE-VITS',
-    f'KOTOBA-SPEECH-{SPK1.upper()}': 'KOTOBA-SPEECH-SPK1',
-    f'KOTOBA-SPEECH-{SPK2.upper()}': 'KOTOBA-SPEECH-SPK2',
-    f'KOTOBA-SPEECH-{SPK3.upper()}': 'KOTOBA-SPEECH-SPK3',
-    f'KOTOBA-SPEECH-{SPK4.upper()}': 'KOTOBA-SPEECH-SPK4',
     'BLANE-TTS': 'BLANE-TTS',
     'AMITARO-VITS': 'AMITARO-VITS',
     'GOOGLE-TTS': 'GOOGLE-TTS',
@@ -456,10 +400,7 @@ model_links = {
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
     'moe-vits': 'skytnt/moe-tts',
-    f'kotoba-speech-{SPK1.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
-    f'kotoba-speech-{SPK2.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
-    f'kotoba-speech-{SPK3.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
-    f'kotoba-speech-{SPK4.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
     'blane-tts': 'https://blane187-blane-tts.hf.space/',
     'amitaro-vits': 'https://lycoris53-vits-tts-japanese-only-amitaro.hf.space/'
 }
@@ -706,6 +647,49 @@ def doresample(path_to_wav):
 # 2x speedup (hopefully) #
 ##########################
 def synthandreturn(text, retry=0):
     text = text.strip()
     if len(text) > MAX_SAMPLE_TXT_LENGTH:
@@ -759,7 +743,11 @@ def synthandreturn(text, retry=0):
                 elif model == "openai-tts":
                     local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
                     result = get_openai_tts(text, local_filename=local_filename)
                 else:
                     result = get_tts_file(text, model)
                     # URL to download the file from
                     url = f"{model_links[model]}file={result}"

 # Constants
 ####################################
+# Configure the API TTS URL here
+KOTOBA_API_URL = os.getenv('KOTOBA_API_URL', 'https://api.example.com/tts')
 AVAILABLE_MODELS = {
     # 'XTTSv2': 'xtts',
     # 'Parler TTS': 'parler'
     'MOE-VITS': 'moe-vits',
     'BARK': 'bark',
+    'KOTOBA-TTS': 'kotoba-tts',
     #'BLANE-TTS': 'blane-tts',
     'AMITARO-VITS': 'amitaro-vits',
     'GOOGLE-TTS': 'google-tts',
 def get_tts_file(text: str, model: str):
     url = {
         "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
     }
     headers = {
         "Content-Type": "application/json"
     }
     data = {
         "blane-tts": {
             "data": [
                 text,
     # 'metavoice': 'MetaVoice-1B',
     'BARK': 'BARK',
     'MOE-VITS': 'MOE-VITS',
+    'KOTOBA-TTS': 'kotoba-tts',
     'BLANE-TTS': 'BLANE-TTS',
     'AMITARO-VITS': 'AMITARO-VITS',
     'GOOGLE-TTS': 'GOOGLE-TTS',
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
     'moe-vits': 'skytnt/moe-tts',
+    'kotoba-tts': KOTOBA_API_URL,
     'blane-tts': 'https://blane187-blane-tts.hf.space/',
     'amitaro-vits': 'https://lycoris53-vits-tts-japanese-only-amitaro.hf.space/'
 }
 # 2x speedup (hopefully) #
 ##########################
+def get_kotoba_tts(text):
+    """
+    Call the Kotoba TTS API to generate speech from text.
+    Args:
+        text (str): The text to convert to speech
+        voice (str): The voice to use (e.g., "Newscaster (man)")
+    Returns:
+        str: Path to the generated audio file
+    """
+    # Request headers
+    headers = {
+        "Content-Type": "application/json"
+    }
+    # Request payload
+    data = {
+        "text": text,
+    }
+    # Create a temporary file to save the audio
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+        output_path = temp_file.name
+    # Make the POST request and save the response directly to the file
+    response = requests.post(
+        KOTOBA_API_URL,
+        headers=headers,
+        json=data,
+        stream=True
+    )
+    # Check if the request was successful
+    response.raise_for_status()
+    # Save the response content to the output file
+    with open(output_path, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            f.write(chunk)
+    return output_path
 def synthandreturn(text, retry=0):
     text = text.strip()
     if len(text) > MAX_SAMPLE_TXT_LENGTH:
                 elif model == "openai-tts":
                     local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
                     result = get_openai_tts(text, local_filename=local_filename)
+                elif model == "kotoba-tts":
+                    result = get_kotoba_tts(text)
+                    print(f"API TTS audio file: {result}")
                 else:
+                    # For other models that use the original approach
                     result = get_tts_file(text, model)
                     # URL to download the file from
                     url = f"{model_links[model]}file={result}"