kamahori commited on
Commit
11f8a08
·
1 Parent(s): e308dcc

Update for new arena

Browse files
Files changed (1) hide show
  1. app.py +52 -64
app.py CHANGED
@@ -36,10 +36,8 @@ with open('ja_sentences.txt') as f:
36
  # Constants
37
  ####################################
38
 
39
- SPK1 = os.getenv('KOTOBA_SPK1')
40
- SPK2 = os.getenv('KOTOBA_SPK2')
41
- SPK3 = os.getenv('KOTOBA_SPK3')
42
- SPK4 = os.getenv('KOTOBA_SPK4')
43
 
44
  AVAILABLE_MODELS = {
45
  # 'XTTSv2': 'xtts',
@@ -57,10 +55,7 @@ AVAILABLE_MODELS = {
57
  # 'Parler TTS': 'parler'
58
  'MOE-VITS': 'moe-vits',
59
  'BARK': 'bark',
60
- f'KOTOBA-SPEECH-{SPK1.upper()}': f'kotoba-speech-{SPK1.lower()}',
61
- f'KOTOBA-SPEECH-{SPK2.upper()}': f'kotoba-speech-{SPK2.lower()}',
62
- f'KOTOBA-SPEECH-{SPK3.upper()}': f'kotoba-speech-{SPK3.lower()}',
63
- f'KOTOBA-SPEECH-{SPK4.upper()}': f'kotoba-speech-{SPK4.lower()}',
64
  #'BLANE-TTS': 'blane-tts',
65
  'AMITARO-VITS': 'amitaro-vits',
66
  'GOOGLE-TTS': 'google-tts',
@@ -130,60 +125,12 @@ def get_db():
130
 
131
  def get_tts_file(text: str, model: str):
132
  url = {
133
- f"kotoba-speech-{SPK1.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
134
- f"kotoba-speech-{SPK2.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
135
- f"kotoba-speech-{SPK3.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
136
- f"kotoba-speech-{SPK4.lower()}": "https://kotoba-tech-kotoba-speech.hf.space/gradio_api/call/tts",
137
  "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
138
  }
139
  headers = {
140
  "Content-Type": "application/json"
141
  }
142
  data = {
143
- f"kotoba-speech-{SPK1.lower()}": {
144
- "data": [
145
- text,
146
- 5,
147
- 5,
148
- "Preset voices",
149
- SPK1,
150
- {"path": "fam/ui/voice01_A.mp3"},
151
- {"path": "fam/ui/voice01_A.mp3"}
152
- ]
153
- },
154
- f"kotoba-speech-{SPK2.lower()}": {
155
- "data": [
156
- text,
157
- 5,
158
- 5,
159
- "Preset voices",
160
- SPK2,
161
- {"path": "fam/ui/voice01_A.mp3"},
162
- {"path": "fam/ui/voice01_A.mp3"}
163
- ]
164
- },
165
- f"kotoba-speech-{SPK3.lower()}": {
166
- "data": [
167
- text,
168
- 5,
169
- 5,
170
- "Preset voices",
171
- SPK3,
172
- {"path": "fam/ui/voice01_A.mp3"},
173
- {"path": "fam/ui/voice01_A.mp3"}
174
- ]
175
- },
176
- f"kotoba-speech-{SPK4.lower()}": {
177
- "data": [
178
- text,
179
- 5,
180
- 5,
181
- "Preset voices",
182
- SPK4,
183
- {"path": "fam/ui/voice01_A.mp3"},
184
- {"path": "fam/ui/voice01_A.mp3"}
185
- ]
186
- },
187
  "blane-tts": {
188
  "data": [
189
  text,
@@ -398,10 +345,7 @@ model_names = {
398
  # 'metavoice': 'MetaVoice-1B',
399
  'BARK': 'BARK',
400
  'MOE-VITS': 'MOE-VITS',
401
- f'KOTOBA-SPEECH-{SPK1.upper()}': 'KOTOBA-SPEECH-SPK1',
402
- f'KOTOBA-SPEECH-{SPK2.upper()}': 'KOTOBA-SPEECH-SPK2',
403
- f'KOTOBA-SPEECH-{SPK3.upper()}': 'KOTOBA-SPEECH-SPK3',
404
- f'KOTOBA-SPEECH-{SPK4.upper()}': 'KOTOBA-SPEECH-SPK4',
405
  'BLANE-TTS': 'BLANE-TTS',
406
  'AMITARO-VITS': 'AMITARO-VITS',
407
  'GOOGLE-TTS': 'GOOGLE-TTS',
@@ -456,10 +400,7 @@ model_links = {
456
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
457
  'bark': 'https://suno-bark.hf.space/',
458
  'moe-vits': 'skytnt/moe-tts',
459
- f'kotoba-speech-{SPK1.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
460
- f'kotoba-speech-{SPK2.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
461
- f'kotoba-speech-{SPK3.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
462
- f'kotoba-speech-{SPK4.lower()}': 'https://kotoba-tech-kotoba-speech.hf.space/gradio_api/',
463
  'blane-tts': 'https://blane187-blane-tts.hf.space/',
464
  'amitaro-vits': 'https://lycoris53-vits-tts-japanese-only-amitaro.hf.space/'
465
  }
@@ -706,6 +647,49 @@ def doresample(path_to_wav):
706
  # 2x speedup (hopefully) #
707
  ##########################
708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  def synthandreturn(text, retry=0):
710
  text = text.strip()
711
  if len(text) > MAX_SAMPLE_TXT_LENGTH:
@@ -759,7 +743,11 @@ def synthandreturn(text, retry=0):
759
  elif model == "openai-tts":
760
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
761
  result = get_openai_tts(text, local_filename=local_filename)
 
 
 
762
  else:
 
763
  result = get_tts_file(text, model)
764
  # URL to download the file from
765
  url = f"{model_links[model]}file={result}"
 
36
  # Constants
37
  ####################################
38
 
39
+ # Configure the API TTS URL here
40
+ KOTOBA_API_URL = os.getenv('KOTOBA_API_URL', 'https://api.example.com/tts')
 
 
41
 
42
  AVAILABLE_MODELS = {
43
  # 'XTTSv2': 'xtts',
 
55
  # 'Parler TTS': 'parler'
56
  'MOE-VITS': 'moe-vits',
57
  'BARK': 'bark',
58
+ 'KOTOBA-TTS': 'kotoba-tts',
 
 
 
59
  #'BLANE-TTS': 'blane-tts',
60
  'AMITARO-VITS': 'amitaro-vits',
61
  'GOOGLE-TTS': 'google-tts',
 
125
 
126
  def get_tts_file(text: str, model: str):
127
  url = {
 
 
 
 
128
  "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
129
  }
130
  headers = {
131
  "Content-Type": "application/json"
132
  }
133
  data = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "blane-tts": {
135
  "data": [
136
  text,
 
345
  # 'metavoice': 'MetaVoice-1B',
346
  'BARK': 'BARK',
347
  'MOE-VITS': 'MOE-VITS',
348
+ 'KOTOBA-TTS': 'kotoba-tts',
 
 
 
349
  'BLANE-TTS': 'BLANE-TTS',
350
  'AMITARO-VITS': 'AMITARO-VITS',
351
  'GOOGLE-TTS': 'GOOGLE-TTS',
 
400
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
401
  'bark': 'https://suno-bark.hf.space/',
402
  'moe-vits': 'skytnt/moe-tts',
403
+ 'kotoba-tts': KOTOBA_API_URL,
 
 
 
404
  'blane-tts': 'https://blane187-blane-tts.hf.space/',
405
  'amitaro-vits': 'https://lycoris53-vits-tts-japanese-only-amitaro.hf.space/'
406
  }
 
647
  # 2x speedup (hopefully) #
648
  ##########################
649
 
650
+ def get_kotoba_tts(text):
651
+ """
652
+ Call the Kotoba TTS API to generate speech from text.
653
+
654
+ Args:
655
+ text (str): The text to convert to speech
656
+ voice (str): The voice to use (e.g., "Newscaster (man)")
657
+
658
+ Returns:
659
+ str: Path to the generated audio file
660
+ """
661
+ # Request headers
662
+ headers = {
663
+ "Content-Type": "application/json"
664
+ }
665
+
666
+ # Request payload
667
+ data = {
668
+ "text": text,
669
+ }
670
+
671
+ # Create a temporary file to save the audio
672
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
673
+ output_path = temp_file.name
674
+
675
+ # Make the POST request and save the response directly to the file
676
+ response = requests.post(
677
+ KOTOBA_API_URL,
678
+ headers=headers,
679
+ json=data,
680
+ stream=True
681
+ )
682
+
683
+ # Check if the request was successful
684
+ response.raise_for_status()
685
+
686
+ # Save the response content to the output file
687
+ with open(output_path, 'wb') as f:
688
+ for chunk in response.iter_content(chunk_size=8192):
689
+ f.write(chunk)
690
+
691
+ return output_path
692
+
693
  def synthandreturn(text, retry=0):
694
  text = text.strip()
695
  if len(text) > MAX_SAMPLE_TXT_LENGTH:
 
743
  elif model == "openai-tts":
744
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
745
  result = get_openai_tts(text, local_filename=local_filename)
746
+ elif model == "kotoba-tts":
747
+ result = get_kotoba_tts(text)
748
+ print(f"API TTS audio file: {result}")
749
  else:
750
+ # For other models that use the original approach
751
  result = get_tts_file(text, model)
752
  # URL to download the file from
753
  url = f"{model_links[model]}file={result}"