Pendrokar commited on
Commit
234e026
1 Parent(s): eb6a051

param fixes; openvoicev2

Browse files
Files changed (1) hide show
  1. app.py +111 -25
app.py CHANGED
@@ -35,24 +35,101 @@ AVAILABLE_MODELS = {
35
  # 'WhisperSpeech': 'whisperspeech',
36
  # 'ElevenLabs': 'eleven',
37
  # 'OpenVoice': 'openvoice',
38
- # 'Pheme': 'pheme',
39
- # 'MetaVoice': 'metavoice'
 
40
  # 'MeloTTS': 'melo',
41
  # 'StyleTTS 2': 'styletts2',
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # '<Space>': <function>#<return-index-of-audio-param>
44
- 'coqui/xtts': '1#1',
45
- 'collabora/WhisperSpeech': '/whisper_speech_demo#0',
46
- 'myshell-ai/OpenVoice': '1#1',
47
- 'mrfakename/MetaVoice-1B-v0.1': '/tts#0',
 
 
 
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # xVASynth (CPU)
50
- 'Pendrokar/xVASynth': '/predict#0',
51
-
 
 
 
 
52
  # CoquiTTS (CPU)
53
- # 'coqui/CoquiTTS': '0#0',
54
-
55
- 'LeeSangHoon/HierSpeech_TTS': '/predict#0',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # TTS w issues
58
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
@@ -60,7 +137,6 @@ AVAILABLE_MODELS = {
60
  # 'suno/bark': '3#0', # Hallucinates
61
  # 'shivammehta25/Matcha-TTS': '5#0', #seems to require multiple requests for setup
62
  # 'styletts2/styletts2': '0#0', #API disabled
63
- # 'mrfakename/MelloTTS': '0#0', #API disabled
64
  # 'Manmay/tortoise-tts': '/predict#0', #Cannot skip text-from-file parameter
65
  # 'pytorch/Tacotron2': '0#0', #old gradio
66
  }
@@ -86,6 +162,10 @@ OVERRIDE_INPUTS = {
86
  1: 'default', # style
87
  2: DEFAULT_VOICE_SAMPLE, # voice sample
88
  },
 
 
 
 
89
  'PolyAI/pheme': {
90
  1: 'YOU1000000044_S0000798', # voice
91
  2: 210,
@@ -116,6 +196,11 @@ OVERRIDE_INPUTS = {
116
  3: None,
117
  4: 'No',
118
  },
 
 
 
 
 
119
  }
120
 
121
  SPACE_ID = os.getenv('SPACE_ID')
@@ -686,43 +771,44 @@ def synthandreturn(text):
686
  log_text(text)
687
  print("[debug] Using", mdl1, mdl2)
688
  def predict_and_update_result(text, model, result_storage):
 
689
  try:
690
  if model in AVAILABLE_MODELS:
691
  if '/' in model:
692
  # Use public HF Space
693
  mdl_space = Client(model, hf_token=hf_token)
694
  # assume the index is one of the first 9 return params
695
- return_audio_index = int(AVAILABLE_MODELS[model][-1])
696
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
697
 
698
  api_name = None
699
  fn_index = None
 
700
  # has named endpoint
701
- if '/' == AVAILABLE_MODELS[model][:1]:
702
- # assume the index is one of the first 9 params
703
- api_name = AVAILABLE_MODELS[model][:-2]
704
 
705
- space_inputs = _get_param_examples(
706
  endpoints['named_endpoints'][api_name]['parameters']
707
  )
708
  # has unnamed endpoint
709
  else:
710
  # endpoint index is the first character
711
- fn_index = int(AVAILABLE_MODELS[model][0])
712
 
713
- space_inputs = _get_param_examples(
714
  endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
715
  )
716
 
717
- space_inputs = _override_params(space_inputs, model)
718
 
719
  # force text
720
- space_inputs[0] = text
721
 
722
  results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
723
 
724
  # return path to audio
725
- print(results)
726
  result = results[return_audio_index] if (not isinstance(results, str)) else results
727
  else:
728
  # Use the private HF Space
@@ -803,7 +889,7 @@ def synthandreturn(text):
803
  print(f"Sending models {mdl1k} and {mdl2k} to API")
804
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
805
  thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))
806
-
807
  thread1.start()
808
  thread2.start()
809
  thread1.join()
@@ -818,7 +904,7 @@ def synthandreturn(text):
818
  # print(sr)
819
  #debug
820
  # outputs = [text, btn, r2, model1, model2, aud1, aud2, abetter, bbetter, prevmodel1, prevmodel2, nxtroundbtn]
821
-
822
  print(f"Retrieving models {mdl1k} and {mdl2k} from API")
823
  return (
824
  text,
 
35
  # 'WhisperSpeech': 'whisperspeech',
36
  # 'ElevenLabs': 'eleven',
37
  # 'OpenVoice': 'openvoice',
38
+ # 'OpenVoice V2': 'openvoicev2',
39
+ # 'Play.HT 2.0': 'playht',
40
+ # 'MetaVoice': 'metavoice',
41
  # 'MeloTTS': 'melo',
42
  # 'StyleTTS 2': 'styletts2',
43
+ # 'GPT-SoVITS': 'sovits',
44
+ # 'Vokan TTS': 'vokan',
45
+ # 'VoiceCraft 2.0': 'voicecraft',
46
+ # 'Parler TTS': 'parler'
47
+
48
+ 'coqui/xtts': 'coqui/xtts',
49
+ 'collabora/WhisperSpeech': 'collabora/WhisperSpeech',
50
+ # 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice',
51
+ 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2',
52
+ 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1',
53
+ 'Pendrokar/xVASynth': 'Pendrokar/xVASynth',
54
+ # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
55
+ 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS',
56
 
57
+ # TTS w issues
58
+ # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
59
+ # 'amphion/Text-to-Speech': '/predict#0', # old running space, takes a whole minute to synthesize
60
+ # 'suno/bark': '3#0', # Hallucinates
61
+ # 'shivammehta25/Matcha-TTS': '5#0', # seems to require multiple requests for setup
62
+ # 'styletts2/styletts2': '0#0', # API disabled
63
+ # 'Manmay/tortoise-tts': '/predict#0', # Cannot skip text-from-file parameter
64
+ # 'pytorch/Tacotron2': '0#0', # old gradio
65
+ # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # old gradio - ValueError: Unsupported protocol: sse_v3
66
+ # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # old gradio - ValueError: Unsupported protocol: sse_v3
67
+ }
68
 
69
+ HF_SPACES = {
70
+ # XTTS v2
71
+ 'coqui/xtts': {
72
+ 'name': 'coqui/xtts',
73
+ 'function': '1',
74
+ 'text_param_index': 0,
75
+ 'return_audio_index': 1,
76
+ },
77
+ # WhisperSpeech
78
+ 'collabora/WhisperSpeech': {
79
+ 'name': 'collabora/WhisperSpeech',
80
+ 'function': '/whisper_speech_demo',
81
+ 'text_param_index': 0,
82
+ 'return_audio_index': 0,
83
+ },
84
+ # OpenVoice (MyShell.ai)
85
+ 'myshell-ai/OpenVoice': {
86
+ 'name':'myshell-ai/OpenVoice',
87
+ 'function': '1',
88
+ 'text_param_index': 0,
89
+ 'return_audio_index': 1,
90
+ },
91
+ # OpenVoice v2 (MyShell.ai)
92
+ 'myshell-ai/OpenVoiceV2': {
93
+ 'name':'myshell-ai/OpenVoiceV2',
94
+ 'function': '1',
95
+ 'text_param_index': 0,
96
+ 'return_audio_index': 1,
97
+ },
98
+ # MetaVoice
99
+ 'mrfakename/MetaVoice-1B-v0.1': {
100
+ 'name':'mrfakename/MetaVoice-1B-v0.1',
101
+ 'function': '/tts',
102
+ 'text_param_index': 0,
103
+ 'return_audio_index': 0,
104
+ },
105
  # xVASynth (CPU)
106
+ 'Pendrokar/xVASynth': {
107
+ 'name': 'Pendrokar/xVASynth',
108
+ 'function': '/predict',
109
+ 'text_param_index': 0,
110
+ 'return_audio_index': 0,
111
+ },
112
  # CoquiTTS (CPU)
113
+ 'coqui/CoquiTTS': {
114
+ 'name': 'coqui/CoquiTTS',
115
+ 'function': '0',
116
+ 'text_param_index': 0,
117
+ 'return_audio_index': 0,
118
+ },
119
+ # HierSpeech_TTS
120
+ 'LeeSangHoon/HierSpeech_TTS': {
121
+ 'name': 'LeeSangHoon/HierSpeech_TTS',
122
+ 'function': '/predict',
123
+ 'text_param_index': 0,
124
+ 'return_audio_index': 0,
125
+ },
126
+ # MeloTTS (MyShell.ai)
127
+ 'mrfakename/MeloTTS': {
128
+ 'name': 'mrfakename/MeloTTS',
129
+ 'function': '/synthesize',
130
+ 'text_param_index': 1,
131
+ 'return_audio_index': 0,
132
+ },
133
 
134
  # TTS w issues
135
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
 
137
  # 'suno/bark': '3#0', # Hallucinates
138
  # 'shivammehta25/Matcha-TTS': '5#0', #seems to require multiple requests for setup
139
  # 'styletts2/styletts2': '0#0', #API disabled
 
140
  # 'Manmay/tortoise-tts': '/predict#0', #Cannot skip text-from-file parameter
141
  # 'pytorch/Tacotron2': '0#0', #old gradio
142
  }
 
162
  1: 'default', # style
163
  2: DEFAULT_VOICE_SAMPLE, # voice sample
164
  },
165
+ 'myshell-ai/OpenVoiceV2': {
166
+ 1: 'en_default', # style
167
+ 2: DEFAULT_VOICE_SAMPLE, # voice sample
168
+ },
169
  'PolyAI/pheme': {
170
  1: 'YOU1000000044_S0000798', # voice
171
  2: 210,
 
196
  3: None,
197
  4: 'No',
198
  },
199
+ 'mrfakename/MeloTTS': {
200
+ 0: 'EN-US', # speaker
201
+ 2: 1,
202
+ 3: 'EN', # language
203
+ },
204
  }
205
 
206
  SPACE_ID = os.getenv('SPACE_ID')
 
771
  log_text(text)
772
  print("[debug] Using", mdl1, mdl2)
773
  def predict_and_update_result(text, model, result_storage):
774
+ print(model)
775
  try:
776
  if model in AVAILABLE_MODELS:
777
  if '/' in model:
778
  # Use public HF Space
779
  mdl_space = Client(model, hf_token=hf_token)
780
  # assume the index is one of the first 9 return params
781
+ return_audio_index = int(HF_SPACES[model]['return_audio_index'])
782
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
783
 
784
  api_name = None
785
  fn_index = None
786
+ end_parameters = None
787
  # has named endpoint
788
+ if '/' == HF_SPACES[model]['function'][0]:
789
+ # audio sync function name
790
+ api_name = HF_SPACES[model]['function']
791
 
792
+ end_parameters = _get_param_examples(
793
  endpoints['named_endpoints'][api_name]['parameters']
794
  )
795
  # has unnamed endpoint
796
  else:
797
  # endpoint index is the first character
798
+ fn_index = int(HF_SPACES[model]['function'])
799
 
800
+ end_parameters = _get_param_examples(
801
  endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
802
  )
803
 
804
+ space_inputs = _override_params(end_parameters, model)
805
 
806
  # force text
807
+ space_inputs[HF_SPACES[model]['text_param_index']] = text
808
 
809
  results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
810
 
811
  # return path to audio
 
812
  result = results[return_audio_index] if (not isinstance(results, str)) else results
813
  else:
814
  # Use the private HF Space
 
889
  print(f"Sending models {mdl1k} and {mdl2k} to API")
890
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
891
  thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))
892
+
893
  thread1.start()
894
  thread2.start()
895
  thread1.join()
 
904
  # print(sr)
905
  #debug
906
  # outputs = [text, btn, r2, model1, model2, aud1, aud2, abetter, bbetter, prevmodel1, prevmodel2, nxtroundbtn]
907
+
908
  print(f"Retrieving models {mdl1k} and {mdl2k} from API")
909
  return (
910
  text,