barreloflube commited on
Commit
89b8fd1
1 Parent(s): aab35a3

Refactor gen_audio function to use CosyVoice TTS instead of SFT

Browse files
tabs/audios/events.py CHANGED
@@ -168,8 +168,9 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
168
  if not speaker_audio_file:
169
  raise gr.Error('Please upload an audio')
170
 
171
- for i, j in enumerate(cv_sft.inference_zero_shot(
172
  tts_text=text,
 
173
  prompt_speech_16k=prompt_speech_16k,
174
  )):
175
  torchaudio.save(
@@ -181,7 +182,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
181
  if not speaker_audio_file:
182
  raise gr.Error('Please upload an audio')
183
 
184
- for i, j in enumerate(cv_sft.inference_cross_lingual(
185
  tts_text=text,
186
  prompt_speech_16k=prompt_speech_16k,
187
  )):
 
168
  if not speaker_audio_file:
169
  raise gr.Error('Please upload an audio')
170
 
171
+ for i, j in enumerate(cv_vc.inference_zero_shot(
172
  tts_text=text,
173
+ prompt_text=voice_instructions,
174
  prompt_speech_16k=prompt_speech_16k,
175
  )):
176
  torchaudio.save(
 
182
  if not speaker_audio_file:
183
  raise gr.Error('Please upload an audio')
184
 
185
+ for i, j in enumerate(cv_vc.inference_cross_lingual(
186
  tts_text=text,
187
  prompt_speech_16k=prompt_speech_16k,
188
  )):
tabs/audios/load_models.py CHANGED
@@ -23,16 +23,16 @@ def init_sys():
23
  # Add `tabs/audios/modules/CosyVoice/third_party/Matcha-TTS` to your `PYTHONPATH`
24
  os.environ['PYTHONPATH'] = f'{os.path.dirname(__file__)}/modules/CosyVoice/third_party/Matcha-TTS:{os.environ.get("PYTHONPATH", "")}'
25
 
26
- # Load CosyVoice TTS
27
- cv_base = CosyVoice('pretrained_models/CosyVoice-300M')
28
-
29
  # Load CosyVoice SFT
30
  cv_sft = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
31
  sft_speakers = cv_sft.list_avaliable_spks()
32
 
 
 
 
33
  # Load CosyVoice Instruct
34
  cv_instruct = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
35
 
36
- return device, df_model, df_state, cv_base, cv_sft, sft_speakers, cv_instruct
37
 
38
- device, df_model, df_state, cv_base, cv_sft, sft_speakers, cv_instruct = init_sys()
 
23
  # Add `tabs/audios/modules/CosyVoice/third_party/Matcha-TTS` to your `PYTHONPATH`
24
  os.environ['PYTHONPATH'] = f'{os.path.dirname(__file__)}/modules/CosyVoice/third_party/Matcha-TTS:{os.environ.get("PYTHONPATH", "")}'
25
 
 
 
 
26
  # Load CosyVoice SFT
27
  cv_sft = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
28
  sft_speakers = cv_sft.list_avaliable_spks()
29
 
30
+ # Load CosyVoice TTS
31
+ cv_vc = CosyVoice('pretrained_models/CosyVoice-300M')
32
+
33
  # Load CosyVoice Instruct
34
  cv_instruct = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
35
 
36
+ return device, df_model, df_state, cv_vc, cv_sft, sft_speakers, cv_instruct
37
 
38
+ device, df_model, df_state, cv_vc, cv_sft, sft_speakers, cv_instruct = init_sys()