John Langley commited on
Commit
4bae818
1 Parent(s): 4979540

change to cpu

Browse files
Files changed (3) hide show
  1. app.py +15 -1
  2. requirements.txt +1 -1
  3. utils.py +32 -2
app.py CHANGED
@@ -61,7 +61,21 @@ os.environ["COQUI_TOS_AGREED"] = "1"
61
  ##print(m)
62
  #m = model_name
63
 
64
- xtts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
 
 
61
  ##print(m)
62
  #m = model_name
63
 
64
+ #xtts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
65
+
66
+ device = "cpu"
67
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
68
+ print("⏳Downloading model")
69
+ ModelManager().download_model(model_name)
70
+ model_path = os.path.join(
71
+ get_user_data_dir("tts"), model_name.replace("/", "--")
72
+ )
73
+
74
+ config = XttsConfig()
75
+ config.load_json(os.path.join(model_path, "config.json"))
76
+ xtts_model = Xtts.init_from_config(config)
77
+ xtts_model.load_checkpoint(config, checkpoint_dir=model_path, eval=True)
78
+ xtts_model.to(device)
79
 
80
 
81
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  # Preinstall requirements from TTS
2
- TTS @ git+https://github.com/coqui-ai/TTS@v0.20.6
3
  pydantic==1.10.13
4
  python-multipart==0.0.6
5
  typing-extensions>=4.8.0
 
1
  # Preinstall requirements from TTS
2
+ TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
3
  pydantic==1.10.13
4
  python-multipart==0.0.6
5
  typing-extensions>=4.8.0
utils.py CHANGED
@@ -8,6 +8,7 @@ import textwrap
8
  import time
9
  import uuid
10
  import wave
 
11
 
12
  import emoji
13
  import gradio as gr
@@ -81,8 +82,37 @@ def detect_language(prompt, xtts_supported_languages=None):
81
 
82
  return language
83
 
84
- def get_voice_streaming(prompt, language, chatbot_voice, xtts_model, suffix="0"):
85
- gpt_cond_latent, speaker_embedding = get_latents(chatbot_voice, xtts_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  try:
87
  t0 = time.time()
88
  chunks = xtts_model.inference_stream(
 
8
  import time
9
  import uuid
10
  import wave
11
+ import torch
12
 
13
  import emoji
14
  import gradio as gr
 
82
 
83
  return language
84
 
85
+ def get_voice_streaming(self,prompt, language, chatbot_voice, xtts_model, suffix="0"):
86
+
87
+ speaker = {
88
+ "speaker_embedding": xtts_model.speaker_manager.speakers["Claribel Dervla"][
89
+ "speaker_embedding"
90
+ ]
91
+ .cpu()
92
+ .squeeze()
93
+ .half()
94
+ .tolist(),
95
+ "gpt_cond_latent": xtts_model.speaker_manager.speakers["Claribel Dervla"][
96
+ "gpt_cond_latent"
97
+ ]
98
+ .cpu()
99
+ .squeeze()
100
+ .half()
101
+ .tolist(),
102
+ }
103
+
104
+ speaker_embedding = (
105
+ torch.tensor(speaker.get("speaker_embedding"))
106
+ .unsqueeze(0)
107
+ .unsqueeze(-1)
108
+ )
109
+
110
+ gpt_cond_latent = (
111
+ torch.tensor(speaker.get("gpt_cond_latent"))
112
+ .reshape((-1, 1024))
113
+ .unsqueeze(0)
114
+ )
115
+ #gpt_cond_latent, speaker_embedding = get_latents(chatbot_voice, xtts_model)
116
  try:
117
  t0 = time.time()
118
  chunks = xtts_model.inference_stream(