Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -38,6 +38,11 @@ class TTS_Interface:
|
|
38 |
def __init__(self):
|
39 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
40 |
self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
|
|
|
|
|
|
|
|
|
|
|
41 |
self.utterance_cloner.tts.set_language("de")
|
42 |
self.acoustic_model = Aligner()
|
43 |
self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
|
@@ -46,7 +51,6 @@ class TTS_Interface:
|
|
46 |
self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
|
47 |
reference_audio = "reference_audios/2.wav"
|
48 |
self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
|
49 |
-
self.utterance_cloner.tts.text2phone.use_word_boundaries = False
|
50 |
self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
|
51 |
|
52 |
#######
|
|
|
38 |
def __init__(self):
|
39 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
40 |
self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
|
41 |
+
|
42 |
+
# for simplicity, since we are using an oracle for this demo, and we have seen enough German data to get by without word boundaries
|
43 |
+
self.utterance_cloner.tf.use_word_boundaries = False
|
44 |
+
self.utterance_cloner.tts.text2phone.use_word_boundaries = False
|
45 |
+
|
46 |
self.utterance_cloner.tts.set_language("de")
|
47 |
self.acoustic_model = Aligner()
|
48 |
self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
|
|
|
51 |
self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
|
52 |
reference_audio = "reference_audios/2.wav"
|
53 |
self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
|
|
|
54 |
self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
|
55 |
|
56 |
#######
|