Flux9665 commited on
Commit
5489b55
1 Parent(s): e30b9ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -38,6 +38,11 @@ class TTS_Interface:
38
  def __init__(self):
39
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
40
  self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
 
 
 
 
 
41
  self.utterance_cloner.tts.set_language("de")
42
  self.acoustic_model = Aligner()
43
  self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
@@ -46,7 +51,6 @@ class TTS_Interface:
46
  self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
47
  reference_audio = "reference_audios/2.wav"
48
  self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
49
- self.utterance_cloner.tts.text2phone.use_word_boundaries = False
50
  self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
51
 
52
  #######
 
38
  def __init__(self):
39
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
40
  self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
41
+
42
+ # for simplicity, since we are using an oracle for this demo, and we have seen enough German data to get by without word boundaries
43
+ self.utterance_cloner.tf.use_word_boundaries = False
44
+ self.utterance_cloner.tts.text2phone.use_word_boundaries = False
45
+
46
  self.utterance_cloner.tts.set_language("de")
47
  self.acoustic_model = Aligner()
48
  self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
 
51
  self.text = "Quellen hattest du ihm, hattest dem Flüchtigen, kühle Schatten geschenkt, und die Gestade sahen, all ihm nach, und es bebte, aus den Wellen ihr lieblich Bild."
52
  reference_audio = "reference_audios/2.wav"
53
  self.duration, self.pitch, self.energy, _, _ = self.utterance_cloner.extract_prosody(self.text, reference_audio, lang="de", on_line_fine_tune=True)
 
54
  self.phones = self.utterance_cloner.tts.text2phone.get_phone_string(self.text)
55
 
56
  #######