Flux9665 commited on
Commit
a8bfe3a
β€’
1 Parent(s): fda9e93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -1,12 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import torch
4
  import math
5
  import os
6
- from Preprocessing.ArticulatoryCombinedTextFrontend import ArticulatoryCombinedTextFrontend
7
  from TrainingInterfaces.Text_to_Spectrogram.AutoAligner.Aligner import Aligner
8
  from TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.DurationCalculator import DurationCalculator
9
- from run_utterance_cloner import UtteranceCloner
10
 
11
  def float2pcm(sig, dtype='int16'):
12
  """
@@ -28,7 +44,7 @@ class TTS_Interface:
28
 
29
  def __init__(self):
30
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
31
- self.utterance_cloner = UtteranceCloner(device=self.device)
32
  self.utterance_cloner.tts.set_language("de")
33
  self.acoustic_model = Aligner()
34
  self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import torch
7
+
8
+ os.system("git clone --branch v2.3 https://github.com/DigitalPhonetics/IMS-Toucan.git toucan_codebase")
9
+ os.system("mv toucan_codebase/* .")
10
+
11
+ from run_model_downloader import download_models
12
+
13
+ download_models()
14
+
15
+
16
+
17
  import gradio as gr
18
  import numpy as np
19
  import torch
20
  import math
21
  import os
22
+ from Preprocessing.TextFrontend import ArticulatoryCombinedTextFrontend
23
  from TrainingInterfaces.Text_to_Spectrogram.AutoAligner.Aligner import Aligner
24
  from TrainingInterfaces.Text_to_Spectrogram.FastSpeech2.DurationCalculator import DurationCalculator
25
+ from InferenceInterfaces.UtteranceCloner import UtteranceCloner
26
 
27
  def float2pcm(sig, dtype='int16'):
28
  """
 
44
 
45
  def __init__(self):
46
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ self.utterance_cloner = UtteranceCloner(model_id="Meta", device=self.device)
48
  self.utterance_cloner.tts.set_language("de")
49
  self.acoustic_model = Aligner()
50
  self.acoustic_model.load_state_dict(torch.load("Models/Aligner/aligner.pt", map_location='cpu')["asr_model"])