Spaces:

robinhad
/

ukrainian-tts

Running

Yurii Paniv commited on Dec 10, 2022

Commit

a9c23eb

•

1 Parent(s): 6449e88

Add speed control

Files changed (2) hide show

app.py CHANGED Viewed

@@ -57,7 +57,7 @@ print(f"CUDA available? {is_available()}")
 ukr_tts = TTS()
-def tts(text: str, voice: str, stress: str):
     print("============================")
     print("Original text:", text)
     print("Voice", voice)
@@ -87,7 +87,7 @@ def tts(text: str, voice: str, stress: str):
         log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        _, text = ukr_tts.tts(text, speaker_name, stress_selected, fp)
         return fp.name, text
@@ -113,6 +113,13 @@ iface = gr.Interface(
             choices=[option.value for option in StressOption],
             value=StressOption.AutomaticStress.value,
         ),
     ],
     outputs=[
         gr.components.Audio(label="Output"),

 ukr_tts = TTS()
+def tts(text: str, voice: str, stress: str, speed: float):
     print("============================")
     print("Original text:", text)
     print("Voice", voice)
         log_queue.put([text, speaker_name, stress_selected, str(datetime.utcnow())])
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        _, text = ukr_tts.tts(text, speaker_name, stress_selected, fp, speed)
         return fp.name, text
             choices=[option.value for option in StressOption],
             value=StressOption.AutomaticStress.value,
         ),
+        gr.components.Slider(
+            label="Швидкість",
+            minimum=0.5,
+            maximum=2,
+            value=1,
+            step=0.1
+        )
     ],
     outputs=[
         gr.components.Audio(label="Output"),

ukrainian_tts/tts.py CHANGED Viewed

@@ -67,19 +67,11 @@ class TTS:
         text = preprocess_text(text, stress)
         text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
-        self.synthesizer = Text2Speech(
-            train_config="config.yaml",
-            model_file="model.pth",
-            device=self.device,
-            speed_control_alpha=1 / speed,
-            # Only for VITS
-            noise_scale=0.333,
-            noise_scale_dur=0.333,
-        )
         # synthesis
         with no_grad():
             start = time.time()
-            wav = self.synthesizer(text, sids=np.array(voice))["wav"]
         rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
         print(f"RTF = {rtf:5f}")
@@ -112,6 +104,15 @@ class TTS:
         self.__download(model_link, model_path)
         self.__download(config_link, config_path)
     def __download(self, url, file_name):
         """Downloads file from `url` into local `file_name` file."""
         if not exists(file_name):

         text = preprocess_text(text, stress)
         text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
         # synthesis
         with no_grad():
             start = time.time()
+            wav = self.synthesizer(text, sids=np.array(voice), decode_conf={"alpha": 1/speed})["wav"]
         rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
         print(f"RTF = {rtf:5f}")
         self.__download(model_link, model_path)
         self.__download(config_link, config_path)
+        self.synthesizer = Text2Speech(
+            train_config="config.yaml",
+            model_file="model.pth",
+            device=self.device,
+            # Only for VITS
+            noise_scale=0.333,
+            noise_scale_dur=0.333,
+        )
     def __download(self, url, file_name):
         """Downloads file from `url` into local `file_name` file."""
         if not exists(file_name):