test-rtechs commited on
Commit
8d827df
1 Parent(s): 54accfc

Update soni_translate/text_to_speech.py

Browse files
Files changed (1) hide show
  1. soni_translate/text_to_speech.py +12 -11
soni_translate/text_to_speech.py CHANGED
@@ -15,6 +15,7 @@ from .utils import (
15
  remove_directory_contents,
16
  remove_files,
17
  run_command,
 
18
  )
19
  import numpy as np
20
  from typing import Any, Dict
@@ -59,7 +60,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
59
  # Read audio data from the TemporaryFile using soundfile
60
  audio_data, samplerate = sf.read(f)
61
  f.close() # Close the TemporaryFile
62
- sf.write(
63
  filename, audio_data, samplerate, format="ogg", subtype="vorbis"
64
  )
65
 
@@ -73,7 +74,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
73
  sample_rate_aux = 22050
74
  duration = float(segment["end"]) - float(segment["start"])
75
  data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
76
- sf.write(
77
  filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
78
  )
79
  logger.error("Audio will be replaced -> [silent audio].")
@@ -181,7 +182,7 @@ def segments_egde_tts(filtered_edge_segments, TRANSLATE_AUDIO_TO, is_gui):
181
  # os.remove(temp_file)
182
 
183
  # Save file
184
- sf.write(
185
  file=filename,
186
  samplerate=sample_rate,
187
  data=data,
@@ -256,7 +257,7 @@ def segments_bark_tts(
256
  speech_output.cpu().numpy().squeeze().astype(np.float32),
257
  sampling_rate,
258
  )
259
- sf.write(
260
  file=filename,
261
  samplerate=sampling_rate,
262
  data=data_tts,
@@ -299,7 +300,7 @@ def uromanize(input_string):
299
  stderr=subprocess.PIPE,
300
  )
301
  stdout, stderr = process.communicate()
302
- script_path = os.path.join("./uroman", "bin", "uroman.pl")
303
 
304
  command = ["perl", script_path]
305
 
@@ -362,7 +363,7 @@ def segments_vits_tts(filtered_vits_segments, TRANSLATE_AUDIO_TO):
362
  sampling_rate,
363
  )
364
  # Save file
365
- sf.write(
366
  file=filename,
367
  samplerate=sampling_rate,
368
  data=data_tts,
@@ -667,7 +668,7 @@ def segments_coqui_tts(
667
  sampling_rate,
668
  )
669
  # Save file
670
- sf.write(
671
  file=filename,
672
  samplerate=sampling_rate,
673
  data=data_tts,
@@ -855,7 +856,7 @@ def segments_vits_onnx_tts(filtered_onnx_vits_segments, TRANSLATE_AUDIO_TO):
855
  sampling_rate,
856
  )
857
  # Save file
858
- sf.write(
859
  file=filename,
860
  samplerate=sampling_rate,
861
  data=data_tts,
@@ -925,7 +926,7 @@ def segments_openai_tts(
925
  sampling_rate,
926
  )
927
 
928
- sf.write(
929
  file=filename,
930
  samplerate=sampling_rate,
931
  data=data_tts,
@@ -1509,7 +1510,7 @@ def toneconverter_freevc(
1509
  target_wav=original_wav_audio_segment,
1510
  )
1511
 
1512
- sf.write(
1513
  file=save_path,
1514
  samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
1515
  data=wav,
@@ -1571,4 +1572,4 @@ if __name__ == "__main__":
1571
  tts_voice03="en-GB-SoniaNeural-Female",
1572
  tts_voice04="en-NZ-MitchellNeural-Male",
1573
  tts_voice05="en-GB-MaisieNeural-Female",
1574
- )
 
15
  remove_directory_contents,
16
  remove_files,
17
  run_command,
18
+ write_chunked,
19
  )
20
  import numpy as np
21
  from typing import Any, Dict
 
60
  # Read audio data from the TemporaryFile using soundfile
61
  audio_data, samplerate = sf.read(f)
62
  f.close() # Close the TemporaryFile
63
+ write_chunked(
64
  filename, audio_data, samplerate, format="ogg", subtype="vorbis"
65
  )
66
 
 
74
  sample_rate_aux = 22050
75
  duration = float(segment["end"]) - float(segment["start"])
76
  data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
77
+ write_chunked(
78
  filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
79
  )
80
  logger.error("Audio will be replaced -> [silent audio].")
 
182
  # os.remove(temp_file)
183
 
184
  # Save file
185
+ write_chunked(
186
  file=filename,
187
  samplerate=sample_rate,
188
  data=data,
 
257
  speech_output.cpu().numpy().squeeze().astype(np.float32),
258
  sampling_rate,
259
  )
260
+ write_chunked(
261
  file=filename,
262
  samplerate=sampling_rate,
263
  data=data_tts,
 
300
  stderr=subprocess.PIPE,
301
  )
302
  stdout, stderr = process.communicate()
303
+ script_path = os.path.join("./uroman", "uroman", "uroman.pl")
304
 
305
  command = ["perl", script_path]
306
 
 
363
  sampling_rate,
364
  )
365
  # Save file
366
+ write_chunked(
367
  file=filename,
368
  samplerate=sampling_rate,
369
  data=data_tts,
 
668
  sampling_rate,
669
  )
670
  # Save file
671
+ write_chunked(
672
  file=filename,
673
  samplerate=sampling_rate,
674
  data=data_tts,
 
856
  sampling_rate,
857
  )
858
  # Save file
859
+ write_chunked(
860
  file=filename,
861
  samplerate=sampling_rate,
862
  data=data_tts,
 
926
  sampling_rate,
927
  )
928
 
929
+ write_chunked(
930
  file=filename,
931
  samplerate=sampling_rate,
932
  data=data_tts,
 
1510
  target_wav=original_wav_audio_segment,
1511
  )
1512
 
1513
+ write_chunked(
1514
  file=save_path,
1515
  samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
1516
  data=wav,
 
1572
  tts_voice03="en-GB-SoniaNeural-Female",
1573
  tts_voice04="en-NZ-MitchellNeural-Male",
1574
  tts_voice05="en-GB-MaisieNeural-Female",
1575
+ )