Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

fffiloni commited on Aug 22, 2023

Commit

8302c0f

•

1 Parent(s): 3c401eb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,6 +22,8 @@ model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
 def infer(prompt, input_wav_file):
     # Path to your WAV file
     source_path = input_wav_file
@@ -42,6 +44,7 @@ def infer(prompt, input_wav_file):
     text = prompt
     # with random speaker
     #output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)
@@ -57,7 +60,7 @@ def infer(prompt, input_wav_file):
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
     wavfile.write(
         'output.wav',
         sample_rate,
@@ -74,6 +77,7 @@ def infer(prompt, input_wav_file):
     return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
 def infer_with_npz(prompt, input_wav_file):
     # Path to your WAV file
     source_path = input_wav_file
     # Extract the file name without the extension
@@ -83,8 +87,12 @@ def infer_with_npz(prompt, input_wav_file):
     # Print the contents
     for item in contents:
         print(item)
-    os.remove(contents[0])
     # cloning a speaker.
     text = prompt
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
@@ -97,8 +105,10 @@ def infer_with_npz(prompt, input_wav_file):
     print(output_dict)
-    sample_rate = 24000  # Replace with the actual sample rate
     wavfile.write(
         'output.wav',
         sample_rate,

 def infer(prompt, input_wav_file):
+    print("SAVING THE AUDIO FILE TO WHERE IT BELONGS")
     # Path to your WAV file
     source_path = input_wav_file
     text = prompt
+    print("SYNTHETIZING...")
     # with random speaker
     #output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
+    print("WRITING WAVE FILE")
     wavfile.write(
         'output.wav',
         sample_rate,
     return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
 def infer_with_npz(prompt, input_wav_file):
+    print("NEW GENERATION WITH EXISTING .NPZ")
     # Path to your WAV file
     source_path = input_wav_file
     # Extract the file name without the extension
     # Print the contents
     for item in contents:
         print(item)
+    first_item = contents[0]  # Index 0 corresponds to the first item
+    item_path = os.path.join(f"bark_voices/{file_name}", first_item)
+    os.remove(item_path)
+    print("BEGINNING GENERATION")
     # cloning a speaker.
     text = prompt
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
     print(output_dict)
+    print("WRITING WAVE FILE")
+    sample_rate = 24000  # Replace with the actual sample rate
     wavfile.write(
         'output.wav',
         sample_rate,