Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

App Files Files Community

fffiloni commited on Aug 22, 2023

Commit

adf3e3f

•

1 Parent(s): 5a679fb

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -75

app.py CHANGED Viewed

@@ -2,10 +2,10 @@ import gradio as gr
 import os
 import shutil
-from huggingface_hub import snapshot_download
 import numpy as np
 from scipy.io import wavfile
 model_ids = [
     'suno/bark',
 ]
@@ -13,7 +13,7 @@ model_ids = [
 for model_id in model_ids:
     model_name = model_id.split('/')[-1]
     snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
 #from TTS.tts.configs.bark_config import BarkConfig
 #from TTS.tts.models.bark import Bark
@@ -89,57 +89,9 @@ def infer(prompt, input_wav_file):
     for item in contents:
         print(item)
-    return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
-def infer_with_npz(prompt, input_wav_file):
-    print("NEW GENERATION WITH EXISTING .NPZ")
-    # Path to your WAV file
-    source_path = input_wav_file
-    # Extract the file name without the extension
-    file_name = os.path.splitext(os.path.basename(source_path))[0]
-    # List all the files and subdirectories in the given directory
-    contents = os.listdir(f"bark_voices/{file_name}")
-    # Print the contents
-    for item in contents:
-        print(item)
-    first_item = contents[0]  # Index 0 corresponds to the first item
-    item_path = os.path.join(f"bark_voices/{file_name}", first_item)
-    os.remove(item_path)
-    """
-    print("BEGINNING GENERATION")
-    # cloning a speaker.
-    text = prompt
-    # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
-    output_dict = model.synthesize(
-        text,
-        config,
-        speaker_id=f"{file_name}",
-        voice_dirs="bark_voices/"
-    )
-    print(output_dict)
-    print("WRITING WAVE FILE")
-    sample_rate = 24000  # Replace with the actual sample rate
-    wavfile.write(
-        'output.wav',
-        sample_rate,
-        output_dict['wav']
-    )
-    """
-    # Print again the contents
-    contents = os.listdir(f"bark_voices/{file_name}")
-    for item in contents:
-        print(item)
-    return 'output.wav'
-def uploaded_audio():
-    return gr.update(visible=True), gr.update(visible=False)
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 """
@@ -180,29 +132,7 @@ with gr.Blocks(css=css) as demo:
         ],
         outputs = [
             cloned_out,
-            npz_file,
-            submit_btn,
-            submit_with_npz_btn
-        ]
-    )
-    submit_with_npz_btn.click(
-        fn = infer_with_npz,
-        inputs = [
-            prompt,
-            audio_in
-        ],
-        outputs = [
-            cloned_out
-        ]
-    )
-    audio_in.upload(
-        fn=uploaded_audio,
-        inputs=[],
-        outputs=[
-            submit_btn,
-            submit_with_npz_btn
         ]
     )

 import os
 import shutil
+#from huggingface_hub import snapshot_download
 import numpy as np
 from scipy.io import wavfile
+"""
 model_ids = [
     'suno/bark',
 ]
 for model_id in model_ids:
     model_name = model_id.split('/')[-1]
     snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
+"""
 #from TTS.tts.configs.bark_config import BarkConfig
 #from TTS.tts.models.bark import Bark
     for item in contents:
         print(item)
+    return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 """
         ],
         outputs = [
             cloned_out,
+            npz_file
         ]
     )