Update app.py
Browse files
app.py
CHANGED
@@ -71,8 +71,48 @@ def infer(prompt, input_wav_file):
|
|
71 |
for item in contents:
|
72 |
print(item)
|
73 |
|
74 |
-
return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
css = """
|
77 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
78 |
"""
|
@@ -95,6 +135,7 @@ with gr.Blocks(css=css) as demo:
|
|
95 |
)
|
96 |
|
97 |
submit_btn = gr.Button("Submit")
|
|
|
98 |
|
99 |
cloned_out = gr.Audio(
|
100 |
label="Text to speech output"
|
@@ -112,7 +153,29 @@ with gr.Blocks(css=css) as demo:
|
|
112 |
],
|
113 |
outputs = [
|
114 |
cloned_out,
|
115 |
-
npz_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
]
|
117 |
)
|
118 |
|
|
|
71 |
for item in contents:
|
72 |
print(item)
|
73 |
|
74 |
+
return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
|
75 |
|
76 |
+
def infer_with_npz(prompt, input_wav_file):
|
77 |
+
# Path to your WAV file
|
78 |
+
source_path = input_wav_file
|
79 |
+
# Extract the file name without the extension
|
80 |
+
file_name = os.path.splitext(os.path.basename(source_path))[0]
|
81 |
+
# List all the files and subdirectories in the given directory
|
82 |
+
contents = os.listdir(f"bark_voices/{file_name}")
|
83 |
+
# Print the contents
|
84 |
+
for item in contents:
|
85 |
+
print(item)
|
86 |
+
os.remove(contents[0])
|
87 |
+
|
88 |
+
# cloning a speaker.
|
89 |
+
text = prompt
|
90 |
+
# It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
|
91 |
+
output_dict = model.synthesize(
|
92 |
+
text,
|
93 |
+
config,
|
94 |
+
speaker_id=f"{file_name}",
|
95 |
+
voice_dirs="bark_voices/"
|
96 |
+
)
|
97 |
+
|
98 |
+
print(output_dict)
|
99 |
+
|
100 |
+
sample_rate = 24000 # Replace with the actual sample rate
|
101 |
+
|
102 |
+
wavfile.write(
|
103 |
+
'output.wav',
|
104 |
+
sample_rate,
|
105 |
+
output_dict['wav']
|
106 |
+
)
|
107 |
+
|
108 |
+
# Print again the contents
|
109 |
+
for item in contents:
|
110 |
+
print(item)
|
111 |
+
|
112 |
+
return 'output.wav'
|
113 |
+
|
114 |
+
def uploaded_audio():
|
115 |
+
return gr.update(visible=True), gr.update(visible=False)
|
116 |
css = """
|
117 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
118 |
"""
|
|
|
135 |
)
|
136 |
|
137 |
submit_btn = gr.Button("Submit")
|
138 |
+
submit_with_npz_btn = gr.Button("Submit 2", visible=False)
|
139 |
|
140 |
cloned_out = gr.Audio(
|
141 |
label="Text to speech output"
|
|
|
153 |
],
|
154 |
outputs = [
|
155 |
cloned_out,
|
156 |
+
npz_file,
|
157 |
+
submit_btn,
|
158 |
+
submit_with_npz_btn
|
159 |
+
]
|
160 |
+
)
|
161 |
+
|
162 |
+
submit_with_npz_btn.click(
|
163 |
+
fn = infer_with_npz,
|
164 |
+
inputs = [
|
165 |
+
prompt,
|
166 |
+
audio_in
|
167 |
+
],
|
168 |
+
outputs = [
|
169 |
+
cloned_out
|
170 |
+
]
|
171 |
+
)
|
172 |
+
|
173 |
+
audio_in.upload(
|
174 |
+
fn=uploaded_audio,
|
175 |
+
inputs=[],
|
176 |
+
outputs=[
|
177 |
+
submit_btn,
|
178 |
+
submit_with_npz_btn
|
179 |
]
|
180 |
)
|
181 |
|