Spaces:
Sleeping
Sleeping
kevinwang676
commited on
Commit
•
76e808f
1
Parent(s):
e1f204c
Update app.py
Browse files
app.py
CHANGED
@@ -68,8 +68,10 @@ def pad_buffer(audio):
|
|
68 |
return audio
|
69 |
|
70 |
|
71 |
-
def generate_voice(text, voice):
|
72 |
-
|
|
|
|
|
73 |
audio = client.generate(text=text, voice=voice) #response.voices[0]
|
74 |
audio = b"".join(audio)
|
75 |
with open("output.mp3", "wb") as f:
|
@@ -101,7 +103,7 @@ html_denoise = """
|
|
101 |
</html>
|
102 |
"""
|
103 |
|
104 |
-
def convert(text, tgt, voice, save_path):
|
105 |
model = "FreeVC (24kHz)"
|
106 |
with torch.no_grad():
|
107 |
# tgt
|
@@ -123,7 +125,7 @@ def convert(text, tgt, voice, save_path):
|
|
123 |
hps.data.mel_fmax
|
124 |
)
|
125 |
# src
|
126 |
-
src = generate_voice(text, voice)
|
127 |
wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
|
128 |
wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
|
129 |
c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)
|
@@ -304,9 +306,6 @@ import shutil
|
|
304 |
|
305 |
def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
|
306 |
|
307 |
-
client = ElevenLabs(
|
308 |
-
api_key=api_key, # Defaults to ELEVEN_API_KEY
|
309 |
-
)
|
310 |
subtitle_list = read_srt(filename)
|
311 |
|
312 |
#audio_data, sr = librosa.load(audio_full, sr=44100)
|
@@ -322,7 +321,7 @@ def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
|
|
322 |
trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
|
323 |
print(f"正在合成第{i.index}条语音")
|
324 |
print(f"语音内容:{i.text}")
|
325 |
-
convert(i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
|
326 |
except Exception:
|
327 |
pass
|
328 |
else:
|
@@ -332,7 +331,7 @@ def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
|
|
332 |
trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
|
333 |
print(f"正在合成第{i.index}条语音")
|
334 |
print(f"语音内容:{i.text.splitlines()[1]}")
|
335 |
-
convert(i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
|
336 |
except Exception:
|
337 |
pass
|
338 |
merge_audios("output")
|
|
|
68 |
return audio
|
69 |
|
70 |
|
71 |
+
def generate_voice(api_key, text, voice):
|
72 |
+
client = ElevenLabs(
|
73 |
+
api_key=api_key, # Defaults to ELEVEN_API_KEY
|
74 |
+
)
|
75 |
audio = client.generate(text=text, voice=voice) #response.voices[0]
|
76 |
audio = b"".join(audio)
|
77 |
with open("output.mp3", "wb") as f:
|
|
|
103 |
</html>
|
104 |
"""
|
105 |
|
106 |
+
def convert(api_key, text, tgt, voice, save_path):
|
107 |
model = "FreeVC (24kHz)"
|
108 |
with torch.no_grad():
|
109 |
# tgt
|
|
|
125 |
hps.data.mel_fmax
|
126 |
)
|
127 |
# src
|
128 |
+
src = generate_voice(api_key, text, voice)
|
129 |
wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
|
130 |
wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
|
131 |
c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)
|
|
|
306 |
|
307 |
def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
|
308 |
|
|
|
|
|
|
|
309 |
subtitle_list = read_srt(filename)
|
310 |
|
311 |
#audio_data, sr = librosa.load(audio_full, sr=44100)
|
|
|
321 |
trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
|
322 |
print(f"正在合成第{i.index}条语音")
|
323 |
print(f"语音内容:{i.text}")
|
324 |
+
convert(api_key, i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
|
325 |
except Exception:
|
326 |
pass
|
327 |
else:
|
|
|
331 |
trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
|
332 |
print(f"正在合成第{i.index}条语音")
|
333 |
print(f"语音内容:{i.text.splitlines()[1]}")
|
334 |
+
convert(api_key, i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
|
335 |
except Exception:
|
336 |
pass
|
337 |
merge_audios("output")
|