Spaces:
Running
Running
修复上传干声采样率不是44100导致的语速问题
Browse files
app.py
CHANGED
@@ -38,10 +38,11 @@ def create_fn(model, spk):
|
|
38 |
return 0, None
|
39 |
sr, audio = input_audio
|
40 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
|
|
41 |
if len(audio.shape) > 1:
|
42 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
43 |
temp_path = "temp.wav"
|
44 |
-
soundfile.write(temp_path, audio,
|
45 |
model.hubert_model = hubert_model
|
46 |
out_audio = model.slice_inference(raw_audio_path=temp_path,
|
47 |
spk=spk,
|
@@ -54,7 +55,7 @@ def create_fn(model, spk):
|
|
54 |
auto_predict_f0=auto_f0)
|
55 |
model.clear_empty()
|
56 |
os.remove(temp_path)
|
57 |
-
return
|
58 |
|
59 |
async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
|
60 |
if input_text == '':
|
@@ -75,7 +76,7 @@ def create_fn(model, spk):
|
|
75 |
temp_path = "temp.wav"
|
76 |
wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
|
77 |
sr, audio = gr_pu.audio_from_file(temp_path)
|
78 |
-
input_audio = (
|
79 |
return svc_fn(input_audio, vc_transform, auto_f0, f0p)
|
80 |
|
81 |
return svc_fn, tts_fn
|
|
|
38 |
return 0, None
|
39 |
sr, audio = input_audio
|
40 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
41 |
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
|
42 |
if len(audio.shape) > 1:
|
43 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
44 |
temp_path = "temp.wav"
|
45 |
+
soundfile.write(temp_path, audio, sampling_rate, format="wav")
|
46 |
model.hubert_model = hubert_model
|
47 |
out_audio = model.slice_inference(raw_audio_path=temp_path,
|
48 |
spk=spk,
|
|
|
55 |
auto_predict_f0=auto_f0)
|
56 |
model.clear_empty()
|
57 |
os.remove(temp_path)
|
58 |
+
return sampling_rate, out_audio
|
59 |
|
60 |
async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
|
61 |
if input_text == '':
|
|
|
76 |
temp_path = "temp.wav"
|
77 |
wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
|
78 |
sr, audio = gr_pu.audio_from_file(temp_path)
|
79 |
+
input_audio = (sampling_rate, audio)
|
80 |
return svc_fn(input_audio, vc_transform, auto_f0, f0p)
|
81 |
|
82 |
return svc_fn, tts_fn
|