Katock commited on
Commit
719a597
1 Parent(s): 2aac707

修复上传干声采样率不是44100导致的语速问题

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -38,10 +38,11 @@ def create_fn(model, spk):
38
  return 0, None
39
  sr, audio = input_audio
40
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
 
41
  if len(audio.shape) > 1:
42
  audio = librosa.to_mono(audio.transpose(1, 0))
43
  temp_path = "temp.wav"
44
- soundfile.write(temp_path, audio, sr, format="wav")
45
  model.hubert_model = hubert_model
46
  out_audio = model.slice_inference(raw_audio_path=temp_path,
47
  spk=spk,
@@ -54,7 +55,7 @@ def create_fn(model, spk):
54
  auto_predict_f0=auto_f0)
55
  model.clear_empty()
56
  os.remove(temp_path)
57
- return sr, out_audio
58
 
59
  async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
60
  if input_text == '':
@@ -75,7 +76,7 @@ def create_fn(model, spk):
75
  temp_path = "temp.wav"
76
  wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
77
  sr, audio = gr_pu.audio_from_file(temp_path)
78
- input_audio = (sr, audio)
79
  return svc_fn(input_audio, vc_transform, auto_f0, f0p)
80
 
81
  return svc_fn, tts_fn
 
38
  return 0, None
39
  sr, audio = input_audio
40
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
41
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
42
  if len(audio.shape) > 1:
43
  audio = librosa.to_mono(audio.transpose(1, 0))
44
  temp_path = "temp.wav"
45
+ soundfile.write(temp_path, audio, sampling_rate, format="wav")
46
  model.hubert_model = hubert_model
47
  out_audio = model.slice_inference(raw_audio_path=temp_path,
48
  spk=spk,
 
55
  auto_predict_f0=auto_f0)
56
  model.clear_empty()
57
  os.remove(temp_path)
58
+ return sampling_rate, out_audio
59
 
60
  async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
61
  if input_text == '':
 
76
  temp_path = "temp.wav"
77
  wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
78
  sr, audio = gr_pu.audio_from_file(temp_path)
79
+ input_audio = (sampling_rate, audio)
80
  return svc_fn(input_audio, vc_transform, auto_f0, f0p)
81
 
82
  return svc_fn, tts_fn