kevinwang676 commited on
Commit
76e808f
1 Parent(s): e1f204c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -68,8 +68,10 @@ def pad_buffer(audio):
68
  return audio
69
 
70
 
71
- def generate_voice(text, voice):
72
-
 
 
73
  audio = client.generate(text=text, voice=voice) #response.voices[0]
74
  audio = b"".join(audio)
75
  with open("output.mp3", "wb") as f:
@@ -101,7 +103,7 @@ html_denoise = """
101
  </html>
102
  """
103
 
104
- def convert(text, tgt, voice, save_path):
105
  model = "FreeVC (24kHz)"
106
  with torch.no_grad():
107
  # tgt
@@ -123,7 +125,7 @@ def convert(text, tgt, voice, save_path):
123
  hps.data.mel_fmax
124
  )
125
  # src
126
- src = generate_voice(text, voice)
127
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
128
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
129
  c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)
@@ -304,9 +306,6 @@ import shutil
304
 
305
  def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
306
 
307
- client = ElevenLabs(
308
- api_key=api_key, # Defaults to ELEVEN_API_KEY
309
- )
310
  subtitle_list = read_srt(filename)
311
 
312
  #audio_data, sr = librosa.load(audio_full, sr=44100)
@@ -322,7 +321,7 @@ def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
322
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
323
  print(f"正在合成第{i.index}条语音")
324
  print(f"语音内容:{i.text}")
325
- convert(i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
326
  except Exception:
327
  pass
328
  else:
@@ -332,7 +331,7 @@ def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
332
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
333
  print(f"正在合成第{i.index}条语音")
334
  print(f"语音内容:{i.text.splitlines()[1]}")
335
- convert(i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
336
  except Exception:
337
  pass
338
  merge_audios("output")
 
68
  return audio
69
 
70
 
71
+ def generate_voice(api_key, text, voice):
72
+ client = ElevenLabs(
73
+ api_key=api_key, # Defaults to ELEVEN_API_KEY
74
+ )
75
  audio = client.generate(text=text, voice=voice) #response.voices[0]
76
  audio = b"".join(audio)
77
  with open("output.mp3", "wb") as f:
 
103
  </html>
104
  """
105
 
106
+ def convert(api_key, text, tgt, voice, save_path):
107
  model = "FreeVC (24kHz)"
108
  with torch.no_grad():
109
  # tgt
 
125
  hps.data.mel_fmax
126
  )
127
  # src
128
+ src = generate_voice(api_key, text, voice)
129
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
130
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
131
  c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)
 
306
 
307
  def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
308
 
 
 
 
309
  subtitle_list = read_srt(filename)
310
 
311
  #audio_data, sr = librosa.load(audio_full, sr=44100)
 
321
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
322
  print(f"正在合成第{i.index}条语音")
323
  print(f"语音内容:{i.text}")
324
+ convert(api_key, i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
325
  except Exception:
326
  pass
327
  else:
 
331
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
332
  print(f"正在合成第{i.index}条语音")
333
  print(f"语音内容:{i.text.splitlines()[1]}")
334
+ convert(api_key, i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
335
  except Exception:
336
  pass
337
  merge_audios("output")