Kevin676 commited on
Commit
f08a872
·
1 Parent(s): 3ea5aaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -3
app.py CHANGED
@@ -31,12 +31,23 @@ import torch
31
  import torchaudio
32
  from speechbrain.pretrained import SpectralMaskEnhancement
33
 
 
 
 
 
 
34
  enhance_model = SpectralMaskEnhancement.from_hparams(
35
  source="speechbrain/metricgan-plus-voicebank",
36
  savedir="pretrained_models/metricgan-plus-voicebank",
37
  run_opts={"device":"cuda"},
38
  )
39
 
 
 
 
 
 
 
40
  from TTS.tts.utils.synthesis import synthesis
41
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
42
  try:
@@ -114,8 +125,43 @@ def compute_spec(ref_file):
114
 
115
 
116
 
117
- def greet(Text,Voicetoclone,VoiceMicrophone):
118
- text= "%s" % (Text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  if Voicetoclone is not None:
120
  reference_files= "%s" % (Voicetoclone)
121
  print("path url")
@@ -181,7 +227,7 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
181
 
182
  demo = gr.Interface(
183
  fn=greet,
184
- inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
185
  outputs="audio",
186
  title="Bilal's Voice Cloning Tool"
187
  )
 
31
  import torchaudio
32
  from speechbrain.pretrained import SpectralMaskEnhancement
33
 
34
+ import whisper
35
+ model = whisper.load_model("small")
36
+
37
+ import openai
38
+
39
  enhance_model = SpectralMaskEnhancement.from_hparams(
40
  source="speechbrain/metricgan-plus-voicebank",
41
  savedir="pretrained_models/metricgan-plus-voicebank",
42
  run_opts={"device":"cuda"},
43
  )
44
 
45
+ mes = [
46
+ {"role": "system", "content": "You are my personal assistant. Try to be helpful."}
47
+ ]
48
+
49
+ res = []
50
+
51
  from TTS.tts.utils.synthesis import synthesis
52
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
53
  try:
 
125
 
126
 
127
 
128
+ def greet(Text, audio, Voicetoclone,VoiceMicrophone):
129
+
130
+ openai.api_key = Text
131
+
132
+ # load audio and pad/trim it to fit 30 seconds
133
+ audio = whisper.load_audio(audio)
134
+ audio = whisper.pad_or_trim(audio)
135
+
136
+ # make log-Mel spectrogram and move to the same device as the model
137
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
138
+
139
+ # detect the spoken language
140
+ _, probs = model.detect_language(mel)
141
+ print(f"Detected language: {max(probs, key=probs.get)}")
142
+
143
+ # decode the audio
144
+ options = whisper.DecodingOptions()
145
+ result = whisper.decode(model, mel, options)
146
+ res.append(result.text)
147
+
148
+ messages = mes
149
+
150
+ # chatgpt
151
+ n = len(res)
152
+ content = res[n-1]
153
+ messages.append({"role": "user", "content": content})
154
+
155
+ completion = openai.ChatCompletion.create(
156
+ model = "gpt-3.5-turbo",
157
+ messages = messages
158
+ )
159
+
160
+ chat_response = completion.choices[0].message.content
161
+
162
+ messages.append({"role": "assistant", "content": chat_response})
163
+
164
+ text= "%s" % (chat_response)
165
  if Voicetoclone is not None:
166
  reference_files= "%s" % (Voicetoclone)
167
  print("path url")
 
227
 
228
  demo = gr.Interface(
229
  fn=greet,
230
+ inputs=[gr.inputs.Textbox(label='请输入您的openai.api_key'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
231
  outputs="audio",
232
  title="Bilal's Voice Cloning Tool"
233
  )