Kevin676 commited on
Commit
ba5d078
1 Parent(s): 3e5beb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -8,6 +8,15 @@ from voicefixer import VoiceFixer
8
  voicefixer = VoiceFixer()
9
  import gradio as gr
10
  import openai
 
 
 
 
 
 
 
 
 
11
 
12
  mes1 = [
13
  {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
@@ -72,7 +81,16 @@ def transcribe(apikey, upload, audio, choice1):
72
  cuda=True, # whether to use gpu acceleration
73
  mode = 0) # You can try out mode 0, 1 to find out the best result
74
 
75
- return [result.text, chat_response, "audio1.wav"]
 
 
 
 
 
 
 
 
 
76
 
77
  output_1 = gr.Textbox(label="Speech to Text")
78
  output_2 = gr.Textbox(label="ChatGPT Output")
 
8
  voicefixer = VoiceFixer()
9
  import gradio as gr
10
  import openai
11
+ import torch
12
+ import torchaudio
13
+ from speechbrain.pretrained import SpectralMaskEnhancement
14
+
15
+ enhance_model = SpectralMaskEnhancement.from_hparams(
16
+ source="speechbrain/metricgan-plus-voicebank",
17
+ savedir="pretrained_models/metricgan-plus-voicebank",
18
+ run_opts={"device":"cuda"},
19
+ )
20
 
21
  mes1 = [
22
  {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
 
81
  cuda=True, # whether to use gpu acceleration
82
  mode = 0) # You can try out mode 0, 1 to find out the best result
83
 
84
+
85
+
86
+ noisy = enhance_model.load_audio(
87
+ "audio1.wav"
88
+ ).unsqueeze(0)
89
+
90
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
91
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
92
+
93
+ return [result.text, chat_response, "enhanced.wav"]
94
 
95
  output_1 = gr.Textbox(label="Speech to Text")
96
  output_2 = gr.Textbox(label="ChatGPT Output")