Spaces:
Runtime error
Runtime error
File size: 4,410 Bytes
9901016 38c3668 33b0017 38c3668 33b0017 9901016 116ce39 9901016 02e4605 9901016 4724e77 33b0017 38c3668 33b0017 38c3668 9901016 33b0017 9901016 00655ab 9df23f1 00655ab e094cdd 00655ab 9df23f1 8d5c4ae 00655ab 9901016 9df23f1 9901016 e094cdd 8d5c4ae 9df23f1 8d5c4ae 00655ab 8d5c4ae b14a966 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
from TTS.api import TTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
import whisper
model = whisper.load_model("small")
import os
os.system('pip install voicefixer --upgrade')
from voicefixer import VoiceFixer
voicefixer = VoiceFixer()
import gradio as gr
import openai
import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement
enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
run_opts={"device":"cuda"},
)
mes1 = [
{"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
]
mes2 = [
{"role": "system", "content": "You are a mental health therapist. Your name is Tina."}
]
mes3 = [
{"role": "system", "content": "You are my personal assistant. Your name is Alice."}
]
res = []
def transcribe(apikey, upload, audio, choice1):
openai.api_key = apikey
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
res.append(result.text)
if choice1 == "TOEFL":
messages = mes1
elif choice1 == "Therapist":
messages = mes2
elif choice1 == "Alice":
messages = mes3
# chatgpt
n = len(res)
content = res[n-1]
messages.append({"role": "user", "content": content})
completion = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
messages = messages
)
chat_response = completion.choices[0].message.content
messages.append({"role": "assistant", "content": chat_response})
tts.tts_to_file(chat_response, speaker_wav = upload, language="en", file_path="output.wav")
voicefixer.restore(input="output.wav", # input wav file path
output="audio1.wav", # output wav file path
cuda=True, # whether to use gpu acceleration
mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
noisy = enhance_model.load_audio(
"audio1.wav"
).unsqueeze(0)
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
return [result.text, chat_response, "enhanced.wav"]
c1=gr.Interface(
fn=transcribe,
inputs=[
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
],
outputs=[
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
],
#theme="huggingface",
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
)
c2=gr.Interface(
fn=transcribe,
inputs=[
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
gr.Audio(source="microphone", label = "请上传您喜欢的声音,并尽量避免噪音", type="filepath"),
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
],
outputs=[
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
],
#theme="huggingface",
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
)
demo = gr.TabbedInterface([c1, c2], ["wav文件上传", "麦克风上传"], title = '🥳💬💕 - TalktoAI,随时随地,谈天说地!')
demo.launch() |