neuralleap's picture
Update app.py
b859ab8 verified
import gradio as gr
import random
import time
import requests
import soundfile as sf
from pydub import AudioSegment
import os
# Load FastPitch
from nemo.collections.tts.models import FastPitchModel
spec_generator = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch")
# Load vocoder
from nemo.collections.tts.models import HifiGanModel
model = HifiGanModel.from_pretrained(model_name="nvidia/tts_hifigan")
"""
os.environ["COQUI_TOS_AGREED"] = "1"
from TTS.api import TTS
import torch
from TTS.api import TTS
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# List available 🐸TTS models
print(TTS().list_models())
# Init TTS
xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) """
# Define the URL and the file path
url = 'https://neuralleap-sdr-v2-api.hf.space/get_sdr_response/'
global chat_chain,id
chat_chain = "SDR : hi is this (customer name)?"
id=0
with gr.Blocks() as demo:
chatbot = gr.Chatbot([[None,"Hi is this (customer name)?"]],avatar_images=("https://cdnl.iconscout.com/lottie/premium/thumb/user-profile-5568736-4644453.gif","https://cdn.dribbble.com/users/77598/screenshots/16399264/media/d86ceb1ad552398787fb76f343080aa6.gif"),height=400,show_label=False,show_copy_button=True,show_share_button=True,likeable=True,layout="panel")
with gr.Row():
msg = gr.Textbox()
slider = gr.Slider(1,3, value=1.2, label="Speed", info="voice speed")
clear = gr.Button("Clear")
audio = gr.Audio(autoplay=True)
#xtts.tts_to_file("this is testing audio sample",speaker_wav="1.wav",language="en",file_path="output.wav")
starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?"
#xtts.tts_to_file(starting_text,speaker_wav="1.wav",language="en",file_path="output.wav")
parsed = spec_generator.parse(starting_text)
spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)
def new_chat():
global chat_chain,id
chat_chain = "SDR : hi is this (customer name)?"
id=0
print("\n==================new chat started==================")
return [[None,"Hi is this (customer name)?"]],""
def user(user_message, history):
return history + [[user_message, None]]
def bot(history,msg,slider):
print(float(slider))
global chat_chain,id
id = id + 1
chat_chain = chat_chain + "\nProspect: " + msg + "\n\n"
params = {
"userText": chat_chain,
"idf":str(id)
}
response = requests.post(url, params=params,stream=True)
#response = ["Hi is this (customer name)?","Hi is this (customer name)?","Hi is this (customer name)?"]
full_text = ""
sound_text = ""
history[-1][1] = ""
for chunk in response:
processed_chunk = chunk.decode('utf-8')
if id==1:
history[-1][1] += processed_chunk
full_text = full_text + processed_chunk
time.sleep(0.01)
yield history,"",gr.Audio(autoplay=True)
else:
history[-1][1] += processed_chunk
full_text = full_text + processed_chunk
#sound_text = sound_text + processed_chunk
#if "." in sound_text:
print(processed_chunk)
#xtts.tts_to_file(processed_chunk.replace(".",""),speaker_wav="1.wav",language="en",file_path="output.wav")
#sound_text = ""
#audio = AudioSegment.from_file("output.wav", format="wav")
#audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
# export to wav
#audio.export("final.wav", format="wav")
yield history,"",gr.Audio(autoplay=True)
full_text = full_text[:-1]
chat_chain = chat_chain + "SDR : "+ full_text
print(chat_chain)
full_text = full_text.replace("[SDR’s name] with","")
full_text = full_text.replace("SDR:","")
"""
for text_line in full_text.split("."):
xtts.tts_to_file(text_line,speaker_wav="1.wav",language="en",file_path="output.wav")
audio = AudioSegment.from_file("output.wav", format="wav")
audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
# export to wav
audio.export("final.wav", format="wav")
yield history,"","final.wav" """
parsed = spec_generator.parse(full_text)
spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)
# Save the audio to disk in a file called speech.wav
sf.write("speech.wav", audio_tts.to('cpu').detach().numpy()[0], 22050)
yield history,"","speech.wav"
msg.submit(user, [msg, chatbot], [chatbot], queue=False).then(
bot, [chatbot,msg,slider], [chatbot,msg,audio]
)
#slider.change(change_speed,[slider],[audio])
clear.click(new_chat,outputs=[chatbot,msg])#.then(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch()