Spaces:
Sleeping
Sleeping
import gradio as gr | |
import random | |
import time | |
import requests | |
import soundfile as sf | |
from pydub import AudioSegment | |
import os | |
# Load FastPitch | |
from nemo.collections.tts.models import FastPitchModel | |
spec_generator = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch") | |
# Load vocoder | |
from nemo.collections.tts.models import HifiGanModel | |
model = HifiGanModel.from_pretrained(model_name="nvidia/tts_hifigan") | |
""" | |
os.environ["COQUI_TOS_AGREED"] = "1" | |
from TTS.api import TTS | |
import torch | |
from TTS.api import TTS | |
# Get device | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# List available 🐸TTS models | |
print(TTS().list_models()) | |
# Init TTS | |
xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) """ | |
# Define the URL and the file path | |
url = 'https://neuralleap-sdr-v2-api.hf.space/get_sdr_response/' | |
global chat_chain,id | |
chat_chain = "SDR : hi is this (customer name)?" | |
id=0 | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot([[None,"Hi is this (customer name)?"]],avatar_images=("https://cdnl.iconscout.com/lottie/premium/thumb/user-profile-5568736-4644453.gif","https://cdn.dribbble.com/users/77598/screenshots/16399264/media/d86ceb1ad552398787fb76f343080aa6.gif"),height=400,show_label=False,show_copy_button=True,show_share_button=True,likeable=True,layout="panel") | |
with gr.Row(): | |
msg = gr.Textbox() | |
slider = gr.Slider(1,3, value=1.2, label="Speed", info="voice speed") | |
clear = gr.Button("Clear") | |
audio = gr.Audio(autoplay=True) | |
#xtts.tts_to_file("this is testing audio sample",speaker_wav="1.wav",language="en",file_path="output.wav") | |
starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?" | |
#xtts.tts_to_file(starting_text,speaker_wav="1.wav",language="en",file_path="output.wav") | |
parsed = spec_generator.parse(starting_text) | |
spectrogram = spec_generator.generate_spectrogram(tokens=parsed) | |
audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram) | |
def new_chat(): | |
global chat_chain,id | |
chat_chain = "SDR : hi is this (customer name)?" | |
id=0 | |
print("\n==================new chat started==================") | |
return [[None,"Hi is this (customer name)?"]],"" | |
def user(user_message, history): | |
return history + [[user_message, None]] | |
def bot(history,msg,slider): | |
print(float(slider)) | |
global chat_chain,id | |
id = id + 1 | |
chat_chain = chat_chain + "\nProspect: " + msg + "\n\n" | |
params = { | |
"userText": chat_chain, | |
"idf":str(id) | |
} | |
response = requests.post(url, params=params,stream=True) | |
#response = ["Hi is this (customer name)?","Hi is this (customer name)?","Hi is this (customer name)?"] | |
full_text = "" | |
sound_text = "" | |
history[-1][1] = "" | |
for chunk in response: | |
processed_chunk = chunk.decode('utf-8') | |
if id==1: | |
history[-1][1] += processed_chunk | |
full_text = full_text + processed_chunk | |
time.sleep(0.01) | |
yield history,"",gr.Audio(autoplay=True) | |
else: | |
history[-1][1] += processed_chunk | |
full_text = full_text + processed_chunk | |
#sound_text = sound_text + processed_chunk | |
#if "." in sound_text: | |
print(processed_chunk) | |
#xtts.tts_to_file(processed_chunk.replace(".",""),speaker_wav="1.wav",language="en",file_path="output.wav") | |
#sound_text = "" | |
#audio = AudioSegment.from_file("output.wav", format="wav") | |
#audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x | |
# export to wav | |
#audio.export("final.wav", format="wav") | |
yield history,"",gr.Audio(autoplay=True) | |
full_text = full_text[:-1] | |
chat_chain = chat_chain + "SDR : "+ full_text | |
print(chat_chain) | |
full_text = full_text.replace("[SDR’s name] with","") | |
full_text = full_text.replace("SDR:","") | |
""" | |
for text_line in full_text.split("."): | |
xtts.tts_to_file(text_line,speaker_wav="1.wav",language="en",file_path="output.wav") | |
audio = AudioSegment.from_file("output.wav", format="wav") | |
audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x | |
# export to wav | |
audio.export("final.wav", format="wav") | |
yield history,"","final.wav" """ | |
parsed = spec_generator.parse(full_text) | |
spectrogram = spec_generator.generate_spectrogram(tokens=parsed) | |
audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram) | |
# Save the audio to disk in a file called speech.wav | |
sf.write("speech.wav", audio_tts.to('cpu').detach().numpy()[0], 22050) | |
yield history,"","speech.wav" | |
msg.submit(user, [msg, chatbot], [chatbot], queue=False).then( | |
bot, [chatbot,msg,slider], [chatbot,msg,audio] | |
) | |
#slider.change(change_speed,[slider],[audio]) | |
clear.click(new_chat,outputs=[chatbot,msg])#.then(lambda: None, None, chatbot, queue=False) | |
demo.queue() | |
demo.launch() | |