import gradio as gr import random import time import requests import soundfile as sf from pydub import AudioSegment import os # Load FastPitch from nemo.collections.tts.models import FastPitchModel spec_generator = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch") # Load vocoder from nemo.collections.tts.models import HifiGanModel model = HifiGanModel.from_pretrained(model_name="nvidia/tts_hifigan") """ os.environ["COQUI_TOS_AGREED"] = "1" from TTS.api import TTS import torch from TTS.api import TTS # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # List available 🐸TTS models print(TTS().list_models()) # Init TTS xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) """ # Define the URL and the file path url = 'https://neuralleap-sdr-v2-api.hf.space/get_sdr_response/' global chat_chain,id chat_chain = "SDR : hi is this (customer name)?" id=0 with gr.Blocks() as demo: chatbot = gr.Chatbot([[None,"Hi is this (customer name)?"]],avatar_images=("https://cdnl.iconscout.com/lottie/premium/thumb/user-profile-5568736-4644453.gif","https://cdn.dribbble.com/users/77598/screenshots/16399264/media/d86ceb1ad552398787fb76f343080aa6.gif"),height=400,show_label=False,show_copy_button=True,show_share_button=True,likeable=True,layout="panel") with gr.Row(): msg = gr.Textbox() slider = gr.Slider(1,3, value=1.2, label="Speed", info="voice speed") clear = gr.Button("Clear") audio = gr.Audio(autoplay=True) #xtts.tts_to_file("this is testing audio sample",speaker_wav="1.wav",language="en",file_path="output.wav") starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?" #xtts.tts_to_file(starting_text,speaker_wav="1.wav",language="en",file_path="output.wav") parsed = spec_generator.parse(starting_text) spectrogram = spec_generator.generate_spectrogram(tokens=parsed) audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram) def new_chat(): global chat_chain,id chat_chain = "SDR : hi is this (customer name)?" id=0 print("\n==================new chat started==================") return [[None,"Hi is this (customer name)?"]],"" def user(user_message, history): return history + [[user_message, None]] def bot(history,msg,slider): print(float(slider)) global chat_chain,id id = id + 1 chat_chain = chat_chain + "\nProspect: " + msg + "\n\n" params = { "userText": chat_chain, "idf":str(id) } response = requests.post(url, params=params,stream=True) #response = ["Hi is this (customer name)?","Hi is this (customer name)?","Hi is this (customer name)?"] full_text = "" sound_text = "" history[-1][1] = "" for chunk in response: processed_chunk = chunk.decode('utf-8') if id==1: history[-1][1] += processed_chunk full_text = full_text + processed_chunk time.sleep(0.01) yield history,"",gr.Audio(autoplay=True) else: history[-1][1] += processed_chunk full_text = full_text + processed_chunk #sound_text = sound_text + processed_chunk #if "." in sound_text: print(processed_chunk) #xtts.tts_to_file(processed_chunk.replace(".",""),speaker_wav="1.wav",language="en",file_path="output.wav") #sound_text = "" #audio = AudioSegment.from_file("output.wav", format="wav") #audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x # export to wav #audio.export("final.wav", format="wav") yield history,"",gr.Audio(autoplay=True) full_text = full_text[:-1] chat_chain = chat_chain + "SDR : "+ full_text print(chat_chain) full_text = full_text.replace("[SDR’s name] with","") full_text = full_text.replace("SDR:","") """ for text_line in full_text.split("."): xtts.tts_to_file(text_line,speaker_wav="1.wav",language="en",file_path="output.wav") audio = AudioSegment.from_file("output.wav", format="wav") audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x # export to wav audio.export("final.wav", format="wav") yield history,"","final.wav" """ parsed = spec_generator.parse(full_text) spectrogram = spec_generator.generate_spectrogram(tokens=parsed) audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram) # Save the audio to disk in a file called speech.wav sf.write("speech.wav", audio_tts.to('cpu').detach().numpy()[0], 22050) yield history,"","speech.wav" msg.submit(user, [msg, chatbot], [chatbot], queue=False).then( bot, [chatbot,msg,slider], [chatbot,msg,audio] ) #slider.change(change_speed,[slider],[audio]) clear.click(new_chat,outputs=[chatbot,msg])#.then(lambda: None, None, chatbot, queue=False) demo.queue() demo.launch()