SDR_v2_streaming_tts

Sleeping

App Files Files Community

SDR_v2_streaming_tts / app.py

neuralleap

Update app.py

b859ab8 verified about 1 month ago

raw history blame contribute delete

No virus

5.37 kB

	import gradio as gr
	import random
	import time
	import requests
	import soundfile as sf
	from pydub import AudioSegment
	import os

	# Load FastPitch
	from nemo.collections.tts.models import FastPitchModel
	spec_generator = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch")

	# Load vocoder
	from nemo.collections.tts.models import HifiGanModel
	model = HifiGanModel.from_pretrained(model_name="nvidia/tts_hifigan")

	"""
	os.environ["COQUI_TOS_AGREED"] = "1"
	from TTS.api import TTS
	import torch
	from TTS.api import TTS

	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# List available 🐸TTS models
	print(TTS().list_models())

	# Init TTS
	xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) """

	# Define the URL and the file path
	url = 'https://neuralleap-sdr-v2-api.hf.space/get_sdr_response/'

	global chat_chain,id
	chat_chain = "SDR : hi is this (customer name)?"
	id=0



	with gr.Blocks() as demo:
	chatbot = gr.Chatbot([[None,"Hi is this (customer name)?"]],avatar_images=("https://cdnl.iconscout.com/lottie/premium/thumb/user-profile-5568736-4644453.gif","https://cdn.dribbble.com/users/77598/screenshots/16399264/media/d86ceb1ad552398787fb76f343080aa6.gif"),height=400,show_label=False,show_copy_button=True,show_share_button=True,likeable=True,layout="panel")
	with gr.Row():
	msg = gr.Textbox()
	slider = gr.Slider(1,3, value=1.2, label="Speed", info="voice speed")
	clear = gr.Button("Clear")
	audio = gr.Audio(autoplay=True)
	#xtts.tts_to_file("this is testing audio sample",speaker_wav="1.wav",language="en",file_path="output.wav")
	starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?"
	#xtts.tts_to_file(starting_text,speaker_wav="1.wav",language="en",file_path="output.wav")
	parsed = spec_generator.parse(starting_text)
	spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
	audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)



	def new_chat():
	global chat_chain,id
	chat_chain = "SDR : hi is this (customer name)?"
	id=0
	print("\n==================new chat started==================")
	return [[None,"Hi is this (customer name)?"]],""

	def user(user_message, history):
	return history + [[user_message, None]]

	def bot(history,msg,slider):
	print(float(slider))
	global chat_chain,id
	id = id + 1
	chat_chain = chat_chain + "\nProspect: " + msg + "\n\n"
	params = {
	"userText": chat_chain,
	"idf":str(id)
	}
	response = requests.post(url, params=params,stream=True)
	#response = ["Hi is this (customer name)?","Hi is this (customer name)?","Hi is this (customer name)?"]
	full_text = ""
	sound_text = ""
	history[-1][1] = ""
	for chunk in response:
	processed_chunk = chunk.decode('utf-8')
	if id==1:
	history[-1][1] += processed_chunk
	full_text = full_text + processed_chunk
	time.sleep(0.01)
	yield history,"",gr.Audio(autoplay=True)
	else:
	history[-1][1] += processed_chunk
	full_text = full_text + processed_chunk
	#sound_text = sound_text + processed_chunk
	#if "." in sound_text:
	print(processed_chunk)
	#xtts.tts_to_file(processed_chunk.replace(".",""),speaker_wav="1.wav",language="en",file_path="output.wav")
	#sound_text = ""
	#audio = AudioSegment.from_file("output.wav", format="wav")
	#audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
	# export to wav
	#audio.export("final.wav", format="wav")
	yield history,"",gr.Audio(autoplay=True)

	full_text = full_text[:-1]
	chat_chain = chat_chain + "SDR : "+ full_text
	print(chat_chain)
	full_text = full_text.replace("[SDR’s name] with","")
	full_text = full_text.replace("SDR:","")
	"""
	for text_line in full_text.split("."):

	xtts.tts_to_file(text_line,speaker_wav="1.wav",language="en",file_path="output.wav")
	audio = AudioSegment.from_file("output.wav", format="wav")
	audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
	# export to wav
	audio.export("final.wav", format="wav")
	yield history,"","final.wav" """

	parsed = spec_generator.parse(full_text)
	spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
	audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)
	# Save the audio to disk in a file called speech.wav
	sf.write("speech.wav", audio_tts.to('cpu').detach().numpy()[0], 22050)
	yield history,"","speech.wav"

	msg.submit(user, [msg, chatbot], [chatbot], queue=False).then(
	bot, [chatbot,msg,slider], [chatbot,msg,audio]
	)
	#slider.change(change_speed,[slider],[audio])
	clear.click(new_chat,outputs=[chatbot,msg])#.then(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()