artificial-styletts2 / live_demo.py
Dionyssos's picture
freq = 16000
2d0e2b6
raw
history blame
2.08 kB
import argparse
import os
import requests
import subprocess
def command_line_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'--affective',
help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/",
action='store_false',
)
parser.add_argument(
'--device',
help="Device ID",
type=str,
default='cpu',
)
parser.add_argument(
'--text',
help="Text to be synthesized.",
default='How is hoowl',
type=str,
)
return parser
def send_to_server(args):
url = "http://192.168.88.209:5000"
payload = {
'text': args.text,
'scene': args.scene
}
response = requests.post(url, data=payload) # NONEs do not arrive to servers dict
# # Check the response from the server
# if response.status_code == 200:
# print("\nRequest was successful!")
# # print("Response:", respdonse.__dict__.keys(), '\n=====\n')
# else:
# print("Failed to send the request")
# print("Status Code:", response.status_code)
# print("Response:", response.text)
return response
def cli(): # args.out_file is not send to server - server writes tmp - copied by client
parser = command_line_args()
args = parser.parse_args()
os.system('cls' if os.name == 'nt' else 'clear')
while True:
args.text = input("\n\n\n\nDescribe Any Sound: \n\n\n\n")
# _text, _scene = args.text.split('|')
# args.text = _text
args.scene = args.text #_scene
if len(args.text) >= 4:
response = send_to_server(args)
out_file = '_gen_.wav' #+ response.headers['suffix-file-type'].split('.')[-1]
with open(out_file, 'wb') as f:
f.write(response.content)
subprocess.run(["paplay", out_file])
else:
print(f'__\n{args.text}\n')
if __name__ == '__main__':
cli()