Spaces:

clr
/

pce

Running

pce / scripts /tapi.py

catiR

force align tts, add voices

366ecce over 1 year ago

1.58 kB

	import json, os, requests, warnings, wave
	warnings.filterwarnings("ignore")


	# synthesise speech
	# save 16khz mono wav file
	# return path to wave file
	# saving word alignment timestamps is deprecating
	def tiro(text,voice,save='./',tiroalign = False):

	# endpoint working 2023
	url = 'https://tts.tiro.is/v0/speech'
	headers = {'Content-Type': 'application/json'}


	# synthesis
	payload_tts = {
	"Engine": "standard",
	"LanguageCode": "is-IS",
	"OutputFormat": "pcm",
	"SampleRate":"16000",
	"Text": text,
	"VoiceId": voice
	}

	wname = save+voice+'.wav'
	tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)

	with wave.open(wname,'wb') as f:
	f.setnchannels(1)
	f.setframerate(16000)
	f.setsampwidth(2)
	f.writeframes(tts_data.content)



	# word time alignments
	# SKIP
	# tiro no longer intends to support this
	# and only does support it for 2 voices anyway
	payload_aln = {
	"Engine": "standard",
	"LanguageCode": "is-IS",
	"OutputFormat": "json",
	"SpeechMarkTypes": ["word"],
	"Text": text,
	"VoiceId": voice
	}
	aname = save+voice+'.json'

	if tiroalign:
	aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False)
	with open(aname,'w') as f:
	f.write('{"alignments": [')
	f.write(aln_data.content.decode().replace('}\n{','},\n {'))
	f.write(']}')


	#return(os.path.abspath(wname),os.path.abspath(aname))
	return os.path.abspath(wname)