pce / scripts /tapi.py
catiR
force align tts, add voices
366ecce
import json, os, requests, warnings, wave
warnings.filterwarnings("ignore")
# synthesise speech
# save 16khz mono wav file
# return path to wave file
# saving word alignment timestamps is deprecating
def tiro(text,voice,save='./',tiroalign = False):
# endpoint working 2023
url = 'https://tts.tiro.is/v0/speech'
headers = {'Content-Type': 'application/json'}
# synthesis
payload_tts = {
"Engine": "standard",
"LanguageCode": "is-IS",
"OutputFormat": "pcm",
"SampleRate":"16000",
"Text": text,
"VoiceId": voice
}
wname = save+voice+'.wav'
tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)
with wave.open(wname,'wb') as f:
f.setnchannels(1)
f.setframerate(16000)
f.setsampwidth(2)
f.writeframes(tts_data.content)
# word time alignments
# SKIP
# tiro no longer intends to support this
# and only does support it for 2 voices anyway
payload_aln = {
"Engine": "standard",
"LanguageCode": "is-IS",
"OutputFormat": "json",
"SpeechMarkTypes": ["word"],
"Text": text,
"VoiceId": voice
}
aname = save+voice+'.json'
if tiroalign:
aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False)
with open(aname,'w') as f:
f.write('{"alignments": [')
f.write(aln_data.content.decode().replace('}\n{','},\n {'))
f.write(']}')
#return(os.path.abspath(wname),os.path.abspath(aname))
return os.path.abspath(wname)