import json, os, requests, warnings, wave | |
warnings.filterwarnings("ignore") | |
# synthesise speech | |
# save 16khz mono wav file | |
# and word-level timestamps | |
# return paths to wave and alignment files | |
def tiro(text,voice,save='./'): | |
# endpoint working 2023 | |
url = 'https://tts.tiro.is/v0/speech' | |
headers = {'Content-Type': 'application/json'} | |
# synthesis | |
payload_tts = { | |
"Engine": "standard", | |
"LanguageCode": "is-IS", | |
"OutputFormat": "pcm", | |
"SampleRate":"16000", | |
"Text": text, | |
"VoiceId": voice | |
} | |
# word time alignments | |
payload_aln = { | |
"Engine": "standard", | |
"LanguageCode": "is-IS", | |
"OutputFormat": "json", | |
"SpeechMarkTypes": ["word"], | |
"Text": text, | |
"VoiceId": voice | |
} | |
tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False) | |
aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False) | |
#fname = save+text.replace(':','').replace('/','-') | |
#wname = fname+'.wav' | |
#aname = fname+'.json' | |
wname = save+voice+'.wav' | |
aname = save+voice+'.json' | |
with wave.open(wname,'wb') as f: | |
f.setnchannels(1) | |
f.setframerate(16000) | |
f.setsampwidth(2) | |
f.writeframes(tts_data.content) | |
with open(aname,'w') as f: | |
f.write('{"alignments": [') | |
f.write(aln_data.content.decode().replace('}\n{','},\n {')) | |
f.write(']}') | |
return(os.path.abspath(wname),os.path.abspath(aname)) | |
#sentence = "Hæ hæ hæ hæ! Ég heiti Gervimaður Finnland, en þú?" | |
#voice = "Alfur" | |
#wf, af = tiro(sentence,voice) | |
#print(wf, af) | |