|
import os, unicodedata, string, random |
|
from scripts.ctcalign import aligner, wav16m |
|
from scripts.tapi import tiro |
|
from scripts.reaper2pass import estimate_pitch, save_pitch |
|
import scripts.clusterprosody as cl |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(sentence, voices, start_end_word_ix): |
|
|
|
|
|
|
|
|
|
|
|
|
|
corpus_meta = '/home/user/app/human_data/SQL1adult10s_metadata.tsv' |
|
speech_dir = '/home/user/app/human_data/audio/squeries/' |
|
playable_dir = 'https://huggingface.co/spaces/clr/pce/resolve/main/human_data/audio/squeries/' |
|
speech_aligns = '/home/user/app/human_data/align/squeries/' |
|
speech_f0 = '/home/user/app/human_data/f0/squeries/' |
|
align_model_path ="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" |
|
|
|
tts_dir = '/home/user/app/tts_data/' |
|
|
|
|
|
norm_sentence = snorm(sentence) |
|
sentence = sentence.replace('\t', ' ') |
|
|
|
human_rec_ids = get_samromur_queries(norm_sentence, corpus_meta, speech_dir, speech_aligns, align_model_path, speech_f0) |
|
|
|
if voices: |
|
temp_tts_sample, tts_sent_dir = get_tts(sentence,voices,tts_dir,align_model_path) |
|
|
|
|
|
|
|
f0_fig_c0, f0_fig_c1, f0_fig_c2, en_fig_c0, en_fig_c1, en_fig_c2, html, tts_results = cl.cluster(norm_sentence, sentence, human_rec_ids, speech_aligns, speech_f0, speech_dir, playable_dir, tts_sent_dir, voices, start_end_word_ix) |
|
|
|
|
|
|
|
return f0_fig_c0, f0_fig_c1, f0_fig_c2, en_fig_c0, en_fig_c1, en_fig_c2, html, tts_results |
|
|
|
|
|
|
|
|
|
|
|
|
|
def snorm(s): |
|
s = ''.join([c.lower() for c in s if not unicodedata.category(c).startswith("P") ]) |
|
while ' ' in s: |
|
s = s.replace(' ', ' ') |
|
return s |
|
|
|
|
|
|
|
def create_temp_sent_list(): |
|
corpusdb = '/home/user/app/human_data/SQL1adult10s_metadata.tsv' |
|
with open(corpusdb,'r') as handle: |
|
meta = handle.read().splitlines() |
|
meta = [l.split('\t')[3] for l in meta[1:]] |
|
meta = sorted(list(set(meta))) |
|
return meta |
|
|
|
|
|
|
|
def align_file(wav_path, output_path, norm_sentence, word_aligner = None, model_path = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"): |
|
|
|
model_word_sep = '|' |
|
model_blank_tk = '[PAD]' |
|
|
|
if not word_aligner: |
|
print('initiating forced alignment, can take some time...') |
|
word_aligner = aligner(model_path,model_word_sep,model_blank_tk) |
|
|
|
word_aln = word_aligner(wav16m(wav_path),norm_sentence,is_normed=True) |
|
word_aln = [[str(x) for x in l] for l in word_aln] |
|
|
|
with open(output_path,'w') as handle: |
|
handle.write(''.join(['\t'.join(l)+'\n' for l in word_aln])) |
|
|
|
return word_aligner |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_samromur_queries(sentence, corpusdb, speech_dir, align_dir, align_model_path, f0_dir, reaper_path = "REAPER/build/reaper"): |
|
with open(corpusdb,'r') as handle: |
|
meta = handle.read().splitlines() |
|
meta = [l.split('\t') for l in meta[1:]] |
|
|
|
|
|
|
|
meta = [l for l in meta if l[4] == sentence] |
|
|
|
if len(meta) < 10: |
|
if len(meta) < 1: |
|
print('This sentence does not exist in the corpus') |
|
else: |
|
print('Under 10 copies of the sentence: skipping.') |
|
return [] |
|
else: |
|
print(f'{len(meta)} recordings of sentence <{sentence}>') |
|
|
|
|
|
word_aligner = None |
|
|
|
if not os.path.exists(align_dir): |
|
os.makedirs(align_dir) |
|
if not os.path.exists(f0_dir): |
|
os.makedirs(f0_dir) |
|
|
|
|
|
for rec in meta: |
|
wpath = f'{speech_dir}{rec[2]}' |
|
apath = align_dir + rec[2].replace('.wav','.tsv') |
|
if not os.path.exists(apath): |
|
word_aligner = align_file(wpath,apath, rec[4], word_aligner = word_aligner, model_path = align_model_path) |
|
|
|
fpath = f0_dir + rec[2].replace('.wav','.f0') |
|
if not os.path.exists(fpath): |
|
fpath = f0_dir + rec[2].replace('.wav','.f0') |
|
f0_data = estimate_pitch(wpath, reaper_path) |
|
save_pitch(f0_data,fpath) |
|
|
|
|
|
human_rec_ids = sorted([l[2].split('.wav')[0] for l in meta]) |
|
return human_rec_ids |
|
|
|
|
|
|
|
|
|
|
|
def get_tts(sentence,voices,ttsdir,align_model_path,reaper_path = "REAPER/build/reaper"): |
|
|
|
dpath = setup_tts_sent(sentence,ttsdir) |
|
|
|
|
|
sample_paths = [] |
|
|
|
word_aligner = None |
|
|
|
for v in voices: |
|
wpath = f'{dpath}/{v}.wav' |
|
apath = f'{dpath}/{v}.tsv' |
|
fpath = f'{dpath}/{v}.f0' |
|
|
|
if not os.path.exists(wpath): |
|
wf = tiro(sentence,v,save=f'{dpath}/') |
|
|
|
if not os.path.exists(apath): |
|
word_aligner = align_file(wpath, apath, snorm(sentence), word_aligner = word_aligner, model_path = align_model_path) |
|
|
|
|
|
if not os.path.exists(fpath): |
|
f0_data = estimate_pitch(wpath, reaper_path) |
|
save_pitch(f0_data,fpath) |
|
|
|
sample_paths.append(wpath) |
|
|
|
|
|
|
|
|
|
temp_sample_path = wpath |
|
|
|
return temp_sample_path, dpath |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_tts_sent(sentence,ttsdir,meta_path = 'tts_meta.tsv'): |
|
|
|
if not os.path.exists(f'{ttsdir}'): |
|
os.makedirs(f'{ttsdir}') |
|
|
|
sentence = sentence.replace('\n',' ') |
|
|
|
with open(f'{ttsdir}{meta_path}','a+') as handle: |
|
handle.seek(0) |
|
tts_meta = handle.read().splitlines() |
|
tts_meta = [l.split('\t') for l in tts_meta] |
|
|
|
tts_meta = {sent:s_id for s_id,sent in tts_meta} |
|
|
|
if sentence not in tts_meta.keys(): |
|
sent_id = sentence.replace(' ','_')[:33] |
|
rand_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) |
|
while f'{sent_id}_{rand_id}' in tts_meta.values(): |
|
rand_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) |
|
sent_id = f'{sent_id}_{rand_id}' |
|
|
|
handle.write(f'{sent_id}\t{sentence}\n') |
|
|
|
else: |
|
sent_id = tts_meta[sentence] |
|
|
|
sent_dir = f'{ttsdir}{sent_id}' |
|
if not os.path.exists(f'{sent_dir}'): |
|
os.makedirs(f'{sent_dir}') |
|
return sent_dir |
|
|
|
|
|
|
|
|
|
|
|
def precompute(corpusdb, speech_dir, align_dir, align_model_path, f0_dir, reaper_path, fromi=None,toi=None): |
|
with open(corpusdb,'r') as handle: |
|
meta = handle.read().splitlines() |
|
meta = [l.split('\t') for l in meta[1:]] |
|
|
|
word_aligner = None |
|
|
|
if not os.path.exists(align_dir): |
|
os.makedirs(align_dir) |
|
if not os.path.exists(f0_dir): |
|
os.makedirs(f0_dir) |
|
|
|
|
|
if (fromi and toi): |
|
meta = meta[fromi:toi] |
|
|
|
for rec in meta: |
|
wpath = f'{speech_dir}{rec[2]}' |
|
apath = align_dir + rec[2].replace('.wav','.tsv') |
|
if not os.path.exists(apath): |
|
word_aligner = align_file(wpath,apath, rec[4], word_aligner = word_aligner, model_path = align_model_path) |
|
|
|
fpath = f0_dir + rec[2].replace('.wav','.f0') |
|
if not os.path.exists(fpath): |
|
fpath = f0_dir + rec[2].replace('.wav','.f0') |
|
f0_data = estimate_pitch(wpath, reaper_path) |
|
save_pitch(f0_data,fpath) |
|
|
|
return max(toi,len(meta)) |
|
|
|
|
|
def localtest(): |
|
|
|
|
|
|
|
sentence= "Hann spyr: Hvað get ég vitað?" |
|
|
|
voices = ['Alfur_v2'] |
|
|
|
|
|
start_end_word_ix = '1-3' |
|
|
|
locl = '/home/caitlinr/work/peval/pce/' |
|
corpus_meta = locl+'human_data/SQL1adult10s_metadata.tsv' |
|
speech_dir = locl+'human_data/audio/squeries/' |
|
playable_dir = 'https://huggingface.co/spaces/clr/pce/resolve/main/human_data/audio/squeries/' |
|
speech_aligns = locl+'human_data/align/squeries/' |
|
speech_f0 = locl+'human_data/f0/squeries/' |
|
align_model_path ="/home/caitlinr/work/models/LVL/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" |
|
|
|
tts_dir = locl+'tts_data/' |
|
|
|
reaper_exc = '/home/caitlinr/work/notterra/REAPER/build/reaper' |
|
|
|
norm_sentence = snorm(sentence) |
|
|
|
human_rec_ids = get_samromur_queries(norm_sentence, corpus_meta, speech_dir, speech_aligns, align_model_path, speech_f0, reaper_path = reaper_exc) |
|
|
|
if voices: |
|
|
|
one_audio_sample, tts_sent_dir = get_tts(sentence,voices,tts_dir,align_model_path,reaper_path = reaper_exc) |
|
|
|
voices = [voices[0]] |
|
|
|
score, f0_fig_c0, f0_fig_c1, f0_fig_c2, en_fig_c0, en_fig_c1, en_fig_c2, html, f0_fig_tts, en_fig_tts = cl.cluster(norm_sentence, sentence, human_rec_ids, speech_aligns, speech_f0, speech_dir, playable_dir, tts_sent_dir, voices, start_end_word_ix) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|