speechEvaluation_normal / text_cloud.py
m-kazuki
init
6e77e55
import os
import pyopenjtalk
import re
out_path = './text_cloud.txt'
output = []
j=0
in_path = '/mnt/vol21/i21_mkazuki/text/transcripts_utf8.txt'
for i in range(1,101):
if i in [1,3,5,6,9,11,12,13,20,21,22,23,28,31,32,33,34,37,41,42,44,45,46,47,48,49,50,52,54,68,70,71,73,74,75,76,77,78,79,80,81,86,87,88,89,97,98,99,100]:
continue
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
if not (os.path.isfile('/mnt/nfs-mnj-archive-03/group/creative/i21_mkazuki/wav/jvs'+format(i, '03')+'/parallel100/'+strs[0]+'.wav')):
continue
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
nonpara_path = '/mnt/nfs-mnj-archive-03/group/creative/i21_mkazuki/wav/jvs_ver1/jvs'+format(i, '03')+'/nonpara30/transcripts_utf8.txt'
with open(nonpara_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
if not (os.path.isfile('/mnt/nfs-mnj-archive-03/group/creative/i21_mkazuki/wav/jvs'+format(i, '03')+'/nonpara30/'+strs[0]+'.wav')):
continue
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if j==0:
output.append('/home/mkazuki/eval_wav/'+strs[0]+'.wav|'+strs[1]+'|'+str(j)+'\n')
whisper_path = '/mnt/nfs-mnj-archive-03/group/creative/i21_mkazuki/wav/jvs_ver1/jvs'+format(i, '03')+'/whisper10/transcripts_utf8.txt'
with open(whisper_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
if not (os.path.isfile('/mnt/nfs-mnj-archive-03/group/creative/i21_mkazuki/wav/jvs'+format(i, '03')+'/whisper10/'+strs[0]+'.wav')):
continue
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if j==0:
output.append('/home/mkazuki/eval_wav/whisper_'+strs[0]+'.wav|'+strs[1]+'|'+str(j)+'\n')
j+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/tsuchiya_normal_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/tsuchiya_happy_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|2'+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/tsuchiya_angry_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|3'+'\n')
k+=1
j+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/uemura_normal_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/uemura_happy_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|2'+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/uemura_angry_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|3'+'\n')
k+=1
j+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/fujitou_normal_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/fujitou_happy_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|2'+'\n')
k+=1
k=1
with open(in_path) as f:
lines = f.readlines()
for line in lines:
strs = line.split(':')
strs[1] = pyopenjtalk.g2p(strs[1])
strs[1] = strs[1].replace('pau',',')
if k>95:
output.append('/home/mkazuki/eval_wav/fujitou_angry_'+format(k, '03')+'.wav|'+strs[1]+'|'+str(j)+'|3'+'\n')
k+=1
with open(out_path, 'w') as f:
f.writelines(output)