Spaces:

clr
/

pce

Sleeping

pce / scripts /reaper2pass.py

catiR

force align tts, add voices

366ecce over 1 year ago

2.83 kB

	import soundfile as sf
	import numpy as np
	from scipy import signal
	from pydub import AudioSegment
	import subprocess
	import os

	# 2 pass f0 estimation
	# ref. Hirst The analysis by synthesis of speech melody: from data to models
	# python wrap for gradio app


	# reaper requires wav file path input,
	# not audio data.
	# reaper does NOT require 16khz mono audio.
	def reaper_soundfile(sound_path, orig_filetype):

	aud_data = AudioSegment.from_file(sound_path, orig_filetype)
	curdir = subprocess.run(["pwd"], capture_output=True, text=True)
	curdir = curdir.stdout.splitlines()[0]
	fname = sound_path.split('/')[-1].replace(orig_filetype,'')
	tmp_path = f'{curdir}/files_tmp/{fname}tmp.wav'
	if not os.path.exists(f'{curdir}/files_tmp'):
	os.mkdir(f'{curdir}/files_tmp')
	aud_data.export(tmp_path, format="wav")
	wav_path = tmp_path

	return wav_path



	# returns f0 data as list of Time, F0 if exists, voicing indicator
	def get_reaper(wav_path, reaper_path, maxf0='700', minf0='50'):

	f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
	f0_data = f0_data.decode()
	f0_data = f0_data.split('EST_Header_End\n')[1].splitlines()
	f0_data = [l.split(' ') for l in f0_data]
	f0_data = [l for l in f0_data if len(l) == 3] # the last line or 2 lines are other info, different format
	f0_data = [ [float(t), float(f), float(v)] for t,v,f in f0_data]

	return f0_data



	# save simplified data format from get_reaper
	# instead of reaper's original output
	def save_pitch(f0_data, save_path,hed=False):
	with open(save_path,'w') as handle:
	if hed:
	handle.write('TIME\tF0\tVOICED\n')
	handle.write(''.join([f'{t}\t{f}\t{v}\n' for t,f,v in f0_data]))


	# 2 pass pitch estimation
	def estimate_pitch(sound_path,reaper_path = "REAPER/build/reaper"):

	orig_ftype = sound_path.split('.')[-1]
	if orig_ftype == 'wav':
	wav_path = sound_path
	else:
	tmp_path = reaper_soundfile(sound_path, orig_ftype)
	wav_path = tmp_path

	#print('REAPER FILE PATH:', wav_path)

	first_pass = get_reaper(wav_path,reaper_path)
	first_pass = [f for t,f,v in first_pass if float(v) ==1]

	q1 = np.quantile(first_pass,0.25)
	q3 = np.quantile(first_pass,0.75)

	pfloor = 0.75 * q1
	pceil = 1.5 * q3

	second_pass = get_reaper(wav_path,reaper_path, maxf0 = str(round(pceil)), minf0 = str(round(pfloor)))


	#if orig_ftype != '.wav':
	# subprocess.run(["rm", tmp_path])
	# don't remove it yet, need it for clustering too
	# therefore, actually change so reaper2pass is called from inside clusterprosody
	# before it wants to read the f0 file.
	# TODO

	return second_pass