File size: 2,826 Bytes
06af375 366ecce 06af375 366ecce 06af375 366ecce 06af375 366ecce 06af375 53792d8 06af375 53792d8 06af375 2defee0 366ecce 53792d8 2defee0 53792d8 06af375 53792d8 06af375 366ecce 06af375 2defee0 06af375 53792d8 06af375 53792d8 06af375 53792d8 06af375 53792d8 06af375 53792d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import soundfile as sf
import numpy as np
from scipy import signal
from pydub import AudioSegment
import subprocess
import os
# 2 pass f0 estimation
# ref. Hirst The analysis by synthesis of speech melody: from data to models
# python wrap for gradio app
# reaper requires wav file path input,
# not audio data.
# reaper does NOT require 16khz mono audio.
def reaper_soundfile(sound_path, orig_filetype):
aud_data = AudioSegment.from_file(sound_path, orig_filetype)
curdir = subprocess.run(["pwd"], capture_output=True, text=True)
curdir = curdir.stdout.splitlines()[0]
fname = sound_path.split('/')[-1].replace(orig_filetype,'')
tmp_path = f'{curdir}/files_tmp/{fname}tmp.wav'
if not os.path.exists(f'{curdir}/files_tmp'):
os.mkdir(f'{curdir}/files_tmp')
aud_data.export(tmp_path, format="wav")
wav_path = tmp_path
return wav_path
# returns f0 data as list of Time, F0 if exists, voicing indicator
def get_reaper(wav_path, reaper_path, maxf0='700', minf0='50'):
f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
f0_data = f0_data.decode()
f0_data = f0_data.split('EST_Header_End\n')[1].splitlines()
f0_data = [l.split(' ') for l in f0_data]
f0_data = [l for l in f0_data if len(l) == 3] # the last line or 2 lines are other info, different format
f0_data = [ [float(t), float(f), float(v)] for t,v,f in f0_data]
return f0_data
# save simplified data format from get_reaper
# instead of reaper's original output
def save_pitch(f0_data, save_path,hed=False):
with open(save_path,'w') as handle:
if hed:
handle.write('TIME\tF0\tVOICED\n')
handle.write(''.join([f'{t}\t{f}\t{v}\n' for t,f,v in f0_data]))
# 2 pass pitch estimation
def estimate_pitch(sound_path,reaper_path = "REAPER/build/reaper"):
orig_ftype = sound_path.split('.')[-1]
if orig_ftype == 'wav':
wav_path = sound_path
else:
tmp_path = reaper_soundfile(sound_path, orig_ftype)
wav_path = tmp_path
#print('REAPER FILE PATH:', wav_path)
first_pass = get_reaper(wav_path,reaper_path)
first_pass = [f for t,f,v in first_pass if float(v) ==1]
q1 = np.quantile(first_pass,0.25)
q3 = np.quantile(first_pass,0.75)
pfloor = 0.75 * q1
pceil = 1.5 * q3
second_pass = get_reaper(wav_path,reaper_path, maxf0 = str(round(pceil)), minf0 = str(round(pfloor)))
#if orig_ftype != '.wav':
# subprocess.run(["rm", tmp_path])
# don't remove it yet, need it for clustering too
# therefore, actually change so reaper2pass is called from inside clusterprosody
# before it wants to read the f0 file.
# TODO
return second_pass
|