File size: 2,826 Bytes
06af375
 
 
 
 
 
 
366ecce
06af375
366ecce
06af375
 
 
366ecce
06af375
 
 
 
 
 
 
366ecce
 
 
06af375
 
 
 
 
 
 
53792d8
 
06af375
 
 
 
 
 
53792d8
06af375
 
 
2defee0
366ecce
 
 
53792d8
2defee0
 
53792d8
 
06af375
 
 
53792d8
06af375
 
366ecce
06af375
 
2defee0
06af375
 
53792d8
06af375
53792d8
 
06af375
 
 
 
 
 
 
53792d8
06af375
 
53792d8
 
 
 
 
 
06af375
 
53792d8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import soundfile as sf
import numpy as np
from scipy import signal
from pydub import AudioSegment
import subprocess
import os

# 2 pass f0 estimation
# ref. Hirst The analysis by synthesis of speech melody: from data to models
# python wrap for gradio app 


# reaper requires wav file path input,
#  not audio data.
# reaper does NOT require 16khz mono audio.
def reaper_soundfile(sound_path, orig_filetype):
    
    aud_data = AudioSegment.from_file(sound_path, orig_filetype)
    curdir = subprocess.run(["pwd"], capture_output=True, text=True)
    curdir = curdir.stdout.splitlines()[0]
    fname = sound_path.split('/')[-1].replace(orig_filetype,'')
    tmp_path = f'{curdir}/files_tmp/{fname}tmp.wav'
    if not os.path.exists(f'{curdir}/files_tmp'):
        os.mkdir(f'{curdir}/files_tmp')
    aud_data.export(tmp_path, format="wav")
    wav_path = tmp_path
        
    return wav_path



# returns f0 data as list of Time, F0 if exists, voicing indicator
def get_reaper(wav_path, reaper_path, maxf0='700', minf0='50'):
    
    f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
    f0_data = f0_data.decode()
    f0_data = f0_data.split('EST_Header_End\n')[1].splitlines()
    f0_data = [l.split(' ') for l in f0_data] 
    f0_data = [l for l in f0_data if len(l) == 3] # the last line or 2 lines are other info, different format
    f0_data = [ [float(t), float(f), float(v)] for t,v,f in f0_data]

    return f0_data
    
    

# save simplified data format from get_reaper
#  instead of reaper's original output
def save_pitch(f0_data, save_path,hed=False):
    with open(save_path,'w') as handle:
        if hed:
            handle.write('TIME\tF0\tVOICED\n')
        handle.write(''.join([f'{t}\t{f}\t{v}\n' for t,f,v in f0_data]))


# 2 pass pitch estimation
def estimate_pitch(sound_path,reaper_path = "REAPER/build/reaper"):

    orig_ftype = sound_path.split('.')[-1]
    if orig_ftype == 'wav':
        wav_path = sound_path
    else:
        tmp_path = reaper_soundfile(sound_path, orig_ftype)
        wav_path = tmp_path

    #print('REAPER FILE PATH:', wav_path)

    first_pass = get_reaper(wav_path,reaper_path)
    first_pass = [f for t,f,v in first_pass if float(v) ==1]
    
    q1 = np.quantile(first_pass,0.25)
    q3 = np.quantile(first_pass,0.75)
    
    pfloor = 0.75 * q1
    pceil = 1.5 * q3

    second_pass = get_reaper(wav_path,reaper_path, maxf0 = str(round(pceil)), minf0 = str(round(pfloor)))


    #if orig_ftype != '.wav':
    #    subprocess.run(["rm", tmp_path])
    # don't remove it yet, need it for clustering too
    # therefore, actually change so reaper2pass is called from inside clusterprosody 
    # before it wants to read the f0 file.
    # TODO
        
    return second_pass