dlaiu
output as json with processing
176ce75
import gradio as gr
import parselmouth
from parselmouth.praat import call
import numpy as np
import pandas as pd
def find_rises_and_peaks_gradient(data, threshold=4):
data['rise_point'] = 0
data['peak_point'] = 0
pitch_values = data['pitch'].values
gradients = np.gradient(pitch_values)
in_rise = False
rise_start = 0
successive_rise_count = 0
min_successive_rise = 3 # Minimum successive values to qualify as a rise
checking_rise = False # Flag to start checking for rises after NaN
for i in range(1, len(gradients)):
if np.isnan(pitch_values[i]):
checking_rise = False # Reset flag when encountering NaN
in_rise = False
successive_rise_count = 0
continue
if not checking_rise:
checking_rise = True # Start checking for rises after NaN
continue
if gradients[i] >= threshold:
if not in_rise:
in_rise = True
rise_start = i-1
successive_rise_count += 1
else:
if in_rise:
if successive_rise_count >= min_successive_rise:
data.at[rise_start, 'rise_point'] = 1
data.at[i-1, 'peak_point'] = 1
in_rise = False
successive_rise_count = 0
return data
def get_pitch(audio_data):
rate, data = audio_data
if data.ndim > 1: # Check if the audio is stereo or multi-channel
data = np.mean(data, axis=1) # Convert to mono by averaging channels
# Convert data to float64 for compatibility with Parselmouth
data = data.astype('float64')
sound = parselmouth.Sound(values=data, sampling_frequency=rate)
try:
pitch = call(sound, "To Pitch", 0.0, 75, 500)
pitch_values = pitch.selected_array['frequency']
# return "Pitch frequencies: " + str(pitch_values)
pitch_values[pitch_values==0] = np.nan
df_pitch = pd.DataFrame(np.column_stack([pitch.xs(), pitch_values]),
columns=['time', 'pitch'])
df_pitch = find_rises_and_peaks_gradient(df_pitch)
output = df_pitch.to_json(orient='records')
return output
except Exception as e:
return "Error in pitch extraction: " + str(e)
demo = gr.Interface(fn=get_pitch, inputs="audio", outputs="text")
demo.launch()