Spaces:
Sleeping
Sleeping
import gradio as gr | |
import parselmouth | |
from parselmouth.praat import call | |
import numpy as np | |
import pandas as pd | |
def find_rises_and_peaks_gradient(data, threshold=4): | |
data['rise_point'] = 0 | |
data['peak_point'] = 0 | |
pitch_values = data['pitch'].values | |
gradients = np.gradient(pitch_values) | |
in_rise = False | |
rise_start = 0 | |
successive_rise_count = 0 | |
min_successive_rise = 3 # Minimum successive values to qualify as a rise | |
checking_rise = False # Flag to start checking for rises after NaN | |
for i in range(1, len(gradients)): | |
if np.isnan(pitch_values[i]): | |
checking_rise = False # Reset flag when encountering NaN | |
in_rise = False | |
successive_rise_count = 0 | |
continue | |
if not checking_rise: | |
checking_rise = True # Start checking for rises after NaN | |
continue | |
if gradients[i] >= threshold: | |
if not in_rise: | |
in_rise = True | |
rise_start = i-1 | |
successive_rise_count += 1 | |
else: | |
if in_rise: | |
if successive_rise_count >= min_successive_rise: | |
data.at[rise_start, 'rise_point'] = 1 | |
data.at[i-1, 'peak_point'] = 1 | |
in_rise = False | |
successive_rise_count = 0 | |
return data | |
def get_pitch(audio_data): | |
rate, data = audio_data | |
if data.ndim > 1: # Check if the audio is stereo or multi-channel | |
data = np.mean(data, axis=1) # Convert to mono by averaging channels | |
# Convert data to float64 for compatibility with Parselmouth | |
data = data.astype('float64') | |
sound = parselmouth.Sound(values=data, sampling_frequency=rate) | |
try: | |
pitch = call(sound, "To Pitch", 0.0, 75, 500) | |
pitch_values = pitch.selected_array['frequency'] | |
# return "Pitch frequencies: " + str(pitch_values) | |
pitch_values[pitch_values==0] = np.nan | |
df_pitch = pd.DataFrame(np.column_stack([pitch.xs(), pitch_values]), | |
columns=['time', 'pitch']) | |
df_pitch = find_rises_and_peaks_gradient(df_pitch) | |
output = df_pitch.to_json(orient='records') | |
return output | |
except Exception as e: | |
return "Error in pitch extraction: " + str(e) | |
demo = gr.Interface(fn=get_pitch, inputs="audio", outputs="text") | |
demo.launch() | |