import gradio as gr import torch,torchaudio from PIL import Image import matplotlib import matplotlib.pyplot as plt import torchaudio.functional as F def plot_kaldi_pitch(mic_used,audio_path,mic_file): SAMPLE_RATE = 16000 if(mic_used==False): target_audio = audio_path else : target_audio = mic_file wavform, sample_rate = torchaudio.load(target_audio) pitch_feature = F.compute_kaldi_pitch(wavform, sample_rate) pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1] figure, axis = plt.subplots(1, 1) axis.set_title("Kaldi Pitch Feature") axis.grid(True) end_time = wavform.shape[1] / sample_rate time_axis = torch.linspace(0, end_time, wavform.shape[1]) axis.plot(time_axis, wavform[0], linewidth=1, color='gray', alpha=0.3) time_axis = torch.linspace(0, end_time, pitch.shape[1]) ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green') axis.set_ylim((-1.3, 1.3)) axis2 = axis.twinx() time_axis = torch.linspace(0, end_time, nfcc.shape[1]) ln2 = axis2.plot( time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--') lns = ln1 + ln2 labels = [l.get_label() for l in lns] axis.legend(lns, labels, loc=0) plt.savefig("test.png") image = Image.open("test.png") return image , target_audio inputs = [gr.Checkbox(label="使用麥克風錄音嗎?"),gr.Audio(source="upload",type="filepath"),gr.Audio(source="microphone",type="filepath")] outputs = [gr.Image(type="pil"),gr.Audio(type="filepath")] demo = gr.Interface(fn=plot_kaldi_pitch, inputs=inputs, outputs=outputs ) demo.launch(debug=False)