analysisvoice / app.py
james0430's picture
Update app.py
7a99afe
raw
history blame
No virus
1.66 kB
import gradio as gr
import torch,torchaudio
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import torchaudio.functional as F
def plot_kaldi_pitch(mic_used,audio_path,mic_file):
SAMPLE_RATE = 16000
if(mic_used==False):
target_audio = audio_path
else :
target_audio = mic_file
wavform, sample_rate = torchaudio.load(target_audio)
pitch_feature = F.compute_kaldi_pitch(wavform, sample_rate)
pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
figure, axis = plt.subplots(1, 1)
axis.set_title("Kaldi Pitch Feature")
axis.grid(True)
end_time = wavform.shape[1] / sample_rate
time_axis = torch.linspace(0, end_time, wavform.shape[1])
axis.plot(time_axis, wavform[0], linewidth=1, color='gray', alpha=0.3)
time_axis = torch.linspace(0, end_time, pitch.shape[1])
ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
axis.set_ylim((-1.3, 1.3))
axis2 = axis.twinx()
time_axis = torch.linspace(0, end_time, nfcc.shape[1])
ln2 = axis2.plot(
time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--')
lns = ln1 + ln2
labels = [l.get_label() for l in lns]
axis.legend(lns, labels, loc=0)
plt.savefig("test.png")
image = Image.open("test.png")
return image , target_audio
inputs = [gr.Checkbox(label="使用麥克風錄音嗎?"),gr.Audio(source="upload",type="filepath"),gr.Audio(source="microphone",type="filepath")]
outputs = [gr.Image(type="pil"),gr.Audio(type="filepath")]
demo = gr.Interface(fn=plot_kaldi_pitch,
inputs=inputs,
outputs=outputs
)
demo.launch(debug=False)