analysisvoice / app.py
james0430's picture
Update app.py
7a99afe
raw
history blame contribute delete
No virus
1.66 kB
import gradio as gr
import torch,torchaudio
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import torchaudio.functional as F
def plot_kaldi_pitch(mic_used,audio_path,mic_file):
SAMPLE_RATE = 16000
if(mic_used==False):
target_audio = audio_path
else :
target_audio = mic_file
wavform, sample_rate = torchaudio.load(target_audio)
pitch_feature = F.compute_kaldi_pitch(wavform, sample_rate)
pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
figure, axis = plt.subplots(1, 1)
axis.set_title("Kaldi Pitch Feature")
axis.grid(True)
end_time = wavform.shape[1] / sample_rate
time_axis = torch.linspace(0, end_time, wavform.shape[1])
axis.plot(time_axis, wavform[0], linewidth=1, color='gray', alpha=0.3)
time_axis = torch.linspace(0, end_time, pitch.shape[1])
ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
axis.set_ylim((-1.3, 1.3))
axis2 = axis.twinx()
time_axis = torch.linspace(0, end_time, nfcc.shape[1])
ln2 = axis2.plot(
time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--')
lns = ln1 + ln2
labels = [l.get_label() for l in lns]
axis.legend(lns, labels, loc=0)
plt.savefig("test.png")
image = Image.open("test.png")
return image , target_audio
inputs = [gr.Checkbox(label="使用麥克風錄音嗎?"),gr.Audio(source="upload",type="filepath"),gr.Audio(source="microphone",type="filepath")]
outputs = [gr.Image(type="pil"),gr.Audio(type="filepath")]
demo = gr.Interface(fn=plot_kaldi_pitch,
inputs=inputs,
outputs=outputs
)
demo.launch(debug=False)