james0430 commited on
Commit
97dbdfb
1 Parent(s): 1500a49

initialversion

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch,torchaudio
3
+ from PIL import Image
4
+ import matplotlib
5
+ import matplotlib.pyplot as plt
6
+ import torchaudio.functional as F
7
+ def plot_kaldi_pitch(mic_used,audio_path,mic_file):
8
+ SAMPLE_RATE = 16000
9
+ if(mic_used==False):
10
+ target_audio = audio_path
11
+ else :
12
+ target_audio = mic_file
13
+ wavform, sample_rate = torchaudio.load(target_audio)
14
+ pitch_feature = F.compute_kaldi_pitch(wavform, sample_rate)
15
+ pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
16
+ figure, axis = plt.subplots(1, 1)
17
+ axis.set_title("Kaldi Pitch Feature")
18
+ axis.grid(True)
19
+ end_time = wavform.shape[1] / sample_rate
20
+ time_axis = torch.linspace(0, end_time, wavform.shape[1])
21
+ axis.plot(time_axis, wavform[0], linewidth=1, color='gray', alpha=0.3)
22
+
23
+ time_axis = torch.linspace(0, end_time, pitch.shape[1])
24
+ ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
25
+ axis.set_ylim((-1.3, 1.3))
26
+
27
+ axis2 = axis.twinx()
28
+ time_axis = torch.linspace(0, end_time, nfcc.shape[1])
29
+ ln2 = axis2.plot(
30
+ time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--')
31
+
32
+ lns = ln1 + ln2
33
+ labels = [l.get_label() for l in lns]
34
+ axis.legend(lns, labels, loc=0)
35
+ plt.savefig("test.png")
36
+ image = Image.open("test.png")
37
+ return image , target_audio
38
+ inputs = [gr.Checkbox(label="使用麥克風錄音嗎?"),gr.Audio(source="upload",type="filepath"),gr.Audio(source="microphone",type="filepath")]
39
+ outputs = [gr.Image(type="pil"),gr.Audio(type="filepath")]
40
+ demo = gr.Interface(fn=plot_kaldi_pitch,
41
+ inputs=inputs,
42
+ outputs=outputs
43
+ )
44
+
45
+ demo.launch(debug=False)