File size: 1,663 Bytes
97dbdfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969ec62
 
7a99afe
978b553
7a99afe
 
 
 
978b553
7a99afe
705299c
97dbdfb
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
import torch,torchaudio
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import torchaudio.functional as F
def plot_kaldi_pitch(mic_used,audio_path,mic_file):
  SAMPLE_RATE = 16000
  if(mic_used==False):
   target_audio = audio_path
  else :
   target_audio = mic_file
  wavform, sample_rate = torchaudio.load(target_audio)
  pitch_feature = F.compute_kaldi_pitch(wavform, sample_rate)
  pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
  figure, axis = plt.subplots(1, 1)
  axis.set_title("Kaldi Pitch Feature")
  axis.grid(True)
  end_time = wavform.shape[1] / sample_rate
  time_axis = torch.linspace(0, end_time,  wavform.shape[1])
  axis.plot(time_axis, wavform[0], linewidth=1, color='gray', alpha=0.3)

  time_axis = torch.linspace(0, end_time, pitch.shape[1])
  ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
  axis.set_ylim((-1.3, 1.3))

  axis2 = axis.twinx()
  time_axis = torch.linspace(0, end_time, nfcc.shape[1])
  ln2 = axis2.plot(
      time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--')

  lns = ln1 + ln2

  labels = [l.get_label() for l in lns]
  axis.legend(lns, labels, loc=0)
  plt.savefig("test.png")
  image = Image.open("test.png")
  return image , target_audio
inputs = [gr.Checkbox(label="使用麥克風錄音嗎?"),gr.Audio(source="upload",type="filepath"),gr.Audio(source="microphone",type="filepath")]
outputs = [gr.Image(type="pil"),gr.Audio(type="filepath")]
demo = gr.Interface(fn=plot_kaldi_pitch,
                    inputs=inputs,
                    outputs=outputs
                    )

demo.launch(debug=False)