HoneyTian commited on
Commit
98f9bc1
1 Parent(s): 340eeda
Dockerfile CHANGED
@@ -11,6 +11,7 @@ RUN pip install --upgrade pip
11
 
12
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
13
 
 
14
  RUN export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.8/site-packages/k2/lib/
15
 
16
  # Set up a new user named "user" with user ID 1000
 
11
 
12
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
13
 
14
+ # libk2_torch_api.so
15
  RUN export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.8/site-packages/k2/lib/
16
 
17
  # Set up a new user named "user" with user ID 1000
examples/wenet/infer.py CHANGED
@@ -8,9 +8,14 @@ import sys
8
  pwd = os.path.abspath(os.path.dirname(__file__))
9
  sys.path.append(os.path.join(pwd, "../../"))
10
 
 
 
11
  import sherpa
 
 
 
12
 
13
- from project_settings import project_path
14
 
15
 
16
  def get_args():
@@ -20,6 +25,12 @@ def get_args():
20
  default=(project_path / "pretrained_models/huggingface/csukuangfj/wenet-chinese-model").as_posix(),
21
  type=str
22
  )
 
 
 
 
 
 
23
  args = parser.parse_args()
24
  return args
25
 
@@ -32,7 +43,7 @@ def main():
32
  tokens_filename = model_dir / "units.txt"
33
 
34
  feat_config = sherpa.FeatureConfig(normalize_samples=False)
35
- feat_config.fbank_opts.frame_opts.samp_freq = 16000
36
  feat_config.fbank_opts.mel_opts.num_bins = 80
37
  feat_config.fbank_opts.frame_opts.dither = 0
38
 
@@ -46,7 +57,28 @@ def main():
46
  )
47
 
48
  recognizer = sherpa.OfflineRecognizer(config)
49
- print(recognizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  return
51
 
52
 
 
8
  pwd = os.path.abspath(os.path.dirname(__file__))
9
  sys.path.append(os.path.join(pwd, "../../"))
10
 
11
+ import librosa
12
+ import numpy as np
13
  import sherpa
14
+ from scipy.io import wavfile
15
+ import torch
16
+ import torchaudio
17
 
18
+ from project_settings import project_path, temp_directory
19
 
20
 
21
  def get_args():
 
25
  default=(project_path / "pretrained_models/huggingface/csukuangfj/wenet-chinese-model").as_posix(),
26
  type=str
27
  )
28
+ parser.add_argument(
29
+ "--filename",
30
+ default=(project_path / "data/test_wavs/paraformer-zh/四川话.wav").as_posix(),
31
+ type=str
32
+ )
33
+ parser.add_argument("--sample_rate", default=16000, type=int)
34
  args = parser.parse_args()
35
  return args
36
 
 
43
  tokens_filename = model_dir / "units.txt"
44
 
45
  feat_config = sherpa.FeatureConfig(normalize_samples=False)
46
+ feat_config.fbank_opts.frame_opts.samp_freq = args.sample_rate
47
  feat_config.fbank_opts.mel_opts.num_bins = 80
48
  feat_config.fbank_opts.frame_opts.dither = 0
49
 
 
57
  )
58
 
59
  recognizer = sherpa.OfflineRecognizer(config)
60
+
61
+ signal, sample_rate = librosa.load(args.filename, sr=args.sample_rate)
62
+ signal *= 32768.0
63
+ signal = np.array(signal, dtype=np.int16)
64
+
65
+ temp_file = temp_directory / "temp.wav"
66
+ wavfile.write(
67
+ temp_file.as_posix(),
68
+ rate=args.sample_rate,
69
+ data=signal
70
+ )
71
+
72
+ s = recognizer.create_stream()
73
+
74
+ s.accept_wave_file(
75
+ temp_file.as_posix()
76
+ )
77
+ recognizer.decode_stream(s)
78
+
79
+ text = s.result.text.strip()
80
+ text = text.lower()
81
+ print("text: {}".format(text))
82
  return
83
 
84
 
project_settings.py CHANGED
@@ -7,6 +7,9 @@ from pathlib import Path
7
  project_path = os.path.abspath(os.path.dirname(__file__))
8
  project_path = Path(project_path)
9
 
 
 
 
10
 
11
  if __name__ == '__main__':
12
  pass
 
7
  project_path = os.path.abspath(os.path.dirname(__file__))
8
  project_path = Path(project_path)
9
 
10
+ temp_directory = project_path / "temp"
11
+ temp_directory.mkdir(parents=True, exist_ok=True)
12
+
13
 
14
  if __name__ == '__main__':
15
  pass
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  gradio==4.29.0
2
  torch==1.13.1
3
  torchaudio==0.13.1
 
4
 
5
  data/wheels/k2-1.23.4.dev20230130+cpu.torch1.13.1-cp38-cp38-linux_x86_64.whl
6
  data/wheels/k2_sherpa-1.1-cp38-cp38-linux_x86_64.whl
 
1
  gradio==4.29.0
2
  torch==1.13.1
3
  torchaudio==0.13.1
4
+ librosa==0.8.1
5
 
6
  data/wheels/k2-1.23.4.dev20230130+cpu.torch1.13.1-cp38-cp38-linux_x86_64.whl
7
  data/wheels/k2_sherpa-1.1-cp38-cp38-linux_x86_64.whl