csukuangfj commited on
Commit
c244ef2
1 Parent(s): dbc27cd

add export script

Browse files
96/export-for-ncnn-bilingual-small.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ # Please download the torchscript model from
6
+ # https://huggingface.co/pfluo/k2fsa-zipformer-bilingual-zh-en-t
7
+
8
+ if [ ! -d bilingual-small ]; then
9
+ mkdir -p bilingual-small
10
+ pushd bilingual-small
11
+ ln -s ~/open-source/icefall-models/k2fsa-zipformer-bilingual-zh-en-t/exp/pretrained.pt epoch-99.pt
12
+ ln -s ~/open-source/icefall-models/k2fsa-zipformer-bilingual-zh-en-t/data .
13
+ popd
14
+ fi
15
+
16
+ ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
17
+ --lang-dir ./bilingual-small/data/lang_char_bpe \
18
+ --exp-dir ./bilingual-small \
19
+ --use-averaged-model 0 \
20
+ --epoch 99 \
21
+ --avg 1 \
22
+ --decode-chunk-len 96 \
23
+ --num-encoder-layers "2,2,2,2,2" \
24
+ --feedforward-dims "768,768,768,768,768" \
25
+ --nhead "4,4,4,4,4" \
26
+ --encoder-dims "256,256,256,256,256" \
27
+ --attention-dims "192,192,192,192,192" \
28
+ --encoder-unmasked-dims "192,192,192,192,192" \
29
+ --zipformer-downsampling-factors "1,2,4,8,2" \
30
+ --cnn-module-kernels "31,31,31,31,31" \
31
+ --decoder-dim 512 \
32
+ --joiner-dim 512
33
+
34
+ cd bilingual-small
35
+
36
+ pnnx encoder_jit_trace-pnnx.pt
37
+ pnnx decoder_jit_trace-pnnx.pt
38
+ pnnx joiner_jit_trace-pnnx.pt
39
+ # modify encoder_jit_trace-pnnx.ncnn.param to support sherpa-ncnn
40
+ # The following is the diff
41
+ # $ diff -uN ./encoder_jit_trace-pnnx.ncnn.param-before encoder_jit_trace-pnnx.ncnn.param
42
+ # --- ./encoder_jit_trace-pnnx.ncnn.param-before 2023-02-16 10:40:18.000000000 +0800
43
+ # +++ encoder_jit_trace-pnnx.ncnn.param 2023-02-16 10:43:21.000000000 +0800
44
+ # @@ -1,5 +1,6 @@
45
+ # 7767517
46
+ # -1423 1762
47
+ # +1424 1762
48
+ # +SherpaMetaData sherpa_meta_data1 0 0 0=2 1=96 2=4 3=7 -23316=5,2,2,2,2,2 -23317=5,256,256,256,256,256 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
49
+ # Input in0 0 1 in0
50
+ # Input in1 0 1 in1
51
+ # Split splitncnn_0 1 2 in1 2 3
52
+ #
53
+ #------
54
+ # Explanation:
55
+ #
56
+ # (1) 1423 is changed to 1424 as an extra layer SherpaMetaData is added
57
+ # (2) SherpaMetaData is the layer type
58
+ # (3) sherpa_meta_data1 is the name of this layer. Must be sherpa_meta_data1
59
+ # (4) 0 0 means this layer has no input or output
60
+ # (5) 1=96, attribute 1, 96 is the value of --decode-chunk-len
61
+ # (6) 2=4, attribute 2, 4 is the value of --num-left-chunks
62
+ # (7) 3=7, attribute 3, 7 is the pad length. The first subsampling layer is using (x_len - 7) // 2, so we use 7 here
63
+ # (8) -23316=5,2,2,2,2,2, attribute 16, this is an array attribute. It is attribute 16 since -23300 - (-23316) = 16
64
+ # the first element of the array is the length of the array, which is 5 in our case.
65
+ # 2,2,2,2,2 is the value of --num-encoder-layers
66
+ # (9) -23317=5,256,256,256,256,256, attribute 17. 256,256,256,256,256 is the value of --encoder-dims
67
+ # (10) -23318=5,192,192,192,192,192, attribute 18, 192,192,192,192,192 is the value of --attention-dims
68
+ # (11) -23319=5,1,2,4,8,2, attribute 19, 1,2,4,8,2 is the value of --zipformer-downsampling-factors
69
+ # (12) -23320=5,31,31,31,31,31, attribute 20, 31,31,31,31,31 is the value of --cnn-module-kernels