HelloWorld2307 commited on
Commit
995e681
·
verified ·
1 Parent(s): 1b0cd2e

Upload 2flow folder

Browse files
Files changed (32) hide show
  1. 2flow/Dockerfile +3 -0
  2. 2flow/models/.gitattributes +35 -0
  3. 2flow/models/README.md +20 -0
  4. 2flow/models/downloads/F5TTS_Base/model_1200000.pt +3 -0
  5. 2flow/models/downloads/F5TTS_Base/model_1200000.safetensors +3 -0
  6. 2flow/models/downloads/F5TTS_Base/vocab.txt +2545 -0
  7. 2flow/models/downloads/F5TTS_Base_bigvgan/model_1250000.pt +3 -0
  8. 2flow/models/downloads/F5TTS_v1_Base/model_1250000.safetensors +3 -0
  9. 2flow/models/downloads/F5TTS_v1_Base/vocab.txt +2545 -0
  10. 2flow/models/downloads/F5TTS_v1_Base_no_zero_init/model_1250000.safetensors +3 -0
  11. 2flow/patch/__init__.py +196 -0
  12. 2flow/patch/f5tts/model.py +222 -0
  13. 2flow/patch/f5tts/modules.py +447 -0
  14. 2flow/requirements.txt +5 -0
  15. 2flow/scripts/build.sh +2 -0
  16. 2flow/scripts/f5/build_engine.sh +5 -0
  17. 2flow/scripts/f5/fix_lib.py +32 -0
  18. 2flow/scripts/f5/pre_build_engine.sh +4 -0
  19. 2flow/scripts/init.sh +6 -0
  20. 2flow/scripts/vocoder/build_engine.sh +3 -0
  21. 2flow/scripts/vocoder/export_vocos_trt.sh +43 -0
  22. 2flow/scripts/vocoder/pre_build_engine.sh +3 -0
  23. 2flow/services/triton/f5_tts_triton_server/f5_tts/1/f5_tts_trtllm.py +486 -0
  24. 2flow/services/triton/f5_tts_triton_server/f5_tts/1/model.py +278 -0
  25. 2flow/services/triton/f5_tts_triton_server/f5_tts/config.pbtxt +81 -0
  26. 2flow/services/triton/f5_tts_triton_server/vocoder/1/.gitkeep +0 -0
  27. 2flow/services/triton/f5_tts_triton_server/vocoder/config.pbtxt +32 -0
  28. 2flow/utils/tts/__pycache__/convert_checkpoint.cpython-310.pyc +0 -0
  29. 2flow/utils/tts/__pycache__/convert_checkpoint.cpython-312.pyc +0 -0
  30. 2flow/utils/tts/__pycache__/export_vocoder_to_onnx.cpython-312.pyc +0 -0
  31. 2flow/utils/tts/convert_checkpoint.py +378 -0
  32. 2flow/utils/tts/export_vocoder_to_onnx.py +138 -0
2flow/Dockerfile ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ FROM nvcr.io/nvidia/tritonserver:25.04-py3
2
+ WORKDIR /workspace/2flow
3
+ COPY . .
2flow/models/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
2flow/models/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ pipeline_tag: text-to-speech
4
+ library_name: f5-tts
5
+ datasets:
6
+ - amphion/Emilia-Dataset
7
+ ---
8
+
9
+ Download [F5-TTS](https://huggingface.co/SWivid/F5-TTS/tree/main/F5TTS_Base) or [E2 TTS](https://huggingface.co/SWivid/E2-TTS/tree/main/E2TTS_Base) and place under ckpts/
10
+ ```
11
+ ckpts/
12
+ F5TTS_v1_Base/
13
+ model_1250000.safetensors
14
+ F5TTS_Base/
15
+ model_1200000.safetensors
16
+ E2TTS_Base/
17
+ model_1200000.safetensors
18
+ ```
19
+ Github: https://github.com/SWivid/F5-TTS
20
+ Paper: [F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching](https://huggingface.co/papers/2410.06885)
2flow/models/downloads/F5TTS_Base/model_1200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f1bcbe1582a04468920abf227aa75f18faf57d24d5b141195eb4e55f39bc03
3
+ size 1348767810
2flow/models/downloads/F5TTS_Base/model_1200000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4180310f91d592cee4bc14998cd37c781f779cf105e8ca8744d9bd48ca7046ae
3
+ size 1348645281
2flow/models/downloads/F5TTS_Base/vocab.txt ADDED
@@ -0,0 +1,2545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ #
5
+ $
6
+ %
7
+ &
8
+ '
9
+ (
10
+ )
11
+ *
12
+ +
13
+ ,
14
+ -
15
+ .
16
+ /
17
+ 0
18
+ 1
19
+ 2
20
+ 3
21
+ 4
22
+ 5
23
+ 6
24
+ 7
25
+ 8
26
+ 9
27
+ :
28
+ ;
29
+ =
30
+ >
31
+ ?
32
+ @
33
+ A
34
+ B
35
+ C
36
+ D
37
+ E
38
+ F
39
+ G
40
+ H
41
+ I
42
+ J
43
+ K
44
+ L
45
+ M
46
+ N
47
+ O
48
+ P
49
+ Q
50
+ R
51
+ S
52
+ T
53
+ U
54
+ V
55
+ W
56
+ X
57
+ Y
58
+ Z
59
+ [
60
+ \
61
+ ]
62
+ _
63
+ a
64
+ a1
65
+ ai1
66
+ ai2
67
+ ai3
68
+ ai4
69
+ an1
70
+ an3
71
+ an4
72
+ ang1
73
+ ang2
74
+ ang4
75
+ ao1
76
+ ao2
77
+ ao3
78
+ ao4
79
+ b
80
+ ba
81
+ ba1
82
+ ba2
83
+ ba3
84
+ ba4
85
+ bai1
86
+ bai2
87
+ bai3
88
+ bai4
89
+ ban1
90
+ ban2
91
+ ban3
92
+ ban4
93
+ bang1
94
+ bang2
95
+ bang3
96
+ bang4
97
+ bao1
98
+ bao2
99
+ bao3
100
+ bao4
101
+ bei
102
+ bei1
103
+ bei2
104
+ bei3
105
+ bei4
106
+ ben1
107
+ ben2
108
+ ben3
109
+ ben4
110
+ beng
111
+ beng1
112
+ beng2
113
+ beng3
114
+ beng4
115
+ bi1
116
+ bi2
117
+ bi3
118
+ bi4
119
+ bian1
120
+ bian2
121
+ bian3
122
+ bian4
123
+ biao1
124
+ biao2
125
+ biao3
126
+ bie1
127
+ bie2
128
+ bie3
129
+ bie4
130
+ bin1
131
+ bin4
132
+ bing1
133
+ bing2
134
+ bing3
135
+ bing4
136
+ bo
137
+ bo1
138
+ bo2
139
+ bo3
140
+ bo4
141
+ bu2
142
+ bu3
143
+ bu4
144
+ c
145
+ ca1
146
+ cai1
147
+ cai2
148
+ cai3
149
+ cai4
150
+ can1
151
+ can2
152
+ can3
153
+ can4
154
+ cang1
155
+ cang2
156
+ cao1
157
+ cao2
158
+ cao3
159
+ ce4
160
+ cen1
161
+ cen2
162
+ ceng1
163
+ ceng2
164
+ ceng4
165
+ cha1
166
+ cha2
167
+ cha3
168
+ cha4
169
+ chai1
170
+ chai2
171
+ chan1
172
+ chan2
173
+ chan3
174
+ chan4
175
+ chang1
176
+ chang2
177
+ chang3
178
+ chang4
179
+ chao1
180
+ chao2
181
+ chao3
182
+ che1
183
+ che2
184
+ che3
185
+ che4
186
+ chen1
187
+ chen2
188
+ chen3
189
+ chen4
190
+ cheng1
191
+ cheng2
192
+ cheng3
193
+ cheng4
194
+ chi1
195
+ chi2
196
+ chi3
197
+ chi4
198
+ chong1
199
+ chong2
200
+ chong3
201
+ chong4
202
+ chou1
203
+ chou2
204
+ chou3
205
+ chou4
206
+ chu1
207
+ chu2
208
+ chu3
209
+ chu4
210
+ chua1
211
+ chuai1
212
+ chuai2
213
+ chuai3
214
+ chuai4
215
+ chuan1
216
+ chuan2
217
+ chuan3
218
+ chuan4
219
+ chuang1
220
+ chuang2
221
+ chuang3
222
+ chuang4
223
+ chui1
224
+ chui2
225
+ chun1
226
+ chun2
227
+ chun3
228
+ chuo1
229
+ chuo4
230
+ ci1
231
+ ci2
232
+ ci3
233
+ ci4
234
+ cong1
235
+ cong2
236
+ cou4
237
+ cu1
238
+ cu4
239
+ cuan1
240
+ cuan2
241
+ cuan4
242
+ cui1
243
+ cui3
244
+ cui4
245
+ cun1
246
+ cun2
247
+ cun4
248
+ cuo1
249
+ cuo2
250
+ cuo4
251
+ d
252
+ da
253
+ da1
254
+ da2
255
+ da3
256
+ da4
257
+ dai1
258
+ dai2
259
+ dai3
260
+ dai4
261
+ dan1
262
+ dan2
263
+ dan3
264
+ dan4
265
+ dang1
266
+ dang2
267
+ dang3
268
+ dang4
269
+ dao1
270
+ dao2
271
+ dao3
272
+ dao4
273
+ de
274
+ de1
275
+ de2
276
+ dei3
277
+ den4
278
+ deng1
279
+ deng2
280
+ deng3
281
+ deng4
282
+ di1
283
+ di2
284
+ di3
285
+ di4
286
+ dia3
287
+ dian1
288
+ dian2
289
+ dian3
290
+ dian4
291
+ diao1
292
+ diao3
293
+ diao4
294
+ die1
295
+ die2
296
+ die4
297
+ ding1
298
+ ding2
299
+ ding3
300
+ ding4
301
+ diu1
302
+ dong1
303
+ dong3
304
+ dong4
305
+ dou1
306
+ dou2
307
+ dou3
308
+ dou4
309
+ du1
310
+ du2
311
+ du3
312
+ du4
313
+ duan1
314
+ duan2
315
+ duan3
316
+ duan4
317
+ dui1
318
+ dui4
319
+ dun1
320
+ dun3
321
+ dun4
322
+ duo1
323
+ duo2
324
+ duo3
325
+ duo4
326
+ e
327
+ e1
328
+ e2
329
+ e3
330
+ e4
331
+ ei2
332
+ en1
333
+ en4
334
+ er
335
+ er2
336
+ er3
337
+ er4
338
+ f
339
+ fa1
340
+ fa2
341
+ fa3
342
+ fa4
343
+ fan1
344
+ fan2
345
+ fan3
346
+ fan4
347
+ fang1
348
+ fang2
349
+ fang3
350
+ fang4
351
+ fei1
352
+ fei2
353
+ fei3
354
+ fei4
355
+ fen1
356
+ fen2
357
+ fen3
358
+ fen4
359
+ feng1
360
+ feng2
361
+ feng3
362
+ feng4
363
+ fo2
364
+ fou2
365
+ fou3
366
+ fu1
367
+ fu2
368
+ fu3
369
+ fu4
370
+ g
371
+ ga1
372
+ ga2
373
+ ga3
374
+ ga4
375
+ gai1
376
+ gai2
377
+ gai3
378
+ gai4
379
+ gan1
380
+ gan2
381
+ gan3
382
+ gan4
383
+ gang1
384
+ gang2
385
+ gang3
386
+ gang4
387
+ gao1
388
+ gao2
389
+ gao3
390
+ gao4
391
+ ge1
392
+ ge2
393
+ ge3
394
+ ge4
395
+ gei2
396
+ gei3
397
+ gen1
398
+ gen2
399
+ gen3
400
+ gen4
401
+ geng1
402
+ geng3
403
+ geng4
404
+ gong1
405
+ gong3
406
+ gong4
407
+ gou1
408
+ gou2
409
+ gou3
410
+ gou4
411
+ gu
412
+ gu1
413
+ gu2
414
+ gu3
415
+ gu4
416
+ gua1
417
+ gua2
418
+ gua3
419
+ gua4
420
+ guai1
421
+ guai2
422
+ guai3
423
+ guai4
424
+ guan1
425
+ guan2
426
+ guan3
427
+ guan4
428
+ guang1
429
+ guang2
430
+ guang3
431
+ guang4
432
+ gui1
433
+ gui2
434
+ gui3
435
+ gui4
436
+ gun3
437
+ gun4
438
+ guo1
439
+ guo2
440
+ guo3
441
+ guo4
442
+ h
443
+ ha1
444
+ ha2
445
+ ha3
446
+ hai1
447
+ hai2
448
+ hai3
449
+ hai4
450
+ han1
451
+ han2
452
+ han3
453
+ han4
454
+ hang1
455
+ hang2
456
+ hang4
457
+ hao1
458
+ hao2
459
+ hao3
460
+ hao4
461
+ he1
462
+ he2
463
+ he4
464
+ hei1
465
+ hen2
466
+ hen3
467
+ hen4
468
+ heng1
469
+ heng2
470
+ heng4
471
+ hong1
472
+ hong2
473
+ hong3
474
+ hong4
475
+ hou1
476
+ hou2
477
+ hou3
478
+ hou4
479
+ hu1
480
+ hu2
481
+ hu3
482
+ hu4
483
+ hua1
484
+ hua2
485
+ hua4
486
+ huai2
487
+ huai4
488
+ huan1
489
+ huan2
490
+ huan3
491
+ huan4
492
+ huang1
493
+ huang2
494
+ huang3
495
+ huang4
496
+ hui1
497
+ hui2
498
+ hui3
499
+ hui4
500
+ hun1
501
+ hun2
502
+ hun4
503
+ huo
504
+ huo1
505
+ huo2
506
+ huo3
507
+ huo4
508
+ i
509
+ j
510
+ ji1
511
+ ji2
512
+ ji3
513
+ ji4
514
+ jia
515
+ jia1
516
+ jia2
517
+ jia3
518
+ jia4
519
+ jian1
520
+ jian2
521
+ jian3
522
+ jian4
523
+ jiang1
524
+ jiang2
525
+ jiang3
526
+ jiang4
527
+ jiao1
528
+ jiao2
529
+ jiao3
530
+ jiao4
531
+ jie1
532
+ jie2
533
+ jie3
534
+ jie4
535
+ jin1
536
+ jin2
537
+ jin3
538
+ jin4
539
+ jing1
540
+ jing2
541
+ jing3
542
+ jing4
543
+ jiong3
544
+ jiu1
545
+ jiu2
546
+ jiu3
547
+ jiu4
548
+ ju1
549
+ ju2
550
+ ju3
551
+ ju4
552
+ juan1
553
+ juan2
554
+ juan3
555
+ juan4
556
+ jue1
557
+ jue2
558
+ jue4
559
+ jun1
560
+ jun4
561
+ k
562
+ ka1
563
+ ka2
564
+ ka3
565
+ kai1
566
+ kai2
567
+ kai3
568
+ kai4
569
+ kan1
570
+ kan2
571
+ kan3
572
+ kan4
573
+ kang1
574
+ kang2
575
+ kang4
576
+ kao1
577
+ kao2
578
+ kao3
579
+ kao4
580
+ ke1
581
+ ke2
582
+ ke3
583
+ ke4
584
+ ken3
585
+ keng1
586
+ kong1
587
+ kong3
588
+ kong4
589
+ kou1
590
+ kou2
591
+ kou3
592
+ kou4
593
+ ku1
594
+ ku2
595
+ ku3
596
+ ku4
597
+ kua1
598
+ kua3
599
+ kua4
600
+ kuai3
601
+ kuai4
602
+ kuan1
603
+ kuan2
604
+ kuan3
605
+ kuang1
606
+ kuang2
607
+ kuang4
608
+ kui1
609
+ kui2
610
+ kui3
611
+ kui4
612
+ kun1
613
+ kun3
614
+ kun4
615
+ kuo4
616
+ l
617
+ la
618
+ la1
619
+ la2
620
+ la3
621
+ la4
622
+ lai2
623
+ lai4
624
+ lan2
625
+ lan3
626
+ lan4
627
+ lang1
628
+ lang2
629
+ lang3
630
+ lang4
631
+ lao1
632
+ lao2
633
+ lao3
634
+ lao4
635
+ le
636
+ le1
637
+ le4
638
+ lei
639
+ lei1
640
+ lei2
641
+ lei3
642
+ lei4
643
+ leng1
644
+ leng2
645
+ leng3
646
+ leng4
647
+ li
648
+ li1
649
+ li2
650
+ li3
651
+ li4
652
+ lia3
653
+ lian2
654
+ lian3
655
+ lian4
656
+ liang2
657
+ liang3
658
+ liang4
659
+ liao1
660
+ liao2
661
+ liao3
662
+ liao4
663
+ lie1
664
+ lie2
665
+ lie3
666
+ lie4
667
+ lin1
668
+ lin2
669
+ lin3
670
+ lin4
671
+ ling2
672
+ ling3
673
+ ling4
674
+ liu1
675
+ liu2
676
+ liu3
677
+ liu4
678
+ long1
679
+ long2
680
+ long3
681
+ long4
682
+ lou1
683
+ lou2
684
+ lou3
685
+ lou4
686
+ lu1
687
+ lu2
688
+ lu3
689
+ lu4
690
+ luan2
691
+ luan3
692
+ luan4
693
+ lun1
694
+ lun2
695
+ lun4
696
+ luo1
697
+ luo2
698
+ luo3
699
+ luo4
700
+ lv2
701
+ lv3
702
+ lv4
703
+ lve3
704
+ lve4
705
+ m
706
+ ma
707
+ ma1
708
+ ma2
709
+ ma3
710
+ ma4
711
+ mai2
712
+ mai3
713
+ mai4
714
+ man1
715
+ man2
716
+ man3
717
+ man4
718
+ mang2
719
+ mang3
720
+ mao1
721
+ mao2
722
+ mao3
723
+ mao4
724
+ me
725
+ mei2
726
+ mei3
727
+ mei4
728
+ men
729
+ men1
730
+ men2
731
+ men4
732
+ meng
733
+ meng1
734
+ meng2
735
+ meng3
736
+ meng4
737
+ mi1
738
+ mi2
739
+ mi3
740
+ mi4
741
+ mian2
742
+ mian3
743
+ mian4
744
+ miao1
745
+ miao2
746
+ miao3
747
+ miao4
748
+ mie1
749
+ mie4
750
+ min2
751
+ min3
752
+ ming2
753
+ ming3
754
+ ming4
755
+ miu4
756
+ mo1
757
+ mo2
758
+ mo3
759
+ mo4
760
+ mou1
761
+ mou2
762
+ mou3
763
+ mu2
764
+ mu3
765
+ mu4
766
+ n
767
+ n2
768
+ na1
769
+ na2
770
+ na3
771
+ na4
772
+ nai2
773
+ nai3
774
+ nai4
775
+ nan1
776
+ nan2
777
+ nan3
778
+ nan4
779
+ nang1
780
+ nang2
781
+ nang3
782
+ nao1
783
+ nao2
784
+ nao3
785
+ nao4
786
+ ne
787
+ ne2
788
+ ne4
789
+ nei3
790
+ nei4
791
+ nen4
792
+ neng2
793
+ ni1
794
+ ni2
795
+ ni3
796
+ ni4
797
+ nian1
798
+ nian2
799
+ nian3
800
+ nian4
801
+ niang2
802
+ niang4
803
+ niao2
804
+ niao3
805
+ niao4
806
+ nie1
807
+ nie4
808
+ nin2
809
+ ning2
810
+ ning3
811
+ ning4
812
+ niu1
813
+ niu2
814
+ niu3
815
+ niu4
816
+ nong2
817
+ nong4
818
+ nou4
819
+ nu2
820
+ nu3
821
+ nu4
822
+ nuan3
823
+ nuo2
824
+ nuo4
825
+ nv2
826
+ nv3
827
+ nve4
828
+ o
829
+ o1
830
+ o2
831
+ ou1
832
+ ou2
833
+ ou3
834
+ ou4
835
+ p
836
+ pa1
837
+ pa2
838
+ pa4
839
+ pai1
840
+ pai2
841
+ pai3
842
+ pai4
843
+ pan1
844
+ pan2
845
+ pan4
846
+ pang1
847
+ pang2
848
+ pang4
849
+ pao1
850
+ pao2
851
+ pao3
852
+ pao4
853
+ pei1
854
+ pei2
855
+ pei4
856
+ pen1
857
+ pen2
858
+ pen4
859
+ peng1
860
+ peng2
861
+ peng3
862
+ peng4
863
+ pi1
864
+ pi2
865
+ pi3
866
+ pi4
867
+ pian1
868
+ pian2
869
+ pian4
870
+ piao1
871
+ piao2
872
+ piao3
873
+ piao4
874
+ pie1
875
+ pie2
876
+ pie3
877
+ pin1
878
+ pin2
879
+ pin3
880
+ pin4
881
+ ping1
882
+ ping2
883
+ po1
884
+ po2
885
+ po3
886
+ po4
887
+ pou1
888
+ pu1
889
+ pu2
890
+ pu3
891
+ pu4
892
+ q
893
+ qi1
894
+ qi2
895
+ qi3
896
+ qi4
897
+ qia1
898
+ qia3
899
+ qia4
900
+ qian1
901
+ qian2
902
+ qian3
903
+ qian4
904
+ qiang1
905
+ qiang2
906
+ qiang3
907
+ qiang4
908
+ qiao1
909
+ qiao2
910
+ qiao3
911
+ qiao4
912
+ qie1
913
+ qie2
914
+ qie3
915
+ qie4
916
+ qin1
917
+ qin2
918
+ qin3
919
+ qin4
920
+ qing1
921
+ qing2
922
+ qing3
923
+ qing4
924
+ qiong1
925
+ qiong2
926
+ qiu1
927
+ qiu2
928
+ qiu3
929
+ qu1
930
+ qu2
931
+ qu3
932
+ qu4
933
+ quan1
934
+ quan2
935
+ quan3
936
+ quan4
937
+ que1
938
+ que2
939
+ que4
940
+ qun2
941
+ r
942
+ ran2
943
+ ran3
944
+ rang1
945
+ rang2
946
+ rang3
947
+ rang4
948
+ rao2
949
+ rao3
950
+ rao4
951
+ re2
952
+ re3
953
+ re4
954
+ ren2
955
+ ren3
956
+ ren4
957
+ reng1
958
+ reng2
959
+ ri4
960
+ rong1
961
+ rong2
962
+ rong3
963
+ rou2
964
+ rou4
965
+ ru2
966
+ ru3
967
+ ru4
968
+ ruan2
969
+ ruan3
970
+ rui3
971
+ rui4
972
+ run4
973
+ ruo4
974
+ s
975
+ sa1
976
+ sa2
977
+ sa3
978
+ sa4
979
+ sai1
980
+ sai4
981
+ san1
982
+ san2
983
+ san3
984
+ san4
985
+ sang1
986
+ sang3
987
+ sang4
988
+ sao1
989
+ sao2
990
+ sao3
991
+ sao4
992
+ se4
993
+ sen1
994
+ seng1
995
+ sha1
996
+ sha2
997
+ sha3
998
+ sha4
999
+ shai1
1000
+ shai2
1001
+ shai3
1002
+ shai4
1003
+ shan1
1004
+ shan3
1005
+ shan4
1006
+ shang
1007
+ shang1
1008
+ shang3
1009
+ shang4
1010
+ shao1
1011
+ shao2
1012
+ shao3
1013
+ shao4
1014
+ she1
1015
+ she2
1016
+ she3
1017
+ she4
1018
+ shei2
1019
+ shen1
1020
+ shen2
1021
+ shen3
1022
+ shen4
1023
+ sheng1
1024
+ sheng2
1025
+ sheng3
1026
+ sheng4
1027
+ shi
1028
+ shi1
1029
+ shi2
1030
+ shi3
1031
+ shi4
1032
+ shou1
1033
+ shou2
1034
+ shou3
1035
+ shou4
1036
+ shu1
1037
+ shu2
1038
+ shu3
1039
+ shu4
1040
+ shua1
1041
+ shua2
1042
+ shua3
1043
+ shua4
1044
+ shuai1
1045
+ shuai3
1046
+ shuai4
1047
+ shuan1
1048
+ shuan4
1049
+ shuang1
1050
+ shuang3
1051
+ shui2
1052
+ shui3
1053
+ shui4
1054
+ shun3
1055
+ shun4
1056
+ shuo1
1057
+ shuo4
1058
+ si1
1059
+ si2
1060
+ si3
1061
+ si4
1062
+ song1
1063
+ song3
1064
+ song4
1065
+ sou1
1066
+ sou3
1067
+ sou4
1068
+ su1
1069
+ su2
1070
+ su4
1071
+ suan1
1072
+ suan4
1073
+ sui1
1074
+ sui2
1075
+ sui3
1076
+ sui4
1077
+ sun1
1078
+ sun3
1079
+ suo
1080
+ suo1
1081
+ suo2
1082
+ suo3
1083
+ t
1084
+ ta1
1085
+ ta2
1086
+ ta3
1087
+ ta4
1088
+ tai1
1089
+ tai2
1090
+ tai4
1091
+ tan1
1092
+ tan2
1093
+ tan3
1094
+ tan4
1095
+ tang1
1096
+ tang2
1097
+ tang3
1098
+ tang4
1099
+ tao1
1100
+ tao2
1101
+ tao3
1102
+ tao4
1103
+ te4
1104
+ teng2
1105
+ ti1
1106
+ ti2
1107
+ ti3
1108
+ ti4
1109
+ tian1
1110
+ tian2
1111
+ tian3
1112
+ tiao1
1113
+ tiao2
1114
+ tiao3
1115
+ tiao4
1116
+ tie1
1117
+ tie2
1118
+ tie3
1119
+ tie4
1120
+ ting1
1121
+ ting2
1122
+ ting3
1123
+ tong1
1124
+ tong2
1125
+ tong3
1126
+ tong4
1127
+ tou
1128
+ tou1
1129
+ tou2
1130
+ tou4
1131
+ tu1
1132
+ tu2
1133
+ tu3
1134
+ tu4
1135
+ tuan1
1136
+ tuan2
1137
+ tui1
1138
+ tui2
1139
+ tui3
1140
+ tui4
1141
+ tun1
1142
+ tun2
1143
+ tun4
1144
+ tuo1
1145
+ tuo2
1146
+ tuo3
1147
+ tuo4
1148
+ u
1149
+ v
1150
+ w
1151
+ wa
1152
+ wa1
1153
+ wa2
1154
+ wa3
1155
+ wa4
1156
+ wai1
1157
+ wai3
1158
+ wai4
1159
+ wan1
1160
+ wan2
1161
+ wan3
1162
+ wan4
1163
+ wang1
1164
+ wang2
1165
+ wang3
1166
+ wang4
1167
+ wei1
1168
+ wei2
1169
+ wei3
1170
+ wei4
1171
+ wen1
1172
+ wen2
1173
+ wen3
1174
+ wen4
1175
+ weng1
1176
+ weng4
1177
+ wo1
1178
+ wo2
1179
+ wo3
1180
+ wo4
1181
+ wu1
1182
+ wu2
1183
+ wu3
1184
+ wu4
1185
+ x
1186
+ xi1
1187
+ xi2
1188
+ xi3
1189
+ xi4
1190
+ xia1
1191
+ xia2
1192
+ xia4
1193
+ xian1
1194
+ xian2
1195
+ xian3
1196
+ xian4
1197
+ xiang1
1198
+ xiang2
1199
+ xiang3
1200
+ xiang4
1201
+ xiao1
1202
+ xiao2
1203
+ xiao3
1204
+ xiao4
1205
+ xie1
1206
+ xie2
1207
+ xie3
1208
+ xie4
1209
+ xin1
1210
+ xin2
1211
+ xin4
1212
+ xing1
1213
+ xing2
1214
+ xing3
1215
+ xing4
1216
+ xiong1
1217
+ xiong2
1218
+ xiu1
1219
+ xiu3
1220
+ xiu4
1221
+ xu
1222
+ xu1
1223
+ xu2
1224
+ xu3
1225
+ xu4
1226
+ xuan1
1227
+ xuan2
1228
+ xuan3
1229
+ xuan4
1230
+ xue1
1231
+ xue2
1232
+ xue3
1233
+ xue4
1234
+ xun1
1235
+ xun2
1236
+ xun4
1237
+ y
1238
+ ya
1239
+ ya1
1240
+ ya2
1241
+ ya3
1242
+ ya4
1243
+ yan1
1244
+ yan2
1245
+ yan3
1246
+ yan4
1247
+ yang1
1248
+ yang2
1249
+ yang3
1250
+ yang4
1251
+ yao1
1252
+ yao2
1253
+ yao3
1254
+ yao4
1255
+ ye1
1256
+ ye2
1257
+ ye3
1258
+ ye4
1259
+ yi
1260
+ yi1
1261
+ yi2
1262
+ yi3
1263
+ yi4
1264
+ yin1
1265
+ yin2
1266
+ yin3
1267
+ yin4
1268
+ ying1
1269
+ ying2
1270
+ ying3
1271
+ ying4
1272
+ yo1
1273
+ yong1
1274
+ yong2
1275
+ yong3
1276
+ yong4
1277
+ you1
1278
+ you2
1279
+ you3
1280
+ you4
1281
+ yu1
1282
+ yu2
1283
+ yu3
1284
+ yu4
1285
+ yuan1
1286
+ yuan2
1287
+ yuan3
1288
+ yuan4
1289
+ yue1
1290
+ yue4
1291
+ yun1
1292
+ yun2
1293
+ yun3
1294
+ yun4
1295
+ z
1296
+ za1
1297
+ za2
1298
+ za3
1299
+ zai1
1300
+ zai3
1301
+ zai4
1302
+ zan1
1303
+ zan2
1304
+ zan3
1305
+ zan4
1306
+ zang1
1307
+ zang4
1308
+ zao1
1309
+ zao2
1310
+ zao3
1311
+ zao4
1312
+ ze2
1313
+ ze4
1314
+ zei2
1315
+ zen3
1316
+ zeng1
1317
+ zeng4
1318
+ zha1
1319
+ zha2
1320
+ zha3
1321
+ zha4
1322
+ zhai1
1323
+ zhai2
1324
+ zhai3
1325
+ zhai4
1326
+ zhan1
1327
+ zhan2
1328
+ zhan3
1329
+ zhan4
1330
+ zhang1
1331
+ zhang2
1332
+ zhang3
1333
+ zhang4
1334
+ zhao1
1335
+ zhao2
1336
+ zhao3
1337
+ zhao4
1338
+ zhe
1339
+ zhe1
1340
+ zhe2
1341
+ zhe3
1342
+ zhe4
1343
+ zhen1
1344
+ zhen2
1345
+ zhen3
1346
+ zhen4
1347
+ zheng1
1348
+ zheng2
1349
+ zheng3
1350
+ zheng4
1351
+ zhi1
1352
+ zhi2
1353
+ zhi3
1354
+ zhi4
1355
+ zhong1
1356
+ zhong2
1357
+ zhong3
1358
+ zhong4
1359
+ zhou1
1360
+ zhou2
1361
+ zhou3
1362
+ zhou4
1363
+ zhu1
1364
+ zhu2
1365
+ zhu3
1366
+ zhu4
1367
+ zhua1
1368
+ zhua2
1369
+ zhua3
1370
+ zhuai1
1371
+ zhuai3
1372
+ zhuai4
1373
+ zhuan1
1374
+ zhuan2
1375
+ zhuan3
1376
+ zhuan4
1377
+ zhuang1
1378
+ zhuang4
1379
+ zhui1
1380
+ zhui4
1381
+ zhun1
1382
+ zhun2
1383
+ zhun3
1384
+ zhuo1
1385
+ zhuo2
1386
+ zi
1387
+ zi1
1388
+ zi2
1389
+ zi3
1390
+ zi4
1391
+ zong1
1392
+ zong2
1393
+ zong3
1394
+ zong4
1395
+ zou1
1396
+ zou2
1397
+ zou3
1398
+ zou4
1399
+ zu1
1400
+ zu2
1401
+ zu3
1402
+ zuan1
1403
+ zuan3
1404
+ zuan4
1405
+ zui2
1406
+ zui3
1407
+ zui4
1408
+ zun1
1409
+ zuo
1410
+ zuo1
1411
+ zuo2
1412
+ zuo3
1413
+ zuo4
1414
+ {
1415
+ ~
1416
+ ¡
1417
+ ¢
1418
+ £
1419
+ ¥
1420
+ §
1421
+ ¨
1422
+ ©
1423
+ «
1424
+ ®
1425
+ ¯
1426
+ °
1427
+ ±
1428
+ ²
1429
+ ³
1430
+ ´
1431
+ µ
1432
+ ·
1433
+ ¹
1434
+ º
1435
+ »
1436
+ ¼
1437
+ ½
1438
+ ¾
1439
+ ¿
1440
+ À
1441
+ Á
1442
+ Â
1443
+ Ã
1444
+ Ä
1445
+ Å
1446
+ Æ
1447
+ Ç
1448
+ È
1449
+ É
1450
+ Ê
1451
+ Í
1452
+ Î
1453
+ Ñ
1454
+ Ó
1455
+ Ö
1456
+ ×
1457
+ Ø
1458
+ Ú
1459
+ Ü
1460
+ Ý
1461
+ Þ
1462
+ ß
1463
+ à
1464
+ á
1465
+ â
1466
+ ã
1467
+ ä
1468
+ å
1469
+ æ
1470
+ ç
1471
+ è
1472
+ é
1473
+ ê
1474
+ ë
1475
+ ì
1476
+ í
1477
+ î
1478
+ ï
1479
+ ð
1480
+ ñ
1481
+ ò
1482
+ ó
1483
+ ô
1484
+ õ
1485
+ ö
1486
+ ø
1487
+ ù
1488
+ ú
1489
+ û
1490
+ ü
1491
+ ý
1492
+ Ā
1493
+ ā
1494
+ ă
1495
+ ą
1496
+ ć
1497
+ Č
1498
+ č
1499
+ Đ
1500
+ đ
1501
+ ē
1502
+ ė
1503
+ ę
1504
+ ě
1505
+ ĝ
1506
+ ğ
1507
+ ħ
1508
+ ī
1509
+ į
1510
+ İ
1511
+ ı
1512
+ Ł
1513
+ ł
1514
+ ń
1515
+ ņ
1516
+ ň
1517
+ ŋ
1518
+ Ō
1519
+ ō
1520
+ ő
1521
+ œ
1522
+ ř
1523
+ Ś
1524
+ ś
1525
+ Ş
1526
+ ş
1527
+ Š
1528
+ š
1529
+ Ť
1530
+ ť
1531
+ ũ
1532
+ ū
1533
+ ź
1534
+ Ż
1535
+ ż
1536
+ Ž
1537
+ ž
1538
+ ơ
1539
+ ư
1540
+ ǎ
1541
+ ǐ
1542
+ ǒ
1543
+ ǔ
1544
+ ǚ
1545
+ ș
1546
+ ț
1547
+ ɑ
1548
+ ɔ
1549
+ ɕ
1550
+ ə
1551
+ ɛ
1552
+ ɜ
1553
+ ɡ
1554
+ ɣ
1555
+ ɪ
1556
+ ɫ
1557
+ ɴ
1558
+ ɹ
1559
+ ɾ
1560
+ ʃ
1561
+ ʊ
1562
+ ʌ
1563
+ ʒ
1564
+ ʔ
1565
+ ʰ
1566
+ ʷ
1567
+ ʻ
1568
+ ʾ
1569
+ ʿ
1570
+ ˈ
1571
+ ː
1572
+ ˙
1573
+ ˜
1574
+ ˢ
1575
+ ́
1576
+ ̅
1577
+ Α
1578
+ Β
1579
+ Δ
1580
+ Ε
1581
+ Θ
1582
+ Κ
1583
+ Λ
1584
+ Μ
1585
+ Ξ
1586
+ Π
1587
+ Σ
1588
+ Τ
1589
+ Φ
1590
+ Χ
1591
+ Ψ
1592
+ Ω
1593
+ ά
1594
+ έ
1595
+ ή
1596
+ ί
1597
+ α
1598
+ β
1599
+ γ
1600
+ δ
1601
+ ε
1602
+ ζ
1603
+ η
1604
+ θ
1605
+ ι
1606
+ κ
1607
+ λ
1608
+ μ
1609
+ ν
1610
+ ξ
1611
+ ο
1612
+ π
1613
+ ρ
1614
+ ς
1615
+ σ
1616
+ τ
1617
+ υ
1618
+ φ
1619
+ χ
1620
+ ψ
1621
+ ω
1622
+ ϊ
1623
+ ό
1624
+ ύ
1625
+ ώ
1626
+ ϕ
1627
+ ϵ
1628
+ Ё
1629
+ А
1630
+ Б
1631
+ В
1632
+ Г
1633
+ Д
1634
+ Е
1635
+ Ж
1636
+ З
1637
+ И
1638
+ Й
1639
+ К
1640
+ Л
1641
+ М
1642
+ Н
1643
+ О
1644
+ П
1645
+ Р
1646
+ С
1647
+ Т
1648
+ У
1649
+ Ф
1650
+ Х
1651
+ Ц
1652
+ Ч
1653
+ Ш
1654
+ Щ
1655
+ Ы
1656
+ Ь
1657
+ Э
1658
+ Ю
1659
+ Я
1660
+ а
1661
+ б
1662
+ в
1663
+ г
1664
+ д
1665
+ е
1666
+ ж
1667
+ з
1668
+ и
1669
+ й
1670
+ к
1671
+ л
1672
+ м
1673
+ н
1674
+ о
1675
+ п
1676
+ р
1677
+ с
1678
+ т
1679
+ у
1680
+ ф
1681
+ х
1682
+ ц
1683
+ ч
1684
+ ш
1685
+ щ
1686
+ ъ
1687
+ ы
1688
+ ь
1689
+ э
1690
+ ю
1691
+ я
1692
+ ё
1693
+ і
1694
+ ְ
1695
+ ִ
1696
+ ֵ
1697
+ ֶ
1698
+ ַ
1699
+ ָ
1700
+ ֹ
1701
+ ּ
1702
+ ־
1703
+ ׁ
1704
+ א
1705
+ ב
1706
+ ג
1707
+ ד
1708
+ ה
1709
+ ו
1710
+ ז
1711
+ ח
1712
+ ט
1713
+ י
1714
+ כ
1715
+ ל
1716
+ ם
1717
+ מ
1718
+ ן
1719
+ נ
1720
+ ס
1721
+ ע
1722
+ פ
1723
+ ק
1724
+ ר
1725
+ ש
1726
+ ת
1727
+ أ
1728
+ ب
1729
+ ة
1730
+ ت
1731
+ ج
1732
+ ح
1733
+ د
1734
+ ر
1735
+ ز
1736
+ س
1737
+ ص
1738
+ ط
1739
+ ع
1740
+ ق
1741
+ ك
1742
+ ل
1743
+ م
1744
+ ن
1745
+ ه
1746
+ و
1747
+ ي
1748
+ َ
1749
+ ُ
1750
+ ِ
1751
+ ْ
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+ ế
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+ 𠮶
2flow/models/downloads/F5TTS_Base_bigvgan/model_1250000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdab3e92fc2b77447aa8c46aac77531d970822b191ca198e5ab94aef99265df9
3
+ size 1348555394
2flow/models/downloads/F5TTS_v1_Base/model_1250000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:670900fd14e6c458b95da6e9ed317cdb20dbaf7a1c02ac06a05475a9d32b6a38
3
+ size 1348435761
2flow/models/downloads/F5TTS_v1_Base/vocab.txt ADDED
@@ -0,0 +1,2545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ #
5
+ $
6
+ %
7
+ &
8
+ '
9
+ (
10
+ )
11
+ *
12
+ +
13
+ ,
14
+ -
15
+ .
16
+ /
17
+ 0
18
+ 1
19
+ 2
20
+ 3
21
+ 4
22
+ 5
23
+ 6
24
+ 7
25
+ 8
26
+ 9
27
+ :
28
+ ;
29
+ =
30
+ >
31
+ ?
32
+ @
33
+ A
34
+ B
35
+ C
36
+ D
37
+ E
38
+ F
39
+ G
40
+ H
41
+ I
42
+ J
43
+ K
44
+ L
45
+ M
46
+ N
47
+ O
48
+ P
49
+ Q
50
+ R
51
+ S
52
+ T
53
+ U
54
+ V
55
+ W
56
+ X
57
+ Y
58
+ Z
59
+ [
60
+ \
61
+ ]
62
+ _
63
+ a
64
+ a1
65
+ ai1
66
+ ai2
67
+ ai3
68
+ ai4
69
+ an1
70
+ an3
71
+ an4
72
+ ang1
73
+ ang2
74
+ ang4
75
+ ao1
76
+ ao2
77
+ ao3
78
+ ao4
79
+ b
80
+ ba
81
+ ba1
82
+ ba2
83
+ ba3
84
+ ba4
85
+ bai1
86
+ bai2
87
+ bai3
88
+ bai4
89
+ ban1
90
+ ban2
91
+ ban3
92
+ ban4
93
+ bang1
94
+ bang2
95
+ bang3
96
+ bang4
97
+ bao1
98
+ bao2
99
+ bao3
100
+ bao4
101
+ bei
102
+ bei1
103
+ bei2
104
+ bei3
105
+ bei4
106
+ ben1
107
+ ben2
108
+ ben3
109
+ ben4
110
+ beng
111
+ beng1
112
+ beng2
113
+ beng3
114
+ beng4
115
+ bi1
116
+ bi2
117
+ bi3
118
+ bi4
119
+ bian1
120
+ bian2
121
+ bian3
122
+ bian4
123
+ biao1
124
+ biao2
125
+ biao3
126
+ bie1
127
+ bie2
128
+ bie3
129
+ bie4
130
+ bin1
131
+ bin4
132
+ bing1
133
+ bing2
134
+ bing3
135
+ bing4
136
+ bo
137
+ bo1
138
+ bo2
139
+ bo3
140
+ bo4
141
+ bu2
142
+ bu3
143
+ bu4
144
+ c
145
+ ca1
146
+ cai1
147
+ cai2
148
+ cai3
149
+ cai4
150
+ can1
151
+ can2
152
+ can3
153
+ can4
154
+ cang1
155
+ cang2
156
+ cao1
157
+ cao2
158
+ cao3
159
+ ce4
160
+ cen1
161
+ cen2
162
+ ceng1
163
+ ceng2
164
+ ceng4
165
+ cha1
166
+ cha2
167
+ cha3
168
+ cha4
169
+ chai1
170
+ chai2
171
+ chan1
172
+ chan2
173
+ chan3
174
+ chan4
175
+ chang1
176
+ chang2
177
+ chang3
178
+ chang4
179
+ chao1
180
+ chao2
181
+ chao3
182
+ che1
183
+ che2
184
+ che3
185
+ che4
186
+ chen1
187
+ chen2
188
+ chen3
189
+ chen4
190
+ cheng1
191
+ cheng2
192
+ cheng3
193
+ cheng4
194
+ chi1
195
+ chi2
196
+ chi3
197
+ chi4
198
+ chong1
199
+ chong2
200
+ chong3
201
+ chong4
202
+ chou1
203
+ chou2
204
+ chou3
205
+ chou4
206
+ chu1
207
+ chu2
208
+ chu3
209
+ chu4
210
+ chua1
211
+ chuai1
212
+ chuai2
213
+ chuai3
214
+ chuai4
215
+ chuan1
216
+ chuan2
217
+ chuan3
218
+ chuan4
219
+ chuang1
220
+ chuang2
221
+ chuang3
222
+ chuang4
223
+ chui1
224
+ chui2
225
+ chun1
226
+ chun2
227
+ chun3
228
+ chuo1
229
+ chuo4
230
+ ci1
231
+ ci2
232
+ ci3
233
+ ci4
234
+ cong1
235
+ cong2
236
+ cou4
237
+ cu1
238
+ cu4
239
+ cuan1
240
+ cuan2
241
+ cuan4
242
+ cui1
243
+ cui3
244
+ cui4
245
+ cun1
246
+ cun2
247
+ cun4
248
+ cuo1
249
+ cuo2
250
+ cuo4
251
+ d
252
+ da
253
+ da1
254
+ da2
255
+ da3
256
+ da4
257
+ dai1
258
+ dai2
259
+ dai3
260
+ dai4
261
+ dan1
262
+ dan2
263
+ dan3
264
+ dan4
265
+ dang1
266
+ dang2
267
+ dang3
268
+ dang4
269
+ dao1
270
+ dao2
271
+ dao3
272
+ dao4
273
+ de
274
+ de1
275
+ de2
276
+ dei3
277
+ den4
278
+ deng1
279
+ deng2
280
+ deng3
281
+ deng4
282
+ di1
283
+ di2
284
+ di3
285
+ di4
286
+ dia3
287
+ dian1
288
+ dian2
289
+ dian3
290
+ dian4
291
+ diao1
292
+ diao3
293
+ diao4
294
+ die1
295
+ die2
296
+ die4
297
+ ding1
298
+ ding2
299
+ ding3
300
+ ding4
301
+ diu1
302
+ dong1
303
+ dong3
304
+ dong4
305
+ dou1
306
+ dou2
307
+ dou3
308
+ dou4
309
+ du1
310
+ du2
311
+ du3
312
+ du4
313
+ duan1
314
+ duan2
315
+ duan3
316
+ duan4
317
+ dui1
318
+ dui4
319
+ dun1
320
+ dun3
321
+ dun4
322
+ duo1
323
+ duo2
324
+ duo3
325
+ duo4
326
+ e
327
+ e1
328
+ e2
329
+ e3
330
+ e4
331
+ ei2
332
+ en1
333
+ en4
334
+ er
335
+ er2
336
+ er3
337
+ er4
338
+ f
339
+ fa1
340
+ fa2
341
+ fa3
342
+ fa4
343
+ fan1
344
+ fan2
345
+ fan3
346
+ fan4
347
+ fang1
348
+ fang2
349
+ fang3
350
+ fang4
351
+ fei1
352
+ fei2
353
+ fei3
354
+ fei4
355
+ fen1
356
+ fen2
357
+ fen3
358
+ fen4
359
+ feng1
360
+ feng2
361
+ feng3
362
+ feng4
363
+ fo2
364
+ fou2
365
+ fou3
366
+ fu1
367
+ fu2
368
+ fu3
369
+ fu4
370
+ g
371
+ ga1
372
+ ga2
373
+ ga3
374
+ ga4
375
+ gai1
376
+ gai2
377
+ gai3
378
+ gai4
379
+ gan1
380
+ gan2
381
+ gan3
382
+ gan4
383
+ gang1
384
+ gang2
385
+ gang3
386
+ gang4
387
+ gao1
388
+ gao2
389
+ gao3
390
+ gao4
391
+ ge1
392
+ ge2
393
+ ge3
394
+ ge4
395
+ gei2
396
+ gei3
397
+ gen1
398
+ gen2
399
+ gen3
400
+ gen4
401
+ geng1
402
+ geng3
403
+ geng4
404
+ gong1
405
+ gong3
406
+ gong4
407
+ gou1
408
+ gou2
409
+ gou3
410
+ gou4
411
+ gu
412
+ gu1
413
+ gu2
414
+ gu3
415
+ gu4
416
+ gua1
417
+ gua2
418
+ gua3
419
+ gua4
420
+ guai1
421
+ guai2
422
+ guai3
423
+ guai4
424
+ guan1
425
+ guan2
426
+ guan3
427
+ guan4
428
+ guang1
429
+ guang2
430
+ guang3
431
+ guang4
432
+ gui1
433
+ gui2
434
+ gui3
435
+ gui4
436
+ gun3
437
+ gun4
438
+ guo1
439
+ guo2
440
+ guo3
441
+ guo4
442
+ h
443
+ ha1
444
+ ha2
445
+ ha3
446
+ hai1
447
+ hai2
448
+ hai3
449
+ hai4
450
+ han1
451
+ han2
452
+ han3
453
+ han4
454
+ hang1
455
+ hang2
456
+ hang4
457
+ hao1
458
+ hao2
459
+ hao3
460
+ hao4
461
+ he1
462
+ he2
463
+ he4
464
+ hei1
465
+ hen2
466
+ hen3
467
+ hen4
468
+ heng1
469
+ heng2
470
+ heng4
471
+ hong1
472
+ hong2
473
+ hong3
474
+ hong4
475
+ hou1
476
+ hou2
477
+ hou3
478
+ hou4
479
+ hu1
480
+ hu2
481
+ hu3
482
+ hu4
483
+ hua1
484
+ hua2
485
+ hua4
486
+ huai2
487
+ huai4
488
+ huan1
489
+ huan2
490
+ huan3
491
+ huan4
492
+ huang1
493
+ huang2
494
+ huang3
495
+ huang4
496
+ hui1
497
+ hui2
498
+ hui3
499
+ hui4
500
+ hun1
501
+ hun2
502
+ hun4
503
+ huo
504
+ huo1
505
+ huo2
506
+ huo3
507
+ huo4
508
+ i
509
+ j
510
+ ji1
511
+ ji2
512
+ ji3
513
+ ji4
514
+ jia
515
+ jia1
516
+ jia2
517
+ jia3
518
+ jia4
519
+ jian1
520
+ jian2
521
+ jian3
522
+ jian4
523
+ jiang1
524
+ jiang2
525
+ jiang3
526
+ jiang4
527
+ jiao1
528
+ jiao2
529
+ jiao3
530
+ jiao4
531
+ jie1
532
+ jie2
533
+ jie3
534
+ jie4
535
+ jin1
536
+ jin2
537
+ jin3
538
+ jin4
539
+ jing1
540
+ jing2
541
+ jing3
542
+ jing4
543
+ jiong3
544
+ jiu1
545
+ jiu2
546
+ jiu3
547
+ jiu4
548
+ ju1
549
+ ju2
550
+ ju3
551
+ ju4
552
+ juan1
553
+ juan2
554
+ juan3
555
+ juan4
556
+ jue1
557
+ jue2
558
+ jue4
559
+ jun1
560
+ jun4
561
+ k
562
+ ka1
563
+ ka2
564
+ ka3
565
+ kai1
566
+ kai2
567
+ kai3
568
+ kai4
569
+ kan1
570
+ kan2
571
+ kan3
572
+ kan4
573
+ kang1
574
+ kang2
575
+ kang4
576
+ kao1
577
+ kao2
578
+ kao3
579
+ kao4
580
+ ke1
581
+ ke2
582
+ ke3
583
+ ke4
584
+ ken3
585
+ keng1
586
+ kong1
587
+ kong3
588
+ kong4
589
+ kou1
590
+ kou2
591
+ kou3
592
+ kou4
593
+ ku1
594
+ ku2
595
+ ku3
596
+ ku4
597
+ kua1
598
+ kua3
599
+ kua4
600
+ kuai3
601
+ kuai4
602
+ kuan1
603
+ kuan2
604
+ kuan3
605
+ kuang1
606
+ kuang2
607
+ kuang4
608
+ kui1
609
+ kui2
610
+ kui3
611
+ kui4
612
+ kun1
613
+ kun3
614
+ kun4
615
+ kuo4
616
+ l
617
+ la
618
+ la1
619
+ la2
620
+ la3
621
+ la4
622
+ lai2
623
+ lai4
624
+ lan2
625
+ lan3
626
+ lan4
627
+ lang1
628
+ lang2
629
+ lang3
630
+ lang4
631
+ lao1
632
+ lao2
633
+ lao3
634
+ lao4
635
+ le
636
+ le1
637
+ le4
638
+ lei
639
+ lei1
640
+ lei2
641
+ lei3
642
+ lei4
643
+ leng1
644
+ leng2
645
+ leng3
646
+ leng4
647
+ li
648
+ li1
649
+ li2
650
+ li3
651
+ li4
652
+ lia3
653
+ lian2
654
+ lian3
655
+ lian4
656
+ liang2
657
+ liang3
658
+ liang4
659
+ liao1
660
+ liao2
661
+ liao3
662
+ liao4
663
+ lie1
664
+ lie2
665
+ lie3
666
+ lie4
667
+ lin1
668
+ lin2
669
+ lin3
670
+ lin4
671
+ ling2
672
+ ling3
673
+ ling4
674
+ liu1
675
+ liu2
676
+ liu3
677
+ liu4
678
+ long1
679
+ long2
680
+ long3
681
+ long4
682
+ lou1
683
+ lou2
684
+ lou3
685
+ lou4
686
+ lu1
687
+ lu2
688
+ lu3
689
+ lu4
690
+ luan2
691
+ luan3
692
+ luan4
693
+ lun1
694
+ lun2
695
+ lun4
696
+ luo1
697
+ luo2
698
+ luo3
699
+ luo4
700
+ lv2
701
+ lv3
702
+ lv4
703
+ lve3
704
+ lve4
705
+ m
706
+ ma
707
+ ma1
708
+ ma2
709
+ ma3
710
+ ma4
711
+ mai2
712
+ mai3
713
+ mai4
714
+ man1
715
+ man2
716
+ man3
717
+ man4
718
+ mang2
719
+ mang3
720
+ mao1
721
+ mao2
722
+ mao3
723
+ mao4
724
+ me
725
+ mei2
726
+ mei3
727
+ mei4
728
+ men
729
+ men1
730
+ men2
731
+ men4
732
+ meng
733
+ meng1
734
+ meng2
735
+ meng3
736
+ meng4
737
+ mi1
738
+ mi2
739
+ mi3
740
+ mi4
741
+ mian2
742
+ mian3
743
+ mian4
744
+ miao1
745
+ miao2
746
+ miao3
747
+ miao4
748
+ mie1
749
+ mie4
750
+ min2
751
+ min3
752
+ ming2
753
+ ming3
754
+ ming4
755
+ miu4
756
+ mo1
757
+ mo2
758
+ mo3
759
+ mo4
760
+ mou1
761
+ mou2
762
+ mou3
763
+ mu2
764
+ mu3
765
+ mu4
766
+ n
767
+ n2
768
+ na1
769
+ na2
770
+ na3
771
+ na4
772
+ nai2
773
+ nai3
774
+ nai4
775
+ nan1
776
+ nan2
777
+ nan3
778
+ nan4
779
+ nang1
780
+ nang2
781
+ nang3
782
+ nao1
783
+ nao2
784
+ nao3
785
+ nao4
786
+ ne
787
+ ne2
788
+ ne4
789
+ nei3
790
+ nei4
791
+ nen4
792
+ neng2
793
+ ni1
794
+ ni2
795
+ ni3
796
+ ni4
797
+ nian1
798
+ nian2
799
+ nian3
800
+ nian4
801
+ niang2
802
+ niang4
803
+ niao2
804
+ niao3
805
+ niao4
806
+ nie1
807
+ nie4
808
+ nin2
809
+ ning2
810
+ ning3
811
+ ning4
812
+ niu1
813
+ niu2
814
+ niu3
815
+ niu4
816
+ nong2
817
+ nong4
818
+ nou4
819
+ nu2
820
+ nu3
821
+ nu4
822
+ nuan3
823
+ nuo2
824
+ nuo4
825
+ nv2
826
+ nv3
827
+ nve4
828
+ o
829
+ o1
830
+ o2
831
+ ou1
832
+ ou2
833
+ ou3
834
+ ou4
835
+ p
836
+ pa1
837
+ pa2
838
+ pa4
839
+ pai1
840
+ pai2
841
+ pai3
842
+ pai4
843
+ pan1
844
+ pan2
845
+ pan4
846
+ pang1
847
+ pang2
848
+ pang4
849
+ pao1
850
+ pao2
851
+ pao3
852
+ pao4
853
+ pei1
854
+ pei2
855
+ pei4
856
+ pen1
857
+ pen2
858
+ pen4
859
+ peng1
860
+ peng2
861
+ peng3
862
+ peng4
863
+ pi1
864
+ pi2
865
+ pi3
866
+ pi4
867
+ pian1
868
+ pian2
869
+ pian4
870
+ piao1
871
+ piao2
872
+ piao3
873
+ piao4
874
+ pie1
875
+ pie2
876
+ pie3
877
+ pin1
878
+ pin2
879
+ pin3
880
+ pin4
881
+ ping1
882
+ ping2
883
+ po1
884
+ po2
885
+ po3
886
+ po4
887
+ pou1
888
+ pu1
889
+ pu2
890
+ pu3
891
+ pu4
892
+ q
893
+ qi1
894
+ qi2
895
+ qi3
896
+ qi4
897
+ qia1
898
+ qia3
899
+ qia4
900
+ qian1
901
+ qian2
902
+ qian3
903
+ qian4
904
+ qiang1
905
+ qiang2
906
+ qiang3
907
+ qiang4
908
+ qiao1
909
+ qiao2
910
+ qiao3
911
+ qiao4
912
+ qie1
913
+ qie2
914
+ qie3
915
+ qie4
916
+ qin1
917
+ qin2
918
+ qin3
919
+ qin4
920
+ qing1
921
+ qing2
922
+ qing3
923
+ qing4
924
+ qiong1
925
+ qiong2
926
+ qiu1
927
+ qiu2
928
+ qiu3
929
+ qu1
930
+ qu2
931
+ qu3
932
+ qu4
933
+ quan1
934
+ quan2
935
+ quan3
936
+ quan4
937
+ que1
938
+ que2
939
+ que4
940
+ qun2
941
+ r
942
+ ran2
943
+ ran3
944
+ rang1
945
+ rang2
946
+ rang3
947
+ rang4
948
+ rao2
949
+ rao3
950
+ rao4
951
+ re2
952
+ re3
953
+ re4
954
+ ren2
955
+ ren3
956
+ ren4
957
+ reng1
958
+ reng2
959
+ ri4
960
+ rong1
961
+ rong2
962
+ rong3
963
+ rou2
964
+ rou4
965
+ ru2
966
+ ru3
967
+ ru4
968
+ ruan2
969
+ ruan3
970
+ rui3
971
+ rui4
972
+ run4
973
+ ruo4
974
+ s
975
+ sa1
976
+ sa2
977
+ sa3
978
+ sa4
979
+ sai1
980
+ sai4
981
+ san1
982
+ san2
983
+ san3
984
+ san4
985
+ sang1
986
+ sang3
987
+ sang4
988
+ sao1
989
+ sao2
990
+ sao3
991
+ sao4
992
+ se4
993
+ sen1
994
+ seng1
995
+ sha1
996
+ sha2
997
+ sha3
998
+ sha4
999
+ shai1
1000
+ shai2
1001
+ shai3
1002
+ shai4
1003
+ shan1
1004
+ shan3
1005
+ shan4
1006
+ shang
1007
+ shang1
1008
+ shang3
1009
+ shang4
1010
+ shao1
1011
+ shao2
1012
+ shao3
1013
+ shao4
1014
+ she1
1015
+ she2
1016
+ she3
1017
+ she4
1018
+ shei2
1019
+ shen1
1020
+ shen2
1021
+ shen3
1022
+ shen4
1023
+ sheng1
1024
+ sheng2
1025
+ sheng3
1026
+ sheng4
1027
+ shi
1028
+ shi1
1029
+ shi2
1030
+ shi3
1031
+ shi4
1032
+ shou1
1033
+ shou2
1034
+ shou3
1035
+ shou4
1036
+ shu1
1037
+ shu2
1038
+ shu3
1039
+ shu4
1040
+ shua1
1041
+ shua2
1042
+ shua3
1043
+ shua4
1044
+ shuai1
1045
+ shuai3
1046
+ shuai4
1047
+ shuan1
1048
+ shuan4
1049
+ shuang1
1050
+ shuang3
1051
+ shui2
1052
+ shui3
1053
+ shui4
1054
+ shun3
1055
+ shun4
1056
+ shuo1
1057
+ shuo4
1058
+ si1
1059
+ si2
1060
+ si3
1061
+ si4
1062
+ song1
1063
+ song3
1064
+ song4
1065
+ sou1
1066
+ sou3
1067
+ sou4
1068
+ su1
1069
+ su2
1070
+ su4
1071
+ suan1
1072
+ suan4
1073
+ sui1
1074
+ sui2
1075
+ sui3
1076
+ sui4
1077
+ sun1
1078
+ sun3
1079
+ suo
1080
+ suo1
1081
+ suo2
1082
+ suo3
1083
+ t
1084
+ ta1
1085
+ ta2
1086
+ ta3
1087
+ ta4
1088
+ tai1
1089
+ tai2
1090
+ tai4
1091
+ tan1
1092
+ tan2
1093
+ tan3
1094
+ tan4
1095
+ tang1
1096
+ tang2
1097
+ tang3
1098
+ tang4
1099
+ tao1
1100
+ tao2
1101
+ tao3
1102
+ tao4
1103
+ te4
1104
+ teng2
1105
+ ti1
1106
+ ti2
1107
+ ti3
1108
+ ti4
1109
+ tian1
1110
+ tian2
1111
+ tian3
1112
+ tiao1
1113
+ tiao2
1114
+ tiao3
1115
+ tiao4
1116
+ tie1
1117
+ tie2
1118
+ tie3
1119
+ tie4
1120
+ ting1
1121
+ ting2
1122
+ ting3
1123
+ tong1
1124
+ tong2
1125
+ tong3
1126
+ tong4
1127
+ tou
1128
+ tou1
1129
+ tou2
1130
+ tou4
1131
+ tu1
1132
+ tu2
1133
+ tu3
1134
+ tu4
1135
+ tuan1
1136
+ tuan2
1137
+ tui1
1138
+ tui2
1139
+ tui3
1140
+ tui4
1141
+ tun1
1142
+ tun2
1143
+ tun4
1144
+ tuo1
1145
+ tuo2
1146
+ tuo3
1147
+ tuo4
1148
+ u
1149
+ v
1150
+ w
1151
+ wa
1152
+ wa1
1153
+ wa2
1154
+ wa3
1155
+ wa4
1156
+ wai1
1157
+ wai3
1158
+ wai4
1159
+ wan1
1160
+ wan2
1161
+ wan3
1162
+ wan4
1163
+ wang1
1164
+ wang2
1165
+ wang3
1166
+ wang4
1167
+ wei1
1168
+ wei2
1169
+ wei3
1170
+ wei4
1171
+ wen1
1172
+ wen2
1173
+ wen3
1174
+ wen4
1175
+ weng1
1176
+ weng4
1177
+ wo1
1178
+ wo2
1179
+ wo3
1180
+ wo4
1181
+ wu1
1182
+ wu2
1183
+ wu3
1184
+ wu4
1185
+ x
1186
+ xi1
1187
+ xi2
1188
+ xi3
1189
+ xi4
1190
+ xia1
1191
+ xia2
1192
+ xia4
1193
+ xian1
1194
+ xian2
1195
+ xian3
1196
+ xian4
1197
+ xiang1
1198
+ xiang2
1199
+ xiang3
1200
+ xiang4
1201
+ xiao1
1202
+ xiao2
1203
+ xiao3
1204
+ xiao4
1205
+ xie1
1206
+ xie2
1207
+ xie3
1208
+ xie4
1209
+ xin1
1210
+ xin2
1211
+ xin4
1212
+ xing1
1213
+ xing2
1214
+ xing3
1215
+ xing4
1216
+ xiong1
1217
+ xiong2
1218
+ xiu1
1219
+ xiu3
1220
+ xiu4
1221
+ xu
1222
+ xu1
1223
+ xu2
1224
+ xu3
1225
+ xu4
1226
+ xuan1
1227
+ xuan2
1228
+ xuan3
1229
+ xuan4
1230
+ xue1
1231
+ xue2
1232
+ xue3
1233
+ xue4
1234
+ xun1
1235
+ xun2
1236
+ xun4
1237
+ y
1238
+ ya
1239
+ ya1
1240
+ ya2
1241
+ ya3
1242
+ ya4
1243
+ yan1
1244
+ yan2
1245
+ yan3
1246
+ yan4
1247
+ yang1
1248
+ yang2
1249
+ yang3
1250
+ yang4
1251
+ yao1
1252
+ yao2
1253
+ yao3
1254
+ yao4
1255
+ ye1
1256
+ ye2
1257
+ ye3
1258
+ ye4
1259
+ yi
1260
+ yi1
1261
+ yi2
1262
+ yi3
1263
+ yi4
1264
+ yin1
1265
+ yin2
1266
+ yin3
1267
+ yin4
1268
+ ying1
1269
+ ying2
1270
+ ying3
1271
+ ying4
1272
+ yo1
1273
+ yong1
1274
+ yong2
1275
+ yong3
1276
+ yong4
1277
+ you1
1278
+ you2
1279
+ you3
1280
+ you4
1281
+ yu1
1282
+ yu2
1283
+ yu3
1284
+ yu4
1285
+ yuan1
1286
+ yuan2
1287
+ yuan3
1288
+ yuan4
1289
+ yue1
1290
+ yue4
1291
+ yun1
1292
+ yun2
1293
+ yun3
1294
+ yun4
1295
+ z
1296
+ za1
1297
+ za2
1298
+ za3
1299
+ zai1
1300
+ zai3
1301
+ zai4
1302
+ zan1
1303
+ zan2
1304
+ zan3
1305
+ zan4
1306
+ zang1
1307
+ zang4
1308
+ zao1
1309
+ zao2
1310
+ zao3
1311
+ zao4
1312
+ ze2
1313
+ ze4
1314
+ zei2
1315
+ zen3
1316
+ zeng1
1317
+ zeng4
1318
+ zha1
1319
+ zha2
1320
+ zha3
1321
+ zha4
1322
+ zhai1
1323
+ zhai2
1324
+ zhai3
1325
+ zhai4
1326
+ zhan1
1327
+ zhan2
1328
+ zhan3
1329
+ zhan4
1330
+ zhang1
1331
+ zhang2
1332
+ zhang3
1333
+ zhang4
1334
+ zhao1
1335
+ zhao2
1336
+ zhao3
1337
+ zhao4
1338
+ zhe
1339
+ zhe1
1340
+ zhe2
1341
+ zhe3
1342
+ zhe4
1343
+ zhen1
1344
+ zhen2
1345
+ zhen3
1346
+ zhen4
1347
+ zheng1
1348
+ zheng2
1349
+ zheng3
1350
+ zheng4
1351
+ zhi1
1352
+ zhi2
1353
+ zhi3
1354
+ zhi4
1355
+ zhong1
1356
+ zhong2
1357
+ zhong3
1358
+ zhong4
1359
+ zhou1
1360
+ zhou2
1361
+ zhou3
1362
+ zhou4
1363
+ zhu1
1364
+ zhu2
1365
+ zhu3
1366
+ zhu4
1367
+ zhua1
1368
+ zhua2
1369
+ zhua3
1370
+ zhuai1
1371
+ zhuai3
1372
+ zhuai4
1373
+ zhuan1
1374
+ zhuan2
1375
+ zhuan3
1376
+ zhuan4
1377
+ zhuang1
1378
+ zhuang4
1379
+ zhui1
1380
+ zhui4
1381
+ zhun1
1382
+ zhun2
1383
+ zhun3
1384
+ zhuo1
1385
+ zhuo2
1386
+ zi
1387
+ zi1
1388
+ zi2
1389
+ zi3
1390
+ zi4
1391
+ zong1
1392
+ zong2
1393
+ zong3
1394
+ zong4
1395
+ zou1
1396
+ zou2
1397
+ zou3
1398
+ zou4
1399
+ zu1
1400
+ zu2
1401
+ zu3
1402
+ zuan1
1403
+ zuan3
1404
+ zuan4
1405
+ zui2
1406
+ zui3
1407
+ zui4
1408
+ zun1
1409
+ zuo
1410
+ zuo1
1411
+ zuo2
1412
+ zuo3
1413
+ zuo4
1414
+ {
1415
+ ~
1416
+ ¡
1417
+ ¢
1418
+ £
1419
+ ¥
1420
+ §
1421
+ ¨
1422
+ ©
1423
+ «
1424
+ ®
1425
+ ¯
1426
+ °
1427
+ ±
1428
+ ²
1429
+ ³
1430
+ ´
1431
+ µ
1432
+ ·
1433
+ ¹
1434
+ º
1435
+ »
1436
+ ¼
1437
+ ½
1438
+ ¾
1439
+ ¿
1440
+ À
1441
+ Á
1442
+ Â
1443
+ Ã
1444
+ Ä
1445
+ Å
1446
+ Æ
1447
+ Ç
1448
+ È
1449
+ É
1450
+ Ê
1451
+ Í
1452
+ Î
1453
+ Ñ
1454
+ Ó
1455
+ Ö
1456
+ ×
1457
+ Ø
1458
+ Ú
1459
+ Ü
1460
+ Ý
1461
+ Þ
1462
+ ß
1463
+ à
1464
+ á
1465
+ â
1466
+ ã
1467
+ ä
1468
+ å
1469
+ æ
1470
+ ç
1471
+ è
1472
+ é
1473
+ ê
1474
+ ë
1475
+ ì
1476
+ í
1477
+ î
1478
+ ï
1479
+ ð
1480
+ ñ
1481
+ ò
1482
+ ó
1483
+ ô
1484
+ õ
1485
+ ö
1486
+ ø
1487
+ ù
1488
+ ú
1489
+ û
1490
+ ü
1491
+ ý
1492
+ Ā
1493
+ ā
1494
+ ă
1495
+ ą
1496
+ ć
1497
+ Č
1498
+ č
1499
+ Đ
1500
+ đ
1501
+ ē
1502
+ ė
1503
+ ę
1504
+ ě
1505
+ ĝ
1506
+ ğ
1507
+ ħ
1508
+ ī
1509
+ į
1510
+ İ
1511
+ ı
1512
+ Ł
1513
+ ł
1514
+ ń
1515
+ ņ
1516
+ ň
1517
+ ŋ
1518
+ Ō
1519
+ ō
1520
+ ő
1521
+ œ
1522
+ ř
1523
+ Ś
1524
+ ś
1525
+ Ş
1526
+ ş
1527
+ Š
1528
+ š
1529
+ Ť
1530
+ ť
1531
+ ũ
1532
+ ū
1533
+ ź
1534
+ Ż
1535
+ ż
1536
+ Ž
1537
+ ž
1538
+ ơ
1539
+ ư
1540
+ ǎ
1541
+ ǐ
1542
+ ǒ
1543
+ ǔ
1544
+ ǚ
1545
+ ș
1546
+ ț
1547
+ ɑ
1548
+ ɔ
1549
+ ɕ
1550
+ ə
1551
+ ɛ
1552
+ ɜ
1553
+ ɡ
1554
+ ɣ
1555
+ ɪ
1556
+ ɫ
1557
+ ɴ
1558
+ ɹ
1559
+ ɾ
1560
+ ʃ
1561
+ ʊ
1562
+ ʌ
1563
+ ʒ
1564
+ ʔ
1565
+ ʰ
1566
+ ʷ
1567
+ ʻ
1568
+ ʾ
1569
+ ʿ
1570
+ ˈ
1571
+ ː
1572
+ ˙
1573
+ ˜
1574
+ ˢ
1575
+ ́
1576
+ ̅
1577
+ Α
1578
+ Β
1579
+ Δ
1580
+ Ε
1581
+ Θ
1582
+ Κ
1583
+ Λ
1584
+ Μ
1585
+ Ξ
1586
+ Π
1587
+ Σ
1588
+ Τ
1589
+ Φ
1590
+ Χ
1591
+ Ψ
1592
+ Ω
1593
+ ά
1594
+ έ
1595
+ ή
1596
+ ί
1597
+ α
1598
+ β
1599
+ γ
1600
+ δ
1601
+ ε
1602
+ ζ
1603
+ η
1604
+ θ
1605
+ ι
1606
+ κ
1607
+ λ
1608
+ μ
1609
+ ν
1610
+ ξ
1611
+ ο
1612
+ π
1613
+ ρ
1614
+ ς
1615
+ σ
1616
+ τ
1617
+ υ
1618
+ φ
1619
+ χ
1620
+ ψ
1621
+ ω
1622
+ ϊ
1623
+ ό
1624
+ ύ
1625
+ ώ
1626
+ ϕ
1627
+ ϵ
1628
+ Ё
1629
+ А
1630
+ Б
1631
+ В
1632
+ Г
1633
+ Д
1634
+ Е
1635
+ Ж
1636
+ З
1637
+ И
1638
+ Й
1639
+ К
1640
+ Л
1641
+ М
1642
+ Н
1643
+ О
1644
+ П
1645
+ Р
1646
+ С
1647
+ Т
1648
+ У
1649
+ Ф
1650
+ Х
1651
+ Ц
1652
+ Ч
1653
+ Ш
1654
+ Щ
1655
+ Ы
1656
+ Ь
1657
+ Э
1658
+ Ю
1659
+ Я
1660
+ а
1661
+ б
1662
+ в
1663
+ г
1664
+ д
1665
+ е
1666
+ ж
1667
+ з
1668
+ и
1669
+ й
1670
+ к
1671
+ л
1672
+ м
1673
+ н
1674
+ о
1675
+ п
1676
+ р
1677
+ с
1678
+ т
1679
+ у
1680
+ ф
1681
+ х
1682
+ ц
1683
+ ч
1684
+ ш
1685
+ щ
1686
+ ъ
1687
+ ы
1688
+ ь
1689
+ э
1690
+ ю
1691
+ я
1692
+ ё
1693
+ і
1694
+ ְ
1695
+ ִ
1696
+ ֵ
1697
+ ֶ
1698
+ ַ
1699
+ ָ
1700
+ ֹ
1701
+ ּ
1702
+ ־
1703
+ ׁ
1704
+ א
1705
+ ב
1706
+ ג
1707
+ ד
1708
+ ה
1709
+ ו
1710
+ ז
1711
+ ח
1712
+ ט
1713
+ י
1714
+ כ
1715
+ ל
1716
+ ם
1717
+ מ
1718
+ ן
1719
+ נ
1720
+ ס
1721
+ ע
1722
+ פ
1723
+ ק
1724
+ ר
1725
+ ש
1726
+ ת
1727
+ أ
1728
+ ب
1729
+ ة
1730
+ ت
1731
+ ج
1732
+ ح
1733
+ د
1734
+ ر
1735
+ ز
1736
+ س
1737
+ ص
1738
+ ط
1739
+ ع
1740
+ ق
1741
+ ك
1742
+ ل
1743
+ م
1744
+ ن
1745
+ ه
1746
+ و
1747
+ ي
1748
+ َ
1749
+ ُ
1750
+ ِ
1751
+ ْ
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+ ế
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+ 𠮶
2flow/models/downloads/F5TTS_v1_Base_no_zero_init/model_1250000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790d5b83e2afea3cc879fabfed58b2b4da214c882ef34513adfed82684a4c47f
3
+ size 1348435761
2flow/patch/__init__.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from .baichuan.model import BaichuanForCausalLM
16
+ from .bert.model import (
17
+ BertForQuestionAnswering,
18
+ BertForSequenceClassification,
19
+ BertModel,
20
+ RobertaForQuestionAnswering,
21
+ RobertaForSequenceClassification,
22
+ RobertaModel,
23
+ )
24
+ from .bloom.model import BloomForCausalLM, BloomModel
25
+ from .chatglm.config import ChatGLMConfig
26
+ from .chatglm.model import ChatGLMForCausalLM, ChatGLMModel
27
+ from .cogvlm.config import CogVLMConfig
28
+ from .cogvlm.model import CogVLMForCausalLM
29
+ from .commandr.model import CohereForCausalLM
30
+ from .dbrx.config import DbrxConfig
31
+ from .dbrx.model import DbrxForCausalLM
32
+ from .deepseek_v1.model import DeepseekForCausalLM
33
+ from .deepseek_v2.model import DeepseekV2ForCausalLM
34
+ from .dit.model import DiT
35
+ from .eagle.model import EagleForCausalLM
36
+ from .enc_dec.model import DecoderModel, EncoderModel, WhisperEncoder
37
+ from .f5tts.model import F5TTS
38
+ from .falcon.config import FalconConfig
39
+ from .falcon.model import FalconForCausalLM, FalconModel
40
+ from .gemma.config import GEMMA2_ARCHITECTURE, GEMMA_ARCHITECTURE, GemmaConfig
41
+ from .gemma.model import GemmaForCausalLM
42
+ from .gpt.config import GPTConfig
43
+ from .gpt.model import GPTForCausalLM, GPTModel
44
+ from .gptj.config import GPTJConfig
45
+ from .gptj.model import GPTJForCausalLM, GPTJModel
46
+ from .gptneox.model import GPTNeoXForCausalLM, GPTNeoXModel
47
+ from .grok.model import GrokForCausalLM
48
+ from .llama.config import LLaMAConfig
49
+ from .llama.model import LLaMAForCausalLM, LLaMAModel
50
+ from .mamba.model import MambaForCausalLM
51
+ from .medusa.config import MedusaConfig
52
+ from .medusa.model import MedusaForCausalLm
53
+ from .mllama.model import MLLaMAModel
54
+ from .modeling_utils import PretrainedConfig, PretrainedModel, SpeculativeDecodingMode
55
+ from .mpt.model import MPTForCausalLM, MPTModel
56
+ from .nemotron_nas.model import DeciLMForCausalLM
57
+ from .opt.model import OPTForCausalLM, OPTModel
58
+ from .phi.model import PhiForCausalLM, PhiModel
59
+ from .phi3.model import Phi3ForCausalLM, Phi3Model
60
+ from .qwen.model import QWenForCausalLM
61
+ from .recurrentgemma.model import RecurrentGemmaForCausalLM
62
+
63
+
64
+ __all__ = [
65
+ "BertModel",
66
+ "BertForQuestionAnswering",
67
+ "BertForSequenceClassification",
68
+ "RobertaModel",
69
+ "RobertaForQuestionAnswering",
70
+ "RobertaForSequenceClassification",
71
+ "BloomModel",
72
+ "BloomForCausalLM",
73
+ "DiT",
74
+ "DeepseekForCausalLM",
75
+ "FalconConfig",
76
+ "DeepseekV2ForCausalLM",
77
+ "FalconForCausalLM",
78
+ "FalconModel",
79
+ "GPTConfig",
80
+ "GPTModel",
81
+ "GPTForCausalLM",
82
+ "OPTForCausalLM",
83
+ "OPTModel",
84
+ "LLaMAConfig",
85
+ "LLaMAForCausalLM",
86
+ "LLaMAModel",
87
+ "MedusaConfig",
88
+ "MedusaForCausalLm",
89
+ "GPTJConfig",
90
+ "GPTJModel",
91
+ "GPTJForCausalLM",
92
+ "GPTNeoXModel",
93
+ "GPTNeoXForCausalLM",
94
+ "PhiModel",
95
+ "PhiConfig",
96
+ "Phi3Model",
97
+ "Phi3Config",
98
+ "PhiForCausalLM",
99
+ "Phi3ForCausalLM",
100
+ "ChatGLMConfig",
101
+ "ChatGLMForCausalLM",
102
+ "ChatGLMModel",
103
+ "BaichuanForCausalLM",
104
+ "QWenConfigQWenForCausalLM",
105
+ "QWenModel",
106
+ "EncoderModel",
107
+ "DecoderModel",
108
+ "PretrainedConfig",
109
+ "PretrainedModel",
110
+ "WhisperEncoder",
111
+ "MambaForCausalLM",
112
+ "MambaConfig",
113
+ "MPTForCausalLM",
114
+ "MPTModel",
115
+ "SkyworkForCausalLM",
116
+ "GemmaConfig",
117
+ "GemmaForCausalLM",
118
+ "DbrxConfig",
119
+ "DbrxForCausalLM",
120
+ "RecurrentGemmaForCausalLM",
121
+ "CogVLMConfig",
122
+ "CogVLMForCausalLM",
123
+ "EagleForCausalLM",
124
+ "SpeculativeDecodingMode",
125
+ "CohereForCausalLM",
126
+ "MLLaMAModel",
127
+ "F5TTS",
128
+ ]
129
+
130
+ MODEL_MAP = {
131
+ "GPT2LMHeadModel": GPTForCausalLM,
132
+ "GPT2LMHeadCustomModel": GPTForCausalLM,
133
+ "GPTBigCodeForCausalLM": GPTForCausalLM,
134
+ "Starcoder2ForCausalLM": GPTForCausalLM,
135
+ "FuyuForCausalLM": GPTForCausalLM,
136
+ "Kosmos2ForConditionalGeneration": GPTForCausalLM,
137
+ "JAISLMHeadModel": GPTForCausalLM,
138
+ "GPTForCausalLM": GPTForCausalLM,
139
+ "NemotronForCausalLM": GPTForCausalLM,
140
+ "OPTForCausalLM": OPTForCausalLM,
141
+ "BloomForCausalLM": BloomForCausalLM,
142
+ "RWForCausalLM": FalconForCausalLM,
143
+ "FalconForCausalLM": FalconForCausalLM,
144
+ "PhiForCausalLM": PhiForCausalLM,
145
+ "Phi3ForCausalLM": Phi3ForCausalLM,
146
+ "Phi3VForCausalLM": Phi3ForCausalLM,
147
+ "Phi3SmallForCausalLM": Phi3ForCausalLM,
148
+ "PhiMoEForCausalLM": Phi3ForCausalLM,
149
+ "MambaForCausalLM": MambaForCausalLM,
150
+ "GPTNeoXForCausalLM": GPTNeoXForCausalLM,
151
+ "GPTJForCausalLM": GPTJForCausalLM,
152
+ "MPTForCausalLM": MPTForCausalLM,
153
+ "GLMModel": ChatGLMForCausalLM,
154
+ "ChatGLMModel": ChatGLMForCausalLM,
155
+ "ChatGLMForCausalLM": ChatGLMForCausalLM,
156
+ "LlamaForCausalLM": LLaMAForCausalLM,
157
+ "ExaoneForCausalLM": LLaMAForCausalLM,
158
+ "MistralForCausalLM": LLaMAForCausalLM,
159
+ "MixtralForCausalLM": LLaMAForCausalLM,
160
+ "ArcticForCausalLM": LLaMAForCausalLM,
161
+ "Grok1ModelForCausalLM": GrokForCausalLM,
162
+ "InternLMForCausalLM": LLaMAForCausalLM,
163
+ "InternLM2ForCausalLM": LLaMAForCausalLM,
164
+ "MedusaForCausalLM": MedusaForCausalLm,
165
+ "BaichuanForCausalLM": BaichuanForCausalLM,
166
+ "BaiChuanForCausalLM": BaichuanForCausalLM,
167
+ "SkyworkForCausalLM": LLaMAForCausalLM,
168
+ GEMMA_ARCHITECTURE: GemmaForCausalLM,
169
+ GEMMA2_ARCHITECTURE: GemmaForCausalLM,
170
+ "QWenLMHeadModel": QWenForCausalLM,
171
+ "QWenForCausalLM": QWenForCausalLM,
172
+ "Qwen2ForCausalLM": QWenForCausalLM,
173
+ "Qwen2MoeForCausalLM": QWenForCausalLM,
174
+ "Qwen2ForSequenceClassification": QWenForCausalLM,
175
+ "Qwen2VLForConditionalGeneration": QWenForCausalLM,
176
+ "WhisperEncoder": WhisperEncoder,
177
+ "EncoderModel": EncoderModel,
178
+ "DecoderModel": DecoderModel,
179
+ "DbrxForCausalLM": DbrxForCausalLM,
180
+ "RecurrentGemmaForCausalLM": RecurrentGemmaForCausalLM,
181
+ "CogVLMForCausalLM": CogVLMForCausalLM,
182
+ "DiT": DiT,
183
+ "DeepseekForCausalLM": DeepseekForCausalLM,
184
+ "DeciLMForCausalLM": DeciLMForCausalLM,
185
+ "DeepseekV2ForCausalLM": DeepseekV2ForCausalLM,
186
+ "EagleForCausalLM": EagleForCausalLM,
187
+ "CohereForCausalLM": CohereForCausalLM,
188
+ "MllamaForConditionalGeneration": MLLaMAModel,
189
+ "BertForQuestionAnswering": BertForQuestionAnswering,
190
+ "BertForSequenceClassification": BertForSequenceClassification,
191
+ "BertModel": BertModel,
192
+ "RobertaModel": RobertaModel,
193
+ "RobertaForQuestionAnswering": RobertaForQuestionAnswering,
194
+ "RobertaForSequenceClassification": RobertaForSequenceClassification,
195
+ "F5TTS": F5TTS,
196
+ }
2flow/patch/f5tts/model.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from collections import OrderedDict
6
+
7
+ import tensorrt as trt
8
+ from tensorrt_llm._common import default_net
9
+
10
+ from ..._utils import str_dtype_to_trt
11
+ from ...functional import Tensor, concat
12
+ from ...layers import Linear
13
+ from ...module import Module, ModuleList
14
+ from ...plugin import current_all_reduce_helper
15
+ from ..modeling_utils import PretrainedConfig, PretrainedModel
16
+ from .modules import AdaLayerNormZero_Final, ConvPositionEmbedding, DiTBlock, TimestepEmbedding
17
+
18
+
19
+ current_file_path = os.path.abspath(__file__)
20
+ parent_dir = os.path.dirname(current_file_path)
21
+ sys.path.append(parent_dir)
22
+
23
+
24
+ class InputEmbedding(Module):
25
+ def __init__(self, mel_dim, text_dim, out_dim):
26
+ super().__init__()
27
+ self.proj = Linear(mel_dim * 2 + text_dim, out_dim)
28
+ self.conv_pos_embed = ConvPositionEmbedding(dim=out_dim)
29
+
30
+ def forward(self, x, cond):
31
+ x = self.proj(concat([x, cond], dim=-1))
32
+ return self.conv_pos_embed(x) + x
33
+
34
+
35
+ class F5TTS(PretrainedModel):
36
+ def __init__(self, config: PretrainedConfig):
37
+ super().__init__(config)
38
+ self.dtype = str_dtype_to_trt(config.dtype)
39
+
40
+ self.time_embed = TimestepEmbedding(config.hidden_size)
41
+ self.input_embed = InputEmbedding(config.mel_dim, config.text_dim, config.hidden_size)
42
+
43
+ self.dim = config.hidden_size
44
+ self.depth = config.num_hidden_layers
45
+ self.transformer_blocks = ModuleList(
46
+ [
47
+ DiTBlock(
48
+ dim=self.dim,
49
+ heads=config.num_attention_heads,
50
+ dim_head=config.dim_head,
51
+ ff_mult=config.ff_mult,
52
+ dropout=config.dropout,
53
+ )
54
+ for _ in range(self.depth)
55
+ ]
56
+ )
57
+
58
+ self.norm_out = AdaLayerNormZero_Final(config.hidden_size) # final modulation
59
+ self.proj_out = Linear(config.hidden_size, config.mel_dim)
60
+
61
+ def forward(
62
+ self,
63
+ noise, # nosied input audio
64
+ cond, # masked cond audio
65
+ time, # time step
66
+ rope_cos,
67
+ rope_sin,
68
+ input_lengths,
69
+ scale=1.0,
70
+ ):
71
+ t = self.time_embed(time)
72
+ x = self.input_embed(noise, cond)
73
+ for block in self.transformer_blocks:
74
+ x = block(x, t, rope_cos=rope_cos, rope_sin=rope_sin, input_lengths=input_lengths, scale=scale)
75
+ denoise = self.proj_out(self.norm_out(x, t))
76
+ denoise.mark_output("denoised", self.dtype)
77
+ return denoise
78
+
79
+ def prepare_inputs(self, **kwargs):
80
+ max_batch_size = kwargs["max_batch_size"]
81
+ batch_size_range = [2, 2, max_batch_size]
82
+ mel_size = 100
83
+ max_seq_len = 3000
84
+ num_frames_range = [200, 2 * max_seq_len, max_seq_len * max_batch_size]
85
+ hidden_size = 512
86
+ concat_feature_dim = mel_size + hidden_size
87
+ freq_embed_dim = 256
88
+ head_dim = 64
89
+ mapping = self.config.mapping
90
+ if mapping.tp_size > 1:
91
+ current_all_reduce_helper().set_workspace_tensor(mapping, 1)
92
+ if default_net().plugin_config.remove_input_padding:
93
+ noise = Tensor(
94
+ name="noise",
95
+ dtype=self.dtype,
96
+ shape=[-1, mel_size],
97
+ dim_range=OrderedDict(
98
+ [
99
+ ("num_frames", [num_frames_range]),
100
+ ("n_mels", [mel_size]),
101
+ ]
102
+ ),
103
+ )
104
+ cond = Tensor(
105
+ name="cond",
106
+ dtype=self.dtype,
107
+ shape=[-1, concat_feature_dim],
108
+ dim_range=OrderedDict(
109
+ [
110
+ ("num_frames", [num_frames_range]),
111
+ ("embeded_length", [concat_feature_dim]),
112
+ ]
113
+ ),
114
+ )
115
+ time = Tensor(
116
+ name="time",
117
+ dtype=self.dtype,
118
+ shape=[-1, freq_embed_dim],
119
+ dim_range=OrderedDict(
120
+ [
121
+ ("num_frames", [num_frames_range]),
122
+ ("freq_dim", [freq_embed_dim]),
123
+ ]
124
+ ),
125
+ )
126
+ rope_cos = Tensor(
127
+ name="rope_cos",
128
+ dtype=self.dtype,
129
+ shape=[-1, head_dim],
130
+ dim_range=OrderedDict(
131
+ [
132
+ ("num_frames", [num_frames_range]),
133
+ ("head_dim", [head_dim]),
134
+ ]
135
+ ),
136
+ )
137
+ rope_sin = Tensor(
138
+ name="rope_sin",
139
+ dtype=self.dtype,
140
+ shape=[-1, head_dim],
141
+ dim_range=OrderedDict(
142
+ [
143
+ ("num_frames", [num_frames_range]),
144
+ ("head_dim", [head_dim]),
145
+ ]
146
+ ),
147
+ )
148
+
149
+ else:
150
+ noise = Tensor(
151
+ name="noise",
152
+ dtype=self.dtype,
153
+ shape=[-1, -1, mel_size],
154
+ dim_range=OrderedDict(
155
+ [
156
+ ("batch_size", [batch_size_range]),
157
+ ("max_duratuion", [[100, max_seq_len // 2, max_seq_len]]),
158
+ ("n_mels", [mel_size]),
159
+ ]
160
+ ),
161
+ )
162
+ cond = Tensor(
163
+ name="cond",
164
+ dtype=self.dtype,
165
+ shape=[-1, -1, concat_feature_dim],
166
+ dim_range=OrderedDict(
167
+ [
168
+ ("batch_size", [batch_size_range]),
169
+ ("max_duratuion", [[100, max_seq_len // 2, max_seq_len]]),
170
+ ("embeded_length", [concat_feature_dim]),
171
+ ]
172
+ ),
173
+ )
174
+ time = Tensor(
175
+ name="time",
176
+ dtype=self.dtype,
177
+ shape=[-1, freq_embed_dim],
178
+ dim_range=OrderedDict(
179
+ [
180
+ ("batch_size", [batch_size_range]),
181
+ ("freq_dim", [freq_embed_dim]),
182
+ ]
183
+ ),
184
+ )
185
+ rope_cos = Tensor(
186
+ name="rope_cos",
187
+ dtype=self.dtype,
188
+ shape=[-1, -1, head_dim],
189
+ dim_range=OrderedDict(
190
+ [
191
+ ("batch_size", [batch_size_range]),
192
+ ("max_duratuion", [[100, max_seq_len // 2, max_seq_len]]),
193
+ ("head_dim", [head_dim]),
194
+ ]
195
+ ),
196
+ )
197
+ rope_sin = Tensor(
198
+ name="rope_sin",
199
+ dtype=self.dtype,
200
+ shape=[-1, -1, head_dim],
201
+ dim_range=OrderedDict(
202
+ [
203
+ ("batch_size", [batch_size_range]),
204
+ ("max_duratuion", [[100, max_seq_len // 2, max_seq_len]]),
205
+ ("head_dim", [head_dim]),
206
+ ]
207
+ ),
208
+ )
209
+ input_lengths = Tensor(
210
+ name="input_lengths",
211
+ dtype=trt.int32,
212
+ shape=[-1],
213
+ dim_range=OrderedDict([("batch_size", [batch_size_range])]),
214
+ )
215
+ return {
216
+ "noise": noise,
217
+ "cond": cond,
218
+ "time": time,
219
+ "rope_cos": rope_cos,
220
+ "rope_sin": rope_sin,
221
+ "input_lengths": input_lengths,
222
+ }
2flow/patch/f5tts/modules.py ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from typing import Optional
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn.functional as F
9
+ from tensorrt_llm._common import default_net
10
+
11
+ from ..._utils import str_dtype_to_trt, trt_dtype_to_np
12
+ from ...functional import (
13
+ Tensor,
14
+ bert_attention,
15
+ cast,
16
+ chunk,
17
+ concat,
18
+ constant,
19
+ expand,
20
+ expand_dims,
21
+ expand_dims_like,
22
+ expand_mask,
23
+ gelu,
24
+ matmul,
25
+ permute,
26
+ shape,
27
+ silu,
28
+ slice,
29
+ softmax,
30
+ squeeze,
31
+ unsqueeze,
32
+ view,
33
+ )
34
+ from ...layers import ColumnLinear, Conv1d, LayerNorm, Linear, Mish, RowLinear
35
+ from ...module import Module
36
+
37
+
38
+ class FeedForward(Module):
39
+ def __init__(self, dim, dim_out=None, mult=4, dropout=0.0):
40
+ super().__init__()
41
+ inner_dim = int(dim * mult)
42
+ dim_out = dim_out if dim_out is not None else dim
43
+
44
+ self.project_in = Linear(dim, inner_dim)
45
+ self.ff = Linear(inner_dim, dim_out)
46
+
47
+ def forward(self, x):
48
+ return self.ff(gelu(self.project_in(x)))
49
+
50
+
51
+ class AdaLayerNormZero(Module):
52
+ def __init__(self, dim):
53
+ super().__init__()
54
+
55
+ self.linear = Linear(dim, dim * 6)
56
+ self.norm = LayerNorm(dim, elementwise_affine=False, eps=1e-6)
57
+
58
+ def forward(self, x, emb=None):
59
+ emb = self.linear(silu(emb))
60
+ shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = chunk(emb, 6, dim=1)
61
+ x = self.norm(x)
62
+ ones = constant(np.ones(1, dtype=np.float32)).cast(x.dtype)
63
+ if default_net().plugin_config.remove_input_padding:
64
+ x = x * (ones + scale_msa) + shift_msa
65
+ else:
66
+ x = x * (ones + unsqueeze(scale_msa, 1)) + unsqueeze(shift_msa, 1)
67
+ return x, gate_msa, shift_mlp, scale_mlp, gate_mlp
68
+
69
+
70
+ class AdaLayerNormZero_Final(Module):
71
+ def __init__(self, dim):
72
+ super().__init__()
73
+
74
+ self.linear = Linear(dim, dim * 2)
75
+
76
+ self.norm = LayerNorm(dim, elementwise_affine=False, eps=1e-6)
77
+
78
+ def forward(self, x, emb):
79
+ emb = self.linear(silu(emb))
80
+ scale, shift = chunk(emb, 2, dim=1)
81
+ ones = constant(np.ones(1, dtype=np.float32)).cast(x.dtype)
82
+ if default_net().plugin_config.remove_input_padding:
83
+ x = self.norm(x) * (ones + scale) + shift
84
+ else:
85
+ x = self.norm(x) * unsqueeze((ones + scale), 1)
86
+ x = x + unsqueeze(shift, 1)
87
+ return x
88
+
89
+
90
+ class ConvPositionEmbedding(Module):
91
+ def __init__(self, dim, kernel_size=31, groups=16):
92
+ super().__init__()
93
+ assert kernel_size % 2 != 0
94
+ self.conv1d1 = Conv1d(dim, dim, kernel_size, groups=groups, padding=kernel_size // 2)
95
+ self.conv1d2 = Conv1d(dim, dim, kernel_size, groups=groups, padding=kernel_size // 2)
96
+ self.mish = Mish()
97
+
98
+ def forward(self, x, mask=None): # noqa: F722
99
+ if default_net().plugin_config.remove_input_padding:
100
+ x = unsqueeze(x, 0)
101
+ x = permute(x, [0, 2, 1])
102
+ x = self.mish(self.conv1d2(self.mish(self.conv1d1(x))))
103
+ out = permute(x, [0, 2, 1])
104
+ if default_net().plugin_config.remove_input_padding:
105
+ out = squeeze(out, 0)
106
+ return out
107
+
108
+
109
+ class Attention(Module):
110
+ def __init__(
111
+ self,
112
+ processor: AttnProcessor,
113
+ dim: int,
114
+ heads: int = 16,
115
+ dim_head: int = 64,
116
+ dropout: float = 0.0,
117
+ context_dim: Optional[int] = None, # if not None -> joint attention
118
+ context_pre_only=None,
119
+ ):
120
+ super().__init__()
121
+
122
+ if not hasattr(F, "scaled_dot_product_attention"):
123
+ raise ImportError("Attention equires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
124
+
125
+ self.processor = processor
126
+
127
+ self.dim = dim # hidden_size
128
+ self.heads = heads
129
+ self.inner_dim = dim_head * heads
130
+ self.dropout = dropout
131
+ self.attention_head_size = dim_head
132
+ self.context_dim = context_dim
133
+ self.context_pre_only = context_pre_only
134
+ self.tp_size = 1
135
+ self.num_attention_heads = heads // self.tp_size
136
+ self.num_attention_kv_heads = heads // self.tp_size # 8
137
+ self.dtype = str_dtype_to_trt("float32")
138
+ self.attention_hidden_size = self.attention_head_size * self.num_attention_heads
139
+ self.to_q = ColumnLinear(
140
+ dim,
141
+ self.tp_size * self.num_attention_heads * self.attention_head_size,
142
+ bias=True,
143
+ dtype=self.dtype,
144
+ tp_group=None,
145
+ tp_size=self.tp_size,
146
+ )
147
+ self.to_k = ColumnLinear(
148
+ dim,
149
+ self.tp_size * self.num_attention_heads * self.attention_head_size,
150
+ bias=True,
151
+ dtype=self.dtype,
152
+ tp_group=None,
153
+ tp_size=self.tp_size,
154
+ )
155
+ self.to_v = ColumnLinear(
156
+ dim,
157
+ self.tp_size * self.num_attention_heads * self.attention_head_size,
158
+ bias=True,
159
+ dtype=self.dtype,
160
+ tp_group=None,
161
+ tp_size=self.tp_size,
162
+ )
163
+
164
+ if self.context_dim is not None:
165
+ self.to_k_c = Linear(context_dim, self.inner_dim)
166
+ self.to_v_c = Linear(context_dim, self.inner_dim)
167
+ if self.context_pre_only is not None:
168
+ self.to_q_c = Linear(context_dim, self.inner_dim)
169
+
170
+ self.to_out = RowLinear(
171
+ self.tp_size * self.num_attention_heads * self.attention_head_size,
172
+ dim,
173
+ bias=True,
174
+ dtype=self.dtype,
175
+ tp_group=None,
176
+ tp_size=self.tp_size,
177
+ )
178
+
179
+ if self.context_pre_only is not None and not self.context_pre_only:
180
+ self.to_out_c = Linear(self.inner_dim, dim)
181
+
182
+ def forward(
183
+ self,
184
+ x, # noised input x
185
+ rope_cos,
186
+ rope_sin,
187
+ input_lengths,
188
+ c=None, # context c
189
+ scale=1.0,
190
+ rope=None,
191
+ c_rope=None, # rotary position embedding for c
192
+ ) -> torch.Tensor:
193
+ if c is not None:
194
+ return self.processor(self, x, c=c, input_lengths=input_lengths, scale=scale, rope=rope, c_rope=c_rope)
195
+ else:
196
+ return self.processor(
197
+ self, x, rope_cos=rope_cos, rope_sin=rope_sin, input_lengths=input_lengths, scale=scale
198
+ )
199
+
200
+
201
+ def rotate_every_two_3dim(tensor: Tensor) -> Tensor:
202
+ shape_tensor = concat(
203
+ [shape(tensor, i) / 2 if i == (tensor.ndim() - 1) else shape(tensor, i) for i in range(tensor.ndim())]
204
+ )
205
+ if default_net().plugin_config.remove_input_padding:
206
+ assert tensor.ndim() == 2
207
+ x1 = slice(tensor, [0, 0], shape_tensor, [1, 2])
208
+ x2 = slice(tensor, [0, 1], shape_tensor, [1, 2])
209
+ x1 = expand_dims(x1, 2)
210
+ x2 = expand_dims(x2, 2)
211
+ zero = constant(np.ascontiguousarray(np.zeros([1], dtype=trt_dtype_to_np(tensor.dtype))))
212
+ x2 = zero - x2
213
+ x = concat([x2, x1], 2)
214
+ out = view(x, concat([shape(x, 0), shape(x, 1) * 2]))
215
+ else:
216
+ assert tensor.ndim() == 3
217
+
218
+ x1 = slice(tensor, [0, 0, 0], shape_tensor, [1, 1, 2])
219
+ x2 = slice(tensor, [0, 0, 1], shape_tensor, [1, 1, 2])
220
+ x1 = expand_dims(x1, 3)
221
+ x2 = expand_dims(x2, 3)
222
+ zero = constant(np.ascontiguousarray(np.zeros([1], dtype=trt_dtype_to_np(tensor.dtype))))
223
+ x2 = zero - x2
224
+ x = concat([x2, x1], 3)
225
+ out = view(x, concat([shape(x, 0), shape(x, 1), shape(x, 2) * 2]))
226
+
227
+ return out
228
+
229
+
230
+ # def apply_rotary_pos_emb_3dim(x, rope_cos, rope_sin):
231
+ # if default_net().plugin_config.remove_input_padding:
232
+ # rot_dim = shape(rope_cos, -1) # 64
233
+ # new_t_shape = concat([shape(x, 0), rot_dim]) # (-1, 64)
234
+ # x_ = slice(x, [0, 0], new_t_shape, [1, 1])
235
+ # end_dim = shape(x, -1) - shape(rope_cos, -1)
236
+ # new_t_unrotated_shape = concat([shape(x, 0), end_dim]) # (2, -1, 960)
237
+ # x_unrotated = slice(x, concat([0, rot_dim]), new_t_unrotated_shape, [1, 1])
238
+ # out = concat([x_ * rope_cos + rotate_every_two_3dim(x_) * rope_sin, x_unrotated], dim=-1)
239
+ # else:
240
+ # rot_dim = shape(rope_cos, 2) # 64
241
+ # new_t_shape = concat([shape(x, 0), shape(x, 1), rot_dim]) # (2, -1, 64)
242
+ # x_ = slice(x, [0, 0, 0], new_t_shape, [1, 1, 1])
243
+ # end_dim = shape(x, 2) - shape(rope_cos, 2)
244
+ # new_t_unrotated_shape = concat([shape(x, 0), shape(x, 1), end_dim]) # (2, -1, 960)
245
+ # x_unrotated = slice(x, concat([0, 0, rot_dim]), new_t_unrotated_shape, [1, 1, 1])
246
+ # out = concat([x_ * rope_cos + rotate_every_two_3dim(x_) * rope_sin, x_unrotated], dim=-1)
247
+ # return out
248
+
249
+
250
+ def apply_rotary_pos_emb_3dim(x, rope_cos, rope_sin):
251
+ """
252
+ Apply RoPE for each block (like 64 dims) across all heads.
253
+ Supports both normal and remove_input_padding=True mode.
254
+ """
255
+ if default_net().plugin_config.remove_input_padding:
256
+ # For [N, D] input
257
+ full_dim = shape(x, 1)
258
+ block_size = shape(rope_cos, 1)
259
+ out_blocks = []
260
+ for i in range(16):
261
+ start = i * 64
262
+ curr_shape = concat([shape(x, 0), block_size])
263
+ x_block = slice(x, [0, start], curr_shape, [1, 1])
264
+ cos_block = slice(rope_cos, [0, start], curr_shape, [1, 1])
265
+ sin_block = slice(rope_sin, [0, start], curr_shape, [1, 1])
266
+ rotated = rotate_every_two_3dim(x_block)
267
+ block_out = x_block * cos_block + rotated * sin_block
268
+ out_blocks.append(block_out)
269
+ out = concat(out_blocks, dim=-1)
270
+ else:
271
+ # For [B, N, D] input
272
+ pieces = []
273
+ rot_dim = shape(rope_cos, 2)
274
+ full_dim = shape(x, 2)
275
+ new_t_shape = concat([shape(x, 0), shape(x, 1), rot_dim])
276
+ for i in range(16):
277
+ x_slice = slice(x, [0, 0, i*64], new_t_shape, [1, 1, 1])
278
+ rotated_slice = x_slice * rope_cos + rotate_every_two_3dim(x_slice) * rope_sin
279
+ pieces.append(rotated_slice)
280
+ out = concat(pieces, dim=-1)
281
+
282
+ return out
283
+
284
+
285
+ class AttnProcessor:
286
+ def __init__(self):
287
+ pass
288
+
289
+ def __call__(
290
+ self,
291
+ attn,
292
+ x, # noised input x
293
+ rope_cos,
294
+ rope_sin,
295
+ input_lengths,
296
+ scale=1.0,
297
+ rope=None,
298
+ ) -> torch.FloatTensor:
299
+ query = attn.to_q(x)
300
+ key = attn.to_k(x)
301
+ value = attn.to_v(x)
302
+ # k,v,q all (2,1226,1024)
303
+ query = apply_rotary_pos_emb_3dim(query, rope_cos, rope_sin)
304
+ key = apply_rotary_pos_emb_3dim(key, rope_cos, rope_sin)
305
+
306
+ # attention
307
+ inner_dim = key.shape[-1]
308
+ norm_factor = math.sqrt(attn.attention_head_size)
309
+ q_scaling = 1.0 / norm_factor
310
+ mask = None
311
+ if not default_net().plugin_config.remove_input_padding:
312
+ N = shape(x, 1)
313
+ B = shape(x, 0)
314
+ seq_len_2d = concat([1, N])
315
+ max_position_embeddings = 4096
316
+ # create position ids
317
+ position_ids_buffer = constant(np.expand_dims(np.arange(max_position_embeddings).astype(np.int32), 0))
318
+ tmp_position_ids = slice(position_ids_buffer, starts=[0, 0], sizes=seq_len_2d)
319
+ tmp_position_ids = expand(tmp_position_ids, concat([B, N])) # BxL
320
+ tmp_input_lengths = unsqueeze(input_lengths, 1) # Bx1
321
+ tmp_input_lengths = expand(tmp_input_lengths, concat([B, N])) # BxL
322
+ mask = tmp_position_ids < tmp_input_lengths # BxL
323
+ mask = mask.cast("int32")
324
+
325
+ if default_net().plugin_config.bert_attention_plugin:
326
+ qkv = concat([query, key, value], dim=-1)
327
+ # TRT plugin mode
328
+ assert input_lengths is not None
329
+ if default_net().plugin_config.remove_input_padding:
330
+ qkv = qkv.view(concat([-1, 3 * inner_dim]))
331
+ max_input_length = constant(
332
+ np.zeros(
333
+ [
334
+ 2048,
335
+ ],
336
+ dtype=np.int32,
337
+ )
338
+ )
339
+ else:
340
+ max_input_length = None
341
+ context = bert_attention(
342
+ qkv,
343
+ input_lengths,
344
+ attn.num_attention_heads,
345
+ attn.attention_head_size,
346
+ q_scaling=q_scaling,
347
+ max_input_length=max_input_length,
348
+ )
349
+ else:
350
+ assert not default_net().plugin_config.remove_input_padding
351
+
352
+ def transpose_for_scores(x):
353
+ new_x_shape = concat([shape(x, 0), shape(x, 1), attn.num_attention_heads, attn.attention_head_size])
354
+
355
+ y = x.view(new_x_shape)
356
+ y = y.transpose(1, 2)
357
+ return y
358
+
359
+ def transpose_for_scores_k(x):
360
+ new_x_shape = concat([shape(x, 0), shape(x, 1), attn.num_attention_heads, attn.attention_head_size])
361
+
362
+ y = x.view(new_x_shape)
363
+ y = y.permute([0, 2, 3, 1])
364
+ return y
365
+
366
+ query = transpose_for_scores(query)
367
+ key = transpose_for_scores_k(key)
368
+ value = transpose_for_scores(value)
369
+
370
+ attention_scores = matmul(query, key, use_fp32_acc=False)
371
+
372
+ if mask is not None:
373
+ attention_mask = expand_mask(mask, shape(query, 2))
374
+ attention_mask = cast(attention_mask, attention_scores.dtype)
375
+ attention_scores = attention_scores + attention_mask
376
+
377
+ attention_probs = softmax(attention_scores, dim=-1)
378
+
379
+ context = matmul(attention_probs, value, use_fp32_acc=False).transpose(1, 2)
380
+ context = context.view(concat([shape(context, 0), shape(context, 1), attn.attention_hidden_size]))
381
+ context = attn.to_out(context)
382
+ if mask is not None:
383
+ mask = mask.view(concat([shape(mask, 0), shape(mask, 1), 1]))
384
+ mask = expand_dims_like(mask, context)
385
+ mask = cast(mask, context.dtype)
386
+ context = context * mask
387
+ return context
388
+
389
+
390
+ # DiT Block
391
+ class DiTBlock(Module):
392
+ def __init__(self, dim, heads, dim_head, ff_mult=2, dropout=0.1):
393
+ super().__init__()
394
+
395
+ self.attn_norm = AdaLayerNormZero(dim)
396
+ self.attn = Attention(
397
+ processor=AttnProcessor(),
398
+ dim=dim,
399
+ heads=heads,
400
+ dim_head=dim_head,
401
+ dropout=dropout,
402
+ )
403
+
404
+ self.ff_norm = LayerNorm(dim, elementwise_affine=False, eps=1e-6)
405
+ self.ff = FeedForward(dim=dim, mult=ff_mult, dropout=dropout)
406
+
407
+ def forward(
408
+ self, x, t, rope_cos, rope_sin, input_lengths, scale=1.0, rope=ModuleNotFoundError
409
+ ): # x: noised input, t: time embedding
410
+ # pre-norm & modulation for attention input
411
+ norm, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.attn_norm(x, emb=t)
412
+ # attention
413
+ # norm ----> (2,1226,1024)
414
+ attn_output = self.attn(x=norm, rope_cos=rope_cos, rope_sin=rope_sin, input_lengths=input_lengths, scale=scale)
415
+
416
+ # process attention output for input x
417
+ if default_net().plugin_config.remove_input_padding:
418
+ x = x + gate_msa * attn_output
419
+ else:
420
+ x = x + unsqueeze(gate_msa, 1) * attn_output
421
+ ones = constant(np.ones(1, dtype=np.float32)).cast(x.dtype)
422
+ if default_net().plugin_config.remove_input_padding:
423
+ norm = self.ff_norm(x) * (ones + scale_mlp) + shift_mlp
424
+ else:
425
+ norm = self.ff_norm(x) * (ones + unsqueeze(scale_mlp, 1)) + unsqueeze(shift_mlp, 1)
426
+ # norm = self.ff_norm(x) * (ones + scale_mlp) + shift_mlp
427
+ ff_output = self.ff(norm)
428
+ if default_net().plugin_config.remove_input_padding:
429
+ x = x + gate_mlp * ff_output
430
+ else:
431
+ x = x + unsqueeze(gate_mlp, 1) * ff_output
432
+
433
+ return x
434
+
435
+
436
+ class TimestepEmbedding(Module):
437
+ def __init__(self, dim, freq_embed_dim=256, dtype=None):
438
+ super().__init__()
439
+ # self.time_embed = SinusPositionEmbedding(freq_embed_dim)
440
+ self.mlp1 = Linear(freq_embed_dim, dim, bias=True, dtype=dtype)
441
+ self.mlp2 = Linear(dim, dim, bias=True, dtype=dtype)
442
+
443
+ def forward(self, timestep):
444
+ t_freq = self.mlp1(timestep)
445
+ t_freq = silu(t_freq)
446
+ t_emb = self.mlp2(t_freq)
447
+ return t_emb
2flow/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ conv-stft
2
+ vocos
3
+ safetensors
4
+ tensorrt_llm
5
+ onnxscript
2flow/scripts/build.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ docker build -t tired:lastest .
2
+
2flow/scripts/f5/build_engine.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ trtllm-build \
2
+ --checkpoint_dir ./models/pre_engine/tts \
3
+ --max_batch_size 8 \
4
+ --output_dir ./models/engine/tts \
5
+ --remove_input_padding "disable"
2flow/scripts/f5/fix_lib.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import tensorrt_llm
3
+ from pathlib import Path
4
+
5
+ trtllm_path = Path(tensorrt_llm.__file__).parent
6
+ target_dir = trtllm_path / "models"
7
+
8
+ print(f"TensorRT-LLM path: {trtllm_path}")
9
+ print(f"Target models directory: {target_dir}")
10
+
11
+ target_dir.mkdir(parents=True, exist_ok=True)
12
+
13
+ patch_dir = Path("./patch")
14
+
15
+ patch_files = list(patch_dir.glob('*'))
16
+ if patch_files:
17
+ print(f"Copying {len(patch_files)} patch file(s) to tensorrt_llm/models")
18
+
19
+ for patch_file in patch_files:
20
+ target_path = target_dir / patch_file.name
21
+ if patch_file.is_file():
22
+ shutil.copy2(patch_file, target_path)
23
+ print(f" Copied: {patch_file.name}")
24
+ elif patch_file.is_dir():
25
+ if target_path.exists():
26
+ shutil.rmtree(target_path)
27
+ shutil.copytree(patch_file, target_path)
28
+ print(f" Copied directory: {patch_file.name}")
29
+
30
+ print(f"✓ Patch files copied successfully")
31
+ else:
32
+ print(f"⚠ No patch files found in {patch_dir}")
2flow/scripts/f5/pre_build_engine.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ python3 -m utils.tts.convert_checkpoint \
2
+ --timm_ckpt ./models/downloads/F5TTS_v1_Base/model_1250000.safetensors \
3
+ --output_dir ./models/pre_engine/tts \
4
+ --model_name F5TTS_v1_Base
2flow/scripts/init.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ docker run -it --rm \
2
+ --gpus all \
3
+ -v /mnt/hoang.dinh/code/2flow:/workspace/2flow \
4
+ -w /workspace/2flow \
5
+ tired:lastest \
6
+ bash
2flow/scripts/vocoder/build_engine.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ bash scripts/vocoder/export_vocos_trt.sh \
2
+ ./models/pre_engine/tts/vocos_vocoder.onnx \
3
+ ./models/engine/tts/vocos_vocoder.plan
2flow/scripts/vocoder/export_vocos_trt.sh ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ TRTEXEC="/usr/src/tensorrt/bin/trtexec"
17
+
18
+ ONNX_PATH=$1
19
+ ENGINE_PATH=$2
20
+ echo "ONNX_PATH: $ONNX_PATH"
21
+ echo "ENGINE_PATH: $ENGINE_PATH"
22
+ PRECISION="fp32"
23
+
24
+
25
+ MIN_BATCH_SIZE=1
26
+ OPT_BATCH_SIZE=1
27
+ MAX_BATCH_SIZE=8
28
+
29
+ MIN_INPUT_LENGTH=1
30
+ OPT_INPUT_LENGTH=1000
31
+ MAX_INPUT_LENGTH=3000
32
+
33
+ MEL_MIN_SHAPE="${MIN_BATCH_SIZE}x100x${MIN_INPUT_LENGTH}"
34
+ MEL_OPT_SHAPE="${OPT_BATCH_SIZE}x100x${OPT_INPUT_LENGTH}"
35
+ MEL_MAX_SHAPE="${MAX_BATCH_SIZE}x100x${MAX_INPUT_LENGTH}"
36
+
37
+ ${TRTEXEC} \
38
+ --minShapes="mel:${MEL_MIN_SHAPE}" \
39
+ --optShapes="mel:${MEL_OPT_SHAPE}" \
40
+ --maxShapes="mel:${MEL_MAX_SHAPE}" \
41
+ --onnx=${ONNX_PATH} \
42
+ --saveEngine=${ENGINE_PATH}
43
+
2flow/scripts/vocoder/pre_build_engine.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ python3 -m utils.tts.export_vocoder_to_onnx \
2
+ --vocoder vocos \
3
+ --output-path ./models/pre_engine/tts/vocos_vocoder.onnx
2flow/services/triton/f5_tts_triton_server/f5_tts/1/f5_tts_trtllm.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import time
4
+ from functools import wraps
5
+ from typing import List, Optional
6
+
7
+ import safetensors.torch
8
+ import tensorrt as trt
9
+ import tensorrt_llm
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+ from tensorrt_llm._utils import str_dtype_to_torch, trt_dtype_to_torch
14
+ from tensorrt_llm.logger import logger
15
+ from tensorrt_llm.runtime.session import Session
16
+
17
+
18
+ def remove_tensor_padding(input_tensor, input_tensor_lengths=None):
19
+ # Audio tensor case: batch, seq_len, feature_len
20
+ # position_ids case: batch, seq_len
21
+ assert input_tensor_lengths is not None, "input_tensor_lengths must be provided for 3D input_tensor"
22
+
23
+ # Initialize a list to collect valid sequences
24
+ valid_sequences = []
25
+
26
+ for i in range(input_tensor.shape[0]):
27
+ valid_length = input_tensor_lengths[i]
28
+ valid_sequences.append(input_tensor[i, :valid_length])
29
+
30
+ # Concatenate all valid sequences along the batch dimension
31
+ output_tensor = torch.cat(valid_sequences, dim=0).contiguous()
32
+ return output_tensor
33
+
34
+
35
+ # class TextEmbedding(nn.Module):
36
+ # def __init__(self, text_num_embeds, text_dim, conv_layers=0, conv_mult=2, precompute_max_pos=4096):
37
+ # super().__init__()
38
+ # self.text_embed = nn.Embedding(text_num_embeds + 1, text_dim) # use 0 as filler token
39
+ # self.register_buffer("freqs_cis", precompute_freqs_cis(text_dim, precompute_max_pos), persistent=False)
40
+ # self.text_blocks = nn.Sequential(*[ConvNeXtV2Block(text_dim, text_dim * conv_mult) for _ in range(conv_layers)])
41
+
42
+ # def forward(self, text):
43
+ # # only keep tensors with value not -1
44
+ # text_mask = text != -1
45
+ # text_pad_cut_off_index = text_mask.sum(dim=1).max()
46
+
47
+ # text = text[:, :text_pad_cut_off_index]
48
+ # text = self.text_embed(text)
49
+ # text = text + self.freqs_cis[: text.shape[1], :]
50
+ # for block in self.text_blocks:
51
+ # text = block(text)
52
+ # # padding text to the original length
53
+ # # text shape: B,seq_len,C
54
+ # # pad at the second dimension
55
+ # text = F.pad(text, (0, 0, 0, text_mask.shape[1] - text.shape[1], 0, 0), value=0)
56
+ # return text
57
+
58
+
59
+ class TextEmbedding(nn.Module):
60
+ def __init__(self, text_num_embeds, text_dim, conv_layers=0, conv_mult=2, precompute_max_pos=4096):
61
+ super().__init__()
62
+ self.text_embed = nn.Embedding(text_num_embeds + 1, text_dim) # use 0 as filler token
63
+ self.register_buffer("freqs_cis", precompute_freqs_cis(text_dim, precompute_max_pos), persistent=False)
64
+ self.text_blocks = nn.Sequential(*[ConvNeXtV2Block(text_dim, text_dim * conv_mult) for _ in range(conv_layers)])
65
+
66
+ def forward(self, text):
67
+ # only keep tensors with value not -1
68
+ text_mask = text != -1
69
+ text_pad_cut_off_index = text_mask.sum(dim=1).max()
70
+
71
+ text_mask_cutoff = text == 0
72
+ text = text[:, :text_pad_cut_off_index]
73
+ text = self.text_embed(text)
74
+ text = text + self.freqs_cis[:text.shape[1], :]
75
+ text = text.masked_fill(text_mask_cutoff.unsqueeze(-1).expand(-1, -1, text.size(-1)), 0.0)
76
+ for block in self.text_blocks:
77
+ text = block(text)
78
+ text = text.masked_fill(text_mask_cutoff.unsqueeze(-1).expand(-1, -1, text.size(-1)), 0.0)
79
+
80
+ # padding text back to original length
81
+ text = F.pad(text, (0, 0, 0, text_mask.shape[1] - text.shape[1], 0, 0), value=0)
82
+
83
+ return text
84
+
85
+
86
+ class GRN(nn.Module):
87
+ def __init__(self, dim):
88
+ super().__init__()
89
+ self.gamma = nn.Parameter(torch.zeros(1, 1, dim))
90
+ self.beta = nn.Parameter(torch.zeros(1, 1, dim))
91
+
92
+ def forward(self, x):
93
+ Gx = torch.norm(x, p=2, dim=1, keepdim=True)
94
+ Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
95
+ return self.gamma * (x * Nx) + self.beta + x
96
+
97
+
98
+ class ConvNeXtV2Block(nn.Module):
99
+ def __init__(
100
+ self,
101
+ dim: int,
102
+ intermediate_dim: int,
103
+ dilation: int = 1,
104
+ ):
105
+ super().__init__()
106
+ padding = (dilation * (7 - 1)) // 2
107
+ self.dwconv = nn.Conv1d(
108
+ dim, dim, kernel_size=7, padding=padding, groups=dim, dilation=dilation
109
+ ) # depthwise conv
110
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
111
+ self.pwconv1 = nn.Linear(dim, intermediate_dim) # pointwise/1x1 convs, implemented with linear layers
112
+ self.act = nn.GELU()
113
+ self.grn = GRN(intermediate_dim)
114
+ self.pwconv2 = nn.Linear(intermediate_dim, dim)
115
+
116
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
117
+ residual = x
118
+ x = x.transpose(1, 2) # b n d -> b d n
119
+ x = self.dwconv(x)
120
+ x = x.transpose(1, 2) # b d n -> b n d
121
+ x = self.norm(x)
122
+ x = self.pwconv1(x)
123
+ x = self.act(x)
124
+ x = self.grn(x)
125
+ x = self.pwconv2(x)
126
+ return residual + x
127
+
128
+
129
+ def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, theta_rescale_factor=1.0):
130
+ # proposed by reddit user bloc97, to rescale rotary embeddings to longer sequence length without fine-tuning
131
+ # has some connection to NTK literature
132
+ # https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
133
+ # https://github.com/lucidrains/rotary-embedding-torch/blob/main/rotary_embedding_torch/rotary_embedding_torch.py
134
+ theta *= theta_rescale_factor ** (dim / (dim - 2))
135
+ freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
136
+ t = torch.arange(end, device=freqs.device) # type: ignore
137
+ freqs = torch.outer(t, freqs).float() # type: ignore
138
+ freqs_cos = torch.cos(freqs) # real part
139
+ freqs_sin = torch.sin(freqs) # imaginary part
140
+ return torch.cat([freqs_cos, freqs_sin], dim=-1)
141
+
142
+
143
+ def load_checkpoint(ckpt_path, use_ema=True):
144
+ # Load checkpoint based on file extension
145
+ if ckpt_path.endswith('.safetensors'):
146
+ print(f"Loading safetensors checkpoint from {ckpt_path}")
147
+ checkpoint = safetensors.torch.load_file(ckpt_path)
148
+ # For safetensors, keys are already flattened, check structure
149
+ if use_ema:
150
+ # Check if keys contain ema_model_state_dict prefix
151
+ if any(k.startswith("ema_model_state_dict.") for k in checkpoint.keys()):
152
+ dict_state = {
153
+ k.replace("ema_model_state_dict.", "").replace("ema_model.", ""): v
154
+ for k, v in checkpoint.items()
155
+ if k.startswith("ema_model_state_dict.") and "initted" not in k and "step" not in k
156
+ }
157
+ # Check if keys contain ema_model prefix directly
158
+ elif any(k.startswith("ema_model.") for k in checkpoint.keys()):
159
+ dict_state = {
160
+ k.replace("ema_model.", ""): v
161
+ for k, v in checkpoint.items()
162
+ if k.startswith("ema_model.") and "initted" not in k and "step" not in k
163
+ }
164
+ else:
165
+ # Keys are already in the expected format
166
+ dict_state = checkpoint
167
+ else:
168
+ dict_state = checkpoint
169
+ else:
170
+ print(f"Loading PyTorch checkpoint from {ckpt_path}")
171
+ checkpoint = torch.load(ckpt_path, weights_only=True)
172
+ if use_ema:
173
+ checkpoint["model_state_dict"] = {
174
+ k.replace("ema_model.", ""): v
175
+ for k, v in checkpoint["ema_model_state_dict"].items()
176
+ if k not in ["initted", "step"]
177
+ }
178
+ dict_state = checkpoint["model_state_dict"]
179
+
180
+ text_embed_dict = {}
181
+ for key in dict_state.keys():
182
+ # transformer.text_embed.text_embed.weight -> text_embed.weight
183
+ if "text_embed" in key:
184
+ text_embed_dict[key.replace("transformer.text_embed.", "")] = dict_state[key]
185
+ return text_embed_dict
186
+
187
+
188
+ class F5TTS(object):
189
+ def __init__(
190
+ self,
191
+ config,
192
+ debug_mode=True,
193
+ stream: Optional[torch.cuda.Stream] = None,
194
+ tllm_model_dir: Optional[str] = None,
195
+ model_path: Optional[str] = None,
196
+ vocab_size: Optional[int] = None,
197
+ ):
198
+ self.dtype = config["pretrained_config"]["dtype"]
199
+
200
+ rank = tensorrt_llm.mpi_rank()
201
+ world_size = config["pretrained_config"]["mapping"]["world_size"]
202
+ cp_size = config["pretrained_config"]["mapping"]["cp_size"]
203
+ tp_size = config["pretrained_config"]["mapping"]["tp_size"]
204
+ pp_size = config["pretrained_config"]["mapping"]["pp_size"]
205
+ assert pp_size == 1
206
+ self.mapping = tensorrt_llm.Mapping(
207
+ world_size=world_size, rank=rank, cp_size=cp_size, tp_size=tp_size, pp_size=1, gpus_per_node=1
208
+ )
209
+
210
+ local_rank = rank % self.mapping.gpus_per_node
211
+ self.device = torch.device(f"cuda:{local_rank}")
212
+
213
+ torch.cuda.set_device(self.device)
214
+
215
+ self.stream = stream
216
+ if self.stream is None:
217
+ self.stream = torch.cuda.Stream(self.device)
218
+ torch.cuda.set_stream(self.stream)
219
+
220
+ engine_file = os.path.join(tllm_model_dir, f"rank{rank}.engine")
221
+ logger.info(f"Loading engine from {engine_file}")
222
+ with open(engine_file, "rb") as f:
223
+ engine_buffer = f.read()
224
+
225
+ assert engine_buffer is not None
226
+
227
+ self.session = Session.from_serialized_engine(engine_buffer)
228
+
229
+ self.debug_mode = debug_mode
230
+
231
+ self.inputs = {}
232
+ self.outputs = {}
233
+ self.buffer_allocated = False
234
+
235
+ expected_tensor_names = ["noise", "cond", "time", "rope_cos", "rope_sin", "input_lengths", "denoised"]
236
+
237
+ found_tensor_names = [self.session.engine.get_tensor_name(i) for i in range(self.session.engine.num_io_tensors)]
238
+ if not self.debug_mode and set(expected_tensor_names) != set(found_tensor_names):
239
+ logger.error(
240
+ f"The following expected tensors are not found: {set(expected_tensor_names).difference(set(found_tensor_names))}"
241
+ )
242
+ logger.error(
243
+ f"Those tensors in engine are not expected: {set(found_tensor_names).difference(set(expected_tensor_names))}"
244
+ )
245
+ logger.error(f"Expected tensor names: {expected_tensor_names}")
246
+ logger.error(f"Found tensor names: {found_tensor_names}")
247
+ raise RuntimeError("Tensor names in engine are not the same as expected.")
248
+ if self.debug_mode:
249
+ self.debug_tensors = list(set(found_tensor_names) - set(expected_tensor_names))
250
+
251
+ self.max_mel_len = 4096
252
+ self.text_embedding = TextEmbedding(
253
+ text_num_embeds=vocab_size, text_dim=512, conv_layers=4, precompute_max_pos=self.max_mel_len
254
+ ).to(self.device)
255
+ self.text_embedding.load_state_dict(load_checkpoint(model_path), strict=True)
256
+
257
+ self.target_audio_sample_rate = 24000
258
+ self.target_rms = 0.15 # target rms for audio
259
+ self.n_fft = 1024
260
+ self.win_length = 1024
261
+ self.hop_length = 256
262
+ self.n_mel_channels = 100
263
+ # self.max_mel_len = 3000
264
+ self.head_dim = 64
265
+ self.base_rescale_factor = 1.0
266
+ self.interpolation_factor = 1.0
267
+ base = 10000.0 * self.base_rescale_factor ** (self.head_dim / (self.head_dim - 2))
268
+ inv_freq = 1.0 / (base ** (torch.arange(0, self.head_dim, 2).float() / self.head_dim))
269
+ freqs = torch.outer(torch.arange(self.max_mel_len, dtype=torch.float32), inv_freq) / self.interpolation_factor
270
+ self.freqs = freqs.repeat_interleave(2, dim=-1).unsqueeze(0)
271
+ self.rope_cos = self.freqs.cos().half()
272
+ self.rope_sin = self.freqs.sin().half()
273
+ self.nfe_steps = 16
274
+ t = torch.linspace(0, 1, self.nfe_steps + 1, dtype=torch.float32)
275
+ time_step = t + (-1.0) * (torch.cos(torch.pi * 0.5 * t) - 1 + t)
276
+ delta_t = torch.diff(time_step)
277
+ # WAR: hard coding 256 here
278
+ tmp_dim = 256
279
+ time_expand = torch.zeros((1, self.nfe_steps, tmp_dim), dtype=torch.float32)
280
+ half_dim = tmp_dim // 2
281
+ emb_factor = math.log(10000) / (half_dim - 1)
282
+ emb_factor = 1000.0 * torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb_factor)
283
+ for i in range(self.nfe_steps):
284
+ emb = time_step[i] * emb_factor
285
+ time_expand[:, i, :] = torch.cat((emb.sin(), emb.cos()), dim=-1)
286
+ self.time_expand = time_expand.to(self.device)
287
+ self.delta_t = torch.cat((delta_t, delta_t), dim=0).contiguous().to(self.device)
288
+
289
+ def _tensor_dtype(self, name):
290
+ # return torch dtype given tensor name for convenience
291
+ dtype = trt_dtype_to_torch(self.session.engine.get_tensor_dtype(name))
292
+ return dtype
293
+
294
+ def _setup(self, batch_size, seq_len):
295
+ for i in range(self.session.engine.num_io_tensors):
296
+ name = self.session.engine.get_tensor_name(i)
297
+ if self.session.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT:
298
+ shape = list(self.session.engine.get_tensor_shape(name))
299
+ shape[0] = batch_size
300
+ shape[1] = seq_len
301
+ self.outputs[name] = torch.empty(shape, dtype=self._tensor_dtype(name), device=self.device)
302
+
303
+ self.buffer_allocated = True
304
+
305
+ def cuda_stream_guard(func):
306
+ """Sync external stream and set current stream to the one bound to the session. Reset on exit."""
307
+
308
+ @wraps(func)
309
+ def wrapper(self, *args, **kwargs):
310
+ external_stream = torch.cuda.current_stream()
311
+ if external_stream != self.stream:
312
+ external_stream.synchronize()
313
+ torch.cuda.set_stream(self.stream)
314
+ ret = func(self, *args, **kwargs)
315
+ if external_stream != self.stream:
316
+ self.stream.synchronize()
317
+ torch.cuda.set_stream(external_stream)
318
+ return ret
319
+
320
+ return wrapper
321
+
322
+ @cuda_stream_guard
323
+ def forward(
324
+ self,
325
+ noise: torch.Tensor,
326
+ cond: torch.Tensor,
327
+ time_expand: torch.Tensor,
328
+ rope_cos: torch.Tensor,
329
+ rope_sin: torch.Tensor,
330
+ input_lengths: torch.Tensor,
331
+ delta_t: torch.Tensor,
332
+ use_perf: bool = False,
333
+ ):
334
+ if use_perf:
335
+ torch.cuda.nvtx.range_push("flow matching")
336
+ cfg_strength = 2.0
337
+ batch_size = noise.shape[0]
338
+ half_batch = batch_size // 2
339
+ noise_half = noise[:half_batch] # Store the initial half of noise
340
+
341
+ input_type = str_dtype_to_torch(self.dtype)
342
+
343
+ # Keep a copy of the initial tensors
344
+ cond = cond.to(input_type)
345
+ rope_cos = rope_cos.to(input_type)
346
+ rope_sin = rope_sin.to(input_type)
347
+ input_lengths = input_lengths.to(str_dtype_to_torch("int32"))
348
+
349
+ # Instead of iteratively updating noise within a single model context,
350
+ # we'll do a single forward pass for each iteration with fresh context setup
351
+ for i in range(self.nfe_steps):
352
+ # Re-setup the buffers for clean execution
353
+ self._setup(batch_size, noise.shape[1])
354
+ if not self.buffer_allocated:
355
+ raise RuntimeError("Buffer not allocated, please call setup first!")
356
+
357
+ # Re-create combined noises for this iteration
358
+ current_noise = torch.cat([noise_half, noise_half], dim=0).to(input_type)
359
+
360
+ # Get time step for this iteration
361
+ current_time = time_expand[:, i].to(input_type)
362
+
363
+ # Create fresh input dictionary for this iteration
364
+ current_inputs = {
365
+ "noise": current_noise,
366
+ "cond": cond,
367
+ "time": current_time,
368
+ "rope_cos": rope_cos,
369
+ "rope_sin": rope_sin,
370
+ "input_lengths": input_lengths,
371
+ }
372
+
373
+ # Update inputs and set shapes
374
+ self.inputs.clear() # Clear previous inputs
375
+ self.inputs.update(**current_inputs)
376
+ self.session.set_shapes(self.inputs)
377
+
378
+ if use_perf:
379
+ torch.cuda.nvtx.range_push(f"execute {i}")
380
+ ok = self.session.run(self.inputs, self.outputs, self.stream.cuda_stream)
381
+ assert ok, "Failed to execute model"
382
+ # self.session.context.execute_async_v3(self.stream.cuda_stream)
383
+ if use_perf:
384
+ torch.cuda.nvtx.range_pop()
385
+ # Process results
386
+ t_scale = delta_t[i].unsqueeze(0).to(input_type)
387
+
388
+ # Extract predictions
389
+ pred_cond = self.outputs["denoised"][:half_batch]
390
+ pred_uncond = self.outputs["denoised"][half_batch:]
391
+
392
+ # Apply classifier-free guidance with safeguards
393
+ guidance = pred_cond + (pred_cond - pred_uncond) * cfg_strength
394
+ # Calculate update for noise
395
+ noise_half = noise_half + guidance * t_scale
396
+ if use_perf:
397
+ torch.cuda.nvtx.range_pop()
398
+ return noise_half
399
+
400
+ def sample(
401
+ self,
402
+ text_pad_sequence: torch.Tensor,
403
+ ref_mel_batch: torch.Tensor,
404
+ ref_mel_len_batch: torch.Tensor,
405
+ estimated_reference_target_mel_len: List[int],
406
+ remove_input_padding: bool = False,
407
+ use_perf: bool = False,
408
+ ):
409
+ if use_perf:
410
+ torch.cuda.nvtx.range_push("text embedding")
411
+ batch = text_pad_sequence.shape[0]
412
+ max_seq_len = ref_mel_batch.shape[1]
413
+
414
+ text_pad_sequence_drop = torch.cat(
415
+ (text_pad_sequence, torch.zeros((1, text_pad_sequence.shape[1]), dtype=torch.int32).to(self.device)), dim=0
416
+ )
417
+
418
+ text_embedding_drop_list = []
419
+ for i in range(batch + 1):
420
+ text_embedding_drop_list.append(self.text_embedding(text_pad_sequence_drop[i].unsqueeze(0).to(self.device)))
421
+ text_embedding_drop_condition = torch.cat(text_embedding_drop_list, dim=0)
422
+
423
+ text_embedding = text_embedding_drop_condition[:-1]
424
+ # text_embedding_drop B,T,C batch should be the same
425
+ text_embedding_drop = text_embedding_drop_condition[-1].unsqueeze(0).repeat(batch, 1, 1)
426
+
427
+ noise = torch.randn_like(ref_mel_batch).to(self.device)
428
+ rope_cos = self.rope_cos[:, :max_seq_len, :].float().repeat(batch, 1, 1)
429
+ rope_sin = self.rope_sin[:, :max_seq_len, :].float().repeat(batch, 1, 1)
430
+
431
+ cat_mel_text = torch.cat((ref_mel_batch, text_embedding), dim=-1)
432
+ cat_mel_text_drop = torch.cat(
433
+ (
434
+ torch.zeros((batch, max_seq_len, self.n_mel_channels), dtype=torch.float32).to(self.device),
435
+ text_embedding_drop,
436
+ ),
437
+ dim=-1,
438
+ )
439
+
440
+ time_expand = self.time_expand.repeat(2 * batch, 1, 1).contiguous()
441
+
442
+ # Convert estimated_reference_target_mel_len to tensor
443
+ input_lengths = torch.tensor(estimated_reference_target_mel_len, dtype=torch.int32)
444
+
445
+ # combine above along the batch dimension
446
+ inputs = {
447
+ "noise": torch.cat((noise, noise), dim=0).contiguous(),
448
+ "cond": torch.cat((cat_mel_text, cat_mel_text_drop), dim=0).contiguous(),
449
+ "time_expand": time_expand,
450
+ "rope_cos": torch.cat((rope_cos, rope_cos), dim=0).contiguous(),
451
+ "rope_sin": torch.cat((rope_sin, rope_sin), dim=0).contiguous(),
452
+ "input_lengths": torch.cat((input_lengths, input_lengths), dim=0).contiguous(),
453
+ "delta_t": self.delta_t,
454
+ }
455
+ if use_perf and remove_input_padding:
456
+ torch.cuda.nvtx.range_push("remove input padding")
457
+ if remove_input_padding:
458
+ max_seq_len = inputs["cond"].shape[1]
459
+ inputs["noise"] = remove_tensor_padding(inputs["noise"], inputs["input_lengths"])
460
+ inputs["cond"] = remove_tensor_padding(inputs["cond"], inputs["input_lengths"])
461
+ # for time_expand, convert from B,D to B,T,D by repeat
462
+ inputs["time_expand"] = inputs["time_expand"].unsqueeze(1).repeat(1, max_seq_len, 1, 1)
463
+ inputs["time_expand"] = remove_tensor_padding(inputs["time_expand"], inputs["input_lengths"])
464
+ inputs["rope_cos"] = remove_tensor_padding(inputs["rope_cos"], inputs["input_lengths"])
465
+ inputs["rope_sin"] = remove_tensor_padding(inputs["rope_sin"], inputs["input_lengths"])
466
+ if use_perf and remove_input_padding:
467
+ torch.cuda.nvtx.range_pop()
468
+ for key in inputs:
469
+ inputs[key] = inputs[key].to(self.device)
470
+ if use_perf:
471
+ torch.cuda.nvtx.range_pop()
472
+ start_time = time.time()
473
+ denoised = self.forward(**inputs, use_perf=use_perf)
474
+ cost_time = time.time() - start_time
475
+ if use_perf and remove_input_padding:
476
+ torch.cuda.nvtx.range_push("remove input padding output")
477
+ if remove_input_padding:
478
+ denoised_list = []
479
+ start_idx = 0
480
+ for i in range(batch):
481
+ denoised_list.append(denoised[start_idx : start_idx + inputs["input_lengths"][i]])
482
+ start_idx += inputs["input_lengths"][i]
483
+ if use_perf and remove_input_padding:
484
+ torch.cuda.nvtx.range_pop()
485
+ return denoised_list, cost_time
486
+ return denoised, cost_time
2flow/services/triton/f5_tts_triton_server/f5_tts/1/model.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ # * Redistributions of source code must retain the above copyright
7
+ # notice, this list of conditions and the following disclaimer.
8
+ # * Redistributions in binary form must reproduce the above copyright
9
+ # notice, this list of conditions and the following disclaimer in the
10
+ # documentation and/or other materials provided with the distribution.
11
+ # * Neither the name of NVIDIA CORPORATION nor the names of its
12
+ # contributors may be used to endorse or promote products derived
13
+ # from this software without specific prior written permission.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16
+ # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
+ # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
+ # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ import json
27
+ import os
28
+
29
+ import jieba
30
+ import torch
31
+ import torch.nn.functional as F
32
+ import torchaudio
33
+ import triton_python_backend_utils as pb_utils
34
+ from f5_tts_trtllm import F5TTS
35
+ from pypinyin import Style, lazy_pinyin
36
+ from torch.nn.utils.rnn import pad_sequence
37
+ from torch.utils.dlpack import from_dlpack, to_dlpack
38
+
39
+
40
+ def get_tokenizer(vocab_file_path: str):
41
+ """
42
+ tokenizer - "pinyin" do g2p for only chinese characters, need .txt vocab_file
43
+ - "char" for char-wise tokenizer, need .txt vocab_file
44
+ - "byte" for utf-8 tokenizer
45
+ - "custom" if you're directly passing in a path to the vocab.txt you want to use
46
+ vocab_size - if use "pinyin", all available pinyin types, common alphabets (also those with accent) and symbols
47
+ - if use "char", derived from unfiltered character & symbol counts of custom dataset
48
+ - if use "byte", set to 256 (unicode byte range)
49
+ """
50
+ with open(vocab_file_path, "r", encoding="utf-8") as f:
51
+ vocab_char_map = {}
52
+ for i, char in enumerate(f):
53
+ vocab_char_map[char[:-1]] = i
54
+ vocab_size = len(vocab_char_map)
55
+ return vocab_char_map, vocab_size
56
+
57
+
58
+ def convert_char_to_pinyin(reference_target_texts_list, polyphone=True):
59
+ final_reference_target_texts_list = []
60
+ custom_trans = str.maketrans(
61
+ {";": ",", "“": '"', "”": '"', "‘": "'", "’": "'"}
62
+ ) # add custom trans here, to address oov
63
+
64
+ def is_chinese(c):
65
+ return "\u3100" <= c <= "\u9fff" # common chinese characters
66
+
67
+ for text in reference_target_texts_list:
68
+ char_list = []
69
+ text = text.translate(custom_trans)
70
+ for seg in jieba.cut(text):
71
+ seg_byte_len = len(bytes(seg, "UTF-8"))
72
+ if seg_byte_len == len(seg): # if pure alphabets and symbols
73
+ if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
74
+ char_list.append(" ")
75
+ char_list.extend(seg)
76
+ elif polyphone and seg_byte_len == 3 * len(seg): # if pure east asian characters
77
+ seg_ = lazy_pinyin(seg, style=Style.TONE3, tone_sandhi=True)
78
+ for i, c in enumerate(seg):
79
+ if is_chinese(c):
80
+ char_list.append(" ")
81
+ char_list.append(seg_[i])
82
+ else: # if mixed characters, alphabets and symbols
83
+ for c in seg:
84
+ if ord(c) < 256:
85
+ char_list.extend(c)
86
+ elif is_chinese(c):
87
+ char_list.append(" ")
88
+ char_list.extend(lazy_pinyin(c, style=Style.TONE3, tone_sandhi=True))
89
+ else:
90
+ char_list.append(c)
91
+ final_reference_target_texts_list.append(char_list)
92
+
93
+ return final_reference_target_texts_list
94
+
95
+
96
+ def list_str_to_idx(
97
+ text: list[str] | list[list[str]],
98
+ vocab_char_map: dict[str, int], # {char: idx}
99
+ padding_value=-1,
100
+ ): # noqa: F722
101
+ list_idx_tensors = [torch.tensor([vocab_char_map.get(c, 0) for c in t]) for t in text] # pinyin or char style
102
+ return list_idx_tensors
103
+
104
+
105
+ class TritonPythonModel:
106
+ def initialize(self, args):
107
+ self.use_perf = True
108
+ self.device = torch.device("cuda")
109
+ self.target_audio_sample_rate = 24000
110
+ self.target_rms = 0.15 # target rms for audio
111
+ self.n_fft = 1024
112
+ self.win_length = 1024
113
+ self.hop_length = 256
114
+ self.n_mel_channels = 100
115
+ self.max_mel_len = 3000
116
+ self.head_dim = 64
117
+
118
+ parameters = json.loads(args["model_config"])["parameters"]
119
+ for key, value in parameters.items():
120
+ parameters[key] = value["string_value"]
121
+
122
+ self.vocab_char_map, self.vocab_size = get_tokenizer(parameters["vocab_file"])
123
+ self.reference_sample_rate = int(parameters["reference_audio_sample_rate"])
124
+ self.resampler = torchaudio.transforms.Resample(self.reference_sample_rate, self.target_audio_sample_rate)
125
+
126
+ self.tllm_model_dir = parameters["tllm_model_dir"]
127
+ config_file = os.path.join(self.tllm_model_dir, "config.json")
128
+ with open(config_file) as f:
129
+ config = json.load(f)
130
+ self.model = F5TTS(
131
+ config,
132
+ debug_mode=False,
133
+ tllm_model_dir=self.tllm_model_dir,
134
+ model_path=parameters["model_path"],
135
+ vocab_size=self.vocab_size,
136
+ )
137
+
138
+ self.vocoder = parameters["vocoder"]
139
+ assert self.vocoder in ["vocos", "bigvgan"]
140
+ if self.vocoder == "vocos":
141
+ self.mel_stft = torchaudio.transforms.MelSpectrogram(
142
+ sample_rate=self.target_audio_sample_rate,
143
+ n_fft=self.n_fft,
144
+ win_length=self.win_length,
145
+ hop_length=self.hop_length,
146
+ n_mels=self.n_mel_channels,
147
+ power=1,
148
+ center=True,
149
+ normalized=False,
150
+ norm=None,
151
+ ).to(self.device)
152
+ self.compute_mel_fn = self.get_vocos_mel_spectrogram
153
+ elif self.vocoder == "bigvgan":
154
+ self.compute_mel_fn = self.get_bigvgan_mel_spectrogram
155
+
156
+ def get_vocos_mel_spectrogram(self, waveform):
157
+ mel = self.mel_stft(waveform)
158
+ mel = mel.clamp(min=1e-5).log()
159
+ return mel.transpose(1, 2)
160
+
161
+ def forward_vocoder(self, mel):
162
+ mel = mel.to(torch.float32).contiguous().cpu()
163
+ input_tensor_0 = pb_utils.Tensor.from_dlpack("mel", to_dlpack(mel))
164
+
165
+ inference_request = pb_utils.InferenceRequest(
166
+ model_name="vocoder", requested_output_names=["waveform"], inputs=[input_tensor_0]
167
+ )
168
+ inference_response = inference_request.exec()
169
+ if inference_response.has_error():
170
+ raise pb_utils.TritonModelException(inference_response.error().message())
171
+ else:
172
+ waveform = pb_utils.get_output_tensor_by_name(inference_response, "waveform")
173
+ waveform = torch.utils.dlpack.from_dlpack(waveform.to_dlpack()).cpu()
174
+
175
+ return waveform
176
+
177
+ def execute(self, requests):
178
+ (
179
+ reference_text_list,
180
+ target_text_list,
181
+ reference_target_texts_list,
182
+ estimated_reference_target_mel_len,
183
+ reference_mel_len,
184
+ ) = [], [], [], [], []
185
+ mel_features_list = []
186
+ if self.use_perf:
187
+ torch.cuda.nvtx.range_push("preprocess")
188
+ for request in requests:
189
+ wav_tensor = pb_utils.get_input_tensor_by_name(request, "reference_wav")
190
+ wav_lens = pb_utils.get_input_tensor_by_name(request, "reference_wav_len")
191
+
192
+ reference_text = pb_utils.get_input_tensor_by_name(request, "reference_text").as_numpy()
193
+ reference_text = reference_text[0][0].decode("utf-8")
194
+ reference_text_list.append(reference_text)
195
+ target_text = pb_utils.get_input_tensor_by_name(request, "target_text").as_numpy()
196
+ target_text = target_text[0][0].decode("utf-8")
197
+ target_text_list.append(target_text)
198
+
199
+ text = reference_text + target_text
200
+ reference_target_texts_list.append(text)
201
+
202
+ wav = from_dlpack(wav_tensor.to_dlpack())
203
+ wav_len = from_dlpack(wav_lens.to_dlpack())
204
+ wav_len = wav_len.squeeze()
205
+ assert wav.shape[0] == 1, "Only support batch size 1 for now."
206
+ wav = wav[:, :wav_len]
207
+
208
+ ref_rms = torch.sqrt(torch.mean(torch.square(wav)))
209
+ if ref_rms < self.target_rms:
210
+ wav = wav * self.target_rms / ref_rms
211
+ if self.reference_sample_rate != self.target_audio_sample_rate:
212
+ wav = self.resampler(wav)
213
+ wav = wav.to(self.device)
214
+ if self.use_perf:
215
+ torch.cuda.nvtx.range_push("compute_mel")
216
+ mel_features = self.compute_mel_fn(wav)
217
+ if self.use_perf:
218
+ torch.cuda.nvtx.range_pop()
219
+ mel_features_list.append(mel_features)
220
+
221
+ reference_mel_len.append(mel_features.shape[1])
222
+ estimated_reference_target_mel_len.append(
223
+ int(
224
+ mel_features.shape[1] * (1 + len(target_text.encode("utf-8")) / len(reference_text.encode("utf-8")))
225
+ )
226
+ )
227
+
228
+ max_seq_len = min(max(estimated_reference_target_mel_len), self.max_mel_len)
229
+
230
+ batch = len(requests)
231
+ mel_features = torch.zeros((batch, max_seq_len, self.n_mel_channels), dtype=torch.float16).to(self.device)
232
+ for i, mel in enumerate(mel_features_list):
233
+ mel_features[i, : mel.shape[1], :] = mel
234
+
235
+ reference_mel_len_tensor = torch.LongTensor(reference_mel_len).to(self.device)
236
+
237
+ pinyin_list = convert_char_to_pinyin(reference_target_texts_list, polyphone=True)
238
+ text_pad_sequence = list_str_to_idx(pinyin_list, self.vocab_char_map)
239
+
240
+ for i, item in enumerate(text_pad_sequence):
241
+ text_pad_sequence[i] = F.pad(
242
+ item, (0, estimated_reference_target_mel_len[i] - len(item)), mode="constant", value=-1
243
+ )
244
+ text_pad_sequence[i] += 1 # WAR: 0 is reserved for padding token, hard coding in F5-TTS
245
+ text_pad_sequence = pad_sequence(text_pad_sequence, padding_value=-1, batch_first=True).to(self.device)
246
+ text_pad_sequence = F.pad(
247
+ text_pad_sequence, (0, max_seq_len - text_pad_sequence.shape[1]), mode="constant", value=-1
248
+ )
249
+ if self.use_perf:
250
+ torch.cuda.nvtx.range_pop()
251
+
252
+ denoised, cost_time = self.model.sample(
253
+ text_pad_sequence,
254
+ mel_features,
255
+ reference_mel_len_tensor,
256
+ estimated_reference_target_mel_len,
257
+ remove_input_padding=False,
258
+ use_perf=self.use_perf,
259
+ )
260
+ if self.use_perf:
261
+ torch.cuda.nvtx.range_push("vocoder")
262
+
263
+ responses = []
264
+ for i in range(batch):
265
+ ref_me_len = reference_mel_len[i]
266
+ estimated_mel_len = estimated_reference_target_mel_len[i]
267
+ denoised_one_item = denoised[i, ref_me_len:estimated_mel_len, :].unsqueeze(0).transpose(1, 2)
268
+ audio = self.forward_vocoder(denoised_one_item)
269
+ rms = torch.sqrt(torch.mean(torch.square(audio)))
270
+ if rms < self.target_rms:
271
+ audio = audio * self.target_rms / rms
272
+
273
+ audio = pb_utils.Tensor.from_dlpack("waveform", to_dlpack(audio))
274
+ inference_response = pb_utils.InferenceResponse(output_tensors=[audio])
275
+ responses.append(inference_response)
276
+ if self.use_perf:
277
+ torch.cuda.nvtx.range_pop()
278
+ return responses
2flow/services/triton/f5_tts_triton_server/f5_tts/config.pbtxt ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "f5_tts"
16
+ backend: "python"
17
+ max_batch_size: 4
18
+ dynamic_batching {
19
+ max_queue_delay_microseconds: 1000
20
+ }
21
+ parameters [
22
+ {
23
+ key: "vocab_file"
24
+ value: { string_value: "${vocab}"}
25
+ },
26
+ {
27
+ key: "model_path",
28
+ value: {string_value:"${model}"}
29
+ },
30
+ {
31
+ key: "tllm_model_dir",
32
+ value: {string_value:"${trtllm}"}
33
+ },
34
+ {
35
+ key: "reference_audio_sample_rate",
36
+ value: {string_value:"24000"}
37
+ },
38
+ {
39
+ key: "vocoder",
40
+ value: {string_value:"${vocoder}"}
41
+ }
42
+ ]
43
+
44
+ input [
45
+ {
46
+ name: "reference_wav"
47
+ data_type: TYPE_FP32
48
+ dims: [-1]
49
+ optional: True
50
+ },
51
+ {
52
+ name: "reference_wav_len"
53
+ data_type: TYPE_INT32
54
+ dims: [1]
55
+ optional: True
56
+ },
57
+ {
58
+ name: "reference_text"
59
+ data_type: TYPE_STRING
60
+ dims: [1]
61
+ },
62
+ {
63
+ name: "target_text"
64
+ data_type: TYPE_STRING
65
+ dims: [1]
66
+ }
67
+ ]
68
+ output [
69
+ {
70
+ name: "waveform"
71
+ data_type: TYPE_FP32
72
+ dims: [ -1 ]
73
+ }
74
+ ]
75
+
76
+ instance_group [
77
+ {
78
+ count: 1
79
+ kind: KIND_GPU
80
+ }
81
+ ]
2flow/services/triton/f5_tts_triton_server/vocoder/1/.gitkeep ADDED
File without changes
2flow/services/triton/f5_tts_triton_server/vocoder/config.pbtxt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "vocoder"
2
+ backend: "tensorrt"
3
+ default_model_filename: "vocoder.plan"
4
+ max_batch_size: 4
5
+
6
+ input [
7
+ {
8
+ name: "mel"
9
+ data_type: TYPE_FP32
10
+ dims: [ 100, -1 ]
11
+ }
12
+ ]
13
+
14
+ output [
15
+ {
16
+ name: "waveform"
17
+ data_type: TYPE_FP32
18
+ dims: [ -1 ]
19
+ }
20
+ ]
21
+
22
+ dynamic_batching {
23
+ preferred_batch_size: [1, 2, 4]
24
+ max_queue_delay_microseconds: 1
25
+ }
26
+
27
+ instance_group [
28
+ {
29
+ count: 1
30
+ kind: KIND_GPU
31
+ }
32
+ ]
2flow/utils/tts/__pycache__/convert_checkpoint.cpython-310.pyc ADDED
Binary file (20.5 kB). View file
 
2flow/utils/tts/__pycache__/convert_checkpoint.cpython-312.pyc ADDED
Binary file (27.8 kB). View file
 
2flow/utils/tts/__pycache__/export_vocoder_to_onnx.cpython-312.pyc ADDED
Binary file (6.35 kB). View file
 
2flow/utils/tts/convert_checkpoint.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ import re
5
+ import time
6
+ import traceback
7
+ from concurrent.futures import ThreadPoolExecutor, as_completed
8
+
9
+ import safetensors.torch
10
+ import torch
11
+ from tensorrt_llm import str_dtype_to_torch
12
+ from tensorrt_llm.mapping import Mapping
13
+ from tensorrt_llm.models.convert_utils import split, split_matrix_tp
14
+
15
+
16
+ def split_q_tp(v, n_head, n_hidden, tensor_parallel, rank):
17
+ split_v = split(v, tensor_parallel, rank, dim=1)
18
+ return split_v.contiguous()
19
+
20
+
21
+ def split_q_bias_tp(v, n_head, n_hidden, tensor_parallel, rank):
22
+ split_v = split(v, tensor_parallel, rank, dim=0)
23
+ return split_v.contiguous()
24
+
25
+
26
+ FACEBOOK_DIT_NAME_MAPPING = {
27
+ "^time_embed.time_mlp.0.weight$": "time_embed.mlp1.weight",
28
+ "^time_embed.time_mlp.0.bias$": "time_embed.mlp1.bias",
29
+ "^time_embed.time_mlp.2.weight$": "time_embed.mlp2.weight",
30
+ "^time_embed.time_mlp.2.bias$": "time_embed.mlp2.bias",
31
+ "^input_embed.conv_pos_embed.conv1d.0.weight$": "input_embed.conv_pos_embed.conv1d1.weight",
32
+ "^input_embed.conv_pos_embed.conv1d.0.bias$": "input_embed.conv_pos_embed.conv1d1.bias",
33
+ "^input_embed.conv_pos_embed.conv1d.2.weight$": "input_embed.conv_pos_embed.conv1d2.weight",
34
+ "^input_embed.conv_pos_embed.conv1d.2.bias$": "input_embed.conv_pos_embed.conv1d2.bias",
35
+ "^transformer_blocks.0.attn.to_out.0.weight$": "transformer_blocks.0.attn.to_out.weight",
36
+ "^transformer_blocks.0.attn.to_out.0.bias$": "transformer_blocks.0.attn.to_out.bias",
37
+ "^transformer_blocks.1.attn.to_out.0.weight$": "transformer_blocks.1.attn.to_out.weight",
38
+ "^transformer_blocks.1.attn.to_out.0.bias$": "transformer_blocks.1.attn.to_out.bias",
39
+ "^transformer_blocks.2.attn.to_out.0.weight$": "transformer_blocks.2.attn.to_out.weight",
40
+ "^transformer_blocks.2.attn.to_out.0.bias$": "transformer_blocks.2.attn.to_out.bias",
41
+ "^transformer_blocks.3.attn.to_out.0.weight$": "transformer_blocks.3.attn.to_out.weight",
42
+ "^transformer_blocks.3.attn.to_out.0.bias$": "transformer_blocks.3.attn.to_out.bias",
43
+ "^transformer_blocks.4.attn.to_out.0.weight$": "transformer_blocks.4.attn.to_out.weight",
44
+ "^transformer_blocks.4.attn.to_out.0.bias$": "transformer_blocks.4.attn.to_out.bias",
45
+ "^transformer_blocks.5.attn.to_out.0.weight$": "transformer_blocks.5.attn.to_out.weight",
46
+ "^transformer_blocks.5.attn.to_out.0.bias$": "transformer_blocks.5.attn.to_out.bias",
47
+ "^transformer_blocks.6.attn.to_out.0.weight$": "transformer_blocks.6.attn.to_out.weight",
48
+ "^transformer_blocks.6.attn.to_out.0.bias$": "transformer_blocks.6.attn.to_out.bias",
49
+ "^transformer_blocks.7.attn.to_out.0.weight$": "transformer_blocks.7.attn.to_out.weight",
50
+ "^transformer_blocks.7.attn.to_out.0.bias$": "transformer_blocks.7.attn.to_out.bias",
51
+ "^transformer_blocks.8.attn.to_out.0.weight$": "transformer_blocks.8.attn.to_out.weight",
52
+ "^transformer_blocks.8.attn.to_out.0.bias$": "transformer_blocks.8.attn.to_out.bias",
53
+ "^transformer_blocks.9.attn.to_out.0.weight$": "transformer_blocks.9.attn.to_out.weight",
54
+ "^transformer_blocks.9.attn.to_out.0.bias$": "transformer_blocks.9.attn.to_out.bias",
55
+ "^transformer_blocks.10.attn.to_out.0.weight$": "transformer_blocks.10.attn.to_out.weight",
56
+ "^transformer_blocks.10.attn.to_out.0.bias$": "transformer_blocks.10.attn.to_out.bias",
57
+ "^transformer_blocks.11.attn.to_out.0.weight$": "transformer_blocks.11.attn.to_out.weight",
58
+ "^transformer_blocks.11.attn.to_out.0.bias$": "transformer_blocks.11.attn.to_out.bias",
59
+ "^transformer_blocks.12.attn.to_out.0.weight$": "transformer_blocks.12.attn.to_out.weight",
60
+ "^transformer_blocks.12.attn.to_out.0.bias$": "transformer_blocks.12.attn.to_out.bias",
61
+ "^transformer_blocks.13.attn.to_out.0.weight$": "transformer_blocks.13.attn.to_out.weight",
62
+ "^transformer_blocks.13.attn.to_out.0.bias$": "transformer_blocks.13.attn.to_out.bias",
63
+ "^transformer_blocks.14.attn.to_out.0.weight$": "transformer_blocks.14.attn.to_out.weight",
64
+ "^transformer_blocks.14.attn.to_out.0.bias$": "transformer_blocks.14.attn.to_out.bias",
65
+ "^transformer_blocks.15.attn.to_out.0.weight$": "transformer_blocks.15.attn.to_out.weight",
66
+ "^transformer_blocks.15.attn.to_out.0.bias$": "transformer_blocks.15.attn.to_out.bias",
67
+ "^transformer_blocks.16.attn.to_out.0.weight$": "transformer_blocks.16.attn.to_out.weight",
68
+ "^transformer_blocks.16.attn.to_out.0.bias$": "transformer_blocks.16.attn.to_out.bias",
69
+ "^transformer_blocks.17.attn.to_out.0.weight$": "transformer_blocks.17.attn.to_out.weight",
70
+ "^transformer_blocks.17.attn.to_out.0.bias$": "transformer_blocks.17.attn.to_out.bias",
71
+ "^transformer_blocks.18.attn.to_out.0.weight$": "transformer_blocks.18.attn.to_out.weight",
72
+ "^transformer_blocks.18.attn.to_out.0.bias$": "transformer_blocks.18.attn.to_out.bias",
73
+ "^transformer_blocks.19.attn.to_out.0.weight$": "transformer_blocks.19.attn.to_out.weight",
74
+ "^transformer_blocks.19.attn.to_out.0.bias$": "transformer_blocks.19.attn.to_out.bias",
75
+ "^transformer_blocks.20.attn.to_out.0.weight$": "transformer_blocks.20.attn.to_out.weight",
76
+ "^transformer_blocks.20.attn.to_out.0.bias$": "transformer_blocks.20.attn.to_out.bias",
77
+ "^transformer_blocks.21.attn.to_out.0.weight$": "transformer_blocks.21.attn.to_out.weight",
78
+ "^transformer_blocks.21.attn.to_out.0.bias$": "transformer_blocks.21.attn.to_out.bias",
79
+ "^transformer_blocks.0.ff.ff.0.0.weight$": "transformer_blocks.0.ff.project_in.weight",
80
+ "^transformer_blocks.0.ff.ff.0.0.bias$": "transformer_blocks.0.ff.project_in.bias",
81
+ "^transformer_blocks.0.ff.ff.2.weight$": "transformer_blocks.0.ff.ff.weight",
82
+ "^transformer_blocks.0.ff.ff.2.bias$": "transformer_blocks.0.ff.ff.bias",
83
+ "^transformer_blocks.1.ff.ff.0.0.weight$": "transformer_blocks.1.ff.project_in.weight",
84
+ "^transformer_blocks.1.ff.ff.0.0.bias$": "transformer_blocks.1.ff.project_in.bias",
85
+ "^transformer_blocks.1.ff.ff.2.weight$": "transformer_blocks.1.ff.ff.weight",
86
+ "^transformer_blocks.1.ff.ff.2.bias$": "transformer_blocks.1.ff.ff.bias",
87
+ "^transformer_blocks.2.ff.ff.0.0.weight$": "transformer_blocks.2.ff.project_in.weight",
88
+ "^transformer_blocks.2.ff.ff.0.0.bias$": "transformer_blocks.2.ff.project_in.bias",
89
+ "^transformer_blocks.2.ff.ff.2.weight$": "transformer_blocks.2.ff.ff.weight",
90
+ "^transformer_blocks.2.ff.ff.2.bias$": "transformer_blocks.2.ff.ff.bias",
91
+ "^transformer_blocks.3.ff.ff.0.0.weight$": "transformer_blocks.3.ff.project_in.weight",
92
+ "^transformer_blocks.3.ff.ff.0.0.bias$": "transformer_blocks.3.ff.project_in.bias",
93
+ "^transformer_blocks.3.ff.ff.2.weight$": "transformer_blocks.3.ff.ff.weight",
94
+ "^transformer_blocks.3.ff.ff.2.bias$": "transformer_blocks.3.ff.ff.bias",
95
+ "^transformer_blocks.4.ff.ff.0.0.weight$": "transformer_blocks.4.ff.project_in.weight",
96
+ "^transformer_blocks.4.ff.ff.0.0.bias$": "transformer_blocks.4.ff.project_in.bias",
97
+ "^transformer_blocks.4.ff.ff.2.weight$": "transformer_blocks.4.ff.ff.weight",
98
+ "^transformer_blocks.4.ff.ff.2.bias$": "transformer_blocks.4.ff.ff.bias",
99
+ "^transformer_blocks.5.ff.ff.0.0.weight$": "transformer_blocks.5.ff.project_in.weight",
100
+ "^transformer_blocks.5.ff.ff.0.0.bias$": "transformer_blocks.5.ff.project_in.bias",
101
+ "^transformer_blocks.5.ff.ff.2.weight$": "transformer_blocks.5.ff.ff.weight",
102
+ "^transformer_blocks.5.ff.ff.2.bias$": "transformer_blocks.5.ff.ff.bias",
103
+ "^transformer_blocks.6.ff.ff.0.0.weight$": "transformer_blocks.6.ff.project_in.weight",
104
+ "^transformer_blocks.6.ff.ff.0.0.bias$": "transformer_blocks.6.ff.project_in.bias",
105
+ "^transformer_blocks.6.ff.ff.2.weight$": "transformer_blocks.6.ff.ff.weight",
106
+ "^transformer_blocks.6.ff.ff.2.bias$": "transformer_blocks.6.ff.ff.bias",
107
+ "^transformer_blocks.7.ff.ff.0.0.weight$": "transformer_blocks.7.ff.project_in.weight",
108
+ "^transformer_blocks.7.ff.ff.0.0.bias$": "transformer_blocks.7.ff.project_in.bias",
109
+ "^transformer_blocks.7.ff.ff.2.weight$": "transformer_blocks.7.ff.ff.weight",
110
+ "^transformer_blocks.7.ff.ff.2.bias$": "transformer_blocks.7.ff.ff.bias",
111
+ "^transformer_blocks.8.ff.ff.0.0.weight$": "transformer_blocks.8.ff.project_in.weight",
112
+ "^transformer_blocks.8.ff.ff.0.0.bias$": "transformer_blocks.8.ff.project_in.bias",
113
+ "^transformer_blocks.8.ff.ff.2.weight$": "transformer_blocks.8.ff.ff.weight",
114
+ "^transformer_blocks.8.ff.ff.2.bias$": "transformer_blocks.8.ff.ff.bias",
115
+ "^transformer_blocks.9.ff.ff.0.0.weight$": "transformer_blocks.9.ff.project_in.weight",
116
+ "^transformer_blocks.9.ff.ff.0.0.bias$": "transformer_blocks.9.ff.project_in.bias",
117
+ "^transformer_blocks.9.ff.ff.2.weight$": "transformer_blocks.9.ff.ff.weight",
118
+ "^transformer_blocks.9.ff.ff.2.bias$": "transformer_blocks.9.ff.ff.bias",
119
+ "^transformer_blocks.10.ff.ff.0.0.weight$": "transformer_blocks.10.ff.project_in.weight",
120
+ "^transformer_blocks.10.ff.ff.0.0.bias$": "transformer_blocks.10.ff.project_in.bias",
121
+ "^transformer_blocks.10.ff.ff.2.weight$": "transformer_blocks.10.ff.ff.weight",
122
+ "^transformer_blocks.10.ff.ff.2.bias$": "transformer_blocks.10.ff.ff.bias",
123
+ "^transformer_blocks.11.ff.ff.0.0.weight$": "transformer_blocks.11.ff.project_in.weight",
124
+ "^transformer_blocks.11.ff.ff.0.0.bias$": "transformer_blocks.11.ff.project_in.bias",
125
+ "^transformer_blocks.11.ff.ff.2.weight$": "transformer_blocks.11.ff.ff.weight",
126
+ "^transformer_blocks.11.ff.ff.2.bias$": "transformer_blocks.11.ff.ff.bias",
127
+ "^transformer_blocks.12.ff.ff.0.0.weight$": "transformer_blocks.12.ff.project_in.weight",
128
+ "^transformer_blocks.12.ff.ff.0.0.bias$": "transformer_blocks.12.ff.project_in.bias",
129
+ "^transformer_blocks.12.ff.ff.2.weight$": "transformer_blocks.12.ff.ff.weight",
130
+ "^transformer_blocks.12.ff.ff.2.bias$": "transformer_blocks.12.ff.ff.bias",
131
+ "^transformer_blocks.13.ff.ff.0.0.weight$": "transformer_blocks.13.ff.project_in.weight",
132
+ "^transformer_blocks.13.ff.ff.0.0.bias$": "transformer_blocks.13.ff.project_in.bias",
133
+ "^transformer_blocks.13.ff.ff.2.weight$": "transformer_blocks.13.ff.ff.weight",
134
+ "^transformer_blocks.13.ff.ff.2.bias$": "transformer_blocks.13.ff.ff.bias",
135
+ "^transformer_blocks.14.ff.ff.0.0.weight$": "transformer_blocks.14.ff.project_in.weight",
136
+ "^transformer_blocks.14.ff.ff.0.0.bias$": "transformer_blocks.14.ff.project_in.bias",
137
+ "^transformer_blocks.14.ff.ff.2.weight$": "transformer_blocks.14.ff.ff.weight",
138
+ "^transformer_blocks.14.ff.ff.2.bias$": "transformer_blocks.14.ff.ff.bias",
139
+ "^transformer_blocks.15.ff.ff.0.0.weight$": "transformer_blocks.15.ff.project_in.weight",
140
+ "^transformer_blocks.15.ff.ff.0.0.bias$": "transformer_blocks.15.ff.project_in.bias",
141
+ "^transformer_blocks.15.ff.ff.2.weight$": "transformer_blocks.15.ff.ff.weight",
142
+ "^transformer_blocks.15.ff.ff.2.bias$": "transformer_blocks.15.ff.ff.bias",
143
+ "^transformer_blocks.16.ff.ff.0.0.weight$": "transformer_blocks.16.ff.project_in.weight",
144
+ "^transformer_blocks.16.ff.ff.0.0.bias$": "transformer_blocks.16.ff.project_in.bias",
145
+ "^transformer_blocks.16.ff.ff.2.weight$": "transformer_blocks.16.ff.ff.weight",
146
+ "^transformer_blocks.16.ff.ff.2.bias$": "transformer_blocks.16.ff.ff.bias",
147
+ "^transformer_blocks.17.ff.ff.0.0.weight$": "transformer_blocks.17.ff.project_in.weight",
148
+ "^transformer_blocks.17.ff.ff.0.0.bias$": "transformer_blocks.17.ff.project_in.bias",
149
+ "^transformer_blocks.17.ff.ff.2.weight$": "transformer_blocks.17.ff.ff.weight",
150
+ "^transformer_blocks.17.ff.ff.2.bias$": "transformer_blocks.17.ff.ff.bias",
151
+ "^transformer_blocks.18.ff.ff.0.0.weight$": "transformer_blocks.18.ff.project_in.weight",
152
+ "^transformer_blocks.18.ff.ff.0.0.bias$": "transformer_blocks.18.ff.project_in.bias",
153
+ "^transformer_blocks.18.ff.ff.2.weight$": "transformer_blocks.18.ff.ff.weight",
154
+ "^transformer_blocks.18.ff.ff.2.bias$": "transformer_blocks.18.ff.ff.bias",
155
+ "^transformer_blocks.19.ff.ff.0.0.weight$": "transformer_blocks.19.ff.project_in.weight",
156
+ "^transformer_blocks.19.ff.ff.0.0.bias$": "transformer_blocks.19.ff.project_in.bias",
157
+ "^transformer_blocks.19.ff.ff.2.weight$": "transformer_blocks.19.ff.ff.weight",
158
+ "^transformer_blocks.19.ff.ff.2.bias$": "transformer_blocks.19.ff.ff.bias",
159
+ "^transformer_blocks.20.ff.ff.0.0.weight$": "transformer_blocks.20.ff.project_in.weight",
160
+ "^transformer_blocks.20.ff.ff.0.0.bias$": "transformer_blocks.20.ff.project_in.bias",
161
+ "^transformer_blocks.20.ff.ff.2.weight$": "transformer_blocks.20.ff.ff.weight",
162
+ "^transformer_blocks.20.ff.ff.2.bias$": "transformer_blocks.20.ff.ff.bias",
163
+ "^transformer_blocks.21.ff.ff.0.0.weight$": "transformer_blocks.21.ff.project_in.weight",
164
+ "^transformer_blocks.21.ff.ff.0.0.bias$": "transformer_blocks.21.ff.project_in.bias",
165
+ "^transformer_blocks.21.ff.ff.2.weight$": "transformer_blocks.21.ff.ff.weight",
166
+ "^transformer_blocks.21.ff.ff.2.bias$": "transformer_blocks.21.ff.ff.bias",
167
+ }
168
+
169
+
170
+ def parse_arguments():
171
+ parser = argparse.ArgumentParser()
172
+ parser.add_argument(
173
+ "--model_name",
174
+ type=str,
175
+ default="F5TTS_Base",
176
+ choices=[
177
+ "F5TTS_Base",
178
+ "F5TTS_v1_Base",
179
+ ],
180
+ ) # TODO: support F5TTS_v1_Base
181
+ parser.add_argument("--timm_ckpt", type=str, default="./ckpts/model_1200000.pt")
182
+ parser.add_argument(
183
+ "--output_dir", type=str, default="./tllm_checkpoint", help="The path to save the TensorRT-LLM checkpoint"
184
+ )
185
+ parser.add_argument("--hidden_size", type=int, default=1024, help="The hidden size of DiT")
186
+ parser.add_argument("--depth", type=int, default=22, help="The number of DiTBlock layers")
187
+ parser.add_argument("--num_heads", type=int, default=16, help="The number of heads of attention module")
188
+ parser.add_argument("--cfg_scale", type=float, default=4.0)
189
+ parser.add_argument("--tp_size", type=int, default=1, help="N-way tensor parallelism size")
190
+ parser.add_argument("--cp_size", type=int, default=1, help="Context parallelism size")
191
+ parser.add_argument("--pp_size", type=int, default=1, help="N-way pipeline parallelism size")
192
+ parser.add_argument("--dtype", type=str, default="float16", choices=["float32", "bfloat16", "float16"])
193
+ parser.add_argument("--fp8_linear", action="store_true", help="Whether use FP8 for linear layers")
194
+ parser.add_argument(
195
+ "--workers", type=int, default=1, help="The number of workers for converting checkpoint in parallel"
196
+ )
197
+ args = parser.parse_args()
198
+ return args
199
+
200
+
201
+ def convert_timm_dit(args, mapping, dtype="float32"):
202
+ weights = {}
203
+ tik = time.time()
204
+ torch_dtype = str_dtype_to_torch(dtype)
205
+ tensor_parallel = mapping.tp_size
206
+
207
+ # Load checkpoint based on file extension
208
+ if args.timm_ckpt.endswith('.safetensors'):
209
+ print(f"Loading safetensors checkpoint from {args.timm_ckpt}")
210
+ model_params = safetensors.torch.load_file(args.timm_ckpt)
211
+ # For safetensors, check if we need to extract from a nested dict
212
+ if any(k.startswith("ema_model.transformer") for k in model_params.keys()):
213
+ model_params = {
214
+ k: v for k, v in model_params.items() if k.startswith("ema_model.transformer")
215
+ }
216
+ elif any(k.startswith("ema_model_state_dict.ema_model.transformer") for k in model_params.keys()):
217
+ model_params = {
218
+ k.replace("ema_model_state_dict.", ""): v
219
+ for k, v in model_params.items()
220
+ if k.startswith("ema_model_state_dict.ema_model.transformer")
221
+ }
222
+ else:
223
+ print(f"Loading PyTorch checkpoint from {args.timm_ckpt}")
224
+ checkpoint = torch.load(args.timm_ckpt)
225
+ model_params = dict(checkpoint)
226
+ model_params = {
227
+ k: v for k, v in model_params["ema_model_state_dict"].items() if k.startswith("ema_model.transformer")
228
+ }
229
+
230
+ prefix = "ema_model.transformer."
231
+ model_params = {key[len(prefix) :] if key.startswith(prefix) else key: value for key, value in model_params.items()}
232
+
233
+ timm_to_trtllm_name = FACEBOOK_DIT_NAME_MAPPING
234
+
235
+ def get_trtllm_name(timm_name):
236
+ for k, v in timm_to_trtllm_name.items():
237
+ m = re.match(k, timm_name)
238
+ if m is not None:
239
+ if "*" in v:
240
+ v = v.replace("*", m.groups()[0])
241
+ return v
242
+ return timm_name
243
+
244
+ weights = dict()
245
+ for name, param in model_params.items():
246
+ if name == "input_embed.conv_pos_embed.conv1d.0.weight" or name == "input_embed.conv_pos_embed.conv1d.2.weight":
247
+ weights[get_trtllm_name(name)] = param.contiguous().to(torch_dtype).unsqueeze(-1)
248
+ else:
249
+ weights[get_trtllm_name(name)] = param.contiguous().to(torch_dtype)
250
+
251
+ assert len(weights) == len(model_params)
252
+
253
+ # new_prefix = 'f5_transformer.'
254
+ new_prefix = ""
255
+ weights = {new_prefix + key: value for key, value in weights.items()}
256
+ import math
257
+
258
+ scale_factor = math.pow(64, -0.25)
259
+ for k, v in weights.items():
260
+ if re.match("^transformer_blocks.*.attn.to_k.weight$", k):
261
+ weights[k] *= scale_factor
262
+ weights[k] = split_q_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
263
+
264
+ elif re.match("^transformer_blocks.*.attn.to_k.bias$", k):
265
+ weights[k] *= scale_factor
266
+ weights[k] = split_q_bias_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
267
+
268
+ elif re.match("^transformer_blocks.*.attn.to_q.weight$", k):
269
+ weights[k] = split_q_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
270
+ weights[k] *= scale_factor
271
+
272
+ elif re.match("^transformer_blocks.*.attn.to_q.bias$", k):
273
+ weights[k] = split_q_bias_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
274
+ weights[k] *= scale_factor
275
+
276
+ elif re.match("^transformer_blocks.*.attn.to_v.weight$", k):
277
+ weights[k] = split_q_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
278
+
279
+ elif re.match("^transformer_blocks.*.attn.to_v.bias$", k):
280
+ weights[k] = split_q_bias_tp(v, args.num_heads, args.hidden_size, tensor_parallel, mapping.tp_rank)
281
+
282
+ elif re.match("^transformer_blocks.*.attn.to_out.weight$", k):
283
+ weights[k] = split_matrix_tp(v, tensor_parallel, mapping.tp_rank, dim=1)
284
+
285
+ tok = time.time()
286
+ t = time.strftime("%H:%M:%S", time.gmtime(tok - tik))
287
+ print(f"Weights loaded. Total time: {t}")
288
+ return weights
289
+
290
+
291
+ def save_config(args):
292
+ if not os.path.exists(args.output_dir):
293
+ os.makedirs(args.output_dir)
294
+ config = {
295
+ "architecture": "F5TTS",
296
+ "dtype": args.dtype,
297
+ "hidden_size": 1024,
298
+ "num_hidden_layers": 22,
299
+ "num_attention_heads": 16,
300
+ "dim_head": 64,
301
+ "dropout": 0.1,
302
+ "ff_mult": 2,
303
+ "mel_dim": 100,
304
+ "text_num_embeds": 256,
305
+ "text_dim": 512,
306
+ "conv_layers": 4,
307
+ "long_skip_connection": False,
308
+ "mapping": {
309
+ "world_size": args.cp_size * args.tp_size * args.pp_size,
310
+ "cp_size": args.cp_size,
311
+ "tp_size": args.tp_size,
312
+ "pp_size": args.pp_size,
313
+ },
314
+ }
315
+ if args.fp8_linear:
316
+ config["quantization"] = {
317
+ "quant_algo": "FP8",
318
+ # TODO: add support for exclude modules.
319
+ # 'exclude_modules': "*final_layer*",
320
+ }
321
+
322
+ with open(os.path.join(args.output_dir, "config.json"), "w") as f:
323
+ json.dump(config, f, indent=4)
324
+
325
+
326
+ def covert_and_save(args, rank):
327
+ if rank == 0:
328
+ save_config(args)
329
+
330
+ mapping = Mapping(
331
+ world_size=args.cp_size * args.tp_size * args.pp_size,
332
+ rank=rank,
333
+ cp_size=args.cp_size,
334
+ tp_size=args.tp_size,
335
+ pp_size=args.pp_size,
336
+ )
337
+
338
+ weights = convert_timm_dit(args, mapping, dtype=args.dtype)
339
+
340
+ safetensors.torch.save_file(weights, os.path.join(args.output_dir, f"rank{rank}.safetensors"))
341
+
342
+
343
+ def execute(workers, func, args):
344
+ if workers == 1:
345
+ for rank, f in enumerate(func):
346
+ f(args, rank)
347
+ else:
348
+ with ThreadPoolExecutor(max_workers=workers) as p:
349
+ futures = [p.submit(f, args, rank) for rank, f in enumerate(func)]
350
+ exceptions = []
351
+ for future in as_completed(futures):
352
+ try:
353
+ future.result()
354
+ except Exception as e:
355
+ traceback.print_exc()
356
+ exceptions.append(e)
357
+ assert len(exceptions) == 0, "Checkpoint conversion failed, please check error log."
358
+
359
+
360
+ def main():
361
+ args = parse_arguments()
362
+ world_size = args.cp_size * args.tp_size * args.pp_size
363
+
364
+ assert args.pp_size == 1, "PP is not supported yet."
365
+
366
+ tik = time.time()
367
+ if args.timm_ckpt is None:
368
+ return
369
+ print("start execute")
370
+ execute(args.workers, [covert_and_save] * world_size, args)
371
+
372
+ tok = time.time()
373
+ t = time.strftime("%H:%M:%S", time.gmtime(tok - tik))
374
+ print(f"Total time of converting checkpoints: {t}")
375
+
376
+
377
+ if __name__ == "__main__":
378
+ main()
2flow/utils/tts/export_vocoder_to_onnx.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import argparse
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+ from conv_stft import STFT
20
+ from huggingface_hub import hf_hub_download
21
+ from vocos import Vocos
22
+
23
+
24
+ opset_version = 17
25
+
26
+
27
+ def get_args():
28
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
29
+ parser.add_argument(
30
+ "--vocoder",
31
+ type=str,
32
+ default="vocos",
33
+ choices=["vocos", "bigvgan"],
34
+ help="Vocoder to export",
35
+ )
36
+ parser.add_argument(
37
+ "--output-path",
38
+ type=str,
39
+ default="./vocos_vocoder.onnx",
40
+ help="Output path",
41
+ )
42
+ return parser.parse_args()
43
+
44
+
45
+ class ISTFTHead(nn.Module):
46
+ def __init__(self, n_fft: int, hop_length: int):
47
+ super().__init__()
48
+ self.out = None
49
+ self.stft = STFT(fft_len=n_fft, win_hop=hop_length, win_len=n_fft)
50
+
51
+ def forward(self, x: torch.Tensor):
52
+ x = self.out(x).transpose(1, 2)
53
+ mag, p = x.chunk(2, dim=1)
54
+ mag = torch.exp(mag)
55
+ mag = torch.clip(mag, max=1e2)
56
+ real = mag * torch.cos(p)
57
+ imag = mag * torch.sin(p)
58
+ audio = self.stft.inverse(input1=real, input2=imag, input_type="realimag")
59
+ return audio
60
+
61
+
62
+ class VocosVocoder(nn.Module):
63
+ def __init__(self, vocos_vocoder):
64
+ super(VocosVocoder, self).__init__()
65
+ self.vocos_vocoder = vocos_vocoder
66
+ istft_head_out = self.vocos_vocoder.head.out
67
+ n_fft = self.vocos_vocoder.head.istft.n_fft
68
+ hop_length = self.vocos_vocoder.head.istft.hop_length
69
+ istft_head_for_export = ISTFTHead(n_fft, hop_length)
70
+ istft_head_for_export.out = istft_head_out
71
+ self.vocos_vocoder.head = istft_head_for_export
72
+
73
+ def forward(self, mel):
74
+ waveform = self.vocos_vocoder.decode(mel)
75
+ return waveform
76
+
77
+
78
+ def export_VocosVocoder(vocos_vocoder, output_path, verbose):
79
+ vocos_vocoder = VocosVocoder(vocos_vocoder).cuda()
80
+ vocos_vocoder.eval()
81
+
82
+ dummy_batch_size = 8
83
+ dummy_input_length = 500
84
+
85
+ dummy_mel = torch.randn(dummy_batch_size, 100, dummy_input_length).cuda()
86
+
87
+ with torch.no_grad():
88
+ dummy_waveform = vocos_vocoder(mel=dummy_mel)
89
+ print(dummy_waveform.shape)
90
+
91
+ dummy_input = dummy_mel
92
+
93
+ torch.onnx.export(
94
+ vocos_vocoder,
95
+ dummy_input,
96
+ output_path,
97
+ opset_version=opset_version,
98
+ do_constant_folding=True,
99
+ input_names=["mel"],
100
+ output_names=["waveform"],
101
+ dynamic_axes={
102
+ "mel": {0: "batch_size", 2: "input_length"},
103
+ "waveform": {0: "batch_size", 1: "output_length"},
104
+ },
105
+ verbose=verbose,
106
+ )
107
+
108
+ print("Exported to {}".format(output_path))
109
+
110
+
111
+ def load_vocoder(vocoder_name="vocos", is_local=False, local_path="", device="cpu", hf_cache_dir=None):
112
+ if vocoder_name == "vocos":
113
+ # vocoder = Vocos.from_pretrained("charactr/vocos-mel-24khz").to(device)
114
+ if is_local:
115
+ print(f"Load vocos from local path {local_path}")
116
+ config_path = f"{local_path}/config.yaml"
117
+ model_path = f"{local_path}/pytorch_model.bin"
118
+ else:
119
+ print("Download Vocos from huggingface charactr/vocos-mel-24khz")
120
+ repo_id = "charactr/vocos-mel-24khz"
121
+ config_path = hf_hub_download(repo_id=repo_id, cache_dir=hf_cache_dir, filename="config.yaml")
122
+ model_path = hf_hub_download(repo_id=repo_id, cache_dir=hf_cache_dir, filename="pytorch_model.bin")
123
+ vocoder = Vocos.from_hparams(config_path)
124
+ state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
125
+ vocoder.load_state_dict(state_dict)
126
+ vocoder = vocoder.eval().to(device)
127
+ elif vocoder_name == "bigvgan":
128
+ raise NotImplementedError("BigVGAN is not supported yet")
129
+ vocoder.remove_weight_norm()
130
+ vocoder = vocoder.eval().to(device)
131
+ return vocoder
132
+
133
+
134
+ if __name__ == "__main__":
135
+ args = get_args()
136
+ vocoder = load_vocoder(vocoder_name=args.vocoder, device="cpu", hf_cache_dir=None)
137
+ if args.vocoder == "vocos":
138
+ export_VocosVocoder(vocoder, args.output_path, verbose=False)