Yi Liu commited on
Commit
31656c7
1 Parent(s): 75d27f9

init commit

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The MooER from Moore Threads is licensed under the MIT License listed below. Copyright (c) 2023-2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2
+ Terms of the MIT License
3
+ -------------------------------------------------------------------------
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5
+
6
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7
+
8
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9
+ -------------------------------------------------------------------------
10
+ The following copyright statements and licenses apply to various open source software/model
11
+ packages (or portions thereof) that are distributed with this MooER. MooER that
12
+ includes this file does not necessarily use all the open source software packages referred
13
+ to below and may also only use portions of a given package. Some open source software
14
+ packages referred to below may have been modified by Moore Threads Technology Co., Ltd
15
+ -------------------------------------------------------------------------
16
+ SLAM-LLM
17
+ Copyright (c) 2024 Ziyang Ma
18
+
19
+ Permission is hereby granted, free of charge, to any person obtaining a copy
20
+ of this software and associated documentation files (the "Software"), to deal
21
+ in the Software without restriction, including without limitation the rights
22
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23
+ copies of the Software, and to permit persons to whom the Software is
24
+ furnished to do so, subject to the following conditions:
25
+
26
+ The above copyright notice and this permission notice shall be included in all
27
+ copies or substantial portions of the Software.
28
+
29
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35
+ SOFTWARE.
36
+ -------------------------------------------------------------------------
37
+ FunASR
38
+
39
+ MIT License
40
+
41
+ Copyright (c) 2022 Alibaba
42
+
43
+ Permission is hereby granted, free of charge, to any person obtaining a copy
44
+ of this software and associated documentation files (the "Software"), to deal
45
+ in the Software without restriction, including without limitation the rights
46
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
47
+ copies of the Software, and to permit persons to whom the Software is
48
+ furnished to do so, subject to the following conditions:
49
+
50
+ The above copyright notice and this permission notice shall be included in all
51
+ copies or substantial portions of the Software.
52
+
53
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
59
+ SOFTWARE.
60
+ -------------------------------------------------------------------------
61
+ FunASR model
62
+ Copyright (C) [2023-2028] Alibaba Group. All rights reserved.
63
+
64
+ Thank you for choosing the FunASR open source models. The FunASR open source models contain a series of open-source models that allow everyone to use, modify, share, and learn from it.
65
+
66
+ To ensure better community collaboration, we have developed the following agreement and hope that you carefully read and abide by it.
67
+
68
+ 1 Definitions
69
+ In this agreement, [FunASR software] refers to the FunASR open source model, and its derivatives, including fine-tuned models. [You] refer to individuals or organizations who use, modify, share, and learn from [FunASR software].
70
+
71
+ 2 License and Restrictions
72
+
73
+ 2.1 License
74
+ You are free to use, copy, modify, and share [FunASR software] under the conditions of this agreement.
75
+
76
+ 2.2 Restrictions
77
+ You should indicate the code and model source and author information when using, copying, modifying and sharing [FunASR software]. You should keep the relevant names of models in [FunASR software].
78
+
79
+ 3 Responsibility and Risk
80
+ [FunASR software] is for reference and learning purposes only and is not responsible for any direct or indirect losses caused by your use or modification of [FunASR software]. You should take responsibility and risks for your use and modification of [FunASR software].
81
+
82
+ 4 Termination
83
+ If you violate any terms of this agreement, your license will be automatically terminated, and you must stop using, copying, modifying, and sharing [FunASR software].
84
+
85
+ 5 Revision
86
+ This agreement may be updated and revised from time to time. The revised agreement will be published in the FunASR official repository and automatically take effect. If you continue to use, copy, modify, and share [FunASR software], it means you agree to the revised agreement.
87
+
88
+ 6 Other Provisions
89
+ This agreement is subject to the laws of [Country/Region]. If any provisions are found to be illegal, invalid, or unenforceable, they shall be deemed deleted from this agreement, and the remaining provisions shall remain valid and binding.
90
+
91
+ If you have any questions or comments about this agreement, please contact us.
92
+
93
+ Copyright (c) [2023-2028] Alibaba Group. All rights reserved.
94
+
95
+ FunASR 模型开源协议
96
+
97
+ 版本号:1.0
98
+
99
+ 版权所有 (C) [2023-2028] [阿里巴巴集团]。保留所有权利。
100
+
101
+ 感谢您选择 FunASR 开源模型。FunASR 开源模型包含一系列免费且开源的工业模型,让大家可以使用、修改、分享和学习该模型。
102
+
103
+ 为了保证更好的社区合作,我们制定了以下协议,希望您仔细阅读并遵守本协议。
104
+
105
+ 1 定义
106
+ 本协议中,[FunASR 软件]指 FunASR 开源模型权重及其衍生品,包括 Finetune 后的模型;[您]指使用、修改、分享和学习[FunASR 软件]的个人或组织。
107
+
108
+ 2 许可和限制
109
+ 2.1 许可
110
+
111
+ 您可以在遵守本协议的前提下,自由地使用、复制、修改和分享[FunASR 软件]。
112
+
113
+ 2.2 限制
114
+
115
+ 您在使用、复制、修改和分享[FunASR 软件]时,必须注明出处以及作者信息,并保留[FunASR 软件]中相关模型名称。
116
+
117
+ 3 责任和风险承担
118
+ [FunASR 软件]仅作为参考和学习使用,不对您使用或修改[FunASR 软件]造成的任何直接或间接损失承担任何责任。您对[FunASR 软件]的使用和修改应该自行承担风险。
119
+
120
+ 4 终止
121
+ 如果您违反本协议的任何条款,您的许可将自动终止,您必须停止使用、复制、修改和分享[FunASR 软件]。
122
+
123
+ 5 修订
124
+ 本协议可能会不时更新和修订。修订后的协议将在[FunASR 软件]官方仓库发布,并自动生效。如果您继续使用、复制、修改和分享[FunASR 软件],即表示您同意修订后的协议。
125
+
126
+ 6 其他规定
127
+ 本协议受到[国家/地区] 的法律管辖。如果任何条款被裁定为不合法、无效或无法执行,则该条款应被视为从本协议中删除,而其余条款应继续有效并具有约束力。
128
+
129
+ 如果您对本协议有任何问题或意见,请联系我们。
130
+
131
+ 版权所有© [2023-2028] [阿里巴巴集团]。保留所有权利。
132
+ -------------------------------------------------------------------------
133
+ Qwen/Qwen2-7B-Instruct
134
+ Copyright [yyyy] [name of copyright owner]
135
+
136
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
137
+
138
+ http://www.apache.org/licenses/LICENSE-2.0
139
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
140
+
Qwen2-7B-Instruct/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please download Qwen2-7B-Instruct manually.
2
+
3
+ Qwen2-7B-Instruct can be downloaded from HuggingFace or ModelScope by:
4
+
5
+ ```
6
+ # from huggingface
7
+ git clone https://huggingface.co/Qwen/Qwen2-7B-Instruct
8
+
9
+ # from modelscope
10
+ git clone https://modelscope.cn/models/qwen/Qwen2-7B-Instruct
11
+ ```
README.md CHANGED
@@ -1,3 +1,280 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ language:
4
+ - zh
5
+ - en
6
+ metrics:
7
+ - cer
8
+ - bleu
9
+ tags:
10
+ - asr
11
+ - automatic-speech-recognition
12
+ - automatic-speech-translation
13
+ - speech-translation
14
+ - speech-recognition
15
  ---
16
+
17
+
18
+ # MooER (摩耳): an LLM-based Speech Recognition and Translation Model from Moore Threads
19
+
20
+ **Online Demo**: https://mooer-speech.mthreads.com:10077/
21
+
22
+ ## 🔥 Update
23
+
24
+ We release a new model *MooER-80K-v2* using 80K hours of data. Currently, *MooER-80K-v2* supports the ASR task. The AST and multi-task models will be released soon.
25
+
26
+ ## 📖 Introduction
27
+
28
+ We introduce **MooER (摩耳)**: an LLM-based speech recognition and translation model developed by Moore Threads. With the *MooER* framework, you can transcribe the speech into text (speech recognition or, ASR), and translate it into other languages (speech translation or, AST) in a end-to-end manner. The performance of *MooER* is demonstrated in the subsequent section, along with our insights into model configurations, training strategies, and more, provided in our [technical report](https://arxiv.org/abs/2408.05101).
29
+
30
+ For the usage of the model files, please refer to our [GitHub](https://github.com/MooreThreads/MooER)
31
+
32
+ <br>
33
+ <p align="center">
34
+ <img src="assets/framework.png" width="600"/>
35
+ <p>
36
+ <br>
37
+
38
+ ## 🥊 Evaluation Results
39
+
40
+ We demonstrate the training data and the evaluation results below. For more comprehensive information, please refer to our [report](https://arxiv.org/pdf/2408.05101).
41
+
42
+ ### Training data
43
+
44
+ We utilize 5k hours of data (MT5K) to train our basic *MooER-5K* model. The data sources include:
45
+
46
+ | Dataset | Duration |
47
+ |---------------|---------------|
48
+ | aishell2 | 137h |
49
+ | librispeech | 131h |
50
+ | multi_cn | 100h |
51
+ | wenetspeech | 1361h |
52
+ | in-house data | 3274h |
53
+
54
+ Note that, data from the open-source datasets were randomly selected from the full training set. The in-house data, collected internally without text, were transcribed using a third-party ASR service.
55
+
56
+ Since all the above datasets were originally designed only for the speech recognition task, no translation results are available. To train our speech translation model, we used a third-party translation service to generate pseudo-labels. No data filtering techniques were applied.
57
+
58
+ At this moment, we are also developing a new model trained with 80K hours of data.
59
+
60
+ ### Speech Recognition
61
+
62
+ The performance of speech recognition is evaluated using WER/CER.
63
+
64
+ <table>
65
+ <tr>
66
+ <th>Language</th>
67
+ <th>Testset</th>
68
+ <th>Paraformer-large</th>
69
+ <th>SenseVoice-small</th>
70
+ <th>Qwen-audio</th>
71
+ <th>Whisper-large-v3</th>
72
+ <th>SeamlessM4T-v2</th>
73
+ <th>MooER-5K</th>
74
+ <th>MooER-80K</th>
75
+ <th>MooER-80K-v2</th>
76
+ </tr>
77
+ <tr>
78
+ <td rowspan="7">Chinese</td>
79
+ <td>aishell1</td>
80
+ <td>1.93</td>
81
+ <td>3.03</td>
82
+ <td>1.43</td>
83
+ <td>7.86</td>
84
+ <td>4.09</td>
85
+ <td>1.93</td>
86
+ <td>1.25</td>
87
+ <td>1.00</td>
88
+ </tr>
89
+ <tr>
90
+ <td>aishell2_ios</td>
91
+ <td>2.85</td>
92
+ <td>3.79</td>
93
+ <td>3.57</td>
94
+ <td>5.38</td>
95
+ <td>4.81</td>
96
+ <td>3.17</td>
97
+ <td>2.67</td>
98
+ <td>2.62</td>
99
+ </tr>
100
+ <tr>
101
+ <td>test_magicdata</td>
102
+ <td>3.66</td>
103
+ <td>3.81</td>
104
+ <td>5.31</td>
105
+ <td>8.36</td>
106
+ <td>9.69</td>
107
+ <td>3.48</td>
108
+ <td>2.52</td>
109
+ <td>2.17</td>
110
+ </tr>
111
+ <tr>
112
+ <td>test_thchs</td>
113
+ <td>3.99</td>
114
+ <td>5.17</td>
115
+ <td>4.86</td>
116
+ <td>9.06</td>
117
+ <td>7.14</td>
118
+ <td>4.11</td>
119
+ <td>3.14</td>
120
+ <td>3.00</td>
121
+ </tr>
122
+ <tr>
123
+ <td>fleurs cmn_dev</td>
124
+ <td>5.56</td>
125
+ <td>6.39</td>
126
+ <td>10.54</td>
127
+ <td>4.54</td>
128
+ <td>7.12</td>
129
+ <td>5.81</td>
130
+ <td>5.23</td>
131
+ <td>5.15</td>
132
+ </tr>
133
+ <tr>
134
+ <td>fleurs cmn_test</td>
135
+ <td>6.92</td>
136
+ <td>7.36</td>
137
+ <td>11.07</td>
138
+ <td>5.24</td>
139
+ <td>7.66</td>
140
+ <td>6.77</td>
141
+ <td>6.18</td>
142
+ <td>6.14</td>
143
+ </tr>
144
+ <tr>
145
+ <td>average</td>
146
+ <td><strong>4.15</strong></td>
147
+ <td><strong>4.93</strong></td>
148
+ <td><strong>6.13</strong></td>
149
+ <td><strong>6.74</strong></td>
150
+ <td><strong>6.75</strong></td>
151
+ <td><strong>4.21</strong></td>
152
+ <td><strong>3.50</strong></td>
153
+ <td><strong>3.35</strong></td>
154
+ </tr>
155
+ <tr>
156
+ <td rowspan="7">English</td>
157
+ <td>librispeech test_clean</td>
158
+ <td>14.15</td>
159
+ <td>4.07</td>
160
+ <td>2.15</td>
161
+ <td>3.42</td>
162
+ <td>2.77</td>
163
+ <td>7.78</td>
164
+ <td>4.11</td>
165
+ <td>3.57</td>
166
+ </tr>
167
+ <tr>
168
+ <td>librispeech test_other</td>
169
+ <td>22.99</td>
170
+ <td>8.26</td>
171
+ <td>4.68</td>
172
+ <td>5.62</td>
173
+ <td>5.25</td>
174
+ <td>15.25</td>
175
+ <td>9.99</td>
176
+ <td>9.09</td>
177
+ </tr>
178
+ <tr>
179
+ <td>fleurs eng_dev</td>
180
+ <td>24.93</td>
181
+ <td>12.92</td>
182
+ <td>22.53</td>
183
+ <td>11.63</td>
184
+ <td>11.36</td>
185
+ <td>18.89</td>
186
+ <td>13.32</td>
187
+ <td>13.12</td>
188
+ </tr>
189
+ <tr>
190
+ <td>fleurs eng_test</td>
191
+ <td>26.81</td>
192
+ <td>13.41</td>
193
+ <td>22.51</td>
194
+ <td>12.57</td>
195
+ <td>11.82</td>
196
+ <td>20.41</td>
197
+ <td>14.97</td>
198
+ <td>14.74</td>
199
+ </tr>
200
+ <tr>
201
+ <td>gigaspeech dev</td>
202
+ <td>24.23</td>
203
+ <td>19.44</td>
204
+ <td>12.96</td>
205
+ <td>19.18</td>
206
+ <td>28.01</td>
207
+ <td>23.46</td>
208
+ <td>16.92</td>
209
+ <td>17.34</td>
210
+ </tr>
211
+ <tr>
212
+ <td>gigaspeech test</td>
213
+ <td>23.07</td>
214
+ <td>16.65</td>
215
+ <td>13.26</td>
216
+ <td>22.34</td>
217
+ <td>28.65</td>
218
+ <td>22.09</td>
219
+ <td>16.64</td>
220
+ <td>16.97</td>
221
+ </tr>
222
+ <tr>
223
+ <td>average</td>
224
+ <td><strong>22.70</strong></td>
225
+ <td><strong>12.46</strong></td>
226
+ <td><strong>13.02</strong></td>
227
+ <td><strong>12.46</strong></td>
228
+ <td><strong>14.64</strong></td>
229
+ <td><strong>17.98</strong></td>
230
+ <td><strong>12.66</strong></td>
231
+ <td><strong>12.47</strong></td>
232
+ </tr>
233
+ </table>
234
+
235
+ ### Speech Translation (zh -> en)
236
+
237
+ For speech translation, the performanced is evaluated using BLEU score.
238
+
239
+ | Testset | Speech-LLaMA | Whisper-large-v3 | Qwen-audio | Qwen2-audio | SeamlessM4T-v2 | MooER-5K | MooER-5K-MTL |
240
+ |--------|-------------|-------------------|------------|-------------|-----------------|--------|--------------|
241
+ |CoVoST1 zh2en | - | 13.5 | 13.5 | - | 25.3 | - | **30.2** |
242
+ |CoVoST2 zh2en | 12.3 | 12.2 | 15.7 | 24.4 | 22.2 | 23.4 | **25.2** |
243
+ |CCMT2019 dev | - | 15.9 | 12.0 | - | 14.8 | - | **19.6** |
244
+
245
+
246
+ ## 🏁 Getting Started
247
+
248
+ Please visit our [GitHub](https://github.com/MooreThreads/MooER) for the setup and usage.
249
+
250
+
251
+ ## 🧾 License
252
+
253
+ Please see the [LICENSE](LICENSE).
254
+
255
+
256
+ ## 💖 Citation
257
+
258
+ If you find MooER useful for your research, please 🌟 this repo and cite our work using the following BibTeX:
259
+
260
+ ```bibtex
261
+ @article{liang2024mooer,
262
+ title = {MooER: an LLM-based Speech Recognition and Translation Model from Moore Threads},
263
+ author = {Zhenlin Liang, Junhao Xu, Yi Liu, Yichao Hu, Jian Li, Yajun Zheng, Meng Cai, Hua Wang},
264
+ journal = {arXiv preprint arXiv:2408.05101},
265
+ url = {https://arxiv.org/abs/2408.05101},
266
+ year = {2024}
267
+ }
268
+ ```
269
+
270
+ ## 📧 Contact
271
+
272
+ If you encouter any problems, feel free to create a discussion.
273
+
274
+ Moore Threads Website: **https://www.mthreads.com/**
275
+
276
+ <br>
277
+ <p align="left">
278
+ <img src="assets/MTLogo.png" width="300"/>
279
+ <p>
280
+ <br>
asr/adapter_project.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e549b5d3b460ce64ed85341687093432264d4412f4bbd543b6823b046ecc93
3
+ size 37772943
asr/lora_weights/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
asr/lora_weights/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/jfs/zhenlin.liang/model/Qwen2-7B-Instruct",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj",
21
+ "up_proj",
22
+ "gate_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
asr/lora_weights/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab49e11452ea2f82bdac2f7018b972518693f3ca99021494bf606b8fc163914
3
+ size 1291981421
assets/MTLogo.png ADDED
assets/framework.png ADDED
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"auto-speech-recognition"}
paraformer_encoder/am.mvn ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <Nnet>
2
+ <Splice> 560 560
3
+ [ 0 ]
4
+ <AddShift> 560 560
5
+ <LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
6
+ <Rescale> 560 560
7
+ <LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
8
+ </Nnet>
paraformer_encoder/paraformer-encoder.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd1d80d5ef6f2d5ec4e32ba077d6e80a7a1fb4bf5cf502bfa12907cf2e75cef
3
+ size 632157571