chmanoj commited on
Commit
a956abd
1 Parent(s): 321874d

Remove extra files

Browse files
Add_LM_to_model.ipynb DELETED
@@ -1,397 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "d23f1f27-fbf4-4fe5-a7b4-17815b23f283",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from transformers import AutoProcessor"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 2,
16
- "id": "cdefcb5e-0824-49ef-be73-8788cbb4e2a9",
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "processor = AutoProcessor.from_pretrained(\"chmanoj/xls-r-300m-te\")"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 3,
26
- "id": "ef78538d-ca83-4cd3-824d-1b7928f5bc4e",
27
- "metadata": {},
28
- "outputs": [],
29
- "source": [
30
- "vocab_dict = processor.tokenizer.get_vocab()\n",
31
- "sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}"
32
- ]
33
- },
34
- {
35
- "cell_type": "code",
36
- "execution_count": 4,
37
- "id": "cd355539-6dfb-4978-82a3-905c0236c6c3",
38
- "metadata": {},
39
- "outputs": [],
40
- "source": [
41
- "from pyctcdecode import build_ctcdecoder"
42
- ]
43
- },
44
- {
45
- "cell_type": "code",
46
- "execution_count": 9,
47
- "id": "34429a23-a3e5-40ca-be4e-186bf12e1ff4",
48
- "metadata": {},
49
- "outputs": [],
50
- "source": [
51
- "# !which python\n",
52
- "\n",
53
- "# !pip install https://github.com/kpu/kenlm/archive/master.zip"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": 5,
59
- "id": "21f4fb99-1c19-4a0a-9ac0-90dd38645585",
60
- "metadata": {},
61
- "outputs": [
62
- {
63
- "name": "stderr",
64
- "output_type": "stream",
65
- "text": [
66
- "Loading the LM will be faster if you build a binary file.\n",
67
- "Reading /mnt/c/Projects/Speech/xls-R-finetuning/xls-r-300m-te/3gram_correct.arpa\n",
68
- "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n",
69
- "****************************************************************************************************\n",
70
- "Found entries of length > 1 in alphabet. This is unusual unless style is BPE, but the alphabet was not recognized as BPE type. Is this correct?\n",
71
- "Unigrams and labels don't seem to agree.\n"
72
- ]
73
- }
74
- ],
75
- "source": [
76
- "decoder = build_ctcdecoder(\n",
77
- " labels=list(sorted_vocab_dict.keys()),\n",
78
- " kenlm_model_path=\"3gram_correct.arpa\",\n",
79
- ")"
80
- ]
81
- },
82
- {
83
- "cell_type": "code",
84
- "execution_count": 6,
85
- "id": "f892aada-710c-4bc2-a11f-c9a35c00870a",
86
- "metadata": {},
87
- "outputs": [],
88
- "source": [
89
- "from transformers import Wav2Vec2ProcessorWithLM\n",
90
- "\n",
91
- "processor_with_lm = Wav2Vec2ProcessorWithLM(\n",
92
- " feature_extractor=processor.feature_extractor,\n",
93
- " tokenizer=processor.tokenizer,\n",
94
- " decoder=decoder\n",
95
- ")"
96
- ]
97
- },
98
- {
99
- "cell_type": "code",
100
- "execution_count": 7,
101
- "id": "5e29f7f7-e116-4c65-9c14-ae7e871390bb",
102
- "metadata": {},
103
- "outputs": [
104
- {
105
- "data": {
106
- "text/plain": [
107
- "'/mnt/c/Projects/Speech/xls-R-finetuning/xls-r-300m-te'"
108
- ]
109
- },
110
- "execution_count": 7,
111
- "metadata": {},
112
- "output_type": "execute_result"
113
- }
114
- ],
115
- "source": [
116
- "import os\n",
117
- "os.getcwd()"
118
- ]
119
- },
120
- {
121
- "cell_type": "code",
122
- "execution_count": 8,
123
- "id": "6f5775eb-aece-41fc-a1eb-8bf6f9b8f429",
124
- "metadata": {},
125
- "outputs": [],
126
- "source": [
127
- "processor_with_lm.save_pretrained(os.getcwd())"
128
- ]
129
- },
130
- {
131
- "cell_type": "code",
132
- "execution_count": null,
133
- "id": "0e7e4d6f-01d0-4a24-9980-a6583fb6d048",
134
- "metadata": {},
135
- "outputs": [],
136
- "source": []
137
- },
138
- {
139
- "cell_type": "code",
140
- "execution_count": 10,
141
- "id": "c5ea011b-9412-484a-b798-15fb6e338a99",
142
- "metadata": {},
143
- "outputs": [
144
- {
145
- "name": "stdout",
146
- "output_type": "stream",
147
- "text": [
148
- "Reading language_model/3gram_correct.arpa\n",
149
- "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n",
150
- "****************************************************************************************************\n",
151
- "SUCCESS\n"
152
- ]
153
- }
154
- ],
155
- "source": [
156
- "!../kenlm/build/bin/build_binary language_model/3gram_correct.arpa language_model/3gram.bin"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": null,
162
- "id": "70c2709b-0b5c-440f-ae9f-11f8045e8fed",
163
- "metadata": {},
164
- "outputs": [],
165
- "source": []
166
- },
167
- {
168
- "cell_type": "code",
169
- "execution_count": 11,
170
- "id": "c5db962f-15f1-4b65-87e3-81e1af14e32e",
171
- "metadata": {},
172
- "outputs": [],
173
- "source": [
174
- "from huggingface_hub import Repository"
175
- ]
176
- },
177
- {
178
- "cell_type": "code",
179
- "execution_count": 15,
180
- "id": "d3801f28-cdb5-40cd-b1b9-5a00f8f24720",
181
- "metadata": {},
182
- "outputs": [],
183
- "source": [
184
- "repo = Repository(local_dir=\".\")"
185
- ]
186
- },
187
- {
188
- "cell_type": "code",
189
- "execution_count": null,
190
- "id": "c6421313-5d36-45ce-8300-3988985e7239",
191
- "metadata": {},
192
- "outputs": [],
193
- "source": []
194
- },
195
- {
196
- "cell_type": "code",
197
- "execution_count": 16,
198
- "id": "7dcfe5d2-063f-4b34-9fdd-5f025ef9f699",
199
- "metadata": {},
200
- "outputs": [
201
- {
202
- "data": {
203
- "application/vnd.jupyter.widget-view+json": {
204
- "model_id": "19e7f1d4c0ab43b6b006cb848879273d",
205
- "version_major": 2,
206
- "version_minor": 0
207
- },
208
- "text/plain": [
209
- "Upload file language_model/3gram.bin: 0%| | 32.0k/771M [00:00<?, ?B/s]"
210
- ]
211
- },
212
- "metadata": {},
213
- "output_type": "display_data"
214
- },
215
- {
216
- "data": {
217
- "application/vnd.jupyter.widget-view+json": {
218
- "model_id": "476ee7adfe4f49729541086d12535504",
219
- "version_major": 2,
220
- "version_minor": 0
221
- },
222
- "text/plain": [
223
- "Upload file language_model/unigrams.txt: 0%| | 32.0k/39.0M [00:00<?, ?B/s]"
224
- ]
225
- },
226
- "metadata": {},
227
- "output_type": "display_data"
228
- },
229
- {
230
- "name": "stderr",
231
- "output_type": "stream",
232
- "text": [
233
- "To https://huggingface.co/chmanoj/xls-r-300m-te\n",
234
- " aa77a85..dbca3b5 main -> main\n",
235
- "\n"
236
- ]
237
- },
238
- {
239
- "data": {
240
- "text/plain": [
241
- "'https://huggingface.co/chmanoj/xls-r-300m-te/commit/dbca3b5d87436c5615b2460922b94a15a878c713'"
242
- ]
243
- },
244
- "execution_count": 16,
245
- "metadata": {},
246
- "output_type": "execute_result"
247
- }
248
- ],
249
- "source": [
250
- "repo.push_to_hub(commit_message=\"Upload lm-boosted decoder\")"
251
- ]
252
- },
253
- {
254
- "cell_type": "code",
255
- "execution_count": null,
256
- "id": "c3fa2899-59f9-458b-8a23-4da3936a18a1",
257
- "metadata": {},
258
- "outputs": [],
259
- "source": []
260
- },
261
- {
262
- "cell_type": "markdown",
263
- "id": "c71ab8cb-8732-4d40-aa77-503421ac717c",
264
- "metadata": {},
265
- "source": [
266
- "## Evaluation"
267
- ]
268
- },
269
- {
270
- "cell_type": "code",
271
- "execution_count": null,
272
- "id": "738524cc-28fb-4bb3-aec5-10d1e33bae45",
273
- "metadata": {},
274
- "outputs": [],
275
- "source": []
276
- },
277
- {
278
- "cell_type": "code",
279
- "execution_count": null,
280
- "id": "99c4aac1-9fe8-4ff5-a0a2-fbe59d6ad2d2",
281
- "metadata": {},
282
- "outputs": [],
283
- "source": [
284
- "#!python eval.py --model_id=\"chmanoj/xls-r-300m-te\" --dataset=\"openslr_SLR66\" --config=\"te\" --split=\"test\" --log_outputs"
285
- ]
286
- },
287
- {
288
- "cell_type": "code",
289
- "execution_count": null,
290
- "id": "c012e9c8-cc46-48d5-a05c-99c136591c9f",
291
- "metadata": {},
292
- "outputs": [],
293
- "source": []
294
- },
295
- {
296
- "cell_type": "code",
297
- "execution_count": 10,
298
- "id": "47d37b88-cc8e-4d17-b070-4ad1cd66dae8",
299
- "metadata": {},
300
- "outputs": [],
301
- "source": [
302
- "from huggingface_hub.repocard import metadata_load"
303
- ]
304
- },
305
- {
306
- "cell_type": "code",
307
- "execution_count": 18,
308
- "id": "a56f846c-fa92-48d5-873e-3788748dd9e8",
309
- "metadata": {},
310
- "outputs": [],
311
- "source": [
312
- "x = metadata_load('README.md')"
313
- ]
314
- },
315
- {
316
- "cell_type": "code",
317
- "execution_count": 19,
318
- "id": "69d92b93-3a67-4be8-9b9b-ade6322718ae",
319
- "metadata": {},
320
- "outputs": [
321
- {
322
- "data": {
323
- "text/plain": [
324
- "{'language': ['te'],\n",
325
- " 'license': 'apache-2.0',\n",
326
- " 'tags': ['automatic-speech-recognition',\n",
327
- " 'openslr_SLR66',\n",
328
- " 'generated_from_trainer',\n",
329
- " 'robust-speech-event'],\n",
330
- " 'datasets': ['openslr', 'SLR66'],\n",
331
- " 'metrics': ['wer'],\n",
332
- " 'model-index': [{'name': 'xls-r-300m-te',\n",
333
- " 'results': [{'task': {'type': 'automatic-speech-recognition',\n",
334
- " 'name': 'Speech Recognition'},\n",
335
- " 'dataset': {'type': 'openslr', 'name': 'Open SLR', 'args': 'SLR66'},\n",
336
- " 'metrics': [{'type': 'wer',\n",
337
- " 'value': 24.695121951219512,\n",
338
- " 'name': 'Test WER'},\n",
339
- " {'type': 'cer', 'value': 4.861934182322532, 'name': 'Test CER'}]}]}]}"
340
- ]
341
- },
342
- "execution_count": 19,
343
- "metadata": {},
344
- "output_type": "execute_result"
345
- }
346
- ],
347
- "source": [
348
- "x"
349
- ]
350
- },
351
- {
352
- "cell_type": "code",
353
- "execution_count": null,
354
- "id": "06957371-efbe-4175-9a3e-4b3c6c6ff255",
355
- "metadata": {},
356
- "outputs": [],
357
- "source": []
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": null,
362
- "id": "b6e6034e-5962-4504-a8a3-9f144d92d37a",
363
- "metadata": {},
364
- "outputs": [],
365
- "source": []
366
- },
367
- {
368
- "cell_type": "code",
369
- "execution_count": null,
370
- "id": "ba10bc4c-5fb3-4da7-a7d0-8b51205262de",
371
- "metadata": {},
372
- "outputs": [],
373
- "source": []
374
- }
375
- ],
376
- "metadata": {
377
- "kernelspec": {
378
- "display_name": "Python 3 (ipykernel)",
379
- "language": "python",
380
- "name": "python3"
381
- },
382
- "language_info": {
383
- "codemirror_mode": {
384
- "name": "ipython",
385
- "version": 3
386
- },
387
- "file_extension": ".py",
388
- "mimetype": "text/x-python",
389
- "name": "python",
390
- "nbconvert_exporter": "python",
391
- "pygments_lexer": "ipython3",
392
- "version": "3.7.10"
393
- }
394
- },
395
- "nbformat": 4,
396
- "nbformat_minor": 5
397
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openslr_SLR66_te_test_eval_results.txt DELETED
@@ -1,2 +0,0 @@
1
- WER: 0.24695121951219512
2
- CER: 0.04861934182322532