chmanoj commited on
Commit
1287863
·
1 Parent(s): 6a99894

Update model card with eval results

Browse files
Files changed (2) hide show
  1. Add_LM_to_model.ipynb +133 -38
  2. README.md +24 -2
Add_LM_to_model.ipynb CHANGED
@@ -176,7 +176,7 @@
176
  },
177
  {
178
  "cell_type": "code",
179
- "execution_count": 12,
180
  "id": "d3801f28-cdb5-40cd-b1b9-5a00f8f24720",
181
  "metadata": {},
182
  "outputs": [],
@@ -194,27 +194,19 @@
194
  },
195
  {
196
  "cell_type": "code",
197
- "execution_count": 14,
198
  "id": "7dcfe5d2-063f-4b34-9fdd-5f025ef9f699",
199
  "metadata": {},
200
  "outputs": [
201
- {
202
- "name": "stderr",
203
- "output_type": "stream",
204
- "text": [
205
- "Several commits (2) will be pushed upstream.\n",
206
- "The progress bars may be unreliable.\n"
207
- ]
208
- },
209
  {
210
  "data": {
211
  "application/vnd.jupyter.widget-view+json": {
212
- "model_id": "d17d7664ff97403f9f428264855729c2",
213
  "version_major": 2,
214
  "version_minor": 0
215
  },
216
  "text/plain": [
217
- "Upload file language_model/3gram_correct.arpa: 0%| | 32.0k/2.59G [00:00<?, ?B/s]"
218
  ]
219
  },
220
  "metadata": {},
@@ -223,46 +215,35 @@
223
  {
224
  "data": {
225
  "application/vnd.jupyter.widget-view+json": {
226
- "model_id": "479389ca92884367bdda025024eaa38d",
227
  "version_major": 2,
228
  "version_minor": 0
229
  },
230
  "text/plain": [
231
- "Upload file language_model/3gram.bin: 0%| | 32.0k/771M [00:00<?, ?B/s]"
232
  ]
233
  },
234
  "metadata": {},
235
  "output_type": "display_data"
236
  },
 
 
 
 
 
 
 
 
 
237
  {
238
  "data": {
239
- "application/vnd.jupyter.widget-view+json": {
240
- "model_id": "2c975b43bb9040c2b29653eede9add4c",
241
- "version_major": 2,
242
- "version_minor": 0
243
- },
244
  "text/plain": [
245
- "Upload file language_model/unigrams.txt: 0%| | 32.0k/39.0M [00:00<?, ?B/s]"
246
  ]
247
  },
 
248
  "metadata": {},
249
- "output_type": "display_data"
250
- },
251
- {
252
- "ename": "KeyboardInterrupt",
253
- "evalue": "",
254
- "output_type": "error",
255
- "traceback": [
256
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
257
- "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
258
- "\u001b[0;32m/tmp/ipykernel_7629/1986395493.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrepo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpush_to_hub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommit_message\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Upload lm-boosted decoder\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
259
- "\u001b[0;32m~/miniconda3/envs/xlsr_ft/lib/python3.7/site-packages/huggingface_hub/repository.py\u001b[0m in \u001b[0;36mpush_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1233\u001b[0m \u001b[0mupstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mf\"origin {self.current_branch}\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[0mblocking\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mblocking\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1235\u001b[0;31m \u001b[0mauto_lfs_prune\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mauto_lfs_prune\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1236\u001b[0m )\n\u001b[1;32m 1237\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
260
- "\u001b[0;32m~/miniconda3/envs/xlsr_ft/lib/python3.7/site-packages/huggingface_hub/repository.py\u001b[0m in \u001b[0;36mgit_push\u001b[0;34m(self, upstream, blocking, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 989\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 990\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mblocking\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 991\u001b[0;31m \u001b[0mstdout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommunicate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 992\u001b[0m \u001b[0mreturn_code\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpoll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkill\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
261
- "\u001b[0;32m~/miniconda3/envs/xlsr_ft/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36mcommunicate\u001b[0;34m(self, input, timeout)\u001b[0m\n\u001b[1;32m 962\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 964\u001b[0;31m \u001b[0mstdout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_communicate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mendtime\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 965\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 966\u001b[0m \u001b[0;31m# https://bugs.python.org/issue25942\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
262
- "\u001b[0;32m~/miniconda3/envs/xlsr_ft/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36m_communicate\u001b[0;34m(self, input, endtime, orig_timeout)\u001b[0m\n\u001b[1;32m 1713\u001b[0m 'failed to raise TimeoutExpired.')\n\u001b[1;32m 1714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1715\u001b[0;31m \u001b[0mready\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mselector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1716\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mendtime\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morig_timeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstdout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1717\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
263
- "\u001b[0;32m~/miniconda3/envs/xlsr_ft/lib/python3.7/selectors.py\u001b[0m in \u001b[0;36mselect\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mready\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 415\u001b[0;31m \u001b[0mfd_event_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_selector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpoll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 416\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mInterruptedError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mready\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
264
- "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
265
- ]
266
  }
267
  ],
268
  "source": [
@@ -272,7 +253,121 @@
272
  {
273
  "cell_type": "code",
274
  "execution_count": null,
275
- "id": "a505c088-5f40-4d9a-8d75-263a07cc93a5",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  "metadata": {},
277
  "outputs": [],
278
  "source": []
 
176
  },
177
  {
178
  "cell_type": "code",
179
+ "execution_count": 15,
180
  "id": "d3801f28-cdb5-40cd-b1b9-5a00f8f24720",
181
  "metadata": {},
182
  "outputs": [],
 
194
  },
195
  {
196
  "cell_type": "code",
197
+ "execution_count": 16,
198
  "id": "7dcfe5d2-063f-4b34-9fdd-5f025ef9f699",
199
  "metadata": {},
200
  "outputs": [
 
 
 
 
 
 
 
 
201
  {
202
  "data": {
203
  "application/vnd.jupyter.widget-view+json": {
204
+ "model_id": "19e7f1d4c0ab43b6b006cb848879273d",
205
  "version_major": 2,
206
  "version_minor": 0
207
  },
208
  "text/plain": [
209
+ "Upload file language_model/3gram.bin: 0%| | 32.0k/771M [00:00<?, ?B/s]"
210
  ]
211
  },
212
  "metadata": {},
 
215
  {
216
  "data": {
217
  "application/vnd.jupyter.widget-view+json": {
218
+ "model_id": "476ee7adfe4f49729541086d12535504",
219
  "version_major": 2,
220
  "version_minor": 0
221
  },
222
  "text/plain": [
223
+ "Upload file language_model/unigrams.txt: 0%| | 32.0k/39.0M [00:00<?, ?B/s]"
224
  ]
225
  },
226
  "metadata": {},
227
  "output_type": "display_data"
228
  },
229
+ {
230
+ "name": "stderr",
231
+ "output_type": "stream",
232
+ "text": [
233
+ "To https://huggingface.co/chmanoj/xls-r-300m-te\n",
234
+ " aa77a85..dbca3b5 main -> main\n",
235
+ "\n"
236
+ ]
237
+ },
238
  {
239
  "data": {
 
 
 
 
 
240
  "text/plain": [
241
+ "'https://huggingface.co/chmanoj/xls-r-300m-te/commit/dbca3b5d87436c5615b2460922b94a15a878c713'"
242
  ]
243
  },
244
+ "execution_count": 16,
245
  "metadata": {},
246
+ "output_type": "execute_result"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  }
248
  ],
249
  "source": [
 
253
  {
254
  "cell_type": "code",
255
  "execution_count": null,
256
+ "id": "c3fa2899-59f9-458b-8a23-4da3936a18a1",
257
+ "metadata": {},
258
+ "outputs": [],
259
+ "source": []
260
+ },
261
+ {
262
+ "cell_type": "markdown",
263
+ "id": "c71ab8cb-8732-4d40-aa77-503421ac717c",
264
+ "metadata": {},
265
+ "source": [
266
+ "## Evaluation"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": null,
272
+ "id": "738524cc-28fb-4bb3-aec5-10d1e33bae45",
273
+ "metadata": {},
274
+ "outputs": [],
275
+ "source": []
276
+ },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": null,
280
+ "id": "99c4aac1-9fe8-4ff5-a0a2-fbe59d6ad2d2",
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": [
284
+ "#!python eval.py --model_id=\"chmanoj/xls-r-300m-te\" --dataset=\"openslr_SLR66\" --config=\"te\" --split=\"test\" --log_outputs"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": null,
290
+ "id": "c012e9c8-cc46-48d5-a05c-99c136591c9f",
291
+ "metadata": {},
292
+ "outputs": [],
293
+ "source": []
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": 10,
298
+ "id": "47d37b88-cc8e-4d17-b070-4ad1cd66dae8",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "from huggingface_hub.repocard import metadata_load"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": 18,
308
+ "id": "a56f846c-fa92-48d5-873e-3788748dd9e8",
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "x = metadata_load('README.md')"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 19,
318
+ "id": "69d92b93-3a67-4be8-9b9b-ade6322718ae",
319
+ "metadata": {},
320
+ "outputs": [
321
+ {
322
+ "data": {
323
+ "text/plain": [
324
+ "{'language': ['te'],\n",
325
+ " 'license': 'apache-2.0',\n",
326
+ " 'tags': ['automatic-speech-recognition',\n",
327
+ " 'openslr_SLR66',\n",
328
+ " 'generated_from_trainer',\n",
329
+ " 'robust-speech-event'],\n",
330
+ " 'datasets': ['openslr', 'SLR66'],\n",
331
+ " 'metrics': ['wer'],\n",
332
+ " 'model-index': [{'name': 'xls-r-300m-te',\n",
333
+ " 'results': [{'task': {'type': 'automatic-speech-recognition',\n",
334
+ " 'name': 'Speech Recognition'},\n",
335
+ " 'dataset': {'type': 'openslr', 'name': 'Open SLR', 'args': 'SLR66'},\n",
336
+ " 'metrics': [{'type': 'wer',\n",
337
+ " 'value': 24.695121951219512,\n",
338
+ " 'name': 'Test WER'},\n",
339
+ " {'type': 'cer', 'value': 4.861934182322532, 'name': 'Test CER'}]}]}]}"
340
+ ]
341
+ },
342
+ "execution_count": 19,
343
+ "metadata": {},
344
+ "output_type": "execute_result"
345
+ }
346
+ ],
347
+ "source": [
348
+ "x"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": null,
354
+ "id": "06957371-efbe-4175-9a3e-4b3c6c6ff255",
355
+ "metadata": {},
356
+ "outputs": [],
357
+ "source": []
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": null,
362
+ "id": "b6e6034e-5962-4504-a8a3-9f144d92d37a",
363
+ "metadata": {},
364
+ "outputs": [],
365
+ "source": []
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": null,
370
+ "id": "ba10bc4c-5fb3-4da7-a7d0-8b51205262de",
371
  "metadata": {},
372
  "outputs": [],
373
  "source": []
README.md CHANGED
@@ -1,13 +1,35 @@
1
  ---
 
 
2
  license: apache-2.0
3
  tags:
4
  - automatic-speech-recognition
5
  - openslr_SLR66
6
  - generated_from_trainer
7
  - robust-speech-event
 
 
 
 
 
8
  model-index:
9
- - name: ''
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
1
  ---
2
+ language:
3
+ - te
4
  license: apache-2.0
5
  tags:
6
  - automatic-speech-recognition
7
  - openslr_SLR66
8
  - generated_from_trainer
9
  - robust-speech-event
10
+ datasets:
11
+ - openslr
12
+ - SLR66
13
+ metrics:
14
+ - wer
15
  model-index:
16
+ - name: xls-r-300m-te
17
+ results:
18
+ - task:
19
+ type: automatic-speech-recognition
20
+ name: Speech Recognition
21
+ dataset:
22
+ type: openslr
23
+ name: Open SLR
24
+ args: SLR66
25
+ metrics:
26
+ - type: wer # Required. Example: wer
27
+ value: 24.695121951219512 # Required. Example: 20.90
28
+ name: Test WER # Optional. Example: Test WER
29
+ - type: cer
30
+ value: 4.861934182322532
31
+ name: Test CER
32
+
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You