vitouphy commited on
Commit
b60c4d7
1 Parent(s): 75e6bf2

add readme

Browse files
Files changed (2) hide show
  1. README.md +13 -0
  2. train_tr.ipynb +37 -47
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - km
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - robust-speech-event
8
+ - km
9
+ datasets:
10
+ model-index:
11
+ - name: ''
12
+ results: []
13
+ ---
train_tr.ipynb CHANGED
@@ -3,7 +3,7 @@
3
  {
4
  "cell_type": "code",
5
  "execution_count": 1,
6
- "id": "3fa7516b",
7
  "metadata": {
8
  "collapsed": true,
9
  "jupyter": {
@@ -783,20 +783,10 @@
783
  "pip install jiwer"
784
  ]
785
  },
786
- {
787
- "cell_type": "code",
788
- "execution_count": null,
789
- "id": "09a1f5ea",
790
- "metadata": {},
791
- "outputs": [],
792
- "source": [
793
- "%"
794
- ]
795
- },
796
  {
797
  "cell_type": "code",
798
  "execution_count": 2,
799
- "id": "c57800bb",
800
  "metadata": {},
801
  "outputs": [],
802
  "source": [
@@ -808,7 +798,7 @@
808
  },
809
  {
810
  "cell_type": "markdown",
811
- "id": "98314e83",
812
  "metadata": {},
813
  "source": [
814
  "### Common Voice Dataset"
@@ -817,7 +807,7 @@
817
  {
818
  "cell_type": "code",
819
  "execution_count": 3,
820
- "id": "c9e549d0",
821
  "metadata": {},
822
  "outputs": [
823
  {
@@ -837,7 +827,7 @@
837
  {
838
  "cell_type": "code",
839
  "execution_count": 4,
840
- "id": "c1ca3246",
841
  "metadata": {},
842
  "outputs": [],
843
  "source": [
@@ -848,7 +838,7 @@
848
  },
849
  {
850
  "cell_type": "markdown",
851
- "id": "7574225f",
852
  "metadata": {},
853
  "source": [
854
  "### Clean Up the Text"
@@ -857,7 +847,7 @@
857
  {
858
  "cell_type": "code",
859
  "execution_count": 5,
860
- "id": "af63fb74",
861
  "metadata": {},
862
  "outputs": [],
863
  "source": [
@@ -873,7 +863,7 @@
873
  {
874
  "cell_type": "code",
875
  "execution_count": 7,
876
- "id": "e1714034",
877
  "metadata": {},
878
  "outputs": [
879
  {
@@ -913,7 +903,7 @@
913
  {
914
  "cell_type": "code",
915
  "execution_count": 8,
916
- "id": "d348cf4a",
917
  "metadata": {},
918
  "outputs": [
919
  {
@@ -938,7 +928,7 @@
938
  },
939
  {
940
  "cell_type": "markdown",
941
- "id": "53070ed8",
942
  "metadata": {},
943
  "source": [
944
  "### Build Character"
@@ -947,7 +937,7 @@
947
  {
948
  "cell_type": "code",
949
  "execution_count": 9,
950
- "id": "dc3b3351",
951
  "metadata": {},
952
  "outputs": [
953
  {
@@ -995,7 +985,7 @@
995
  {
996
  "cell_type": "code",
997
  "execution_count": 10,
998
- "id": "ccd38414",
999
  "metadata": {},
1000
  "outputs": [],
1001
  "source": [
@@ -1006,7 +996,7 @@
1006
  {
1007
  "cell_type": "code",
1008
  "execution_count": 11,
1009
- "id": "1eb8cfd7",
1010
  "metadata": {},
1011
  "outputs": [
1012
  {
@@ -1024,7 +1014,7 @@
1024
  {
1025
  "cell_type": "code",
1026
  "execution_count": 12,
1027
- "id": "40088296",
1028
  "metadata": {},
1029
  "outputs": [
1030
  {
@@ -1051,7 +1041,7 @@
1051
  {
1052
  "cell_type": "code",
1053
  "execution_count": 13,
1054
- "id": "cc6882a1",
1055
  "metadata": {},
1056
  "outputs": [],
1057
  "source": [
@@ -1062,7 +1052,7 @@
1062
  },
1063
  {
1064
  "cell_type": "markdown",
1065
- "id": "27df2a9d",
1066
  "metadata": {},
1067
  "source": [
1068
  "# Tokenizer"
@@ -1071,7 +1061,7 @@
1071
  {
1072
  "cell_type": "code",
1073
  "execution_count": 14,
1074
- "id": "0d5cdfa3",
1075
  "metadata": {},
1076
  "outputs": [],
1077
  "source": [
@@ -1083,7 +1073,7 @@
1083
  {
1084
  "cell_type": "code",
1085
  "execution_count": 19,
1086
- "id": "fb3a2f1c",
1087
  "metadata": {},
1088
  "outputs": [
1089
  {
@@ -1103,7 +1093,7 @@
1103
  {
1104
  "cell_type": "code",
1105
  "execution_count": 26,
1106
- "id": "52203070",
1107
  "metadata": {},
1108
  "outputs": [],
1109
  "source": [
@@ -1120,7 +1110,7 @@
1120
  {
1121
  "cell_type": "code",
1122
  "execution_count": 27,
1123
- "id": "ac38e29c",
1124
  "metadata": {},
1125
  "outputs": [
1126
  {
@@ -1160,7 +1150,7 @@
1160
  {
1161
  "cell_type": "code",
1162
  "execution_count": 20,
1163
- "id": "caba15f7",
1164
  "metadata": {},
1165
  "outputs": [],
1166
  "source": [
@@ -1171,7 +1161,7 @@
1171
  {
1172
  "cell_type": "code",
1173
  "execution_count": 143,
1174
- "id": "040ba517",
1175
  "metadata": {},
1176
  "outputs": [],
1177
  "source": [
@@ -1182,7 +1172,7 @@
1182
  {
1183
  "cell_type": "code",
1184
  "execution_count": 21,
1185
- "id": "06f8aad7",
1186
  "metadata": {},
1187
  "outputs": [
1188
  {
@@ -1208,7 +1198,7 @@
1208
  {
1209
  "cell_type": "code",
1210
  "execution_count": 22,
1211
- "id": "2c485e01",
1212
  "metadata": {},
1213
  "outputs": [
1214
  {
@@ -1255,7 +1245,7 @@
1255
  {
1256
  "cell_type": "code",
1257
  "execution_count": 23,
1258
- "id": "8e1df31f",
1259
  "metadata": {},
1260
  "outputs": [],
1261
  "source": [
@@ -1277,7 +1267,7 @@
1277
  {
1278
  "cell_type": "code",
1279
  "execution_count": 24,
1280
- "id": "88b304c0",
1281
  "metadata": {},
1282
  "outputs": [
1283
  {
@@ -1311,7 +1301,7 @@
1311
  {
1312
  "cell_type": "code",
1313
  "execution_count": 41,
1314
- "id": "5a9767e7",
1315
  "metadata": {},
1316
  "outputs": [],
1317
  "source": [
@@ -1323,7 +1313,7 @@
1323
  {
1324
  "cell_type": "code",
1325
  "execution_count": 25,
1326
- "id": "df2feb3b",
1327
  "metadata": {},
1328
  "outputs": [],
1329
  "source": [
@@ -1383,7 +1373,7 @@
1383
  {
1384
  "cell_type": "code",
1385
  "execution_count": 26,
1386
- "id": "bf081d03",
1387
  "metadata": {},
1388
  "outputs": [],
1389
  "source": [
@@ -1393,7 +1383,7 @@
1393
  {
1394
  "cell_type": "code",
1395
  "execution_count": 27,
1396
- "id": "2059e2de",
1397
  "metadata": {},
1398
  "outputs": [],
1399
  "source": [
@@ -1404,7 +1394,7 @@
1404
  {
1405
  "cell_type": "code",
1406
  "execution_count": 28,
1407
- "id": "ac4bb898",
1408
  "metadata": {},
1409
  "outputs": [],
1410
  "source": [
@@ -1428,7 +1418,7 @@
1428
  {
1429
  "cell_type": "code",
1430
  "execution_count": 29,
1431
- "id": "43669353",
1432
  "metadata": {},
1433
  "outputs": [
1434
  {
@@ -1462,7 +1452,7 @@
1462
  {
1463
  "cell_type": "code",
1464
  "execution_count": 30,
1465
- "id": "f6a6f61e",
1466
  "metadata": {},
1467
  "outputs": [],
1468
  "source": [
@@ -1472,7 +1462,7 @@
1472
  {
1473
  "cell_type": "code",
1474
  "execution_count": 31,
1475
- "id": "54b7d33d",
1476
  "metadata": {},
1477
  "outputs": [],
1478
  "source": [
@@ -1499,7 +1489,7 @@
1499
  {
1500
  "cell_type": "code",
1501
  "execution_count": 32,
1502
- "id": "929dc96c",
1503
  "metadata": {},
1504
  "outputs": [
1505
  {
@@ -1527,7 +1517,7 @@
1527
  {
1528
  "cell_type": "code",
1529
  "execution_count": 33,
1530
- "id": "3460b4cd",
1531
  "metadata": {},
1532
  "outputs": [
1533
  {
@@ -1751,7 +1741,7 @@
1751
  {
1752
  "cell_type": "code",
1753
  "execution_count": null,
1754
- "id": "df904372",
1755
  "metadata": {},
1756
  "outputs": [],
1757
  "source": []
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": 1,
6
+ "id": "89ce602d",
7
  "metadata": {
8
  "collapsed": true,
9
  "jupyter": {
 
783
  "pip install jiwer"
784
  ]
785
  },
 
 
 
 
 
 
 
 
 
 
786
  {
787
  "cell_type": "code",
788
  "execution_count": 2,
789
+ "id": "fcf5fa8d",
790
  "metadata": {},
791
  "outputs": [],
792
  "source": [
 
798
  },
799
  {
800
  "cell_type": "markdown",
801
+ "id": "e4e73fe4",
802
  "metadata": {},
803
  "source": [
804
  "### Common Voice Dataset"
 
807
  {
808
  "cell_type": "code",
809
  "execution_count": 3,
810
+ "id": "6151b4f2",
811
  "metadata": {},
812
  "outputs": [
813
  {
 
827
  {
828
  "cell_type": "code",
829
  "execution_count": 4,
830
+ "id": "c95a7685",
831
  "metadata": {},
832
  "outputs": [],
833
  "source": [
 
838
  },
839
  {
840
  "cell_type": "markdown",
841
+ "id": "c59e9e5f",
842
  "metadata": {},
843
  "source": [
844
  "### Clean Up the Text"
 
847
  {
848
  "cell_type": "code",
849
  "execution_count": 5,
850
+ "id": "457e9338",
851
  "metadata": {},
852
  "outputs": [],
853
  "source": [
 
863
  {
864
  "cell_type": "code",
865
  "execution_count": 7,
866
+ "id": "67fef1ed",
867
  "metadata": {},
868
  "outputs": [
869
  {
 
903
  {
904
  "cell_type": "code",
905
  "execution_count": 8,
906
+ "id": "63b5ce79",
907
  "metadata": {},
908
  "outputs": [
909
  {
 
928
  },
929
  {
930
  "cell_type": "markdown",
931
+ "id": "df14e4c1",
932
  "metadata": {},
933
  "source": [
934
  "### Build Character"
 
937
  {
938
  "cell_type": "code",
939
  "execution_count": 9,
940
+ "id": "2f1bfdb5",
941
  "metadata": {},
942
  "outputs": [
943
  {
 
985
  {
986
  "cell_type": "code",
987
  "execution_count": 10,
988
+ "id": "adf632a9",
989
  "metadata": {},
990
  "outputs": [],
991
  "source": [
 
996
  {
997
  "cell_type": "code",
998
  "execution_count": 11,
999
+ "id": "4f929bce",
1000
  "metadata": {},
1001
  "outputs": [
1002
  {
 
1014
  {
1015
  "cell_type": "code",
1016
  "execution_count": 12,
1017
+ "id": "9d7976c6",
1018
  "metadata": {},
1019
  "outputs": [
1020
  {
 
1041
  {
1042
  "cell_type": "code",
1043
  "execution_count": 13,
1044
+ "id": "4d39c58d",
1045
  "metadata": {},
1046
  "outputs": [],
1047
  "source": [
 
1052
  },
1053
  {
1054
  "cell_type": "markdown",
1055
+ "id": "94293ddc",
1056
  "metadata": {},
1057
  "source": [
1058
  "# Tokenizer"
 
1061
  {
1062
  "cell_type": "code",
1063
  "execution_count": 14,
1064
+ "id": "3cc51841",
1065
  "metadata": {},
1066
  "outputs": [],
1067
  "source": [
 
1073
  {
1074
  "cell_type": "code",
1075
  "execution_count": 19,
1076
+ "id": "d4ef2d6b",
1077
  "metadata": {},
1078
  "outputs": [
1079
  {
 
1093
  {
1094
  "cell_type": "code",
1095
  "execution_count": 26,
1096
+ "id": "45b4d37c",
1097
  "metadata": {},
1098
  "outputs": [],
1099
  "source": [
 
1110
  {
1111
  "cell_type": "code",
1112
  "execution_count": 27,
1113
+ "id": "e49f0d0d",
1114
  "metadata": {},
1115
  "outputs": [
1116
  {
 
1150
  {
1151
  "cell_type": "code",
1152
  "execution_count": 20,
1153
+ "id": "5278f944",
1154
  "metadata": {},
1155
  "outputs": [],
1156
  "source": [
 
1161
  {
1162
  "cell_type": "code",
1163
  "execution_count": 143,
1164
+ "id": "1a345bd9",
1165
  "metadata": {},
1166
  "outputs": [],
1167
  "source": [
 
1172
  {
1173
  "cell_type": "code",
1174
  "execution_count": 21,
1175
+ "id": "50a52463",
1176
  "metadata": {},
1177
  "outputs": [
1178
  {
 
1198
  {
1199
  "cell_type": "code",
1200
  "execution_count": 22,
1201
+ "id": "43330076",
1202
  "metadata": {},
1203
  "outputs": [
1204
  {
 
1245
  {
1246
  "cell_type": "code",
1247
  "execution_count": 23,
1248
+ "id": "54926718",
1249
  "metadata": {},
1250
  "outputs": [],
1251
  "source": [
 
1267
  {
1268
  "cell_type": "code",
1269
  "execution_count": 24,
1270
+ "id": "0a348aa0",
1271
  "metadata": {},
1272
  "outputs": [
1273
  {
 
1301
  {
1302
  "cell_type": "code",
1303
  "execution_count": 41,
1304
+ "id": "142e5d79",
1305
  "metadata": {},
1306
  "outputs": [],
1307
  "source": [
 
1313
  {
1314
  "cell_type": "code",
1315
  "execution_count": 25,
1316
+ "id": "310cdbb1",
1317
  "metadata": {},
1318
  "outputs": [],
1319
  "source": [
 
1373
  {
1374
  "cell_type": "code",
1375
  "execution_count": 26,
1376
+ "id": "6cff622b",
1377
  "metadata": {},
1378
  "outputs": [],
1379
  "source": [
 
1383
  {
1384
  "cell_type": "code",
1385
  "execution_count": 27,
1386
+ "id": "df12cc5b",
1387
  "metadata": {},
1388
  "outputs": [],
1389
  "source": [
 
1394
  {
1395
  "cell_type": "code",
1396
  "execution_count": 28,
1397
+ "id": "8b25005a",
1398
  "metadata": {},
1399
  "outputs": [],
1400
  "source": [
 
1418
  {
1419
  "cell_type": "code",
1420
  "execution_count": 29,
1421
+ "id": "a7ac7d14",
1422
  "metadata": {},
1423
  "outputs": [
1424
  {
 
1452
  {
1453
  "cell_type": "code",
1454
  "execution_count": 30,
1455
+ "id": "352fb742",
1456
  "metadata": {},
1457
  "outputs": [],
1458
  "source": [
 
1462
  {
1463
  "cell_type": "code",
1464
  "execution_count": 31,
1465
+ "id": "ae38b1c1",
1466
  "metadata": {},
1467
  "outputs": [],
1468
  "source": [
 
1489
  {
1490
  "cell_type": "code",
1491
  "execution_count": 32,
1492
+ "id": "d60948cc",
1493
  "metadata": {},
1494
  "outputs": [
1495
  {
 
1517
  {
1518
  "cell_type": "code",
1519
  "execution_count": 33,
1520
+ "id": "6b20f77c",
1521
  "metadata": {},
1522
  "outputs": [
1523
  {
 
1741
  {
1742
  "cell_type": "code",
1743
  "execution_count": null,
1744
+ "id": "f580e49e",
1745
  "metadata": {},
1746
  "outputs": [],
1747
  "source": []