mesolitica
/

wav2vec2-xls-r-300m-mixed

@@ -35,37 +35,37 @@ It achieves the following results on the evaluation set based on [evaluate-wav2v
 Mixed evaluation,
 ```
-CER: 0.04363189219453221
-WER: 0.12446419219809059
-CER with LM: 0.03621180629932558
-WER with LM: 0.09152993800218129
 ```
 Malay evaluation,
 ```
-CER: 0.053659683623049854
-WER: 0.22565751242221832
-CER with LM: 0.036930421149001316
-WER with LM: 0.14256712242006359
 ```
 Singlish evaluation,
 ```
-CER: 0.04174804195104746
-WER: 0.10734402150682842
-CER with LM: 0.03538238462620066
-WER with LM: 0.08103191123663189
 ```
 Mandarin evaluation,
 ```
-CER: 0.04211892733885779
-WER: 0.09817787449869257
-CER with LM: 0.040151154521006656
-WER with LM: 0.08913415903511501
 ```
 Language model from https://huggingface.co/huseinzol05/language-model-bahasa-manglish-combined

 Mixed evaluation,
 ```
+CER: 0.0481054244857041
+WER: 0.1322198446007387
+CER with LM: 0.041196586938584696
+WER with LM: 0.09880169127621556
 ```
 Malay evaluation,
 ```
+CER: 0.051636391937588406
+WER: 0.19561999547293663
+CER with LM: 0.03917689630621449
+WER with LM: 0.12710746406824835
 ```
 Singlish evaluation,
 ```
+CER: 0.0494915200071987
+WER: 0.12763802881676573
+CER with LM: 0.04271234986432335
+WER with LM: 0.09677160640413336
 ```
 Mandarin evaluation,
 ```
+CER: 0.035626554824269824
+WER: 0.07993515937860181
+CER with LM: 0.03487760945087219
+WER with LM: 0.07536807168546154
 ```
 Language model from https://huggingface.co/huseinzol05/language-model-bahasa-manglish-combined

evaluate-gpu.ipynb CHANGED Viewed

@@ -158,22 +158,22 @@
     {
      "data": {
       "text/plain": [
-       "[('malay-test/101.wav',\n",
-       "  'kenapa jews dan israelis mengejek iranian bukan agama lebih'),\n",
-       " ('singlish-test/978.wav',\n",
-       "  'but in the olympics time does not really matter what matters is winning'),\n",
-       " ('singlish-test/1189.wav',\n",
-       "  'and if joseph schooling was born with a tinier hand he might not have hit the wall first'),\n",
-       " ('singlish-test/1774.wav', 'melissa passed her number to the young man'),\n",
-       " ('singlish-test/21.wav',\n",
-       "  'but he really wanted to make satay and hung around satay sellers and memorized their ingredients'),\n",
-       " ('singlish-test/2164.wav', 'just check out their coverage'),\n",
-       " ('malay-test/397.wav', 'budaya cocorico french culture'),\n",
-       " ('mandarin-test/359.wav', 'xi shou jian deng da kai'),\n",
-       " ('singlish-test/1107.wav',\n",
-       "  'the party had a cool vibe but was for guests only'),\n",
-       " ('singlish-test/285.wav',\n",
-       "  'but in smaller towns there are not so many places and that is where they are most needed')]"
       ]
      },
      "execution_count": 8,
@@ -252,7 +252,7 @@
    "outputs": [],
    "source": [
     "model = AutoModelForCTC.from_pretrained(\n",
-    "    './wav2vec2-mixed-v3/checkpoint-97000',\n",
     "    ctc_loss_reduction=\"mean\",\n",
     "    pad_token_id=tokenizer.pad_token_id,\n",
     "    vocab_size=len(tokenizer),\n",
@@ -303,10 +303,10 @@
     {
      "data": {
       "text/plain": [
-       "['kenapa jius dan israelis mengejik iranian bukan agama lebih',\n",
-       " 'but in the olympics time does not really matter what matters is winning',\n",
-       " 'and if joseph schooling was born with a tinier hand he might not have hit the world first',\n",
-       " 'melissa passed her number to the young man']"
       ]
      },
      "execution_count": 14,
@@ -362,10 +362,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0 kenapa jus dan israelis mengejek iranian bukan agama lebih\n",
-      "1 but in the olympics time does not really matter what matters is winning\n",
-      "2 and if joseph schooling was born with a tinier hand he might not have hit the world first\n",
-      "3 melissa passed her number to the young man\n"
      ]
     }
    ],
@@ -385,10 +385,10 @@
     {
      "data": {
       "text/plain": [
-       "['kenapa jews dan israelis mengejek iranian bukan agama lebih',\n",
-       " 'but in the olympics time does not really matter what matters is winning',\n",
-       " 'and if joseph schooling was born with a tinier hand he might not have hit the wall first',\n",
-       " 'melissa passed her number to the young man']"
       ]
      },
      "execution_count": 18,
@@ -443,7 +443,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1240/1240 [07:43<00:00,  2.67it/s]\n"
      ]
     }
    ],
@@ -474,27 +474,25 @@
     "        cer.append(calculate_cer(batch_y[k], pred[k]))\n",
     "        \n",
     "        wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
-    "        cer_lm.append(calculate_cer(batch_y[k], d_lm2))\n",
-    "    \n",
-    "    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "id": "6c6ce8ef",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(0.12446419219809059,\n",
-       " 0.04363189219453221,\n",
-       " 0.09152993800218129,\n",
-       " 0.03621180629932558)"
       ]
      },
-     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -505,7 +503,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "id": "cf53914e",
    "metadata": {},
    "outputs": [],
@@ -517,20 +515,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "id": "b1558987",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(0.22565751242221832,\n",
-       " 0.053659683623049854,\n",
-       " 0.14256712242006359,\n",
-       " 0.036930421149001316)"
       ]
      },
-     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -541,20 +539,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "id": "f340cde7",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(0.10734402150682842,\n",
-       " 0.04174804195104746,\n",
-       " 0.08103191123663189,\n",
-       " 0.03538238462620066)"
       ]
      },
-     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -565,20 +563,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "id": "cbc2539f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(0.09817787449869257,\n",
-       " 0.04211892733885779,\n",
-       " 0.08913415903511501,\n",
-       " 0.040151154521006656)"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -589,14 +587,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "id": "4c543d0c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "551516109d6a418b95be6884422d853e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -606,27 +604,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "remote: Enforcing permissions...        \n",
-      "remote: Allowed refs: all        \n",
-      "To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
-      "   3f5d181..7799685  main -> main\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/77996855b40213396051061d8e23b67c2616e614'"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -635,42 +612,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "id": "05ec385e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-01 19:14:20.564262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.603610: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.605395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.607506: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-06-01 19:14:20.609495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.610833: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.612207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.615738: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.617302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.618707: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2022-06-01 19:14:20.620281: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
-      "2022-06-01 19:14:20.620394: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 17119 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6\n",
-      "\n",
-      "TFWav2Vec2ForCTC has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tine this model, you need a GPU or a TPU\n",
-      "2022-06-01 19:14:22.857691: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8100\n",
-      "2022-06-01 19:14:24.326073: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n",
-      "2022-06-01 19:14:25.725870: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
-      "All PyTorch model weights were used when initializing TFWav2Vec2ForCTC.\n",
-      "\n",
-      "All the weights of TFWav2Vec2ForCTC were initialized from the PyTorch model.\n",
-      "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWav2Vec2ForCTC for predictions without further training.\n"
-     ]
-    }
-   ],
    "source": [
     "model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
-    "    './wav2vec2-mixed-v3/checkpoint-97000',\n",
     "    ctc_loss_reduction=\"mean\",\n",
     "    pad_token_id=tokenizer.pad_token_id,\n",
     "    vocab_size=len(tokenizer),\n",
@@ -680,46 +628,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
    "id": "e0f3f749",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a0e5eeee5bf4499da3d5f4adbd5bfd4f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Upload file tf_model.h5:   0%|          | 4.00k/1.18G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "remote: Enforcing permissions...        \n",
-      "remote: Allowed refs: all        \n",
-      "To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
-      "   7799685..0b9b0fb  main -> main\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/0b9b0fb66dc68a4f71ab793274fb28df9f19764f'"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
    ]

     {
      "data": {
       "text/plain": [
+       "[('singlish-test/3057.wav', 'the teenagers paddled hard on their boat'),\n",
+       " ('malay-test/705.wav', 'kenapa justin trudeau seperti kemaluan wanita'),\n",
+       " ('singlish-test/2631.wav',\n",
+       "  'a letter by a mans daughter pleading for leniency was submitted'),\n",
+       " ('singlish-test/659.wav', 'and theres thousands of people to meet'),\n",
+       " ('singlish-test/809.wav', 'how much lower are the prices'),\n",
+       " ('singlish-test/2040.wav',\n",
+       "  'suddenly a gun shot was fired from a distance which sent the dogs fleeing in an instant'),\n",
+       " ('singlish-test/1616.wav',\n",
+       "  'a stronger dollar pressures gold making it more expensive for holders of other currencies'),\n",
+       " ('singlish-test/1816.wav',\n",
+       "  'family as a priority has become real for me and not just a cliche'),\n",
+       " ('malay-test/147.wav',\n",
+       "  'adakah anda percaya bahawa donald trump adalah kedatangan kedua jesus christ'),\n",
+       " ('singlish-test/3468.wav',\n",
+       "  'but much of the technology required for such a fantastic instrument didnt yet exist')]"
       ]
      },
      "execution_count": 8,
    "outputs": [],
    "source": [
     "model = AutoModelForCTC.from_pretrained(\n",
+    "    './checkpoint-115000',\n",
     "    ctc_loss_reduction=\"mean\",\n",
     "    pad_token_id=tokenizer.pad_token_id,\n",
     "    vocab_size=len(tokenizer),\n",
     {
      "data": {
       "text/plain": [
+       "['the teenagers paddled hard on their boat',\n",
+       " 'kenapa justin tradio seperti kemaluan wanita',\n",
+       " 'a letter bya mans daughter pleading for lenien te was submitted',\n",
+       " 'and theres thousands of people to meet']"
       ]
      },
      "execution_count": 14,
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "0 to know more about this years budget click here\n",
+      "1 you can bake shortbread cookies just with sugar butter and flour\n",
+      "2 all good citizens should learn how to change a light bulb\n",
+      "3 as a child madam surley was constantly teased by other children over her appearance\n"
      ]
     }
    ],
     {
      "data": {
       "text/plain": [
+       "['to know more about this years budget click here',\n",
+       " 'you can bake shortbread cookies just with sugar butter and flour',\n",
+       " 'all good citizens should learn how to change a light bulb',\n",
+       " 'as a child madam shirley was constantly teased by other children over her appearance']"
       ]
      },
      "execution_count": 18,
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|█████��████| 1240/1240 [04:23<00:00,  4.71it/s]\n"
      ]
     }
    ],
     "        cer.append(calculate_cer(batch_y[k], pred[k]))\n",
     "        \n",
     "        wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
+    "        cer_lm.append(calculate_cer(batch_y[k], d_lm2))"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "6c6ce8ef",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(0.1322198446007387,\n",
+       " 0.0481054244857041,\n",
+       " 0.09880169127621556,\n",
+       " 0.041196586938584696)"
       ]
      },
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "cf53914e",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "b1558987",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(0.19561999547293663,\n",
+       " 0.051636391937588406,\n",
+       " 0.12710746406824835,\n",
+       " 0.03917689630621449)"
       ]
      },
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "f340cde7",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(0.12763802881676573,\n",
+       " 0.0494915200071987,\n",
+       " 0.09677160640413336,\n",
+       " 0.04271234986432335)"
       ]
      },
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 26,
    "id": "cbc2539f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(0.07993515937860181,\n",
+       " 0.035626554824269824,\n",
+       " 0.07536807168546154,\n",
+       " 0.03487760945087219)"
       ]
      },
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "4c543d0c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7270a78ff7874222b18f538069750bc1",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "05ec385e",
    "metadata": {},
+   "outputs": [],
    "source": [
     "model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
+    "    './checkpoint-115000',\n",
     "    ctc_loss_reduction=\"mean\",\n",
     "    pad_token_id=tokenizer.pad_token_id,\n",
     "    vocab_size=len(tokenizer),\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e0f3f749",
    "metadata": {},
+   "outputs": [],
    "source": [
     "model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
    ]