varunadityabalaji
/

chinese_english_asr

Model card Files Files and versions Community

Varun Aditya Balaji commited on Dec 19, 2022

Commit

a216922

•

1 Parent(s): 836ffb0

first commit

Browse files

Files changed (15) hide show

2.wav +0 -0
3.wav +0 -0
Pipeline.ipynb +68 -40
chunk0.wav +0 -0
chunk1.wav +0 -0
chunk2.wav +0 -0
chunk3.wav +0 -0
chunk4.wav +0 -0
chunk5.wav +0 -0
chunk6.wav +0 -0
combine.wav +0 -0
tmp/classifier.ckpt +1 -0
tmp/embedding_model.ckpt +1 -0
tmp/hyperparams.yaml +1 -0
tmp/label_encoder.ckpt +1 -0

2.wav ADDED Viewed

Binary file (740 kB). View file

3.wav ADDED Viewed

Binary file (436 kB). View file

Pipeline.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 41,
    "id": "edc2e2ff",
    "metadata": {},
    "outputs": [],
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "id": "76f25cc3",
    "metadata": {},
    "outputs": [
@@ -27,7 +27,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "11/08/2022 14:17:47 - INFO - huggingsound.speech_recognition.model - Loading model...\n"
      ]
     },
     {
@@ -41,8 +41,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "11/08/2022 14:17:49 - WARNING - root - bos_token <s> not in provided tokens. It will be added to the list of tokens\n",
-      "11/08/2022 14:17:49 - WARNING - root - eos_token </s> not in provided tokens. It will be added to the list of tokens\n"
      ]
     }
    ],
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
    "id": "3b142546",
    "metadata": {},
    "outputs": [],
@@ -82,8 +82,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
-   "id": "48bed0f8",
    "metadata": {},
    "outputs": [
     {
@@ -98,27 +98,34 @@
      "output_type": "stream",
      "text": [
       "Detected language is English\n",
-      "WITHOUT THE DATA SET THE ARTICLE IS USELESS\n"
      ]
     }
    ],
    "source": [
-    "class Speech_to_Text(Pipeline):\n",
-    "    def postprocess(self,model_outputs):\n",
-    "        if prediction[3][0] == 'zh: Chinese':\n",
-    "        "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "id": "b0fae1dd",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
      ]
     },
@@ -126,36 +133,57 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Detected language is English\n",
-      "WITHOUT THE DATA SET THE ARTICLE IS USELESS\n"
      ]
-    }
-   ],
-   "source": [
-    "start = time.time()\n",
-    "pipeline('english.wav')\n",
-    "end = time.time()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "1e0321b5",
-   "metadata": {},
-   "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "0.5424931049346924"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "end - start"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 92,
    "id": "edc2e2ff",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 93,
    "id": "76f25cc3",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "12/06/2022 13:42:19 - INFO - huggingsound.speech_recognition.model - Loading model...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "12/06/2022 13:42:23 - WARNING - root - bos_token <s> not in provided tokens. It will be added to the list of tokens\n",
+      "12/06/2022 13:42:23 - WARNING - root - eos_token </s> not in provided tokens. It will be added to the list of tokens\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 94,
    "id": "3b142546",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 95,
+   "id": "b0fae1dd",
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "text": [
       "Detected language is English\n",
+      "NISHE JUAN FANMA HE MOVED ABOUT INVISIBLE BUT EVERYONE COULD HEAR HIM\n"
      ]
     }
    ],
    "source": [
+    "start = time.time()\n",
+    "pipeline('combine.wav')\n",
+    "end = time.time()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 96,
+   "id": "1e0321b5",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Detected Language is Chinese\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s]\n",
       "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
      ]
     },
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "你喜欢饭吗\n",
+      "Detected language is English\n"
      ]
+    },
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Detected language is English\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "HE MOVED ABOUT\n",
+      "Detected language is English\n",
+      "INVISIBLE BUT EVERYONE COULD HEAR HIM\n"
+     ]
     }
    ],
    "source": [
+    "from pydub import AudioSegment\n",
+    "from pydub.silence import split_on_silence\n",
+    "\n",
+    "sound_file = AudioSegment.from_wav(\"combine.wav\")\n",
+    "audio_chunks = split_on_silence(sound_file, \n",
+    "    min_silence_len=100,\n",
+    "    silence_thresh=-50\n",
+    ")\n",
+    "\n",
+    "for i, chunk in enumerate(audio_chunks):\n",
+    "\n",
+    "    out_file = \"./chunk{0}.wav\".format(i)\n",
+    "    chunk.export(out_file, format=\"wav\")\n",
+    "    pipeline(out_file)"
    ]
   },
   {

chunk0.wav ADDED Viewed

Binary file (196 kB). View file

chunk1.wav ADDED Viewed

Binary file (21.3 kB). View file

chunk2.wav ADDED Viewed

Binary file (96.6 kB). View file

chunk3.wav ADDED Viewed

Binary file (188 kB). View file

chunk4.wav ADDED Viewed

Binary file (79.3 kB). View file

chunk5.wav ADDED Viewed

Binary file (18.5 kB). View file

chunk6.wav ADDED Viewed

Binary file (189 kB). View file

combine.wav ADDED Viewed

Binary file (603 kB). View file

tmp/classifier.ckpt ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/classifier.ckpt

tmp/embedding_model.ckpt ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/embedding_model.ckpt

tmp/hyperparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/hyperparams.yaml

tmp/label_encoder.ckpt ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/label_encoder.txt