{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "807e94de-b600-46ca-9808-372619e38e69",
   "metadata": {},
   "source": [
    "# Making kurianbenoy/faster-speech-to-text-for-malayalam with Jupyter notebooks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0e04921-4634-4d16-940f-bf8dd20bb63b",
   "metadata": {},
   "source": [
    "## Install packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7a6257dd-ea39-44e1-b103-3f9588d6cf4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -Uqq nbdev gradio==3.31.0 faster-whisper==0.5.1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7ba223d-8043-4aab-8df3-f6cf3a4ac6b2",
   "metadata": {},
   "source": [
    "## Basic inference code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "22e6e9c5-7a3f-4546-8039-ecf98004235b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#|export\n",
    "import gradio as gr\n",
    "from faster_whisper import WhisperModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "81691362-0c73-4af0-9f99-96ffb7dc318b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.31.0'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gr.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "de8e21b9-449a-4ae3-bd64-bba334075fdd",
   "metadata": {},
   "outputs": [],
   "source": [
    "def t_asr(folder=\"vegam-whisper-medium-ml-fp16\", audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\", compute_type=\"float16\", device=\"cpu\"):\n",
    "    model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
    "    \n",
    "    segments, info = model.transcribe(audio_file, beam_size=5)\n",
    "    \n",
    "    for segment in segments:\n",
    "        print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "25e1413f-8f80-4704-a94e-26b8d9581a6a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00s -> 4.58s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ\n",
      "CPU times: user 11.2 s, sys: 2.2 s, total: 13.4 s\n",
      "Wall time: 6.54 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "t_asr(compute_type=\"int8\", device=\"cuda\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b255a0f0-4987-4f04-b63c-5ca0167917b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "#|export \n",
    "def transcribe_malayalam_speech(audio_file, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n",
    "    \n",
    "    model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
    "    segments, info = model.transcribe(audio_file, beam_size=5)\n",
    "\n",
    "    lst = []\n",
    "    for segment in segments:\n",
    "        # print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))\n",
    "        lst.append(segment.text)\n",
    "\n",
    "    return(\" \".join(lst))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "48cd4ec3-512f-49d0-87ac-3ef989e25b80",
   "metadata": {},
   "outputs": [],
   "source": [
    "#|export\n",
    "def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n",
    "    warn_output = \"\"\n",
    "    if (microphone is not None) and (file_upload is not None):\n",
    "        warn_output = (\n",
    "            \"WARNING: You've uploaded an audio file and used the microphone. \"\n",
    "            \"The recorded file from the microphone will be used and the uploaded audio will be discarded.\\n\"\n",
    "        )\n",
    "\n",
    "    elif (microphone is None) and (file_upload is None):\n",
    "        return \"ERROR: You have to either use the microphone or upload an audio file\"\n",
    "\n",
    "    audio_file = microphone if microphone is not None else file_upload\n",
    "    \n",
    "    model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
    "    segments, info = model.transcribe(audio_file, beam_size=5)\n",
    "\n",
    "    lst = []\n",
    "    for segment in segments:\n",
    "        # print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))\n",
    "        lst.append(segment.text)\n",
    "\n",
    "    return(\" \".join(lst))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "14fda29a-aee1-44b2-9269-048cc8b98ea8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 40.6 s, sys: 9.76 s, total: 50.3 s\n",
      "Wall time: 13.6 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "transcribe_malayalam_speech(audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "bbdadecf-68d1-4183-8e43-7965c1aecf6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Haha, You are burning GPUs and wasting CO2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "45fade75-e0b1-4c5d-90a3-ebd7345a4d16",
   "metadata": {},
   "source": [
    "## Figure out Whisper  Demo by Huggingface"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "023ffa7c-b82f-49ea-b6ca-00f84e2c8698",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "fe37c9e1-bc56-422d-9547-be94ab4e4844",
   "metadata": {},
   "source": [
    "## Make an app with Gradio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9badfdcd-dd99-49ea-a318-eda88cddefb6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://0.0.0.0:6006\n",
      "Running on public URL: https://9fa992d2ba37b0af49.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://9fa992d2ba37b0af49.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gradio as gr\n",
    "\n",
    "def greet(name):\n",
    "    return \"Hello \" + name + \"!!\"\n",
    "\n",
    "iface = gr.Interface(fn=greet, inputs=\"text\", outputs=\"text\")\n",
    "iface.launch(share=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "81f3b241-8a6d-4ff0-bb70-d389d4d4e93a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.10/site-packages/gradio/inputs.py:321: UserWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n",
      "  warnings.warn(\n",
      "/opt/conda/lib/python3.10/site-packages/gradio/inputs.py:324: UserWarning: `optional` parameter is deprecated, and it has no effect\n",
      "  super().__init__(source=source, type=type, label=label, optional=optional)\n"
     ]
    }
   ],
   "source": [
    "#|export\n",
    "mf_transcribe = gr.Interface(\n",
    "    fn=gr_transcribe_malayalam_speech,\n",
    "    inputs=[\n",
    "        gr.inputs.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n",
    "        gr.inputs.Audio(source=\"upload\", type=\"filepath\", optional=True),\n",
    "    ],\n",
    "    outputs=\"text\",\n",
    "    title=\"PALLAKKU (പല്ലക്ക്)\",\n",
    "    description=(\n",
    "        \"Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16).\"\n",
    "    ),\n",
    "    article=\"Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com\",\n",
    "    allow_flagging=\"never\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "b1e34fa5-8340-4329-a348-b641ca4db341",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rerunning server... use `close()` to stop if you need to change `launch()` parameters.\n",
      "----\n",
      "Running on local URL:  http://0.0.0.0:6010\n",
      "Running on public URL: https://19b32861466405ac95.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://19b32861466405ac95.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|export\n",
    "mf_transcribe.launch(share=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ec1f78d-d9c0-46c7-9466-0408bc6c6cdc",
   "metadata": {},
   "source": [
    "## Create a requirements.txt file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "7c3e753f-5051-4c3b-a5ab-fa65c7e7cae9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing requirements.txt\n"
     ]
    }
   ],
   "source": [
    "%%writefile requirements.txt\n",
    "gradio==3.31.0\n",
    "faster-whisper==0.5.1\n",
    "torch"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "43505375-9b3d-4661-93d1-11965cd8d6b5",
   "metadata": {},
   "source": [
    "## Convert this notebook into a Gradio app"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "fba83810-1f0f-4777-b831-aabb4cfead39",
   "metadata": {},
   "outputs": [],
   "source": [
    "from nbdev.export import nb_export\n",
    "nb_export('app.ipynb', lib_path='.', name='app')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c7c52be-c7c4-4026-9886-ae9f71dec603",
   "metadata": {},
   "source": [
    "## Reference\n",
    "\n",
    "1. [Create A 🤗 Space From A Notebook](https://nbdev.fast.ai/blog/posts/2022-11-07-spaces/index.html)\n",
    "2. [Nbdev Demo](https://gist.github.com/hamelsmu/35be07d242f3f19063c3a3839127dc67)\n",
    "3. [Whisper-demo space by  🤗](https://huggingface.co/spaces/whisper-event/whisper-demo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5384528f-9a83-4a0d-b4fd-8ed8458b0eda",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}