{ "cells": [ { "cell_type": "markdown", "id": "807e94de-b600-46ca-9808-372619e38e69", "metadata": {}, "source": [ "# Making kurianbenoy/faster-speech-to-text-for-malayalam with Jupyter notebooks" ] }, { "cell_type": "markdown", "id": "f0e04921-4634-4d16-940f-bf8dd20bb63b", "metadata": {}, "source": [ "## Install packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "7a6257dd-ea39-44e1-b103-3f9588d6cf4d", "metadata": {}, "outputs": [], "source": [ "!pip install -Uqq nbdev gradio==3.31.0 faster-whisper==0.5.1" ] }, { "cell_type": "markdown", "id": "d7ba223d-8043-4aab-8df3-f6cf3a4ac6b2", "metadata": {}, "source": [ "## Basic inference code" ] }, { "cell_type": "code", "execution_count": 2, "id": "22e6e9c5-7a3f-4546-8039-ecf98004235b", "metadata": {}, "outputs": [], "source": [ "#|export\n", "import gradio as gr\n", "from faster_whisper import WhisperModel" ] }, { "cell_type": "code", "execution_count": 3, "id": "81691362-0c73-4af0-9f99-96ffb7dc318b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3.31.0'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gr.__version__" ] }, { "cell_type": "code", "execution_count": 5, "id": "de8e21b9-449a-4ae3-bd64-bba334075fdd", "metadata": {}, "outputs": [], "source": [ "def t_asr(folder=\"vegam-whisper-medium-ml-fp16\", audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\", compute_type=\"float16\", device=\"cpu\"):\n", " model = WhisperModel(folder, device=device, compute_type=compute_type)\n", " \n", " segments, info = model.transcribe(audio_file, beam_size=5)\n", " \n", " for segment in segments:\n", " print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))" ] }, { "cell_type": "code", "execution_count": 6, "id": "25e1413f-8f80-4704-a94e-26b8d9581a6a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.00s -> 4.58s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ\n", "CPU times: user 11.2 s, sys: 2.2 s, total: 13.4 s\n", "Wall time: 6.54 s\n" ] } ], "source": [ "%%time\n", "t_asr(compute_type=\"int8\", device=\"cuda\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "b255a0f0-4987-4f04-b63c-5ca0167917b6", "metadata": {}, "outputs": [], "source": [ "#|export \n", "def transcribe_malayalam_speech(audio_file, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n", " \n", " model = WhisperModel(folder, device=device, compute_type=compute_type)\n", " segments, info = model.transcribe(audio_file, beam_size=5)\n", "\n", " lst = []\n", " for segment in segments:\n", " # print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))\n", " lst.append(segment.text)\n", "\n", " return(\" \".join(lst))" ] }, { "cell_type": "code", "execution_count": 8, "id": "48cd4ec3-512f-49d0-87ac-3ef989e25b80", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n", " warn_output = \"\"\n", " if (microphone is not None) and (file_upload is not None):\n", " warn_output = (\n", " \"WARNING: You've uploaded an audio file and used the microphone. \"\n", " \"The recorded file from the microphone will be used and the uploaded audio will be discarded.\\n\"\n", " )\n", "\n", " elif (microphone is None) and (file_upload is None):\n", " return \"ERROR: You have to either use the microphone or upload an audio file\"\n", "\n", " audio_file = microphone if microphone is not None else file_upload\n", " \n", " model = WhisperModel(folder, device=device, compute_type=compute_type)\n", " segments, info = model.transcribe(audio_file, beam_size=5)\n", "\n", " lst = []\n", " for segment in segments:\n", " # print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))\n", " lst.append(segment.text)\n", "\n", " return(\" \".join(lst))" ] }, { "cell_type": "code", "execution_count": 9, "id": "14fda29a-aee1-44b2-9269-048cc8b98ea8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 40.6 s, sys: 9.76 s, total: 50.3 s\n", "Wall time: 13.6 s\n" ] }, { "data": { "text/plain": [ "'പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "transcribe_malayalam_speech(audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "bbdadecf-68d1-4183-8e43-7965c1aecf6a", "metadata": {}, "outputs": [], "source": [ "## Haha, You are burning GPUs and wasting CO2" ] }, { "cell_type": "markdown", "id": "45fade75-e0b1-4c5d-90a3-ebd7345a4d16", "metadata": {}, "source": [ "## Figure out Whisper Demo by Huggingface" ] }, { "cell_type": "code", "execution_count": null, "id": "023ffa7c-b82f-49ea-b6ca-00f84e2c8698", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "fe37c9e1-bc56-422d-9547-be94ab4e4844", "metadata": {}, "source": [ "## Make an app with Gradio" ] }, { "cell_type": "code", "execution_count": 10, "id": "9badfdcd-dd99-49ea-a318-eda88cddefb6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://0.0.0.0:6006\n", "Running on public URL: https://9fa992d2ba37b0af49.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "