kurianbenoy commited on
Commit
502cdb1
1 Parent(s): ecc934a

update gradio demo

Browse files
Files changed (1) hide show
  1. app.ipynb +106 -298
app.ipynb CHANGED
@@ -69,35 +69,12 @@
69
  },
70
  {
71
  "cell_type": "code",
72
- "execution_count": 10,
73
- "id": "5f4d3586-a6b9-4d3e-b02a-9f25f5068dbe",
74
- "metadata": {},
75
- "outputs": [
76
- {
77
- "ename": "AttributeError",
78
- "evalue": "module 'faster_whisper' has no attribute '__version__'",
79
- "output_type": "error",
80
- "traceback": [
81
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
82
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
83
- "Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mfaster_whisper\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mfaster_whisper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__version__\u001b[49m\n",
84
- "\u001b[0;31mAttributeError\u001b[0m: module 'faster_whisper' has no attribute '__version__'"
85
- ]
86
- }
87
- ],
88
- "source": [
89
- "# import faster_whisper\n",
90
- "# faster_whisper.__version__"
91
- ]
92
- },
93
- {
94
- "cell_type": "code",
95
- "execution_count": 33,
96
  "id": "de8e21b9-449a-4ae3-bd64-bba334075fdd",
97
  "metadata": {},
98
  "outputs": [],
99
  "source": [
100
- "def t_asr(folder=\"vegam-whisper-medium-ml-fp16\", audio_file=\"00b38e80-80b8-4f70-babf-566e848879fc.webm\", compute_type=\"float16\", device=\"cpu\"):\n",
101
  " model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
102
  " \n",
103
  " segments, info = model.transcribe(audio_file, beam_size=5)\n",
@@ -108,8 +85,8 @@
108
  },
109
  {
110
  "cell_type": "code",
111
- "execution_count": 31,
112
- "id": "87c58dd2-7d3d-4fb3-821c-cdac673fee0d",
113
  "metadata": {},
114
  "outputs": [
115
  {
@@ -117,68 +94,58 @@
117
  "output_type": "stream",
118
  "text": [
119
  "[0.00s -> 4.58s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ\n",
120
- "CPU times: user 42.2 s, sys: 9.58 s, total: 51.8 s\n",
121
- "Wall time: 13.5 s\n"
122
- ]
123
- }
124
- ],
125
- "source": [
126
- "%%time\n",
127
- "t_asr(compute_type=\"int8\")"
128
- ]
129
- },
130
- {
131
- "cell_type": "code",
132
- "execution_count": 28,
133
- "id": "a5624cf5-b3b8-4ae3-aa82-ee19505bb42d",
134
- "metadata": {},
135
- "outputs": [
136
- {
137
- "name": "stdout",
138
- "output_type": "stream",
139
- "text": [
140
- "Detected language 'ta' with probability 0.372757\n",
141
- "[0.00s -> 4.74s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലൊ കൂരായണ\n",
142
- "CPU times: user 36.5 s, sys: 9.52 s, total: 46.1 s\n",
143
- "Wall time: 12.3 s\n"
144
  ]
145
  }
146
  ],
147
  "source": [
148
  "%%time\n",
149
- "t_asr(folder=\"vegam-whisper-medium-ml\", compute_type=\"int8\")"
150
  ]
151
  },
152
  {
153
  "cell_type": "code",
154
- "execution_count": 34,
155
- "id": "25e1413f-8f80-4704-a94e-26b8d9581a6a",
156
  "metadata": {},
157
- "outputs": [
158
- {
159
- "name": "stdout",
160
- "output_type": "stream",
161
- "text": [
162
- "[0.00s -> 4.58s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ\n",
163
- "CPU times: user 9.39 s, sys: 792 ms, total: 10.2 s\n",
164
- "Wall time: 4.51 s\n"
165
- ]
166
- }
167
- ],
168
  "source": [
169
- "%%time\n",
170
- "t_asr(compute_type=\"int8\", device=\"cuda\")"
 
 
 
 
 
 
 
 
 
 
171
  ]
172
  },
173
  {
174
  "cell_type": "code",
175
- "execution_count": 4,
176
  "id": "48cd4ec3-512f-49d0-87ac-3ef989e25b80",
177
  "metadata": {},
178
  "outputs": [],
179
  "source": [
180
  "#|export\n",
181
- "def transcribe_malayalam_speech(audio_file, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n",
 
 
 
 
 
 
 
 
 
 
 
 
182
  " model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
183
  " segments, info = model.transcribe(audio_file, beam_size=5)\n",
184
  "\n",
@@ -192,7 +159,7 @@
192
  },
193
  {
194
  "cell_type": "code",
195
- "execution_count": 5,
196
  "id": "14fda29a-aee1-44b2-9269-048cc8b98ea8",
197
  "metadata": {},
198
  "outputs": [
@@ -200,8 +167,8 @@
200
  "name": "stdout",
201
  "output_type": "stream",
202
  "text": [
203
- "CPU times: user 43.1 s, sys: 12.3 s, total: 55.4 s\n",
204
- "Wall time: 14.8 s\n"
205
  ]
206
  },
207
  {
@@ -210,14 +177,14 @@
210
  "'പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ'"
211
  ]
212
  },
213
- "execution_count": 5,
214
  "metadata": {},
215
  "output_type": "execute_result"
216
  }
217
  ],
218
  "source": [
219
  "%%time\n",
220
- "transcribe_malayalam_speech(audio_file=\"00b38e80-80b8-4f70-babf-566e848879fc.webm\")"
221
  ]
222
  },
223
  {
@@ -230,14 +197,6 @@
230
  "## Haha, You are burning GPUs and wasting CO2"
231
  ]
232
  },
233
- {
234
- "cell_type": "code",
235
- "execution_count": null,
236
- "id": "bf706a0a-c3a2-489c-a1fe-df4fbf700d9c",
237
- "metadata": {},
238
- "outputs": [],
239
- "source": []
240
- },
241
  {
242
  "cell_type": "markdown",
243
  "id": "45fade75-e0b1-4c5d-90a3-ebd7345a4d16",
@@ -246,203 +205,6 @@
246
  "## Figure out Whisper Demo by Huggingface"
247
  ]
248
  },
249
- {
250
- "cell_type": "code",
251
- "execution_count": 36,
252
- "id": "fa06f8a6-87b7-45af-b36b-fb5ebe362455",
253
- "metadata": {},
254
- "outputs": [
255
- {
256
- "data": {
257
- "application/vnd.jupyter.widget-view+json": {
258
- "model_id": "e437727ccbcd40838a43a0c1bbb00143",
259
- "version_major": 2,
260
- "version_minor": 0
261
- },
262
- "text/plain": [
263
- "Downloading (…)lve/main/config.json: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
264
- ]
265
- },
266
- "metadata": {},
267
- "output_type": "display_data"
268
- },
269
- {
270
- "data": {
271
- "application/vnd.jupyter.widget-view+json": {
272
- "model_id": "2f654c303e24413cb73990bdd9d99907",
273
- "version_major": 2,
274
- "version_minor": 0
275
- },
276
- "text/plain": [
277
- "Downloading pytorch_model.bin: 0%| | 0.00/967M [00:00<?, ?B/s]"
278
- ]
279
- },
280
- "metadata": {},
281
- "output_type": "display_data"
282
- },
283
- {
284
- "data": {
285
- "application/vnd.jupyter.widget-view+json": {
286
- "model_id": "16386d3b586d475fa021ea8d6f925161",
287
- "version_major": 2,
288
- "version_minor": 0
289
- },
290
- "text/plain": [
291
- "Downloading (…)neration_config.json: 0%| | 0.00/3.51k [00:00<?, ?B/s]"
292
- ]
293
- },
294
- "metadata": {},
295
- "output_type": "display_data"
296
- },
297
- {
298
- "data": {
299
- "application/vnd.jupyter.widget-view+json": {
300
- "model_id": "bf5964b1ba024ce685a04127f21f78d0",
301
- "version_major": 2,
302
- "version_minor": 0
303
- },
304
- "text/plain": [
305
- "Downloading (…)okenizer_config.json: 0%| | 0.00/842 [00:00<?, ?B/s]"
306
- ]
307
- },
308
- "metadata": {},
309
- "output_type": "display_data"
310
- },
311
- {
312
- "data": {
313
- "application/vnd.jupyter.widget-view+json": {
314
- "model_id": "038082a393084da998eed2085960e634",
315
- "version_major": 2,
316
- "version_minor": 0
317
- },
318
- "text/plain": [
319
- "Downloading (…)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
320
- ]
321
- },
322
- "metadata": {},
323
- "output_type": "display_data"
324
- },
325
- {
326
- "data": {
327
- "application/vnd.jupyter.widget-view+json": {
328
- "model_id": "105ef799439d4c1ea0e3d2cbbfbcaf5d",
329
- "version_major": 2,
330
- "version_minor": 0
331
- },
332
- "text/plain": [
333
- "Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.20M [00:00<?, ?B/s]"
334
- ]
335
- },
336
- "metadata": {},
337
- "output_type": "display_data"
338
- },
339
- {
340
- "data": {
341
- "application/vnd.jupyter.widget-view+json": {
342
- "model_id": "e3369330ed9a4a9f8208ba6f160210bf",
343
- "version_major": 2,
344
- "version_minor": 0
345
- },
346
- "text/plain": [
347
- "Downloading (…)olve/main/merges.txt: 0%| | 0.00/494k [00:00<?, ?B/s]"
348
- ]
349
- },
350
- "metadata": {},
351
- "output_type": "display_data"
352
- },
353
- {
354
- "data": {
355
- "application/vnd.jupyter.widget-view+json": {
356
- "model_id": "4c3a9c73c84245b0b88e42980d65abdf",
357
- "version_major": 2,
358
- "version_minor": 0
359
- },
360
- "text/plain": [
361
- "Downloading (…)main/normalizer.json: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
362
- ]
363
- },
364
- "metadata": {},
365
- "output_type": "display_data"
366
- },
367
- {
368
- "data": {
369
- "application/vnd.jupyter.widget-view+json": {
370
- "model_id": "8d61551d78914036a2b6475a6d840663",
371
- "version_major": 2,
372
- "version_minor": 0
373
- },
374
- "text/plain": [
375
- "Downloading (…)in/added_tokens.json: 0%| | 0.00/2.08k [00:00<?, ?B/s]"
376
- ]
377
- },
378
- "metadata": {},
379
- "output_type": "display_data"
380
- },
381
- {
382
- "data": {
383
- "application/vnd.jupyter.widget-view+json": {
384
- "model_id": "262213d3b6364b4e8648180c903c3008",
385
- "version_major": 2,
386
- "version_minor": 0
387
- },
388
- "text/plain": [
389
- "Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.08k [00:00<?, ?B/s]"
390
- ]
391
- },
392
- "metadata": {},
393
- "output_type": "display_data"
394
- },
395
- {
396
- "data": {
397
- "application/vnd.jupyter.widget-view+json": {
398
- "model_id": "258a3b7a9eb94dcdb8355c09c1b683b3",
399
- "version_major": 2,
400
- "version_minor": 0
401
- },
402
- "text/plain": [
403
- "Downloading (…)rocessor_config.json: 0%| | 0.00/185k [00:00<?, ?B/s]"
404
- ]
405
- },
406
- "metadata": {},
407
- "output_type": "display_data"
408
- }
409
- ],
410
- "source": [
411
- "import torch\n",
412
- "from transformers import pipeline\n",
413
- "from huggingface_hub import model_info\n",
414
- "\n",
415
- "MODEL_NAME = \"openai/whisper-small\" #this always needs to stay in line 8 :D sorry for the hackiness\n",
416
- "lang = \"en\"\n",
417
- "\n",
418
- "device = 0 if torch.cuda.is_available() else \"cpu\"\n",
419
- "pipe = pipeline(\n",
420
- " task=\"automatic-speech-recognition\",\n",
421
- " model=MODEL_NAME,\n",
422
- " chunk_length_s=30,\n",
423
- " device=device,\n",
424
- ")\n",
425
- "\n",
426
- "pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task=\"transcribe\")\n",
427
- "\n",
428
- "def transcribe(microphone, file_upload):\n",
429
- " warn_output = \"\"\n",
430
- " if (microphone is not None) and (file_upload is not None):\n",
431
- " warn_output = (\n",
432
- " \"WARNING: You've uploaded an audio file and used the microphone. \"\n",
433
- " \"The recorded file from the microphone will be used and the uploaded audio will be discarded.\\n\"\n",
434
- " )\n",
435
- "\n",
436
- " elif (microphone is None) and (file_upload is None):\n",
437
- " return \"ERROR: You have to either use the microphone or upload an audio file\"\n",
438
- "\n",
439
- " file = microphone if microphone is not None else file_upload\n",
440
- "\n",
441
- " text = pipe(file)[\"text\"]\n",
442
- "\n",
443
- " return warn_output + text"
444
- ]
445
- },
446
  {
447
  "cell_type": "code",
448
  "execution_count": null,
@@ -461,7 +223,7 @@
461
  },
462
  {
463
  "cell_type": "code",
464
- "execution_count": 38,
465
  "id": "9badfdcd-dd99-49ea-a318-eda88cddefb6",
466
  "metadata": {},
467
  "outputs": [
@@ -469,8 +231,8 @@
469
  "name": "stdout",
470
  "output_type": "stream",
471
  "text": [
472
- "Running on local URL: http://0.0.0.0:6007\n",
473
- "Running on public URL: https://537af5b5b55ed185f5.gradio.live\n",
474
  "\n",
475
  "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
476
  ]
@@ -478,7 +240,7 @@
478
  {
479
  "data": {
480
  "text/html": [
481
- "<div><iframe src=\"https://537af5b5b55ed185f5.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
482
  ],
483
  "text/plain": [
484
  "<IPython.core.display.HTML object>"
@@ -491,7 +253,7 @@
491
  "data": {
492
  "text/plain": []
493
  },
494
- "execution_count": 38,
495
  "metadata": {},
496
  "output_type": "execute_result"
497
  }
@@ -508,35 +270,80 @@
508
  },
509
  {
510
  "cell_type": "code",
511
- "execution_count": 41,
512
  "id": "81f3b241-8a6d-4ff0-bb70-d389d4d4e93a",
513
  "metadata": {},
514
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
515
  "source": [
 
516
  "mf_transcribe = gr.Interface(\n",
517
- " fn=transcribe,\n",
518
  " inputs=[\n",
519
  " gr.inputs.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n",
520
  " gr.inputs.Audio(source=\"upload\", type=\"filepath\", optional=True),\n",
521
  " ],\n",
522
  " outputs=\"text\",\n",
523
- " title=\"Whisper Demo: Transcribe Audio\",\n",
524
  " description=(\n",
525
- " \"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned\"\n",
526
- " f\" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files\"\n",
527
- " \" of arbitrary length.\"\n",
528
  " ),\n",
 
529
  " allow_flagging=\"never\",\n",
530
  ")"
531
  ]
532
  },
533
  {
534
  "cell_type": "code",
535
- "execution_count": null,
536
  "id": "b1e34fa5-8340-4329-a348-b641ca4db341",
537
  "metadata": {},
538
- "outputs": [],
539
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  },
541
  {
542
  "cell_type": "markdown",
@@ -548,7 +355,7 @@
548
  },
549
  {
550
  "cell_type": "code",
551
- "execution_count": 14,
552
  "id": "7c3e753f-5051-4c3b-a5ab-fa65c7e7cae9",
553
  "metadata": {},
554
  "outputs": [
@@ -556,14 +363,15 @@
556
  "name": "stdout",
557
  "output_type": "stream",
558
  "text": [
559
- "Overwriting requirements.txt\n"
560
  ]
561
  }
562
  ],
563
  "source": [
564
  "%%writefile requirements.txt\n",
565
  "gradio==3.31.0\n",
566
- "faster-whisper==0.5.1"
 
567
  ]
568
  },
569
  {
@@ -576,7 +384,7 @@
576
  },
577
  {
578
  "cell_type": "code",
579
- "execution_count": 59,
580
  "id": "fba83810-1f0f-4777-b831-aabb4cfead39",
581
  "metadata": {},
582
  "outputs": [],
 
69
  },
70
  {
71
  "cell_type": "code",
72
+ "execution_count": 5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  "id": "de8e21b9-449a-4ae3-bd64-bba334075fdd",
74
  "metadata": {},
75
  "outputs": [],
76
  "source": [
77
+ "def t_asr(folder=\"vegam-whisper-medium-ml-fp16\", audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\", compute_type=\"float16\", device=\"cpu\"):\n",
78
  " model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
79
  " \n",
80
  " segments, info = model.transcribe(audio_file, beam_size=5)\n",
 
85
  },
86
  {
87
  "cell_type": "code",
88
+ "execution_count": 6,
89
+ "id": "25e1413f-8f80-4704-a94e-26b8d9581a6a",
90
  "metadata": {},
91
  "outputs": [
92
  {
 
94
  "output_type": "stream",
95
  "text": [
96
  "[0.00s -> 4.58s] പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ\n",
97
+ "CPU times: user 11.2 s, sys: 2.2 s, total: 13.4 s\n",
98
+ "Wall time: 6.54 s\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  ]
100
  }
101
  ],
102
  "source": [
103
  "%%time\n",
104
+ "t_asr(compute_type=\"int8\", device=\"cuda\")"
105
  ]
106
  },
107
  {
108
  "cell_type": "code",
109
+ "execution_count": 7,
110
+ "id": "b255a0f0-4987-4f04-b63c-5ca0167917b6",
111
  "metadata": {},
112
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
113
  "source": [
114
+ "#|export \n",
115
+ "def transcribe_malayalam_speech(audio_file, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n",
116
+ " \n",
117
+ " model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
118
+ " segments, info = model.transcribe(audio_file, beam_size=5)\n",
119
+ "\n",
120
+ " lst = []\n",
121
+ " for segment in segments:\n",
122
+ " # print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))\n",
123
+ " lst.append(segment.text)\n",
124
+ "\n",
125
+ " return(\" \".join(lst))"
126
  ]
127
  },
128
  {
129
  "cell_type": "code",
130
+ "execution_count": 8,
131
  "id": "48cd4ec3-512f-49d0-87ac-3ef989e25b80",
132
  "metadata": {},
133
  "outputs": [],
134
  "source": [
135
  "#|export\n",
136
+ "def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type=\"int8\", device=\"cpu\", folder=\"vegam-whisper-medium-ml-fp16\"):\n",
137
+ " warn_output = \"\"\n",
138
+ " if (microphone is not None) and (file_upload is not None):\n",
139
+ " warn_output = (\n",
140
+ " \"WARNING: You've uploaded an audio file and used the microphone. \"\n",
141
+ " \"The recorded file from the microphone will be used and the uploaded audio will be discarded.\\n\"\n",
142
+ " )\n",
143
+ "\n",
144
+ " elif (microphone is None) and (file_upload is None):\n",
145
+ " return \"ERROR: You have to either use the microphone or upload an audio file\"\n",
146
+ "\n",
147
+ " audio_file = microphone if microphone is not None else file_upload\n",
148
+ " \n",
149
  " model = WhisperModel(folder, device=device, compute_type=compute_type)\n",
150
  " segments, info = model.transcribe(audio_file, beam_size=5)\n",
151
  "\n",
 
159
  },
160
  {
161
  "cell_type": "code",
162
+ "execution_count": 9,
163
  "id": "14fda29a-aee1-44b2-9269-048cc8b98ea8",
164
  "metadata": {},
165
  "outputs": [
 
167
  "name": "stdout",
168
  "output_type": "stream",
169
  "text": [
170
+ "CPU times: user 40.6 s, sys: 9.76 s, total: 50.3 s\n",
171
+ "Wall time: 13.6 s\n"
172
  ]
173
  },
174
  {
 
177
  "'പാലം കടുക്കുവോളം നാരായണ പാലം കടന്നാലോ കൂരായണ'"
178
  ]
179
  },
180
+ "execution_count": 9,
181
  "metadata": {},
182
  "output_type": "execute_result"
183
  }
184
  ],
185
  "source": [
186
  "%%time\n",
187
+ "transcribe_malayalam_speech(audio_file=\"vegam-whisper-medium-ml-fp16/00b38e80-80b8-4f70-babf-566e848879fc.webm\")"
188
  ]
189
  },
190
  {
 
197
  "## Haha, You are burning GPUs and wasting CO2"
198
  ]
199
  },
 
 
 
 
 
 
 
 
200
  {
201
  "cell_type": "markdown",
202
  "id": "45fade75-e0b1-4c5d-90a3-ebd7345a4d16",
 
205
  "## Figure out Whisper Demo by Huggingface"
206
  ]
207
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  {
209
  "cell_type": "code",
210
  "execution_count": null,
 
223
  },
224
  {
225
  "cell_type": "code",
226
+ "execution_count": 10,
227
  "id": "9badfdcd-dd99-49ea-a318-eda88cddefb6",
228
  "metadata": {},
229
  "outputs": [
 
231
  "name": "stdout",
232
  "output_type": "stream",
233
  "text": [
234
+ "Running on local URL: http://0.0.0.0:6006\n",
235
+ "Running on public URL: https://9fa992d2ba37b0af49.gradio.live\n",
236
  "\n",
237
  "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
238
  ]
 
240
  {
241
  "data": {
242
  "text/html": [
243
+ "<div><iframe src=\"https://9fa992d2ba37b0af49.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
244
  ],
245
  "text/plain": [
246
  "<IPython.core.display.HTML object>"
 
253
  "data": {
254
  "text/plain": []
255
  },
256
+ "execution_count": 10,
257
  "metadata": {},
258
  "output_type": "execute_result"
259
  }
 
270
  },
271
  {
272
  "cell_type": "code",
273
+ "execution_count": 20,
274
  "id": "81f3b241-8a6d-4ff0-bb70-d389d4d4e93a",
275
  "metadata": {},
276
+ "outputs": [
277
+ {
278
+ "name": "stderr",
279
+ "output_type": "stream",
280
+ "text": [
281
+ "/opt/conda/lib/python3.10/site-packages/gradio/inputs.py:321: UserWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n",
282
+ " warnings.warn(\n",
283
+ "/opt/conda/lib/python3.10/site-packages/gradio/inputs.py:324: UserWarning: `optional` parameter is deprecated, and it has no effect\n",
284
+ " super().__init__(source=source, type=type, label=label, optional=optional)\n"
285
+ ]
286
+ }
287
+ ],
288
  "source": [
289
+ "#|export\n",
290
  "mf_transcribe = gr.Interface(\n",
291
+ " fn=gr_transcribe_malayalam_speech,\n",
292
  " inputs=[\n",
293
  " gr.inputs.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n",
294
  " gr.inputs.Audio(source=\"upload\", type=\"filepath\", optional=True),\n",
295
  " ],\n",
296
  " outputs=\"text\",\n",
297
+ " title=\"PALLAKKU (പല്ലക്ക്)\",\n",
298
  " description=(\n",
299
+ " \"Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16).\"\n",
 
 
300
  " ),\n",
301
+ " article=\"Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com\",\n",
302
  " allow_flagging=\"never\",\n",
303
  ")"
304
  ]
305
  },
306
  {
307
  "cell_type": "code",
308
+ "execution_count": 21,
309
  "id": "b1e34fa5-8340-4329-a348-b641ca4db341",
310
  "metadata": {},
311
+ "outputs": [
312
+ {
313
+ "name": "stdout",
314
+ "output_type": "stream",
315
+ "text": [
316
+ "Running on local URL: http://0.0.0.0:6010\n",
317
+ "Running on public URL: https://19b32861466405ac95.gradio.live\n",
318
+ "\n",
319
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
320
+ ]
321
+ },
322
+ {
323
+ "data": {
324
+ "text/html": [
325
+ "<div><iframe src=\"https://19b32861466405ac95.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
326
+ ],
327
+ "text/plain": [
328
+ "<IPython.core.display.HTML object>"
329
+ ]
330
+ },
331
+ "metadata": {},
332
+ "output_type": "display_data"
333
+ },
334
+ {
335
+ "data": {
336
+ "text/plain": []
337
+ },
338
+ "execution_count": 21,
339
+ "metadata": {},
340
+ "output_type": "execute_result"
341
+ }
342
+ ],
343
+ "source": [
344
+ "#|export\n",
345
+ "mf_transcribe.launch(share=True)"
346
+ ]
347
  },
348
  {
349
  "cell_type": "markdown",
 
355
  },
356
  {
357
  "cell_type": "code",
358
+ "execution_count": 22,
359
  "id": "7c3e753f-5051-4c3b-a5ab-fa65c7e7cae9",
360
  "metadata": {},
361
  "outputs": [
 
363
  "name": "stdout",
364
  "output_type": "stream",
365
  "text": [
366
+ "Writing requirements.txt\n"
367
  ]
368
  }
369
  ],
370
  "source": [
371
  "%%writefile requirements.txt\n",
372
  "gradio==3.31.0\n",
373
+ "faster-whisper==0.5.1\n",
374
+ "torch"
375
  ]
376
  },
377
  {
 
384
  },
385
  {
386
  "cell_type": "code",
387
+ "execution_count": 23,
388
  "id": "fba83810-1f0f-4777-b831-aabb4cfead39",
389
  "metadata": {},
390
  "outputs": [],