avfranco commited on
Commit
3597c88
1 Parent(s): e5a6460

audio transcriber

Browse files

Initial demo release of Microsoft Teams meeting recording transcriber using Gradio.

This demo is powered by 🤗's ASR models, primarily Whisper by OpenAI.
Optimisations are developed by Vaibhavs10/insanely-fast-whisper.

Files changed (1) hide show
  1. transcriberUI.py +61 -0
transcriberUI.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import os
4
+ from pydub import AudioSegment
5
+
6
+ def audio_converter(audio_file:str):
7
+ audio_input = AudioSegment.from_file(audio_file,'m4a')
8
+ audio_input_name = os.path.splitext(audio_file)[0]
9
+ audio_wav_filename = f"{audio_input_name}.wav"
10
+ audio_input.export(audio_wav_filename, 'wav')
11
+
12
+ return audio_wav_filename
13
+
14
+ def asr_transcriber(audio_file):
15
+ from transformers import pipeline
16
+ import torch
17
+ import random
18
+
19
+ audio_file_wav = audio_converter(audio_file)
20
+
21
+ device_id = "mps"
22
+ flash = False
23
+
24
+ # Initialize the ASR pipeline
25
+ pipe = pipeline(
26
+ "automatic-speech-recognition",
27
+ model="openai/whisper-large-v3",
28
+ torch_dtype=torch.float16,
29
+ device=device_id
30
+ )
31
+ if device_id == "mps":
32
+ torch.mps.empty_cache()
33
+ elif not flash:
34
+ pipe.model = pipe.model.to_bettertransformer()
35
+
36
+ ts = True
37
+ language = None
38
+ task = "transcribe"
39
+
40
+ json_output = pipe(
41
+ audio_file_wav,
42
+ chunk_length_s=30,
43
+ batch_size=2,
44
+ generate_kwargs={"task": task, "language": language},
45
+ return_timestamps=ts
46
+ )
47
+
48
+ return json_output["text"]
49
+
50
+ with gr.Blocks() as transcriberUI:
51
+ gr.Markdown(
52
+ """
53
+ # Ola Xara & Solange!
54
+ Clicar no botao abaixo para selecionar o Audio a ser transcrito!
55
+ Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-)
56
+ """)
57
+ inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
58
+ transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
59
+ inp.upload(asr_transcriber, inp, transcribe)
60
+
61
+ transcriberUI.launch(share=True)