avfranco commited on
Commit
87f602f
1 Parent(s): 554a124

ASR Transcriber optimisation for CPU

Browse files

- Model changed to OpenAI/Whisper-small
- Enabled better_transformer

Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- import time
3
  import os
4
  from pydub import AudioSegment
5
 
@@ -12,27 +11,41 @@ def audio_converter(audio_file:str):
12
  return audio_wav_filename
13
 
14
  def asr_transcriber(audio_file):
15
- from transformers import pipeline
16
  import torch
17
- import random
18
 
19
  audio_file_wav = audio_converter(audio_file)
20
-
21
  # Check for CUDA availability (GPU)
22
  if torch.cuda.is_available():
23
  device_id = torch.device('cuda')
24
  else:
25
  device_id = torch.device('cpu')
26
-
 
 
 
 
 
 
 
 
 
 
27
  # Initialize the ASR pipeline
28
  pipe = pipeline(
29
  "automatic-speech-recognition",
30
- model="openai/whisper-large-v3",
31
- torch_dtype=torch.float32,
32
- device=device_id
33
  )
34
-
35
- ts = True
 
 
 
 
36
  language = None
37
  task = "transcribe"
38
 
@@ -51,7 +64,7 @@ with gr.Blocks() as transcriberUI:
51
  """
52
  # Ola Xara & Solange!
53
  Clicar no botao abaixo para selecionar o Audio a ser transcrito!
54
- Ambiente Demo disponivel 24x7.
55
  """)
56
  inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
57
  transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
 
1
  import gradio as gr
 
2
  import os
3
  from pydub import AudioSegment
4
 
 
11
  return audio_wav_filename
12
 
13
  def asr_transcriber(audio_file):
14
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
15
  import torch
16
+ import optimum
17
 
18
  audio_file_wav = audio_converter(audio_file)
19
+
20
  # Check for CUDA availability (GPU)
21
  if torch.cuda.is_available():
22
  device_id = torch.device('cuda')
23
  else:
24
  device_id = torch.device('cpu')
25
+
26
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
27
+
28
+ #device_id = "mps" for Mac only
29
+ #torch_dtype = float16
30
+ flash = False
31
+ ts = True
32
+
33
+ #Try to optimize when CPU and float32
34
+ model_id = "openai/whisper-small"
35
+
36
  # Initialize the ASR pipeline
37
  pipe = pipeline(
38
  "automatic-speech-recognition",
39
+ model=model_id,
40
+ torch_dtype=torch_dtype,
41
+ device=device_id,
42
  )
43
+
44
+ if device_id == "mps":
45
+ torch.mps.empty_cache()
46
+ elif not flash:
47
+ pipe.model = pipe.model.to_bettertransformer()
48
+
49
  language = None
50
  task = "transcribe"
51
 
 
64
  """
65
  # Ola Xara & Solange!
66
  Clicar no botao abaixo para selecionar o Audio a ser transcrito!
67
+ Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-)
68
  """)
69
  inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
70
  transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)