camparchimedes commited on
Commit
e898bd8
·
verified ·
1 Parent(s): af8451f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -9,6 +9,7 @@
9
  import time
10
  import os
11
  import spaces
 
12
  import warnings
13
  warnings.filterwarnings("ignore")
14
  from pydub import AudioSegment
@@ -21,34 +22,31 @@ def convert_to_wav(audio_file):
21
  return wav_file
22
 
23
  import torch
24
- from transformers import pipeline, AutoProcessor # AutoModelForSpeechSeq2Seq
25
 
26
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
27
- torch_dtype = torch.float32
28
-
29
- pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
30
-
31
- @spaces.GPU(queue=True)
32
 
33
  # Initialize processor and pipeline
34
  processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
35
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
36
  torch_dtype = torch.float32
37
 
 
 
38
  language = "no"
39
  task = "transcribe"
40
 
 
41
  def transcribe_audio(audio_file):
42
  if audio_file.endswith(".m4a"):
43
  audio_file = convert_to_wav(audio_file)
44
 
45
  start_time = time.time()
46
 
47
- # forced_decoder_ids@the correct context
48
  forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
49
 
50
  with torch.no_grad():
51
- # CUDA@function -->
52
  with torch.cuda.device(device) if torch.cuda.is_available() else contextlib.nullcontext():
53
  output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
54
 
 
9
  import time
10
  import os
11
  import spaces
12
+ import contextlib
13
  import warnings
14
  warnings.filterwarnings("ignore")
15
  from pydub import AudioSegment
 
22
  return wav_file
23
 
24
  import torch
25
+ from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
26
 
 
 
 
 
 
 
27
 
28
  # Initialize processor and pipeline
29
  processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
  torch_dtype = torch.float32
32
 
33
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
34
+
35
  language = "no"
36
  task = "transcribe"
37
 
38
+ @spaces.GPU(queue=True)
39
  def transcribe_audio(audio_file):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
42
 
43
  start_time = time.time()
44
 
45
+ # forced_decoder_ids in the correct context
46
  forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
47
 
48
  with torch.no_grad():
49
+ # CUDA within the function
50
  with torch.cuda.device(device) if torch.cuda.is_available() else contextlib.nullcontext():
51
  output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
52