Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
import time
|
10 |
import os
|
11 |
import spaces
|
|
|
12 |
import warnings
|
13 |
warnings.filterwarnings("ignore")
|
14 |
from pydub import AudioSegment
|
@@ -21,34 +22,31 @@ def convert_to_wav(audio_file):
|
|
21 |
return wav_file
|
22 |
|
23 |
import torch
|
24 |
-
from transformers import pipeline, AutoProcessor
|
25 |
|
26 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
27 |
-
torch_dtype = torch.float32
|
28 |
-
|
29 |
-
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
|
30 |
-
|
31 |
-
@spaces.GPU(queue=True)
|
32 |
|
33 |
# Initialize processor and pipeline
|
34 |
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
|
35 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
36 |
torch_dtype = torch.float32
|
37 |
|
|
|
|
|
38 |
language = "no"
|
39 |
task = "transcribe"
|
40 |
|
|
|
41 |
def transcribe_audio(audio_file):
|
42 |
if audio_file.endswith(".m4a"):
|
43 |
audio_file = convert_to_wav(audio_file)
|
44 |
|
45 |
start_time = time.time()
|
46 |
|
47 |
-
# forced_decoder_ids
|
48 |
forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
|
49 |
|
50 |
with torch.no_grad():
|
51 |
-
# CUDA
|
52 |
with torch.cuda.device(device) if torch.cuda.is_available() else contextlib.nullcontext():
|
53 |
output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
|
54 |
|
|
|
9 |
import time
|
10 |
import os
|
11 |
import spaces
|
12 |
+
import contextlib
|
13 |
import warnings
|
14 |
warnings.filterwarnings("ignore")
|
15 |
from pydub import AudioSegment
|
|
|
22 |
return wav_file
|
23 |
|
24 |
import torch
|
25 |
+
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# Initialize processor and pipeline
|
29 |
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
torch_dtype = torch.float32
|
32 |
|
33 |
+
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
|
34 |
+
|
35 |
language = "no"
|
36 |
task = "transcribe"
|
37 |
|
38 |
+
@spaces.GPU(queue=True)
|
39 |
def transcribe_audio(audio_file):
|
40 |
if audio_file.endswith(".m4a"):
|
41 |
audio_file = convert_to_wav(audio_file)
|
42 |
|
43 |
start_time = time.time()
|
44 |
|
45 |
+
# forced_decoder_ids in the correct context
|
46 |
forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
|
47 |
|
48 |
with torch.no_grad():
|
49 |
+
# CUDA within the function
|
50 |
with torch.cuda.device(device) if torch.cuda.is_available() else contextlib.nullcontext():
|
51 |
output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
|
52 |
|