fixed error in verbose and time transcription
Browse files- handler.py +13 -2
handler.py
CHANGED
@@ -3,6 +3,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
|
|
3 |
import whisper
|
4 |
import torch
|
5 |
import pytube
|
|
|
6 |
|
7 |
|
8 |
class EndpointHandler():
|
@@ -13,7 +14,12 @@ class EndpointHandler():
|
|
13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
print(f'whisper will use: {device}')
|
15 |
|
|
|
16 |
whisper_model = whisper.load_model(MODEL_NAME).to(device)
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
|
@@ -33,14 +39,19 @@ class EndpointHandler():
|
|
33 |
# Realized this by running in verbose mode and seeing how much time
|
34 |
# was spent on the decoding language step
|
35 |
"language":"en",
|
36 |
-
verbose: True
|
37 |
}
|
38 |
yt = pt.YouTube(video_url)
|
39 |
stream = yt.streams.filter(only_audio=True)[0]
|
40 |
path_to_audio = f"{yt.video_id}.mp3"
|
41 |
stream.download(filename=path_to_audio)
|
42 |
-
|
43 |
transcript = self.model.transcribe(path_to_audio, **decode_options)
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# postprocess the prediction
|
46 |
return {"transcript": transcript}
|
|
|
3 |
import whisper
|
4 |
import torch
|
5 |
import pytube
|
6 |
+
import time
|
7 |
|
8 |
|
9 |
class EndpointHandler():
|
|
|
14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
print(f'whisper will use: {device}')
|
16 |
|
17 |
+
t0 = time.time()
|
18 |
whisper_model = whisper.load_model(MODEL_NAME).to(device)
|
19 |
+
t1 = time.time()
|
20 |
+
|
21 |
+
total = t1-t0
|
22 |
+
print(f'Finished loading model in {total} seconds')
|
23 |
|
24 |
|
25 |
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
|
|
|
39 |
# Realized this by running in verbose mode and seeing how much time
|
40 |
# was spent on the decoding language step
|
41 |
"language":"en",
|
42 |
+
"verbose": True
|
43 |
}
|
44 |
yt = pt.YouTube(video_url)
|
45 |
stream = yt.streams.filter(only_audio=True)[0]
|
46 |
path_to_audio = f"{yt.video_id}.mp3"
|
47 |
stream.download(filename=path_to_audio)
|
48 |
+
t0 = time.time()
|
49 |
transcript = self.model.transcribe(path_to_audio, **decode_options)
|
50 |
+
t1 = time.time()
|
51 |
+
|
52 |
+
total = t1-t0
|
53 |
+
print(f'Finished transcription in {total} seconds')
|
54 |
+
|
55 |
|
56 |
# postprocess the prediction
|
57 |
return {"transcript": transcript}
|