tomiwa1a
/

video-search

tomiwa1a commited on Dec 24, 2022

Commit

f376027

•

1 Parent(s): 91f436b

fixed error in verbose and time transcription

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
 import whisper
 import torch
 import pytube
 class EndpointHandler():
@@ -13,7 +14,12 @@ class EndpointHandler():
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f'whisper will use: {device}')
         whisper_model = whisper.load_model(MODEL_NAME).to(device)
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
@@ -33,14 +39,19 @@ class EndpointHandler():
             # Realized this by running in verbose mode and seeing how much time
             # was spent on the decoding language step
             "language":"en",
-            verbose: True
         }
         yt = pt.YouTube(video_url)
         stream = yt.streams.filter(only_audio=True)[0]
         path_to_audio = f"{yt.video_id}.mp3"
         stream.download(filename=path_to_audio)
         transcript = self.model.transcribe(path_to_audio, **decode_options)
         # postprocess the prediction
         return {"transcript": transcript}

 import whisper
 import torch
 import pytube
+import time
 class EndpointHandler():
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f'whisper will use: {device}')
+        t0 = time.time()
         whisper_model = whisper.load_model(MODEL_NAME).to(device)
+        t1 = time.time()
+        total = t1-t0
+        print(f'Finished loading model in {total} seconds')
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
             # Realized this by running in verbose mode and seeing how much time
             # was spent on the decoding language step
             "language":"en",
+            "verbose": True
         }
         yt = pt.YouTube(video_url)
         stream = yt.streams.filter(only_audio=True)[0]
         path_to_audio = f"{yt.video_id}.mp3"
         stream.download(filename=path_to_audio)
+        t0 = time.time()
         transcript = self.model.transcribe(path_to_audio, **decode_options)
+        t1 = time.time()
+        total = t1-t0
+        print(f'Finished transcription in {total} seconds')
         # postprocess the prediction
         return {"transcript": transcript}