S-Fry
/

large

Automatic Speech Recognition

hf-asr-leaderboard

Inference Endpoints

Model card Files Files and versions Community

S-Fry commited on Apr 25, 2023

Commit

c5153e0

•

1 Parent(s): d352bb6

Update handler.py

Files changed (1) hide show

handler.py +12 -10

handler.py CHANGED Viewed

@@ -8,20 +8,21 @@ SAMPLE_RATE = 16000
 MODEL_NAME = "openai/whisper-large" #this always needs to stay in line 8 :D sorry for the hackiness
 lang = "dk"
-device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    chunk_length_s=30,
-    device=device,
-)
-pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
 class EndpointHandler():
     def __init__(self, path=""):
         # load the model
-        self.model = whisper.load_model("medium")
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
@@ -37,8 +38,9 @@ class EndpointHandler():
         audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
         audio_tensor= torch.from_numpy(audio_nparray)
-        # run inference pipeline
         result = self.model.transcribe(audio_nparray)
         # postprocess the prediction
         return {"tekst": result["text"]}

 MODEL_NAME = "openai/whisper-large" #this always needs to stay in line 8 :D sorry for the hackiness
 lang = "dk"
 class EndpointHandler():
     def __init__(self, path=""):
+        pipe = pipeline(
+            task="automatic-speech-recognition",
+            model=MODEL_NAME,
+            chunk_length_s=30,
+            device=device,
+        )
         # load the model
+        #self.model = whisper.load_model("large")
+        self.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
         audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
         audio_tensor= torch.from_numpy(audio_nparray)
+        # run inference pipeline
         result = self.model.transcribe(audio_nparray)
         # postprocess the prediction
         return {"tekst": result["text"]}