spellingdragon
/

whisper-large-v3-handler

Automatic Speech Recognition

hf-asr-leaderboard

Inference Endpoints

Model card Files Files and versions Community

spellingdragon commited on Nov 26, 2023

Commit

e8d6e13

•

1 Parent(s): 67b6099

Update handler.py

Files changed (1) hide show

handler.py +16 -16

handler.py CHANGED Viewed

@@ -5,8 +5,8 @@ from transformers import WhisperProcessor, AutoModelForSpeechSeq2Seq, AutoProces
 class EndpointHandler():
     def __init__(self, path=""):
-        #device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        #torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model_id = "openai/whisper-large-v3"
         #model = AutoModelForSpeechSeq2Seq.from_pretrained(
@@ -14,22 +14,22 @@ class EndpointHandler():
         #)
         #model.to(device)
-        #processor = AutoProcessor.from_pretrained(model_id)
         #processor = WhisperProcessor.from_pretrained(model_id)
-        #self.pipeline = pipeline(
-        #    "automatic-speech-recognition",
-        #    model=model,
-        #    tokenizer=processor.tokenizer,
-        #    feature_extractor=processor.feature_extractor,
-        #    max_new_tokens=128,
-        #    chunk_length_s=30,
-        #    batch_size=16,
-        #    return_timestamps=True,
-        #    torch_dtype=torch_dtype,
-        #    device=device,
-        #)
-        #self.model = model
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:

 class EndpointHandler():
     def __init__(self, path=""):
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model_id = "openai/whisper-large-v3"
         #model = AutoModelForSpeechSeq2Seq.from_pretrained(
         #)
         #model.to(device)
+        processor = AutoProcessor.from_pretrained(model_id)
         #processor = WhisperProcessor.from_pretrained(model_id)
+        self.pipeline = pipeline(
+            "automatic-speech-recognition",
+            model=model_id,
+            tokenizer=processor.tokenizer,
+            feature_extractor=processor.feature_extractor,
+            max_new_tokens=128,
+            chunk_length_s=30,
+            batch_size=16,
+            return_timestamps=True,
+            torch_dtype=torch_dtype,
+            device=device,
+        )
+        self.model = model
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]: