File size: 1,304 Bytes

44c0e8a
 
 
2b4b5f3
 
 
 
44c0e8a
 
 
 
 
 
 
 
 
 
 
 
 
 
2b4b5f3
 
44c0e8a
 
 
 
 
 
 
 
 
 
 
343abdc
2b4b5f3
 
44c0e8a
343abdc
 
44c0e8a
343abdc
44c0e8a

from typing import Dict, List, Any

import torch as torch
from transformers import pipeline, WhisperProcessor

from scipy.io.wavfile import read



class EndpointHandler():



    def __init__(self, path=""):
        device = 0 if torch.cuda.is_available() else "cpu"
        self.pipe = pipeline(
            task="automatic-speech-recognition",
            model="openai/whisper-large",
            chunk_length_s=30,
            device=device,
        )
        processor = WhisperProcessor.from_pretrained("openai/whisper-large")
        self.pipe.model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="nl", task="transcribe")

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
       data args:
            inputs (:obj: `str`)
            date (:obj: `str`)
      Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        #print request
        print("request")
        print(data.inputs)
        # audio_data = read(io.BytesIO(data))
        # get inputs, inputs in request body is possible equal to wav or mp3 file
        inputs = data.pop("inputs", data)
        print("here comes text")
        print(self.pipe(inputs))
        text = self.pipe(inputs)["text"]
        print(text)
        return text