gautamtata
/

colab_test_model

Inference Endpoints

Model card Files Files and versions Community

gautamtata commited on Dec 13, 2023

Commit

38c2b04

·

1 Parent(s): d415c99

Create handler.py

Files changed (1) hide show

handler.py +55 -0

handler.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import torchaudio
+import torch
+from transformers import Wav2Vec2Processor, Wav2Vec2ForSpeechClassification, AutoConfig
+from torch.nn.functional import softmax
+from typing import Dict, List, Any
+# Suppose this handler is for a speech classification model
+class EndpointHandler():
+    def __init__(self, path="."):
+        # Assuming that the path contains all the necessary files for model and processor.
+        config = AutoConfig.from_pretrained(path)
+        self.processor = Wav2Vec2Processor.from_pretrained(path)
+        self.model = Wav2Vec2ForSpeechClassification.from_pretrained(path)
+        self.sampling_rate = self.processor.feature_extractor.sampling_rate
+        self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Overriding call method to handle speech input and return classification result.
+        """
+        # Extract 'inputs' key from the data dictionary. This should be a path to the audio file.
+        audio_path = data.get('inputs', None)
+        if audio_path is None:
+            raise ValueError("Invalid input, 'inputs' key with path to the audio file is required.")
+        # Load and preprocess the audio file, and run prediction
+        outputs = self.predict(audio_path)
+        return outputs
+    def predict(self, path):
+        """
+        Runs prediction on the provided audio file path.
+        """
+        # Load audio file
+        speech_array, _sampling_rate = torchaudio.load(path)
+        # Resample if necessary
+        if _sampling_rate != self.sampling_rate:
+            resampler = torchaudio.transforms.Resample(_sampling_rate, self.sampling_rate)
+            speech_array = resampler(speech_array)
+        speech_array = speech_array.squeeze().numpy()
+        # Preprocess audio input
+        inputs = self.processor(speech_array, sampling_rate=self.sampling_rate, return_tensors="pt", padding=True)
+        input_values = inputs.input_values.to('cuda' if torch.cuda.is_available() else 'cpu')
+        attention_mask = inputs.attention_mask.to('cuda' if torch.cuda.is_available() else 'cpu')
+        # Model inference
+        with torch.no_grad():
+            logits = self.model(input_values, attention_mask=attention_mask).logits
+        # Postprocessing
+        scores = softmax(logits, dim=1).detach().cpu().numpy()[0]
+        predictions = [{"label": self.config.id2label[i], "score": float(score)} for i, score in enumerate(scores)]
+        return predictions