File size: 1,644 Bytes
693bcba d00a1b3 693bcba b00ee09 d00a1b3 b00ee09 d00a1b3 29fc826 d00a1b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from typing import Dict, List, Tuple
import numpy as np
from asteroid import separate
from asteroid.models import BaseModel
import os
class PreTrainedPipeline():
def __init__(self, path=""):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here"""
self.model = BaseModel.from_pretrained(os.path.join(path, "pytorch_model.bin"))
self.sampling_rate = self.model.sample_rate
def __call__(self, inputs: np.array) -> Tuple[np.array, int, List[str]]:
"""
Args:
inputs (:obj:`np.array`):
The raw waveform of audio received. By default sampled at `self.sampling_rate`.
The shape of this array is `T`, where `T` is the time axis
Return:
A :obj:`tuple` containing:
- :obj:`np.array`:
The return shape of the array must be `C'`x`T'`
- a :obj:`int`: the sampling rate as an int in Hz.
- a :obj:`List[str]`: the annotation for each out channel.
This can be the name of the instruments for audio source separation
or some annotation for speech enhancement. The length must be `C'`.
"""
separated = separate.numpy_separate(self.model, inputs.reshape((1, 1, -1)))
out = separated[0]
n = out.shape[0]
labels = [f"label_{i}" for i in range(n)]
return separated[0], int(self.model.sample_rate), labels
|