File size: 1,644 Bytes
693bcba
d00a1b3
 
693bcba
 
b00ee09
d00a1b3
 
 
 
 
 
 
 
b00ee09
d00a1b3
 
29fc826
d00a1b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from typing import Dict, List, Tuple

import numpy as np
from asteroid import separate
from asteroid.models import BaseModel
import os


class PreTrainedPipeline():
    def __init__(self, path=""):
        # IMPLEMENT_THIS
        # Preload all the elements you are going to need at inference.
        # For instance your model, processors, tokenizer that might be needed.
        # This function is only called once, so do all the heavy processing I/O here"""
        self.model = BaseModel.from_pretrained(os.path.join(path, "pytorch_model.bin"))
        self.sampling_rate = self.model.sample_rate

    def __call__(self, inputs: np.array) -> Tuple[np.array, int, List[str]]:
        """
        Args:
            inputs (:obj:`np.array`):
                The raw waveform of audio received. By default sampled at `self.sampling_rate`.
                The shape of this array is `T`, where `T` is the time axis
        Return:
            A :obj:`tuple` containing:
              - :obj:`np.array`:
                 The return shape of the array must be `C'`x`T'`
              - a :obj:`int`: the sampling rate as an int in Hz.
              - a :obj:`List[str]`: the annotation for each out channel.
                    This can be the name of the instruments for audio source separation
                    or some annotation for speech enhancement. The length must be `C'`.
        """
        separated = separate.numpy_separate(self.model, inputs.reshape((1, 1, -1)))
        out = separated[0]
        n = out.shape[0]
        labels = [f"label_{i}" for i in range(n)]
        return separated[0], int(self.model.sample_rate), labels