File size: 2,435 Bytes

import logging
from speechbrain.pretrained import EncoderClassifier
from typing import Dict, List, Any
import requests
from pydub import AudioSegment
from io import BytesIO
import tempfile
import os


def save_chunks_to_temp_files(url, chunk_length=5000):  # chunk_length in milliseconds
    # Download the audio file from the URL
    if not url.startswith("file://"):
        response = requests.get(url)
        response.raise_for_status()

        # Ensure the content type is audio
        if "audio" not in response.headers["Content-Type"]:
            raise ValueError("URL does not seem to be an audio file")

        # Convert the downloaded bytes into a file-like object
        audio_file = BytesIO(response.content)

        # Load audio into an AudioSegment
        audio_segment = AudioSegment.from_file(audio_file)
    else:
        audio_segment = AudioSegment.from_file(url[7:])

    # Split audio into 10-second chunks
    chunks = [
        audio_segment[i : i + chunk_length]
        for i in range(0, len(audio_segment), chunk_length)
    ]

    if len(chunks) > 1:
        chunks[-1] = audio_segment[-chunk_length:]

    # Save each chunk to a temporary file and store file paths in a list
    temp_files = []
    for idx, chunk in enumerate(chunks):
        with tempfile.NamedTemporaryFile(
            delete=False, suffix=f"_chunk{idx}.mp3"
        ) as temp_file:
            chunk.export(temp_file.name, format="mp3")
            temp_files.append(temp_file.name)

    return temp_files


class EndpointHandler:
    def __init__(self, path=""):
        self.model = EncoderClassifier.from_hparams(
            "speechbrain/lang-id-voxlingua107-ecapa"
        )
        print("model loaded")
        logging.info("model loaded")

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        url = data.pop("inputs", data)

        print("audio_url", url)
        logging.info(f"audio_url {url}")

        response = []

        temp_filepaths = save_chunks_to_temp_files(url)
        for i, path in enumerate(temp_filepaths):
            logging.info(f"processing chunk {i} / {len(temp_filepaths)}")
            output = self.model.classify_file(path)

            response.append(
                {
                    "prediction": float(output[1].exp()[0]),
                    "language": output[3][0],
                }
            )
            os.remove(path)

        return response