Spaces:

jonmatthis
/

skellychat

Sleeping

File size: 2,003 Bytes

d20eb01

import warnings

import whisper

from utilities.azure_utils import AzureVoiceData
from utilities.polly_utils import PollyVoiceData

FORCE_TRANSLATE_DEFAULT = True
USE_GPT4_DEFAULT = True

POLLY_VOICE_DATA = PollyVoiceData()
AZURE_VOICE_DATA = AzureVoiceData()

# Pertains to WHISPER functionality
WHISPER_DETECT_LANG = "Detect language"

# UNCOMMENT TO USE WHISPER
warnings.filterwarnings("ignore")
WHISPER_MODEL = whisper.load_model("large")
print("WHISPER_MODEL", WHISPER_MODEL)


# UNCOMMENT TO USE WHISPER
def transcribe(aud_inp, whisper_lang):
    if aud_inp is None:
        return ""
    aud = whisper.load_audio(aud_inp)
    aud = whisper.pad_or_trim(aud)
    mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
    _, probs = WHISPER_MODEL.detect_language(mel)
    options = whisper.DecodingOptions()
    if whisper_lang != WHISPER_DETECT_LANG:
        whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
        options = whisper.DecodingOptions(language=whisper_lang_code)
    result = whisper.decode(WHISPER_MODEL, mel, options)
    print("result.text", result.text)
    result_text = ""
    if result and result.text:
        result_text = result.text
    return result_text


# TEMPORARY FOR TESTING
def transcribe_dummy(aud_inp_tb, whisper_lang):
    if aud_inp_tb is None:
        return ""
    # aud = whisper.load_audio(aud_inp)
    # aud = whisper.pad_or_trim(aud)
    # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
    # _, probs = WHISPER_MODEL.detect_language(mel)
    # options = whisper.DecodingOptions()
    # options = whisper.DecodingOptions(language="ja")
    # result = whisper.decode(WHISPER_MODEL, mel, options)
    result_text = "Whisper will detect language"
    if whisper_lang != WHISPER_DETECT_LANG:
        whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
        result_text = f"Whisper will use lang code: {whisper_lang_code}"
    print("result_text", result_text)
    return aud_inp_tb