import warnings import whisper from utilities.azure_utils import AzureVoiceData from utilities.polly_utils import PollyVoiceData FORCE_TRANSLATE_DEFAULT = True USE_GPT4_DEFAULT = True POLLY_VOICE_DATA = PollyVoiceData() AZURE_VOICE_DATA = AzureVoiceData() # Pertains to WHISPER functionality WHISPER_DETECT_LANG = "Detect language" # UNCOMMENT TO USE WHISPER warnings.filterwarnings("ignore") WHISPER_MODEL = whisper.load_model("large") print("WHISPER_MODEL", WHISPER_MODEL) # UNCOMMENT TO USE WHISPER def transcribe(aud_inp, whisper_lang): if aud_inp is None: return "" aud = whisper.load_audio(aud_inp) aud = whisper.pad_or_trim(aud) mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device) _, probs = WHISPER_MODEL.detect_language(mel) options = whisper.DecodingOptions() if whisper_lang != WHISPER_DETECT_LANG: whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang) options = whisper.DecodingOptions(language=whisper_lang_code) result = whisper.decode(WHISPER_MODEL, mel, options) print("result.text", result.text) result_text = "" if result and result.text: result_text = result.text return result_text # TEMPORARY FOR TESTING def transcribe_dummy(aud_inp_tb, whisper_lang): if aud_inp_tb is None: return "" # aud = whisper.load_audio(aud_inp) # aud = whisper.pad_or_trim(aud) # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device) # _, probs = WHISPER_MODEL.detect_language(mel) # options = whisper.DecodingOptions() # options = whisper.DecodingOptions(language="ja") # result = whisper.decode(WHISPER_MODEL, mel, options) result_text = "Whisper will detect language" if whisper_lang != WHISPER_DETECT_LANG: whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang) result_text = f"Whisper will use lang code: {whisper_lang_code}" print("result_text", result_text) return aud_inp_tb