Spaces:
Sleeping
Sleeping
| import warnings | |
| import whisper | |
| from utilities.azure_utils import AzureVoiceData | |
| from utilities.polly_utils import PollyVoiceData | |
| FORCE_TRANSLATE_DEFAULT = True | |
| USE_GPT4_DEFAULT = True | |
| POLLY_VOICE_DATA = PollyVoiceData() | |
| AZURE_VOICE_DATA = AzureVoiceData() | |
| # Pertains to WHISPER functionality | |
| WHISPER_DETECT_LANG = "Detect language" | |
| # UNCOMMENT TO USE WHISPER | |
| warnings.filterwarnings("ignore") | |
| WHISPER_MODEL = whisper.load_model("large") | |
| print("WHISPER_MODEL", WHISPER_MODEL) | |
| # UNCOMMENT TO USE WHISPER | |
| def transcribe(aud_inp, whisper_lang): | |
| if aud_inp is None: | |
| return "" | |
| aud = whisper.load_audio(aud_inp) | |
| aud = whisper.pad_or_trim(aud) | |
| mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device) | |
| _, probs = WHISPER_MODEL.detect_language(mel) | |
| options = whisper.DecodingOptions() | |
| if whisper_lang != WHISPER_DETECT_LANG: | |
| whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang) | |
| options = whisper.DecodingOptions(language=whisper_lang_code) | |
| result = whisper.decode(WHISPER_MODEL, mel, options) | |
| print("result.text", result.text) | |
| result_text = "" | |
| if result and result.text: | |
| result_text = result.text | |
| return result_text | |
| # TEMPORARY FOR TESTING | |
| def transcribe_dummy(aud_inp_tb, whisper_lang): | |
| if aud_inp_tb is None: | |
| return "" | |
| # aud = whisper.load_audio(aud_inp) | |
| # aud = whisper.pad_or_trim(aud) | |
| # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device) | |
| # _, probs = WHISPER_MODEL.detect_language(mel) | |
| # options = whisper.DecodingOptions() | |
| # options = whisper.DecodingOptions(language="ja") | |
| # result = whisper.decode(WHISPER_MODEL, mel, options) | |
| result_text = "Whisper will detect language" | |
| if whisper_lang != WHISPER_DETECT_LANG: | |
| whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang) | |
| result_text = f"Whisper will use lang code: {whisper_lang_code}" | |
| print("result_text", result_text) | |
| return aud_inp_tb | |