Spaces:
Sleeping
Sleeping
File size: 2,003 Bytes
d20eb01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import warnings
import whisper
from utilities.azure_utils import AzureVoiceData
from utilities.polly_utils import PollyVoiceData
FORCE_TRANSLATE_DEFAULT = True
USE_GPT4_DEFAULT = True
POLLY_VOICE_DATA = PollyVoiceData()
AZURE_VOICE_DATA = AzureVoiceData()
# Pertains to WHISPER functionality
WHISPER_DETECT_LANG = "Detect language"
# UNCOMMENT TO USE WHISPER
warnings.filterwarnings("ignore")
WHISPER_MODEL = whisper.load_model("large")
print("WHISPER_MODEL", WHISPER_MODEL)
# UNCOMMENT TO USE WHISPER
def transcribe(aud_inp, whisper_lang):
if aud_inp is None:
return ""
aud = whisper.load_audio(aud_inp)
aud = whisper.pad_or_trim(aud)
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
_, probs = WHISPER_MODEL.detect_language(mel)
options = whisper.DecodingOptions()
if whisper_lang != WHISPER_DETECT_LANG:
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
options = whisper.DecodingOptions(language=whisper_lang_code)
result = whisper.decode(WHISPER_MODEL, mel, options)
print("result.text", result.text)
result_text = ""
if result and result.text:
result_text = result.text
return result_text
# TEMPORARY FOR TESTING
def transcribe_dummy(aud_inp_tb, whisper_lang):
if aud_inp_tb is None:
return ""
# aud = whisper.load_audio(aud_inp)
# aud = whisper.pad_or_trim(aud)
# mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
# _, probs = WHISPER_MODEL.detect_language(mel)
# options = whisper.DecodingOptions()
# options = whisper.DecodingOptions(language="ja")
# result = whisper.decode(WHISPER_MODEL, mel, options)
result_text = "Whisper will detect language"
if whisper_lang != WHISPER_DETECT_LANG:
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
result_text = f"Whisper will use lang code: {whisper_lang_code}"
print("result_text", result_text)
return aud_inp_tb
|