qanastek commited on
Commit
020e681
β€’
1 Parent(s): 992c7e6
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -28,12 +28,6 @@ models_paths = {
28
  "el-GR": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
29
  }
30
 
31
- # Initialize the models
32
- for lang_code in models_paths.keys():
33
- models[lang_code] = {}
34
- models[lang_code]["processor"] = Wav2Vec2Processor.from_pretrained(models_paths[lang_code])
35
- models[lang_code]["model"] = Wav2Vec2ForCTC.from_pretrained(models_paths[lang_code])
36
-
37
  # Classifier Intent
38
  model_name = 'qanastek/XLMRoberta-Alexa-Intents-Classification'
39
  tokenizer_intent = AutoTokenizer.from_pretrained(model_name)
@@ -59,6 +53,11 @@ examples = [[e, e.split("=")[0].split("/")[-1]] for e in examples]
59
  def transcribe(audio_path, lang_code):
60
 
61
  speech_array, sampling_rate = librosa.load(audio_path, sr=16_000)
 
 
 
 
 
62
 
63
  # Load model
64
  processor_asr = models[lang_code]["processor"]
@@ -97,6 +96,7 @@ def getUniform(text):
97
  def predict(wav_file, lang_code):
98
 
99
  if lang_code not in models_paths.keys():
 
100
  return {
101
  "The language code is unknown!"
102
  }
 
28
  "el-GR": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
29
  }
30
 
 
 
 
 
 
 
31
  # Classifier Intent
32
  model_name = 'qanastek/XLMRoberta-Alexa-Intents-Classification'
33
  tokenizer_intent = AutoTokenizer.from_pretrained(model_name)
 
53
  def transcribe(audio_path, lang_code):
54
 
55
  speech_array, sampling_rate = librosa.load(audio_path, sr=16_000)
56
+
57
+ if lang_code not in models:
58
+ models[lang_code] = {}
59
+ models[lang_code]["processor"] = Wav2Vec2Processor.from_pretrained(models_paths[lang_code])
60
+ models[lang_code]["model"] = Wav2Vec2ForCTC.from_pretrained(models_paths[lang_code])
61
 
62
  # Load model
63
  processor_asr = models[lang_code]["processor"]
 
96
  def predict(wav_file, lang_code):
97
 
98
  if lang_code not in models_paths.keys():
99
+
100
  return {
101
  "The language code is unknown!"
102
  }