lucio commited on
Commit
028ff01
1 Parent(s): 7fd4a61

fix spanish asr

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -10,7 +10,7 @@ from stt import Model
10
 
11
  import torch
12
  from transformers import pipeline
13
-
14
  import torchaudio
15
  from speechbrain.pretrained import EncoderClassifier
16
 
@@ -43,20 +43,24 @@ def client(audio_data: np.array, sample_rate: int, default_lang: str):
43
 
44
  output_audio.seek(0)
45
  fin = wave.open(output_audio, 'rb')
46
- audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
 
 
 
47
 
48
  fin.close()
49
  print(default_lang, text_lab)
50
 
51
  if text_lab == 'Spanish':
52
  text_lab = 'español'
 
53
  asr_pipeline = STT_MODELS['español']
54
- result = asr_pipeline(audio, chunk_length_s=5, stride_length_s=1)['text']
55
 
56
  else:
57
  text_lab = default_lang
58
  ds = STT_MODELS[default_lang]
59
- result = ds.stt(audio)
60
 
61
  return f"{text_lab}: {result}"
62
 
 
10
 
11
  import torch
12
  from transformers import pipeline
13
+ import librosa
14
  import torchaudio
15
  from speechbrain.pretrained import EncoderClassifier
16
 
 
43
 
44
  output_audio.seek(0)
45
  fin = wave.open(output_audio, 'rb')
46
+ coqui_audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
47
+
48
+ output_audio.seek(0)
49
+ hf_audio, _ = librosa.load(output_audio)
50
 
51
  fin.close()
52
  print(default_lang, text_lab)
53
 
54
  if text_lab == 'Spanish':
55
  text_lab = 'español'
56
+
57
  asr_pipeline = STT_MODELS['español']
58
+ result = asr_pipeline(hf_audio, chunk_length_s=5, stride_length_s=1)['text']
59
 
60
  else:
61
  text_lab = default_lang
62
  ds = STT_MODELS[default_lang]
63
+ result = ds.stt(coqui_audio)
64
 
65
  return f"{text_lab}: {result}"
66
 
requirements.txt CHANGED
@@ -3,4 +3,5 @@ STT==1.0.0
3
  pydub==0.25.1
4
  speechbrain==0.5.10
5
  torchaudio
6
- transformers
 
 
3
  pydub==0.25.1
4
  speechbrain==0.5.10
5
  torchaudio
6
+ transformers
7
+ librosa