ales commited on
Commit
1c6b627
1 Parent(s): 6b93fd2

optimized model loading, added file upload widget

Browse files
Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -13,17 +13,30 @@ HF_HUB_URL = 'ales/wav2vec2-cv-be'
13
  LM_HUB_FP = 'language_model/cv8be_5gram.bin'
14
  MODEL_SAMPLING_RATE = 16_000 # 16kHz
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def main(audio_fp: str):
18
  # read audio file
19
  inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]
20
 
21
- # download Language Model from HF Hub
22
- lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
23
-
24
- # init pipeline
25
- pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
26
-
27
  # recognize speech
28
  pipeline_res = pipeline(inputs=inputs)
29
  text = pipeline_res['text'][0] # unpack batch of size 1
@@ -31,6 +44,10 @@ def main(audio_fp: str):
31
  # add technical information to the output
32
  tech_data = pipeline_res
33
  del tech_data['text']
 
 
 
 
34
  tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE
35
  tech_data['inputs_shape'] = inputs.shape
36
  tech_data['inputs_max'] = inputs.max().item()
@@ -49,10 +66,18 @@ The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/a
49
 
50
  iface = gr.Interface(
51
  fn=main,
52
- inputs=gr.inputs.Audio(
53
- source='microphone', type='filepath',
54
- label='Запішыце аўдыяфайл, каб распазнаць маўленьне'
55
- ),
 
 
 
 
 
 
 
 
56
  outputs=[
57
  gr.outputs.Textbox(type='str', label='Распазнаны тэкст'),
58
  gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя')
@@ -61,7 +86,7 @@ iface = gr.Interface(
61
  description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
62
  'Акустычная мадэль + моўная мадэль.'
63
  ),
64
- article=article,
65
  )
66
 
67
  iface.launch(enable_queue=True)
 
13
  LM_HUB_FP = 'language_model/cv8be_5gram.bin'
14
  MODEL_SAMPLING_RATE = 16_000 # 16kHz
15
 
16
+ # download Language Model from HF Hub
17
+ lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
18
+
19
+ # init pipeline
20
+ pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
21
+
22
+
23
+ def main(recorded_audio_fp: str, uploaded_audio_fp: str):
24
+ audio_fp = None
25
+ if recorded_audio_fp is not None:
26
+ audio_fp = recorded_audio_fp
27
+ used_audiofile = 'recorded'
28
+ elif uploaded_audio_fp is not None:
29
+ audio_fp = uploaded_audio_fp
30
+ used_audiofile = 'uploaded'
31
+ else:
32
+ return (
33
+ 'Памылка! Вы мусіце альбо запісаць, альбо запампаваць аўдыяфайл.',
34
+ 'Error! You have to either record or upload an audiofile.'
35
+ )
36
 
 
37
  # read audio file
38
  inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]
39
 
 
 
 
 
 
 
40
  # recognize speech
41
  pipeline_res = pipeline(inputs=inputs)
42
  text = pipeline_res['text'][0] # unpack batch of size 1
 
44
  # add technical information to the output
45
  tech_data = pipeline_res
46
  del tech_data['text']
47
+ tech_data['used_audiofile'] = used_audiofile
48
+ tech_data['recorded_file_present'] = recorded_audio_fp is not None
49
+ tech_data['uploaded_file_present'] = uploaded_audio_fp is not None
50
+ tech_data['audiofile_path'] = audio_fp
51
  tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE
52
  tech_data['inputs_shape'] = inputs.shape
53
  tech_data['inputs_max'] = inputs.max().item()
 
66
 
67
  iface = gr.Interface(
68
  fn=main,
69
+ inputs=[
70
+ gr.inputs.Audio(
71
+ source='microphone', type='filepath',
72
+ label='Запішыце аўдыяфайл, каб распазнаць маўленьне',
73
+ optional=True,
74
+ ),
75
+ gr.inputs.Audio(
76
+ source='upload', type='filepath',
77
+ label='Альбо загрузіце ўжо запісаны аўдыяфайл сюды',
78
+ optional=True
79
+ ),
80
+ ],
81
  outputs=[
82
  gr.outputs.Textbox(type='str', label='Распазнаны тэкст'),
83
  gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя')
 
86
  description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
87
  'Акустычная мадэль + моўная мадэль.'
88
  ),
89
+ article=article
90
  )
91
 
92
  iface.launch(enable_queue=True)