ales commited on
Commit
44daa8d
1 Parent(s): aca9f3d

updated outputs and description

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -39,30 +39,38 @@ def main(audio_fp: str):
39
  pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
40
 
41
  # recognize speech
42
- res = pipeline(inputs=inputs)
 
43
 
44
- # add additional information to the output
45
- res['text'] = res['text'][0] # unpack batch of size 1
46
- res['sampling_rate_orig'] = sampling_rate
47
- res['init_audio_shape'] = init_audio_shape
48
- res['converted_to_mono'] = converted_to_mono
49
- res['resampled_audio_shape'] = audio_resampled.shape
50
- res['inputs_shape'] = inputs.shape
51
- res['inputs_max'] = np.max(inputs).item()
52
- res['inputs_min'] = np.min(inputs).item()
 
53
 
54
- res_str = pformat(res)
55
 
56
- return res_str
57
 
58
 
59
  iface = gr.Interface(
60
  fn=main,
61
  inputs=gr.inputs.Audio(
62
  source='microphone', type='filepath',
63
- label='Запішыце аўдыяфайл, каб распазнаваць маўленне'
64
  ),
65
- outputs='text'
 
 
 
 
 
 
66
  )
67
 
68
  iface.launch()
 
39
  pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
40
 
41
  # recognize speech
42
+ pipeline_res = pipeline(inputs=inputs)
43
+ text = pipeline_res['text'][0] # unpack batch of size 1
44
 
45
+ # add technical information to the output
46
+ tech_data = pipeline_res
47
+ del tech_data['text']
48
+ tech_data['sampling_rate_orig'] = sampling_rate
49
+ tech_data['init_audio_shape'] = init_audio_shape
50
+ tech_data['converted_to_mono'] = converted_to_mono
51
+ tech_data['resampled_audio_shape'] = audio_resampled.shape
52
+ tech_data['inputs_shape'] = inputs.shape
53
+ tech_data['inputs_max'] = np.max(inputs).item()
54
+ tech_data['inputs_min'] = np.min(inputs).item()
55
 
56
+ tech_data_str = pformat(tech_data)
57
 
58
+ return text, tech_data_str
59
 
60
 
61
  iface = gr.Interface(
62
  fn=main,
63
  inputs=gr.inputs.Audio(
64
  source='microphone', type='filepath',
65
+ label='Запішыце аўдыяфайл, каб распазнаваць маўленьне'
66
  ),
67
+ outputs=[
68
+ gr.outputs.Textbox(type='text', label='Распазнаны тэкст'),
69
+ gr.outputs.Textbox(type='text', label='Тэхнічная інфармацыя')
70
+ ],
71
+ title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
72
+ description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8. '
73
+ 'Акустычная мадэль + моўная мадэль.')
74
  )
75
 
76
  iface.launch()