Spaces:

ales
/

wav2vec2-cv-be-lm

Running

App Files Files Community

ales commited on Apr 13, 2022

Commit

44daa8d

•

1 Parent(s): aca9f3d

updated outputs and description

Browse files

Files changed (1) hide show

app.py +22 -14

app.py CHANGED Viewed

@@ -39,30 +39,38 @@ def main(audio_fp: str):
     pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
     # recognize speech
-    res = pipeline(inputs=inputs)
-    # add additional information to the output
-    res['text'] = res['text'][0]  # unpack batch of size 1
-    res['sampling_rate_orig'] = sampling_rate
-    res['init_audio_shape'] = init_audio_shape
-    res['converted_to_mono'] = converted_to_mono
-    res['resampled_audio_shape'] = audio_resampled.shape
-    res['inputs_shape'] = inputs.shape
-    res['inputs_max'] = np.max(inputs).item()
-    res['inputs_min'] = np.min(inputs).item()
-    res_str = pformat(res)
-    return res_str
 iface = gr.Interface(
     fn=main,
     inputs=gr.inputs.Audio(
         source='microphone', type='filepath',
-        label='Запішыце аўдыяфайл, каб распазнаваць маўленне'
     ),
-    outputs='text'
 )
 iface.launch()

     pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
     # recognize speech
+    pipeline_res = pipeline(inputs=inputs)
+    text = pipeline_res['text'][0]  # unpack batch of size 1
+    # add technical information to the output
+    tech_data = pipeline_res
+    del tech_data['text']
+    tech_data['sampling_rate_orig'] = sampling_rate
+    tech_data['init_audio_shape'] = init_audio_shape
+    tech_data['converted_to_mono'] = converted_to_mono
+    tech_data['resampled_audio_shape'] = audio_resampled.shape
+    tech_data['inputs_shape'] = inputs.shape
+    tech_data['inputs_max'] = np.max(inputs).item()
+    tech_data['inputs_min'] = np.min(inputs).item()
+    tech_data_str = pformat(tech_data)
+    return text, tech_data_str
 iface = gr.Interface(
     fn=main,
     inputs=gr.inputs.Audio(
         source='microphone', type='filepath',
+        label='Запішыце аўдыяфайл, каб распазнаваць маўленьне'
     ),
+    outputs=[
+        gr.outputs.Textbox(type='text', label='Распазнаны тэкст'),
+        gr.outputs.Textbox(type='text', label='Тэхнічная інфармацыя')
+    ],
+    title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
+    description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8. '
+                'Акустычная мадэль + моўная мадэль.')
 )
 iface.launch()