cahya commited on
Commit
0d80de1
1 Parent(s): 37c396e
Files changed (2) hide show
  1. app.py +7 -10
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  from pyctcdecode import build_ctcdecoder
5
  import gradio as gr
6
- import sox
7
  import os
8
  from multiprocessing import Pool
9
 
@@ -30,12 +30,12 @@ class KenLM:
30
  text = [KenLM.lm_postprocess(x) for x in text]
31
  return text
32
 
 
33
  def convert(inputfile, outfile):
34
- sox_tfm = sox.Transformer()
35
- sox_tfm.set_output_format(
36
- file_type="wav", channels=1, encoding="signed-integer", rate=16000, bits=16
37
- )
38
- sox_tfm.build(inputfile, outfile)
39
 
40
 
41
  api_token = os.getenv("API_TOKEN")
@@ -62,13 +62,10 @@ input_ = gr.inputs.Audio(source="microphone", type="file")
62
 
63
  gr.Interface(parse_transcription, inputs=input_, outputs=[output],
64
  analytics_enabled=False,
65
- show_tips=False,
66
- theme='huggingface',
67
- layout='vertical',
68
  title="Automatic Speech Recognition for Luganda",
69
  description="Speech Recognition Live Demo for Luganda",
70
  article="This demo was built for the "
71
  "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
72
  "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
73
  "which was fine-tuned on Luganda Common Voice speech datasets.",
74
- enable_queue=True).launch( inline=False)
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  from pyctcdecode import build_ctcdecoder
5
  import gradio as gr
6
+ import librosa
7
  import os
8
  from multiprocessing import Pool
9
 
30
  text = [KenLM.lm_postprocess(x) for x in text]
31
  return text
32
 
33
+
34
  def convert(inputfile, outfile):
35
+ target_sr = 16000
36
+ data, sample_rate = librosa.load(inputfile)
37
+ data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
38
+ sf.write(outfile, data, target_sr)
 
39
 
40
 
41
  api_token = os.getenv("API_TOKEN")
62
 
63
  gr.Interface(parse_transcription, inputs=input_, outputs=[output],
64
  analytics_enabled=False,
 
 
 
65
  title="Automatic Speech Recognition for Luganda",
66
  description="Speech Recognition Live Demo for Luganda",
67
  article="This demo was built for the "
68
  "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
69
  "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
70
  "which was fine-tuned on Luganda Common Voice speech datasets.",
71
+ enable_queue=True).launch(inline=False, server_name="0.0.0.0", show_tips=False, enable_queue=True)
requirements.txt CHANGED
@@ -2,7 +2,7 @@ gradio
2
  soundfile
3
  torch
4
  transformers
5
- sox
6
  sentencepiece
7
  pyctcdecode==0.3.0
8
  kenlm @ https://github.com/kpu/kenlm/archive/master.zip
2
  soundfile
3
  torch
4
  transformers
5
+ librosa
6
  sentencepiece
7
  pyctcdecode==0.3.0
8
  kenlm @ https://github.com/kpu/kenlm/archive/master.zip