datnth1709 commited on
Commit
19ac608
1 Parent(s): 812d36d
Files changed (2) hide show
  1. app.py +6 -18
  2. requirements.txt +8 -13
app.py CHANGED
@@ -1,18 +1,15 @@
1
  import gradio as gr
2
  import nltk
3
  import librosa
4
- from optimum.onnxruntime import ORTModelForSeq2SeqLM
5
-
6
- from transformers import pipeline, TranslationPipeline, AutoTokenizer, TranslationPipeline
7
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2Tokenizer
8
  from transformers.file_utils import cached_path, hf_bucket_url
9
  import os, zipfile
 
10
  from datasets import load_dataset
11
  import torch
12
  import kenlm
13
  import torchaudio
14
  from pyctcdecode import Alphabet, BeamSearchDecoderCTC, LanguageModel
15
- device = torch.device(0 if torch.cuda.is_available() else "cpu")
16
 
17
  """Vietnamese speech2text"""
18
  cache_dir = './cache/'
@@ -126,17 +123,8 @@ def speech2text_en(input_file):
126
  """Machine translation"""
127
  vien_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-vi-en_PhoMT"
128
  envi_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-en-vi_PhoMT"
129
- # vien_translator = pipeline("translation", model=vien_model_checkpoint)
130
- # envi_translator = pipeline("translation", model=envi_model_checkpoint)
131
-
132
- vien_tokenizer = AutoTokenizer.from_pretrained(vien_model_checkpoint, return_tensors="pt")
133
- vien_model = ORTModelForSeq2SeqLM.from_pretrained(vien_model_checkpoint)
134
- vien_translator = TranslationPipeline(model=vien_model, tokenizer=vien_tokenizer,clean_up_tokenization_spaces=True, device=device)
135
-
136
- envi_tokenizer = AutoTokenizer.from_pretrained(envi_model_checkpoint, return_tensors="pt")
137
- envi_model = ORTModelForSeq2SeqLM.from_pretrained(envi_model_checkpoint)
138
- envi_translator = TranslationPipeline(model=envi_model, tokenizer=envi_tokenizer,clean_up_tokenization_spaces=True, device=device)
139
-
140
 
141
  def translate_vi2en(Vietnamese):
142
  return vien_translator(Vietnamese)[0]['translation_text']
@@ -188,9 +176,9 @@ def transcribe_en(audio, state_en="", state_vi=""):
188
  transcription = eng_tokenizer.decode(predicted_ids[0])
189
  # Output is all upper case
190
  transcription = correct_casing(transcription.lower())
191
- state_en += transcription + "+"
192
  vi_text = translate_en2vi(transcription)
193
- state_vi += vi_text + "+"
194
  return state_en, state_vi
195
 
196
  """Gradio demo"""
 
1
  import gradio as gr
2
  import nltk
3
  import librosa
4
+ from transformers import pipeline
 
 
 
5
  from transformers.file_utils import cached_path, hf_bucket_url
6
  import os, zipfile
7
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2Tokenizer
8
  from datasets import load_dataset
9
  import torch
10
  import kenlm
11
  import torchaudio
12
  from pyctcdecode import Alphabet, BeamSearchDecoderCTC, LanguageModel
 
13
 
14
  """Vietnamese speech2text"""
15
  cache_dir = './cache/'
 
123
  """Machine translation"""
124
  vien_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-vi-en_PhoMT"
125
  envi_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-en-vi_PhoMT"
126
+ vien_translator = pipeline("translation", model=vien_model_checkpoint)
127
+ envi_translator = pipeline("translation", model=envi_model_checkpoint)
 
 
 
 
 
 
 
 
 
128
 
129
  def translate_vi2en(Vietnamese):
130
  return vien_translator(Vietnamese)[0]['translation_text']
 
176
  transcription = eng_tokenizer.decode(predicted_ids[0])
177
  # Output is all upper case
178
  transcription = correct_casing(transcription.lower())
179
+ state_en += transcription + " "
180
  vi_text = translate_en2vi(transcription)
181
+ state_vi += vi_text + " "
182
  return state_en, state_vi
183
 
184
  """Gradio demo"""
requirements.txt CHANGED
@@ -1,22 +1,17 @@
1
- transformers==4.21.1
2
- transformers[sentencepiece]
3
- transformers[onnx]
4
- datasets==2.4.0
5
- optimum==1.3.0
6
- sacremoses==0.0.53
7
- onnxruntime==1.12.1
8
- onnx==1.12.0
9
- optimum[graphcore]
10
- torch
11
- torchaudio
12
  pyctcdecode==v0.1.0
13
  speechbrain
14
  pydub
15
  kenlm
 
16
  soundfile
17
  ffmpeg-python
18
  gradio
19
  nltk
20
  librosa
21
- https://github.com/kpu/kenlm/archive/master.zip
22
-
 
 
1
+ torch==1.9.0
2
+ torchaudio==0.9.0
3
+ transformers==4.9.2
4
+ datasets==1.11.0
 
 
 
 
 
 
 
5
  pyctcdecode==v0.1.0
6
  speechbrain
7
  pydub
8
  kenlm
9
+ pyctcdecode
10
  soundfile
11
  ffmpeg-python
12
  gradio
13
  nltk
14
  librosa
15
+ transformers
16
+ transformers[sentencepiece]
17
+ https://github.com/kpu/kenlm/archive/master.zip