johntsi commited on
Commit
62a4416
1 Parent(s): eafabee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -6
README.md CHANGED
@@ -255,6 +255,13 @@ This version of ZeroSwot is trained with ASR data from CommonVoice, and adapting
255
 
256
  ## Usage
257
 
 
 
 
 
 
 
 
258
  ```python
259
  from transformers import Wav2Vec2Processor, NllbTokenizer, AutoModel, AutoModelForSeq2SeqLM
260
  import torchaudio
@@ -271,21 +278,21 @@ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60
271
  tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
272
 
273
  # Load ZeroSwot Encoder
274
- commit_hash = "1d38f5dbf4f89adefe06961e4ec344b21f74ebae"
275
  zeroswot_encoder = AutoModel.from_pretrained(
276
  "johntsi/ZeroSwot-Medium_asr-cv_en-to-200", trust_remote_code=True, revision=commit_hash,
277
  )
278
- model.eval()
279
- model.to("cuda")
280
 
281
  # Load NLLB Model
282
  nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
283
  nllb_model.eval()
284
  nllb_model.to("cuda")
285
 
286
- # Load sample .wav
287
- audio = load_and_resample_audio("resources/sample.wav")
288
- input_values = processor(audio, sampling_rate=16000, return_tensors="pt").cuda()
289
 
290
  # translation to German
291
  compressed_embeds, attention_mask = zeroswot_encoder(**input_values)
 
255
 
256
  ## Usage
257
 
258
+ The usage is tested with python 3.9.16 and Transformer v4.41.2. Install also torchaudio and sentencepiece for processing.
259
+
260
+ ```bash
261
+ pip install transformers torchaudio sentencepiece
262
+ ```
263
+
264
+
265
  ```python
266
  from transformers import Wav2Vec2Processor, NllbTokenizer, AutoModel, AutoModelForSeq2SeqLM
267
  import torchaudio
 
278
  tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
279
 
280
  # Load ZeroSwot Encoder
281
+ commit_hash = "eafabee295ea1c8b45483d1fd26bd747d9a7d937"
282
  zeroswot_encoder = AutoModel.from_pretrained(
283
  "johntsi/ZeroSwot-Medium_asr-cv_en-to-200", trust_remote_code=True, revision=commit_hash,
284
  )
285
+ zeroswot_encoder.eval()
286
+ zeroswot_encoder.to("cuda")
287
 
288
  # Load NLLB Model
289
  nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
290
  nllb_model.eval()
291
  nllb_model.to("cuda")
292
 
293
+ # Load audio file
294
+ audio = load_and_resample_audio(path_to_audio_file)
295
+ input_values = processor(audio, sampling_rate=16000, return_tensors="pt").to("cuda")
296
 
297
  # translation to German
298
  compressed_embeds, attention_mask = zeroswot_encoder(**input_values)