ychenNLP commited on
Commit
7fb5df0
·
1 Parent(s): a2b9149

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -0
README.md CHANGED
@@ -1,3 +1,30 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ ```python
6
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
7
+ import torch
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(
10
+ "facebook/nllb-200-distilled-600M", src_lang="eng_Latn")
11
+
12
+ print("Loading model")
13
+ model = AutoModelForSeq2SeqLM.from_pretrained("ychenNLP/nllb-200-3.3b-ep")
14
+ model.cuda()
15
+
16
+ input_chunks = ["A translator always risks inadvertently introducing source-language words, grammar, or syntax into the target-language rendering."]
17
+ print("Start translation...")
18
+ output_result = []
19
+ for idx in tqdm(range(0, len(input_chunks), batch_size)):
20
+ start_idx = idx
21
+ end_idx = idx + batch_size
22
+ inputs = tokenizer(input_chunks[start_idx: end_idx], padding=True, truncation=True, max_length=128, return_tensors="pt").to('cuda')
23
+
24
+ with torch.no_grad():
25
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["zho_Hans"],
26
+ max_length=128, num_beams=5, num_return_sequences=1, early_stopping=True)
27
+
28
+ output = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
29
+ output_result.extend(output)
30
+ ```