nhanv commited on
Commit
9264e4f
1 Parent(s): a2ffe41

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -3
README.md CHANGED
@@ -22,11 +22,29 @@ and first released at [this page](https://openai.com/blog/better-language-models
22
  # How to use the model
23
 
24
  ~~~~
25
- from transformers import GPT2Tokenizer, AutoModelForCausalLM
 
26
 
27
- tokenizer = GPT2Tokenizer.from_pretrained("NlpHUST/gpt2-vietnamese")
 
28
 
29
- model = AutoModelForCausalLM.from_pretrained("NlpHUST/gpt2-vietnamese")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ~~~~
31
 
32
  # Model architecture
 
22
  # How to use the model
23
 
24
  ~~~~
25
+ import torch
26
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
27
 
28
+ tokenizer = GPT2Tokenizer.from_pretrained('NlpHUST/gpt2-vietnamese')
29
+ model = GPT2LMHeadModel.from_pretrained('NlpHUST/gpt2-vietnamese')
30
 
31
+ text = "Albert Einstein là nhà vật lý học tạo ra thuyết lượng tử"
32
+ input_ids = tokenizer.encode(text, return_tensors='pt')
33
+ max_length = 100
34
+
35
+ sample_outputs = model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,
36
+ do_sample=True,
37
+ max_length=max_length,
38
+ min_length=max_length,
39
+ top_k=40,
40
+ num_beams=5,
41
+ early_stopping=True,
42
+ no_repeat_ngram_size=2,
43
+ num_return_sequences=3)
44
+
45
+ for i, sample_output in enumerate(sample_outputs):
46
+ print(">> Generated text {}\n\n{}".format(i+1, tokenizer.decode(sample_output.tolist())))
47
+ print('\n---')
48
  ~~~~
49
 
50
  # Model architecture