p-s commited on
Commit
12d1d79
β€’
1 Parent(s): 0419fec

Added AutoTokenizer support

Browse files
Files changed (2) hide show
  1. README.md +4 -6
  2. tokenizer_config.json +2 -1
README.md CHANGED
@@ -26,12 +26,11 @@ Tokenizer requirements:
26
 
27
  #### Simple FillMaskPipeline
28
  ```python
29
- from transformers import AutoModelForSeq2SeqLM, pipeline
30
- from tokenization_bart_japanese import BartJapaneseTokenizer
31
 
32
  model_name = "Formzu/bart-base-japanese"
33
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
34
- tokenizer = BartJapaneseTokenizer.from_pretrained(model_name)
35
 
36
  masked_text = "ε€©ζ°—γŒ<mask>から散歩しましょう。"
37
 
@@ -46,15 +45,14 @@ print(out)
46
  ```
47
  #### Text Generation
48
  ```python
49
- from transformers import AutoModelForSeq2SeqLM
50
- from tokenization_bart_japanese import BartJapaneseTokenizer
51
  import torch
52
 
53
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
54
 
55
  model_name = "Formzu/bart-base-japanese"
56
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
57
- tokenizer = BartJapaneseTokenizer.from_pretrained(model_name)
58
 
59
  masked_text = "ε€©ζ°—γŒ<mask>から散歩しましょう。"
60
 
 
26
 
27
  #### Simple FillMaskPipeline
28
  ```python
29
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 
30
 
31
  model_name = "Formzu/bart-base-japanese"
32
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
34
 
35
  masked_text = "ε€©ζ°—γŒ<mask>から散歩しましょう。"
36
 
 
45
  ```
46
  #### Text Generation
47
  ```python
48
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
49
  import torch
50
 
51
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
52
 
53
  model_name = "Formzu/bart-base-japanese"
54
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
55
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
56
 
57
  masked_text = "ε€©ζ°—γŒ<mask>から散歩しましょう。"
58
 
tokenizer_config.json CHANGED
@@ -18,5 +18,6 @@
18
  "tgt_lang": null,
19
  "tokenizer_class": "BartJapaneseTokenizer",
20
  "tokenizer_file": null,
21
- "unk_token": "<unk>"
 
22
  }
 
18
  "tgt_lang": null,
19
  "tokenizer_class": "BartJapaneseTokenizer",
20
  "tokenizer_file": null,
21
+ "unk_token": "<unk>",
22
+ "auto_map": {"AutoTokenizer": ["tokenization_bart_japanese.BartJapaneseTokenizer", null]}
23
  }