Update README.md
Browse files
README.md
CHANGED
@@ -31,7 +31,7 @@ def gen_text(batch_size: int = 5000):
|
|
31 |
|
32 |
tokenizer.train_from_iterator(
|
33 |
gen_text(),
|
34 |
-
vocab_size=50265,
|
35 |
min_frequency=2,
|
36 |
special_tokens=[
|
37 |
"<s>",
|
|
|
31 |
|
32 |
tokenizer.train_from_iterator(
|
33 |
gen_text(),
|
34 |
+
vocab_size=50265, # roberta-base와 같은 크기
|
35 |
min_frequency=2,
|
36 |
special_tokens=[
|
37 |
"<s>",
|