Spaces:
Paused
Paused
nroggendorff
commited on
Commit
•
304de92
1
Parent(s):
cb4059a
Update train.py
Browse files
train.py
CHANGED
@@ -10,8 +10,8 @@ from tokenizers import ByteLevelBPETokenizer
|
|
10 |
MAX_SEQ_LENGTH = 128
|
11 |
BATCH_SIZE = 16
|
12 |
EPOCHS = 1
|
13 |
-
LEARNING_RATE = 1e-
|
14 |
-
FACTOR =
|
15 |
VOCAB_SIZE = 3200
|
16 |
INPUT_DATASET = "nroggendorff/elephant"
|
17 |
OUTPUT_REPO = "smallama"
|
@@ -54,9 +54,9 @@ def create_model(tokenizer):
|
|
54 |
config = LlamaConfig(
|
55 |
vocab_size=tokenizer.vocab_size,
|
56 |
hidden_size=FACTOR,
|
57 |
-
intermediate_size=FACTOR *
|
58 |
-
num_hidden_layers=
|
59 |
-
num_attention_heads=
|
60 |
max_position_embeddings=MAX_SEQ_LENGTH,
|
61 |
rms_norm_eps=1e-6,
|
62 |
initializer_range=0.02,
|
|
|
10 |
MAX_SEQ_LENGTH = 128
|
11 |
BATCH_SIZE = 16
|
12 |
EPOCHS = 1
|
13 |
+
LEARNING_RATE = 1e-5
|
14 |
+
FACTOR = 600
|
15 |
VOCAB_SIZE = 3200
|
16 |
INPUT_DATASET = "nroggendorff/elephant"
|
17 |
OUTPUT_REPO = "smallama"
|
|
|
54 |
config = LlamaConfig(
|
55 |
vocab_size=tokenizer.vocab_size,
|
56 |
hidden_size=FACTOR,
|
57 |
+
intermediate_size=FACTOR * 4,
|
58 |
+
num_hidden_layers=FACTOR // 32,
|
59 |
+
num_attention_heads=FACTOR // 64,
|
60 |
max_position_embeddings=MAX_SEQ_LENGTH,
|
61 |
rms_norm_eps=1e-6,
|
62 |
initializer_range=0.02,
|