Spaces:
Paused
Paused
nroggendorff
commited on
Commit
•
c62bc4a
1
Parent(s):
240511e
Update train.py
Browse files
train.py
CHANGED
@@ -36,7 +36,7 @@ class Config:
|
|
36 |
self.INSTRUCT_FINETUNE_BOOL = False
|
37 |
|
38 |
# Training steps and warmup
|
39 |
-
self.FACTOR = 12 ** 3 //
|
40 |
self.TOTAL_STEPS = (self.SHARD_SIZE * self.EPOCHS) // (self.BATCH_SIZE * self.GRADIENT_ACCUMULATION_STEPS)
|
41 |
self.WARMUP_STEPS = int(self.TOTAL_STEPS * 0.1)
|
42 |
|
@@ -160,11 +160,11 @@ def create_model(tokenizer):
|
|
160 |
vocab_size=tokenizer.vocab_size,
|
161 |
hidden_size=config.FACTOR,
|
162 |
intermediate_size=config.FACTOR * 4,
|
163 |
-
num_hidden_layers=
|
164 |
-
num_attention_heads=
|
165 |
max_position_embeddings=config.MAX_SEQ_LENGTH,
|
166 |
rms_norm_eps=1e-5,
|
167 |
-
initializer_range=
|
168 |
use_cache=True,
|
169 |
pad_token_id=tokenizer.pad_token_id,
|
170 |
bos_token_id=tokenizer.bos_token_id,
|
|
|
36 |
self.INSTRUCT_FINETUNE_BOOL = False
|
37 |
|
38 |
# Training steps and warmup
|
39 |
+
self.FACTOR = 12 ** 3 // 2
|
40 |
self.TOTAL_STEPS = (self.SHARD_SIZE * self.EPOCHS) // (self.BATCH_SIZE * self.GRADIENT_ACCUMULATION_STEPS)
|
41 |
self.WARMUP_STEPS = int(self.TOTAL_STEPS * 0.1)
|
42 |
|
|
|
160 |
vocab_size=tokenizer.vocab_size,
|
161 |
hidden_size=config.FACTOR,
|
162 |
intermediate_size=config.FACTOR * 4,
|
163 |
+
num_hidden_layers=config.FACTOR // 2 ** 4,
|
164 |
+
num_attention_heads=config.FACTOR // 2 ** 5,
|
165 |
max_position_embeddings=config.MAX_SEQ_LENGTH,
|
166 |
rms_norm_eps=1e-5,
|
167 |
+
initializer_range=2e-2,
|
168 |
use_cache=True,
|
169 |
pad_token_id=tokenizer.pad_token_id,
|
170 |
bos_token_id=tokenizer.bos_token_id,
|