nroggendorff commited on
Commit
5123979
1 Parent(s): 40853aa

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +4 -4
train.py CHANGED
@@ -12,17 +12,17 @@ from torch.cuda.amp import autocast, GradScaler
12
  from itertools import islice
13
 
14
  BATCH_SIZE = 8
15
- EPOCHS = 1
16
  LEARNING_RATE = 1e-4
17
- FACTOR = 768
18
- MAX_SEQ_LENGTH = 128
19
  VOCAB_SIZE = 32000
20
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
21
  INSTRUCT_DATASET = "nroggendorff/elephant"
22
  OUTPUT_REPO = "nroggendorff/smallama"
23
  INSTRUCT_FINETUNE_BOOL = False
24
  INIT = 0#/3
25
- SHARD_SIZE = int(2e+6)
26
  FP16 = True
27
  WARMUP_STEPS = 1000
28
  WEIGHT_DECAY = 0.01
 
12
  from itertools import islice
13
 
14
  BATCH_SIZE = 8
15
+ EPOCHS = 3
16
  LEARNING_RATE = 1e-4
17
+ FACTOR = 12 ** 3 // 3
18
+ MAX_SEQ_LENGTH = 512
19
  VOCAB_SIZE = 32000
20
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
21
  INSTRUCT_DATASET = "nroggendorff/elephant"
22
  OUTPUT_REPO = "nroggendorff/smallama"
23
  INSTRUCT_FINETUNE_BOOL = False
24
  INIT = 0#/3
25
+ SHARD_SIZE = int(5e+5)
26
  FP16 = True
27
  WARMUP_STEPS = 1000
28
  WEIGHT_DECAY = 0.01