nroggendorff commited on
Commit
239454d
1 Parent(s): cfa89e3

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +3 -3
train.py CHANGED
@@ -12,12 +12,12 @@ from huggingface_hub import HfApi
12
  from torch.utils.data import DataLoader
13
  from itertools import islice
14
 
15
- BATCH_SIZE = 8
16
  EPOCHS = 1
17
  LEARNING_RATE = 2e-4
18
  FACTOR = 12 ** 3 // 3
19
  MAX_SEQ_LENGTH = 128
20
- VOCAB_SIZE = 52000
21
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
22
  INSTRUCT_DATASET = "nroggendorff/elephant"
23
  OUTPUT_REPO = "nroggendorff/smallama"
@@ -27,7 +27,7 @@ SHARD_SIZE = int(2e+6)
27
  FP16 = True
28
  WARMUP_STEPS = 50
29
  WEIGHT_DECAY = 1e-3
30
- GRADIENT_ACCUMULATION_STEPS = 4
31
  PUSH_TO_HUB = True
32
 
33
  class Space:
 
12
  from torch.utils.data import DataLoader
13
  from itertools import islice
14
 
15
+ BATCH_SIZE = 16
16
  EPOCHS = 1
17
  LEARNING_RATE = 2e-4
18
  FACTOR = 12 ** 3 // 3
19
  MAX_SEQ_LENGTH = 128
20
+ VOCAB_SIZE = 32000
21
  INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
22
  INSTRUCT_DATASET = "nroggendorff/elephant"
23
  OUTPUT_REPO = "nroggendorff/smallama"
 
27
  FP16 = True
28
  WARMUP_STEPS = 50
29
  WEIGHT_DECAY = 1e-3
30
+ GRADIENT_ACCUMULATION_STEPS = 2
31
  PUSH_TO_HUB = True
32
 
33
  class Space: