nroggendorff commited on
Commit
304de92
1 Parent(s): cb4059a

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +5 -5
train.py CHANGED
@@ -10,8 +10,8 @@ from tokenizers import ByteLevelBPETokenizer
10
  MAX_SEQ_LENGTH = 128
11
  BATCH_SIZE = 16
12
  EPOCHS = 1
13
- LEARNING_RATE = 1e-4
14
- FACTOR = 2 ** 9 + 2 ** 7
15
  VOCAB_SIZE = 3200
16
  INPUT_DATASET = "nroggendorff/elephant"
17
  OUTPUT_REPO = "smallama"
@@ -54,9 +54,9 @@ def create_model(tokenizer):
54
  config = LlamaConfig(
55
  vocab_size=tokenizer.vocab_size,
56
  hidden_size=FACTOR,
57
- intermediate_size=FACTOR * 2,
58
- num_hidden_layers=max(1, FACTOR // 64),
59
- num_attention_heads=max(1, FACTOR // 64),
60
  max_position_embeddings=MAX_SEQ_LENGTH,
61
  rms_norm_eps=1e-6,
62
  initializer_range=0.02,
 
10
  MAX_SEQ_LENGTH = 128
11
  BATCH_SIZE = 16
12
  EPOCHS = 1
13
+ LEARNING_RATE = 1e-5
14
+ FACTOR = 600
15
  VOCAB_SIZE = 3200
16
  INPUT_DATASET = "nroggendorff/elephant"
17
  OUTPUT_REPO = "smallama"
 
54
  config = LlamaConfig(
55
  vocab_size=tokenizer.vocab_size,
56
  hidden_size=FACTOR,
57
+ intermediate_size=FACTOR * 4,
58
+ num_hidden_layers=FACTOR // 32,
59
+ num_attention_heads=FACTOR // 64,
60
  max_position_embeddings=MAX_SEQ_LENGTH,
61
  rms_norm_eps=1e-6,
62
  initializer_range=0.02,