HaileyStorm commited on
Commit
070a898
1 Parent(s): b32cef0

Upload chess-mamba-vs-xformer/config/Mamba/50M.py with huggingface_hub

Browse files
chess-mamba-vs-xformer/config/Mamba/50M.py CHANGED
@@ -17,14 +17,14 @@ max_seq_len = 1536
17
  base_batch_size = 256
18
 
19
  batch_size = 50
20
- gradient_accumulation_steps = 2
21
  effective_batch_size = batch_size * gradient_accumulation_steps
22
 
23
  always_save_checkpoint = True
24
- eval_interval = 250
25
- eval_iters = 33
26
- log_interval = 50
27
- train_file_update_interval = 10 # 23 was original ... 7 definitely crashes (maybe try 10 on Lambda)
28
 
29
  warmup_iters = 500 # not super necessary potentially
30
  learning_rate = 1.5e-3 # tested 1.5e-3 from 112k-156k, before that 3.5e-3 #8e-3
@@ -64,7 +64,7 @@ d_state = 32
64
  dt_rank = 56
65
  move_num_in_gamestate = False
66
 
67
- init_from = 'scratch'
68
 
69
  device = 'cuda' # run on cpu only
70
  compile = False # do not torch compile the model
 
17
  base_batch_size = 256
18
 
19
  batch_size = 50
20
+ gradient_accumulation_steps = 2 #25
21
  effective_batch_size = batch_size * gradient_accumulation_steps
22
 
23
  always_save_checkpoint = True
24
+ eval_interval = 60
25
+ eval_iters = 1.5
26
+ log_interval = 0.01
27
+ train_file_update_interval = 1 # 23 was original ... 7 definitely crashes (maybe try 10 on Lambda)
28
 
29
  warmup_iters = 500 # not super necessary potentially
30
  learning_rate = 1.5e-3 # tested 1.5e-3 from 112k-156k, before that 3.5e-3 #8e-3
 
64
  dt_rank = 56
65
  move_num_in_gamestate = False
66
 
67
+ init_from = 'resume'
68
 
69
  device = 'cuda' # run on cpu only
70
  compile = False # do not torch compile the model