HaileyStorm commited on
Commit
4719b37
·
verified ·
1 Parent(s): 5472f94

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files
chess-gpt-eval-contrastive/mamba_module.py CHANGED
@@ -126,7 +126,7 @@ class MambaPlayer:
126
  tensor_output = output
127
  seq_len = tensor_output.shape[1]
128
  bucket = next(b for b in self.move_buckets if self.move_num <= b)
129
- self.activations_sum[layer_idx][bucket]["current"][:, :8, :] += tensor_output.detach().cpu().numpy()[:, :max(self.seq_len, 8), :][:, -8:, :]
130
  self.activations_count[layer_idx][bucket]["current"] += 1
131
 
132
  self.hooks.append(layer.register_forward_hook(hook))
@@ -325,8 +325,8 @@ class MambaPlayer:
325
  def get_lr(it):
326
  warmup_iters = 0 #300 * 43
327
  lr_decay_iters = 3000 * 43
328
- learning_rate = 0.0002
329
- min_lr = 0.00001
330
  # 1) linear warmup for warmup_iters steps
331
  if it < warmup_iters:
332
  return learning_rate * it / warmup_iters
 
126
  tensor_output = output
127
  seq_len = tensor_output.shape[1]
128
  bucket = next(b for b in self.move_buckets if self.move_num <= b)
129
+ self.activations_sum[layer_idx][bucket]["current"][:, :min(8, self.seq_len), :] += tensor_output.detach().cpu().numpy()[:, :self.seq_len, :][:, -8:, :]
130
  self.activations_count[layer_idx][bucket]["current"] += 1
131
 
132
  self.hooks.append(layer.register_forward_hook(hook))
 
325
  def get_lr(it):
326
  warmup_iters = 0 #300 * 43
327
  lr_decay_iters = 3000 * 43
328
+ learning_rate = 0.000075
329
+ min_lr = 0.0000075
330
  # 1) linear warmup for warmup_iters steps
331
  if it < warmup_iters:
332
  return learning_rate * it / warmup_iters