HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on Apr 28, 2024

Commit

432e67d

verified ·

1 Parent(s): e8aba5c

Update chess-gpt-eval-contrastive/mamba_module.py

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -137,7 +137,7 @@ class MambaPlayer:
                 self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
                 self.linear_optimizers = {
                     layer_idx: {
-                        probe_type: optim.Adam(self.linear_probes[layer_idx][probe_type].parameters(), lr=lr)
                         for probe_type in ['q_value', 'q_value_delta', 'material_balance']
                     }
                     for layer_idx in self.linear_probes
@@ -312,7 +312,7 @@ class MambaPlayer:
             self.linear_probe_targets[layer_idx][bucket]['q_value_delta'].append(q_value_delta)
             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
-    def train_linear_probes(self, lr=0.01):
         criterion = nn.MSELoss()
         for layer_idx in self.linear_probes:

                 self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
                 self.linear_optimizers = {
                     layer_idx: {
+                        probe_type: optim.Adam(self.linear_probes[layer_idx][probe_type].parameters(), lr=0.01)
                         for probe_type in ['q_value', 'q_value_delta', 'material_balance']
                     }
                     for layer_idx in self.linear_probes
             self.linear_probe_targets[layer_idx][bucket]['q_value_delta'].append(q_value_delta)
             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
+    def train_linear_probes(self):
         criterion = nn.MSELoss()
         for layer_idx in self.linear_probes: