ASzecsenyi commited on
Commit
1878fb0
·
verified ·
1 Parent(s): cf2e317

Upload stage_3_mtetahwr/meta_001123.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. stage_3_mtetahwr/meta_001123.json +10 -7
stage_3_mtetahwr/meta_001123.json CHANGED
@@ -2,7 +2,7 @@
2
  "step": 1123,
3
  "val_bpb": 2.2258310665885443,
4
  "model_config": {
5
- "run": "rva_6x256/stage_3",
6
  "wandb_group": "rva_6x256",
7
  "seed": 42,
8
  "device_type": "cuda",
@@ -62,7 +62,7 @@
62
  "warmup_ratio": 0.05,
63
  "warmdown_ratio": 0.4,
64
  "final_lr_frac": 0.0,
65
- "resume_from_step": -1,
66
  "eval_every": 250,
67
  "eval_tokens": 10485760,
68
  "core_metric_every": -1,
@@ -74,14 +74,15 @@
74
  "profile_step": 2,
75
  "profile_micro_step": 0,
76
  "memory_history_max_entries": 10000,
77
- "model_tag": "rva_6x256/stage_3",
 
78
  "n_layer": 6,
79
  "n_head": 16,
80
  "n_kv_head": 16,
81
  "n_embd": 256
82
  },
83
  "user_config": {
84
- "run": "rva_6x256/stage_3",
85
  "wandb_group": "rva_6x256",
86
  "seed": 42,
87
  "device_type": "cuda",
@@ -141,7 +142,7 @@
141
  "warmup_ratio": 0.05,
142
  "warmdown_ratio": 0.4,
143
  "final_lr_frac": 0.0,
144
- "resume_from_step": -1,
145
  "eval_every": 250,
146
  "eval_tokens": 10485760,
147
  "core_metric_every": -1,
@@ -153,13 +154,15 @@
153
  "profile_step": 2,
154
  "profile_micro_step": 0,
155
  "memory_history_max_entries": 10000,
156
- "model_tag": "rva_6x256/stage_3"
 
157
  },
 
158
  "device_batch_size": 8,
159
  "sequence_len": 1024,
160
  "dataloader_state_dict": {
161
  "pq_idx": 2,
162
- "rg_idx": 16
163
  },
164
  "loop_state": {
165
  "min_val_bpb": 2.2258310665885443,
 
2
  "step": 1123,
3
  "val_bpb": 2.2258310665885443,
4
  "model_config": {
5
+ "run": "rva_6x256/stage_3_mtetahwr",
6
  "wandb_group": "rva_6x256",
7
  "seed": 42,
8
  "device_type": "cuda",
 
62
  "warmup_ratio": 0.05,
63
  "warmdown_ratio": 0.4,
64
  "final_lr_frac": 0.0,
65
+ "resume_from_step": 1123,
66
  "eval_every": 250,
67
  "eval_tokens": 10485760,
68
  "core_metric_every": -1,
 
74
  "profile_step": 2,
75
  "profile_micro_step": 0,
76
  "memory_history_max_entries": 10000,
77
+ "model_tag": "rva_6x256/stage_3_mtetahwr",
78
+ "stage": 3,
79
  "n_layer": 6,
80
  "n_head": 16,
81
  "n_kv_head": 16,
82
  "n_embd": 256
83
  },
84
  "user_config": {
85
+ "run": "rva_6x256/stage_3_mtetahwr",
86
  "wandb_group": "rva_6x256",
87
  "seed": 42,
88
  "device_type": "cuda",
 
142
  "warmup_ratio": 0.05,
143
  "warmdown_ratio": 0.4,
144
  "final_lr_frac": 0.0,
145
+ "resume_from_step": 1123,
146
  "eval_every": 250,
147
  "eval_tokens": 10485760,
148
  "core_metric_every": -1,
 
154
  "profile_step": 2,
155
  "profile_micro_step": 0,
156
  "memory_history_max_entries": 10000,
157
+ "model_tag": "rva_6x256/stage_3_mtetahwr",
158
+ "stage": 3
159
  },
160
+ "stage": 3,
161
  "device_batch_size": 8,
162
  "sequence_len": 1024,
163
  "dataloader_state_dict": {
164
  "pq_idx": 2,
165
+ "rg_idx": 20
166
  },
167
  "loop_state": {
168
  "min_val_bpb": 2.2258310665885443,