hlzhang109 commited on
Commit
ebc1546
1 Parent(s): a5dd613

Upload folder using huggingface_hub

Browse files
models/books_tau=64_1b/config.yaml ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ method: full
2
+ score: rho
3
+ score_combination: max
4
+ fix_learner: false
5
+ granularity: sequence
6
+ select_random: false
7
+ select_frac: 1.0
8
+ just_score_reference: false
9
+ collect_learner_score: false
10
+ collect_reference_score: false
11
+ sft: false
12
+ sft_use_label: false
13
+ sft_dataset: null
14
+ reference_models: null
15
+ update_reference: false
16
+ learner_model: null
17
+ train_online: false
18
+ fix_reference: false
19
+ data_start_step: null
20
+ run_name: olmo_35013764_3
21
+ seed: 1
22
+ epoch: null
23
+ dry_run: false
24
+ model:
25
+ d_model: 2048
26
+ n_heads: 32
27
+ n_kv_heads: null
28
+ clip_qkv: null
29
+ n_layers: 24
30
+ mlp_ratio: 4
31
+ mlp_hidden_size: 8192
32
+ activation_type: gelu
33
+ block_type: sequential
34
+ block_group_size: 1
35
+ alibi: false
36
+ alibi_bias_max: 8.0
37
+ rope: true
38
+ rope_full_precision: true
39
+ flash_attention: false
40
+ attention_dropout: 0.0
41
+ multi_query_attention: false
42
+ attention_layer_norm: true
43
+ residual_dropout: 0.0
44
+ embedding_dropout: 0.0
45
+ layer_norm_type: default
46
+ layer_norm_with_affine: true
47
+ attention_layer_norm_with_affine: true
48
+ max_sequence_length: 512
49
+ include_bias: false
50
+ bias_for_layer_norm: false
51
+ scale_logits: false
52
+ vocab_size: 50280
53
+ embedding_size: 50304
54
+ weight_tying: false
55
+ eos_token_id: 0
56
+ pad_token_id: 1
57
+ init_device: meta
58
+ init_fn: mitchell
59
+ init_std: 0.02
60
+ init_cutoff_factor: null
61
+ precision: amp_bf16
62
+ optimizer:
63
+ name: adamw
64
+ learning_rate: 0.001
65
+ weight_decay: 0.0
66
+ betas:
67
+ - 0.9
68
+ - 0.95
69
+ eps: 1.0e-15
70
+ no_decay_norm_and_bias: null
71
+ decay_norm_and_bias: false
72
+ decay_embeddings: false
73
+ metrics_log_interval: 100
74
+ scheduler:
75
+ name: cosine_with_warmup
76
+ units: steps
77
+ t_warmup: 8000
78
+ t_max: null
79
+ alpha_f: 0.1
80
+ grad_clip_warmup_steps: null
81
+ grad_clip_warmup_factor: null
82
+ warmup_min_lr: null
83
+ data:
84
+ paths:
85
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-00-00001.npy
86
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-01-00000.npy
87
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-01-00001.npy
88
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00000.npy
89
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00001.npy
90
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00002.npy
91
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00003.npy
92
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-03-00000.npy
93
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-03-00001.npy
94
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-04-00000.npy
95
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-04-00001.npy
96
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00000.npy
97
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00001.npy
98
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00002.npy
99
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00003.npy
100
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-06-00000.npy
101
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-06-00001.npy
102
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-07-00000.npy
103
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-07-00001.npy
104
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00000.npy
105
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00001.npy
106
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00002.npy
107
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00003.npy
108
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-09-00000.npy
109
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-09-00001.npy
110
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-10-00000.npy
111
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-10-00001.npy
112
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00000.npy
113
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00001.npy
114
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00002.npy
115
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00003.npy
116
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-12-00000.npy
117
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-12-00001.npy
118
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-13-00000.npy
119
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-13-00001.npy
120
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00000.npy
121
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00001.npy
122
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00002.npy
123
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00003.npy
124
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-15-00000.npy
125
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-15-00001.npy
126
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-16-00000.npy
127
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-16-00001.npy
128
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00000.npy
129
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00001.npy
130
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00002.npy
131
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00003.npy
132
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-18-00000.npy
133
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-18-00001.npy
134
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-19-00000.npy
135
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-19-00001.npy
136
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00000.npy
137
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00001.npy
138
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00002.npy
139
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00003.npy
140
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-21-00000.npy
141
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-21-00001.npy
142
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-22-00000.npy
143
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-22-00001.npy
144
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00000.npy
145
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00001.npy
146
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00002.npy
147
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-24-00000.npy
148
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-24-00001.npy
149
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-25-00000.npy
150
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-25-00001.npy
151
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00000.npy
152
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00001.npy
153
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00002.npy
154
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00003.npy
155
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-27-00000.npy
156
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-27-00001.npy
157
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-28-00000.npy
158
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-28-00001.npy
159
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00000.npy
160
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00001.npy
161
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00002.npy
162
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00003.npy
163
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-30-00000.npy
164
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-30-00001.npy
165
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00000.npy
166
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00001.npy
167
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00002.npy
168
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00003.npy
169
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-32-00000.npy
170
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-32-00001.npy
171
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-33-00000.npy
172
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-33-00001.npy
173
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00000.npy
174
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00001.npy
175
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00002.npy
176
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00003.npy
177
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-35-00000.npy
178
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-35-00001.npy
179
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-36-00000.npy
180
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-36-00001.npy
181
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00000.npy
182
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00001.npy
183
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00002.npy
184
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00003.npy
185
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-38-00000.npy
186
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-38-00001.npy
187
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-39-00000.npy
188
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-39-00001.npy
189
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00000.npy
190
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00001.npy
191
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00002.npy
192
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00003.npy
193
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-41-00000.npy
194
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-41-00001.npy
195
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-42-00000.npy
196
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-42-00001.npy
197
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00000.npy
198
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00001.npy
199
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00002.npy
200
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00003.npy
201
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-44-00000.npy
202
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-44-00001.npy
203
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-45-00000.npy
204
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-45-00001.npy
205
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00000.npy
206
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00001.npy
207
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00002.npy
208
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00003.npy
209
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-47-00000.npy
210
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-47-00001.npy
211
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-48-00000.npy
212
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-48-00001.npy
213
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00000.npy
214
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00001.npy
215
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00002.npy
216
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00003.npy
217
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-50-00000.npy
218
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-50-00001.npy
219
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-51-00000.npy
220
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-51-00001.npy
221
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00000.npy
222
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00001.npy
223
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00002.npy
224
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00003.npy
225
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-53-00000.npy
226
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-53-00001.npy
227
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-54-00000.npy
228
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-54-00001.npy
229
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00000.npy
230
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00001.npy
231
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00002.npy
232
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00003.npy
233
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-56-00000.npy
234
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-56-00001.npy
235
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-57-00000.npy
236
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-57-00001.npy
237
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00000.npy
238
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00001.npy
239
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00002.npy
240
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00003.npy
241
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-59-00000.npy
242
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-59-00001.npy
243
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-60-00000.npy
244
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-60-00001.npy
245
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00000.npy
246
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00001.npy
247
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00002.npy
248
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00003.npy
249
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-62-00000.npy
250
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-62-00001.npy
251
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00000.npy
252
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00001.npy
253
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00002.npy
254
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00003.npy
255
+ datasets: null
256
+ label_mask_paths: null
257
+ pad_direction: right
258
+ generate_attention_mask: false
259
+ num_workers: 16
260
+ drop_last: true
261
+ pin_memory: true
262
+ prefetch_factor: 16
263
+ persistent_workers: true
264
+ timeout: 0
265
+ seed: null
266
+ extra_data_paths: null
267
+ extra_data_key: null
268
+ load_extra_data_to_ram: false
269
+ index_path: /n/holyscratch01/sham_lab/data-olmo/data/35006380_3/selected_indices.npy
270
+ restore_dataloader: true
271
+ fast_forward_batches: null
272
+ evaluators:
273
+ - label: books
274
+ type: lm
275
+ data:
276
+ paths: null
277
+ datasets:
278
+ books_val:
279
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/books_val/books_val.npy
280
+ label_mask_paths: null
281
+ pad_direction: right
282
+ generate_attention_mask: false
283
+ num_workers: 0
284
+ drop_last: true
285
+ pin_memory: false
286
+ prefetch_factor: null
287
+ persistent_workers: false
288
+ timeout: 0
289
+ seed: null
290
+ extra_data_paths: null
291
+ extra_data_key: null
292
+ load_extra_data_to_ram: false
293
+ index_path: null
294
+ device_eval_batch_size: null
295
+ subset_num_batches: null
296
+ sft_use_label: false
297
+ sft: false
298
+ - label: c4
299
+ type: lm
300
+ data:
301
+ paths: null
302
+ datasets:
303
+ c4_val:
304
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4_val/part-00-00000.npy
305
+ label_mask_paths: null
306
+ pad_direction: right
307
+ generate_attention_mask: false
308
+ num_workers: 0
309
+ drop_last: true
310
+ pin_memory: false
311
+ prefetch_factor: null
312
+ persistent_workers: false
313
+ timeout: 0
314
+ seed: null
315
+ extra_data_paths: null
316
+ extra_data_key: null
317
+ load_extra_data_to_ram: false
318
+ index_path: null
319
+ device_eval_batch_size: null
320
+ subset_num_batches: null
321
+ sft_use_label: false
322
+ sft: false
323
+ - label: other
324
+ type: lm
325
+ data:
326
+ paths: null
327
+ datasets:
328
+ wiki-en-simple_val:
329
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/wiki-en-simple_val/wiki-en-simple_val.npy
330
+ stack-code_val:
331
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/stack-code_val/part-00-00000.npy
332
+ cc_en_head_val:
333
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/cc_en_head_val/part-00-00000.npy
334
+ peS2o_val:
335
+ - /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/peS2o_val/part-01-00000.npy
336
+ label_mask_paths: null
337
+ pad_direction: right
338
+ generate_attention_mask: false
339
+ num_workers: 0
340
+ drop_last: true
341
+ pin_memory: false
342
+ prefetch_factor: null
343
+ persistent_workers: false
344
+ timeout: 0
345
+ seed: null
346
+ extra_data_paths: null
347
+ extra_data_key: null
348
+ load_extra_data_to_ram: false
349
+ index_path: null
350
+ device_eval_batch_size: null
351
+ subset_num_batches: null
352
+ sft_use_label: false
353
+ sft: false
354
+ - label: piqa_train
355
+ type: downstream
356
+ data:
357
+ paths: null
358
+ datasets: null
359
+ label_mask_paths: null
360
+ pad_direction: right
361
+ generate_attention_mask: false
362
+ num_workers: 0
363
+ drop_last: false
364
+ pin_memory: false
365
+ prefetch_factor: null
366
+ persistent_workers: false
367
+ timeout: 0
368
+ seed: null
369
+ extra_data_paths: null
370
+ extra_data_key: null
371
+ load_extra_data_to_ram: false
372
+ index_path: null
373
+ device_eval_batch_size: null
374
+ subset_num_batches: null
375
+ sft_use_label: false
376
+ sft: false
377
+ - label: openbook_qa_train
378
+ type: downstream
379
+ data:
380
+ paths: null
381
+ datasets: null
382
+ label_mask_paths: null
383
+ pad_direction: right
384
+ generate_attention_mask: false
385
+ num_workers: 0
386
+ drop_last: false
387
+ pin_memory: false
388
+ prefetch_factor: null
389
+ persistent_workers: false
390
+ timeout: 0
391
+ seed: null
392
+ extra_data_paths: null
393
+ extra_data_key: null
394
+ load_extra_data_to_ram: false
395
+ index_path: null
396
+ device_eval_batch_size: null
397
+ subset_num_batches: null
398
+ sft_use_label: false
399
+ sft: false
400
+ - label: hellaswag_train
401
+ type: downstream
402
+ data:
403
+ paths: null
404
+ datasets: null
405
+ label_mask_paths: null
406
+ pad_direction: right
407
+ generate_attention_mask: false
408
+ num_workers: 0
409
+ drop_last: false
410
+ pin_memory: false
411
+ prefetch_factor: null
412
+ persistent_workers: false
413
+ timeout: 0
414
+ seed: null
415
+ extra_data_paths: null
416
+ extra_data_key: null
417
+ load_extra_data_to_ram: false
418
+ index_path: null
419
+ device_eval_batch_size: null
420
+ subset_num_batches: null
421
+ sft_use_label: false
422
+ sft: false
423
+ - label: winogrande_train
424
+ type: downstream
425
+ data:
426
+ paths: null
427
+ datasets: null
428
+ label_mask_paths: null
429
+ pad_direction: right
430
+ generate_attention_mask: false
431
+ num_workers: 0
432
+ drop_last: false
433
+ pin_memory: false
434
+ prefetch_factor: null
435
+ persistent_workers: false
436
+ timeout: 0
437
+ seed: null
438
+ extra_data_paths: null
439
+ extra_data_key: null
440
+ load_extra_data_to_ram: false
441
+ index_path: null
442
+ device_eval_batch_size: null
443
+ subset_num_batches: null
444
+ sft_use_label: false
445
+ sft: false
446
+ - label: arc_easy_train
447
+ type: downstream
448
+ data:
449
+ paths: null
450
+ datasets: null
451
+ label_mask_paths: null
452
+ pad_direction: right
453
+ generate_attention_mask: false
454
+ num_workers: 0
455
+ drop_last: false
456
+ pin_memory: false
457
+ prefetch_factor: null
458
+ persistent_workers: false
459
+ timeout: 0
460
+ seed: null
461
+ extra_data_paths: null
462
+ extra_data_key: null
463
+ load_extra_data_to_ram: false
464
+ index_path: null
465
+ device_eval_batch_size: null
466
+ subset_num_batches: null
467
+ sft_use_label: false
468
+ sft: false
469
+ - label: arc_challenge_train
470
+ type: downstream
471
+ data:
472
+ paths: null
473
+ datasets: null
474
+ label_mask_paths: null
475
+ pad_direction: right
476
+ generate_attention_mask: false
477
+ num_workers: 0
478
+ drop_last: false
479
+ pin_memory: false
480
+ prefetch_factor: null
481
+ persistent_workers: false
482
+ timeout: 0
483
+ seed: null
484
+ extra_data_paths: null
485
+ extra_data_key: null
486
+ load_extra_data_to_ram: false
487
+ index_path: null
488
+ device_eval_batch_size: null
489
+ subset_num_batches: null
490
+ sft_use_label: false
491
+ sft: false
492
+ - label: boolq_train
493
+ type: downstream
494
+ data:
495
+ paths: null
496
+ datasets: null
497
+ label_mask_paths: null
498
+ pad_direction: right
499
+ generate_attention_mask: false
500
+ num_workers: 0
501
+ drop_last: false
502
+ pin_memory: false
503
+ prefetch_factor: null
504
+ persistent_workers: false
505
+ timeout: 0
506
+ seed: null
507
+ extra_data_paths: null
508
+ extra_data_key: null
509
+ load_extra_data_to_ram: false
510
+ index_path: null
511
+ device_eval_batch_size: null
512
+ subset_num_batches: null
513
+ sft_use_label: false
514
+ sft: false
515
+ - label: sciq_train
516
+ type: downstream
517
+ data:
518
+ paths: null
519
+ datasets: null
520
+ label_mask_paths: null
521
+ pad_direction: right
522
+ generate_attention_mask: false
523
+ num_workers: 0
524
+ drop_last: false
525
+ pin_memory: false
526
+ prefetch_factor: null
527
+ persistent_workers: false
528
+ timeout: 0
529
+ seed: null
530
+ extra_data_paths: null
531
+ extra_data_key: null
532
+ load_extra_data_to_ram: false
533
+ index_path: null
534
+ device_eval_batch_size: null
535
+ subset_num_batches: null
536
+ sft_use_label: false
537
+ sft: false
538
+ - label: piqa_test
539
+ type: downstream
540
+ data:
541
+ paths: null
542
+ datasets: null
543
+ label_mask_paths: null
544
+ pad_direction: right
545
+ generate_attention_mask: false
546
+ num_workers: 0
547
+ drop_last: false
548
+ pin_memory: false
549
+ prefetch_factor: null
550
+ persistent_workers: false
551
+ timeout: 0
552
+ seed: null
553
+ extra_data_paths: null
554
+ extra_data_key: null
555
+ load_extra_data_to_ram: false
556
+ index_path: null
557
+ device_eval_batch_size: null
558
+ subset_num_batches: 1000
559
+ sft_use_label: false
560
+ sft: false
561
+ - label: openbook_qa_test
562
+ type: downstream
563
+ data:
564
+ paths: null
565
+ datasets: null
566
+ label_mask_paths: null
567
+ pad_direction: right
568
+ generate_attention_mask: false
569
+ num_workers: 0
570
+ drop_last: false
571
+ pin_memory: false
572
+ prefetch_factor: null
573
+ persistent_workers: false
574
+ timeout: 0
575
+ seed: null
576
+ extra_data_paths: null
577
+ extra_data_key: null
578
+ load_extra_data_to_ram: false
579
+ index_path: null
580
+ device_eval_batch_size: null
581
+ subset_num_batches: 1000
582
+ sft_use_label: false
583
+ sft: false
584
+ - label: hellaswag_test
585
+ type: downstream
586
+ data:
587
+ paths: null
588
+ datasets: null
589
+ label_mask_paths: null
590
+ pad_direction: right
591
+ generate_attention_mask: false
592
+ num_workers: 0
593
+ drop_last: false
594
+ pin_memory: false
595
+ prefetch_factor: null
596
+ persistent_workers: false
597
+ timeout: 0
598
+ seed: null
599
+ extra_data_paths: null
600
+ extra_data_key: null
601
+ load_extra_data_to_ram: false
602
+ index_path: null
603
+ device_eval_batch_size: null
604
+ subset_num_batches: 1000
605
+ sft_use_label: false
606
+ sft: false
607
+ - label: winogrande_test
608
+ type: downstream
609
+ data:
610
+ paths: null
611
+ datasets: null
612
+ label_mask_paths: null
613
+ pad_direction: right
614
+ generate_attention_mask: false
615
+ num_workers: 0
616
+ drop_last: false
617
+ pin_memory: false
618
+ prefetch_factor: null
619
+ persistent_workers: false
620
+ timeout: 0
621
+ seed: null
622
+ extra_data_paths: null
623
+ extra_data_key: null
624
+ load_extra_data_to_ram: false
625
+ index_path: null
626
+ device_eval_batch_size: null
627
+ subset_num_batches: 1000
628
+ sft_use_label: false
629
+ sft: false
630
+ - label: arc_easy_test
631
+ type: downstream
632
+ data:
633
+ paths: null
634
+ datasets: null
635
+ label_mask_paths: null
636
+ pad_direction: right
637
+ generate_attention_mask: false
638
+ num_workers: 0
639
+ drop_last: false
640
+ pin_memory: false
641
+ prefetch_factor: null
642
+ persistent_workers: false
643
+ timeout: 0
644
+ seed: null
645
+ extra_data_paths: null
646
+ extra_data_key: null
647
+ load_extra_data_to_ram: false
648
+ index_path: null
649
+ device_eval_batch_size: null
650
+ subset_num_batches: 1000
651
+ sft_use_label: false
652
+ sft: false
653
+ - label: arc_challenge_test
654
+ type: downstream
655
+ data:
656
+ paths: null
657
+ datasets: null
658
+ label_mask_paths: null
659
+ pad_direction: right
660
+ generate_attention_mask: false
661
+ num_workers: 0
662
+ drop_last: false
663
+ pin_memory: false
664
+ prefetch_factor: null
665
+ persistent_workers: false
666
+ timeout: 0
667
+ seed: null
668
+ extra_data_paths: null
669
+ extra_data_key: null
670
+ load_extra_data_to_ram: false
671
+ index_path: null
672
+ device_eval_batch_size: null
673
+ subset_num_batches: 1000
674
+ sft_use_label: false
675
+ sft: false
676
+ - label: boolq_test
677
+ type: downstream
678
+ data:
679
+ paths: null
680
+ datasets: null
681
+ label_mask_paths: null
682
+ pad_direction: right
683
+ generate_attention_mask: false
684
+ num_workers: 0
685
+ drop_last: false
686
+ pin_memory: false
687
+ prefetch_factor: null
688
+ persistent_workers: false
689
+ timeout: 0
690
+ seed: null
691
+ extra_data_paths: null
692
+ extra_data_key: null
693
+ load_extra_data_to_ram: false
694
+ index_path: null
695
+ device_eval_batch_size: null
696
+ subset_num_batches: 1000
697
+ sft_use_label: false
698
+ sft: false
699
+ - label: sciq_test
700
+ type: downstream
701
+ data:
702
+ paths: null
703
+ datasets: null
704
+ label_mask_paths: null
705
+ pad_direction: right
706
+ generate_attention_mask: false
707
+ num_workers: 0
708
+ drop_last: false
709
+ pin_memory: false
710
+ prefetch_factor: null
711
+ persistent_workers: false
712
+ timeout: 0
713
+ seed: null
714
+ extra_data_paths: null
715
+ extra_data_key: null
716
+ load_extra_data_to_ram: false
717
+ index_path: null
718
+ device_eval_batch_size: null
719
+ subset_num_batches: 1000
720
+ sft_use_label: false
721
+ sft: false
722
+ eval_interval: 2000
723
+ tokenizer:
724
+ identifier: allenai/eleuther-ai-gpt-neox-20b-pii-special
725
+ truncate_direction: right
726
+ save_folder: /n/holyscratch01/sham_lab/data-olmo/ckpts/35013764_3/
727
+ remote_save_folder: null
728
+ canceled_check_interval: 50
729
+ save_interval: 5000
730
+ save_interval_unsharded: 100000
731
+ save_interval_ephemeral: null
732
+ save_num_checkpoints_to_keep: 1
733
+ save_num_unsharded_checkpoints_to_keep: 1
734
+ save_overwrite: true
735
+ force_save_unsharded: false
736
+ no_pre_train_checkpoint: false
737
+ load_path: null
738
+ load_path_sharded_checkpointer: null
739
+ reset_optimizer_state: false
740
+ reset_trainer_state: false
741
+ sharded_checkpointer: torch_legacy
742
+ new_style_checkpoints: null
743
+ max_duration: 192000
744
+ global_train_batch_size: 256
745
+ device_train_batch_size: 64
746
+ device_train_microbatch_size: 32
747
+ device_eval_batch_size: 64
748
+ eval_subset_num_batches: 100
749
+ eval_on_load: false
750
+ device_train_grad_accum: 2
751
+ max_grad_norm: 1.0
752
+ max_grad_norm_ratio: null
753
+ precision: amp_bf16
754
+ wandb:
755
+ project: data-olmo
756
+ entity: harvardml
757
+ group: frozen-150-global-1b-5
758
+ name: olmo_35013764_3
759
+ tags:
760
+ - watching
761
+ log_artifacts: false
762
+ rank_zero_only: true
763
+ log_interval: 10
764
+ speed_monitor:
765
+ window_size: 1
766
+ gpu_flops_available: null
767
+ console_log_interval: 10
768
+ gen1_gc_interval: 1
769
+ compile:
770
+ mode: default
771
+ fullgraph: false
772
+ backend: inductor
773
+ fsdp:
774
+ use_orig_params: true
775
+ sharding_strategy: FULL_SHARD
776
+ wrapping_strategy: null
777
+ precision: mixed
778
+ hybrid_sharding_num_model_replicas: null
779
+ softmax_auxiliary_loss: true
780
+ time_limit: 2100000.0
781
+ extra_steps_after_cancel: 10
782
+ early_stopping_factor: null
783
+ save_data_indices: true
784
+ python_profiling: false
785
+ torch_profiling: false
786
+ stop_at: null
787
+ stop_after: null
788
+ activation_checkpointing: null
789
+ fused_loss: null
models/books_tau=64_1b/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a54eeb295e7a626b70bd98d5ccc58782697ec16fcf7461070d3975b890f656e
3
+ size 5656891654
models/books_tau=64_1b/optim.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0983cff8536de3da0a1cd3354ed8b7a2638a2f8d162ce65a2d6d484b7a3bb7b
3
+ size 11313806634
models/books_tau=64_1b/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ae6cb3153ff56dbe88eb04ccafac8cbe19c16151cfc67774a09e31829c851d
3
+ size 14604