Tristan commited on
Commit
e82b954
1 Parent(s): ff89a1c

Training in progress, epoch 0

Browse files
eval_job_output.txt CHANGED
@@ -1,4 +1,4 @@
1
- slurm submission log: 2024-05-20 23:23:25.682813
2
  created following sbatch script:
3
 
4
  ###############################
@@ -7,13 +7,255 @@ created following sbatch script:
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
- #SBATCH --dependency=afterok:7637742
11
  #SBATCH --gres=gpu:1
12
- #SBATCH --job-name=tthrush-job-1473087
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
- #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled/llms/pythia-70m_sciq_1/eval_job_output.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  #SBATCH --partition=sphinx
18
  #SBATCH --time=14-0
19
 
@@ -24,7 +266,7 @@ created following sbatch script:
24
  cd .
25
 
26
  # launch commands
27
- srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled/llms/pythia-70m_sciq_1/perf'
28
 
29
  ###############################
30
 
@@ -34,7 +276,7 @@ submission to slurm complete!
34
  ###############################
35
  slurm submission output
36
 
37
- Submitted batch job 7637743
38
 
39
 
40
 
 
1
+ slurm submission log: 2024-05-22 17:07:25.162212
2
  created following sbatch script:
3
 
4
  ###############################
 
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
+ #SBATCH --dependency=afterok:7642740
11
  #SBATCH --gres=gpu:1
12
+ #SBATCH --job-name=tthrush-job-2791360
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/eval_job_output.txt
17
+ #SBATCH --partition=sphinx
18
+ #SBATCH --time=14-0
19
+
20
+ # activate your desired anaconda environment
21
+ . /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
22
+
23
+ # cd to working directory
24
+ cd .
25
+
26
+ # launch commands
27
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf'
28
+
29
+ ###############################
30
+
31
+ submission to slurm complete!
32
+
33
+
34
+ ###############################
35
+ slurm submission output
36
+
37
+ Submitted batch job 7642741
38
+
39
+
40
+
41
+ ###############################
42
+
43
+ slurm submission log: 2024-05-22 17:23:51.579657
44
+ created following sbatch script:
45
+
46
+ ###############################
47
+
48
+ #!/bin/bash
49
+
50
+ #SBATCH --account=nlp
51
+ #SBATCH --cpus-per-task=16
52
+ #SBATCH --dependency=afterok:7642780
53
+ #SBATCH --gres=gpu:1
54
+ #SBATCH --job-name=tthrush-job-1978619
55
+ #SBATCH --mem=60G
56
+ #SBATCH --nodelist=sphinx1
57
+ #SBATCH --open-mode=append
58
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/eval_job_output.txt
59
+ #SBATCH --partition=sphinx
60
+ #SBATCH --time=14-0
61
+
62
+ # activate your desired anaconda environment
63
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
64
+
65
+ # cd to working directory
66
+ cd .
67
+
68
+ # launch commands
69
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf'
70
+
71
+ ###############################
72
+
73
+ submission to slurm complete!
74
+
75
+
76
+ ###############################
77
+ slurm submission output
78
+
79
+ Submitted batch job 7642781
80
+
81
+
82
+
83
+ ###############################
84
+
85
+ slurm submission log: 2024-05-22 17:29:15.965569
86
+ created following sbatch script:
87
+
88
+ ###############################
89
+
90
+ #!/bin/bash
91
+
92
+ #SBATCH --account=nlp
93
+ #SBATCH --cpus-per-task=16
94
+ #SBATCH --dependency=afterok:7642805
95
+ #SBATCH --gres=gpu:1
96
+ #SBATCH --job-name=tthrush-job-2597090
97
+ #SBATCH --mem=60G
98
+ #SBATCH --nodelist=sphinx1
99
+ #SBATCH --open-mode=append
100
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/eval_job_output.txt
101
+ #SBATCH --partition=sphinx
102
+ #SBATCH --time=14-0
103
+
104
+ # activate your desired anaconda environment
105
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
106
+
107
+ # cd to working directory
108
+ cd .
109
+
110
+ # launch commands
111
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf'
112
+
113
+ ###############################
114
+
115
+ submission to slurm complete!
116
+
117
+
118
+ ###############################
119
+ slurm submission output
120
+
121
+ Submitted batch job 7642806
122
+
123
+
124
+
125
+ ###############################
126
+
127
+ /var/lib/slurm/slurmd/job7642806/slurm_script: line 16: /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh: No such file or directory
128
+
129
+ CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
130
+ To initialize your shell, run
131
+
132
+ $ conda init <SHELL_NAME>
133
+
134
+ Currently supported shells are:
135
+ - bash
136
+ - fish
137
+ - tcsh
138
+ - xonsh
139
+ - zsh
140
+ - powershell
141
+
142
+ See 'conda init --help' for more information and options.
143
+
144
+ IMPORTANT: You may need to close and restart your shell after running 'conda init'.
145
+
146
+
147
+ ###############################
148
+ start time: 2024-05-22 17:31:53.790973
149
+ machine: sphinx1
150
+ conda env: pretraining-coreset-selection
151
+ ###############################
152
+ running following processes
153
+
154
+ lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf
155
+
156
+
157
+ ###############################
158
+ command outputs:
159
+
160
+
161
+ 2024-05-22:17:31:59,220 INFO [utils.py:145] Note: detected 255 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
162
+ 2024-05-22:17:31:59,220 INFO [utils.py:148] Note: NumExpr detected 255 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
163
+ 2024-05-22:17:31:59,220 INFO [utils.py:160] NumExpr defaulting to 8 threads.
164
+ 2024-05-22:17:31:59,893 INFO [config.py:58] PyTorch version 2.2.2 available.
165
+ 2024-05-22:17:32:04,203 INFO [__main__.py:156] Verbosity set to INFO
166
+ 2024-05-22:17:32:14,116 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.
167
+ /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/datasets/load.py:1429: FutureWarning: The repository for hails/mmlu_no_train contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/hails/mmlu_no_train
168
+ You can avoid this message in future by passing the argument `trust_remote_code=True`.
169
+ Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
170
+ warnings.warn(
171
+ 2024-05-22:17:33:33,299 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.
172
+ 2024-05-22:17:33:33,305 INFO [__main__.py:229] Selected Tasks: ['arc_easy', 'lambada', 'piqa', 'sciq', 'xnli_en', 'xnli_fr']
173
+ 2024-05-22:17:33:33,714 INFO [huggingface.py:148] Using device 'cuda'
174
+ Traceback (most recent call last):
175
+ File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/bin/lm_eval", line 8, in <module>
176
+ sys.exit(cli_evaluate())
177
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/__main__.py", line 231, in cli_evaluate
178
+ results = evaluator.simple_evaluate(
179
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/utils.py", line 415, in _wrapper
180
+ return fn(*args, **kwargs)
181
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/evaluator.py", line 98, in simple_evaluate
182
+ lm = lm_eval.api.registry.get_model(model).create_from_arg_string(
183
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/api/model.py", line 134, in create_from_arg_string
184
+ return cls(**args, **args2)
185
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/models/huggingface.py", line 174, in __init__
186
+ self._get_config(
187
+ File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/models/huggingface.py", line 420, in _get_config
188
+ self._config = transformers.AutoConfig.from_pretrained(
189
+ File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 928, in from_pretrained
190
+ config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
191
+ File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/configuration_utils.py", line 631, in get_config_dict
192
+ config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
193
+ File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/configuration_utils.py", line 686, in _get_config_dict
194
+ resolved_config_file = cached_file(
195
+ File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/utils/hub.py", line 369, in cached_file
196
+ raise EnvironmentError(
197
+ OSError: /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1 does not appear to have a file named config.json. Checkout 'https://huggingface.co//juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/tree/main' for available files.
198
+ ###############################
199
+ end time: 2024-05-22 17:33:43.911118
200
+ elapsed time: 0:01:50.120145
201
+ slurm submission log: 2024-05-22 17:41:39.457215
202
+ created following sbatch script:
203
+
204
+ ###############################
205
+
206
+ #!/bin/bash
207
+
208
+ #SBATCH --account=nlp
209
+ #SBATCH --cpus-per-task=16
210
+ #SBATCH --dependency=afterok:7642834
211
+ #SBATCH --gres=gpu:1
212
+ #SBATCH --job-name=tthrush-job-24240
213
+ #SBATCH --mem=60G
214
+ #SBATCH --nodelist=sphinx1
215
+ #SBATCH --open-mode=append
216
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/eval_job_output.txt
217
+ #SBATCH --partition=sphinx
218
+ #SBATCH --time=14-0
219
+
220
+ # activate your desired anaconda environment
221
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
222
+
223
+ # cd to working directory
224
+ cd .
225
+
226
+ # launch commands
227
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf'
228
+
229
+ ###############################
230
+
231
+ submission to slurm complete!
232
+
233
+
234
+ ###############################
235
+ slurm submission output
236
+
237
+ Submitted batch job 7642835
238
+
239
+
240
+
241
+ ###############################
242
+
243
+ slurm submission log: 2024-05-22 19:52:23.060911
244
+ created following sbatch script:
245
+
246
+ ###############################
247
+
248
+ #!/bin/bash
249
+
250
+ #SBATCH --account=nlp
251
+ #SBATCH --cpus-per-task=16
252
+ #SBATCH --dependency=afterok:7643057
253
+ #SBATCH --gres=gpu:1
254
+ #SBATCH --job-name=tthrush-job-4137796
255
+ #SBATCH --mem=60G
256
+ #SBATCH --nodelist=sphinx1
257
+ #SBATCH --open-mode=append
258
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/eval_job_output.txt
259
  #SBATCH --partition=sphinx
260
  #SBATCH --time=14-0
261
 
 
266
  cd .
267
 
268
  # launch commands
269
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained/llms/pythia-70m_sciq_1/perf'
270
 
271
  ###############################
272
 
 
276
  ###############################
277
  slurm submission output
278
 
279
+ Submitted batch job 7643058
280
 
281
 
282
 
logs/events.out.tfevents.1716462340.sphinx2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb8dfead4db2cd215bac5aca5134227d88e37cd309967443488c1220bdead2f
3
+ size 95428
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09d84270040ceca82dc15a4388c4b665b21b56b4351768ba347f58081b53e3fd
3
  size 281715176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1d0b5f6fe2fd1b3e778eb06905f9ff0b46cd859dd0d2696a8fdd8dfb3af932
3
  size 281715176
train_job_output.txt CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce8c68572ae640438186e4bbb1f34f00a7686c8c0261f06d0c6d518ab3ccf1f5
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42604c6e8628be2643fe1460d9bd416ac8e71af2f5f0e7182a340f5cb4e9907f
3
  size 5176