Tristan commited on
Commit
a78c772
1 Parent(s): 9dc37be

Training in progress, epoch 2

Browse files
eval_job_output.txt CHANGED
@@ -1,4 +1,4 @@
1
- slurm submission log: 2024-05-24 23:54:02.801635
2
  created following sbatch script:
3
 
4
  ###############################
@@ -7,13 +7,13 @@ created following sbatch script:
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
- #SBATCH --dependency=afterok:7649441
11
  #SBATCH --gres=gpu:1
12
- #SBATCH --job-name=tthrush-job-1955160
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
- #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_diff/llms/pythia-70m_sciq_1/eval_job_output.txt
17
  #SBATCH --partition=sphinx
18
  #SBATCH --time=14-0
19
 
@@ -24,7 +24,7 @@ created following sbatch script:
24
  cd .
25
 
26
  # launch commands
27
- srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_diff/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_diff/llms/pythia-70m_sciq_1/perf'
28
 
29
  ###############################
30
 
@@ -34,7 +34,175 @@ submission to slurm complete!
34
  ###############################
35
  slurm submission output
36
 
37
- Submitted batch job 7649442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
 
 
1
+ slurm submission log: 2024-05-25 22:01:16.904818
2
  created following sbatch script:
3
 
4
  ###############################
 
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
+ #SBATCH --dependency=afterok:7651389
11
  #SBATCH --gres=gpu:1
12
+ #SBATCH --job-name=tthrush-job-1695127
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/eval_job_output.txt
17
  #SBATCH --partition=sphinx
18
  #SBATCH --time=14-0
19
 
 
24
  cd .
25
 
26
  # launch commands
27
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/perf'
28
 
29
  ###############################
30
 
 
34
  ###############################
35
  slurm submission output
36
 
37
+ Submitted batch job 7651390
38
+
39
+
40
+
41
+ ###############################
42
+
43
+ slurm submission log: 2024-05-25 22:02:26.073999
44
+ created following sbatch script:
45
+
46
+ ###############################
47
+
48
+ #!/bin/bash
49
+
50
+ #SBATCH --account=nlp
51
+ #SBATCH --cpus-per-task=16
52
+ #SBATCH --dependency=afterok:7651420
53
+ #SBATCH --gres=gpu:1
54
+ #SBATCH --job-name=tthrush-job-1839927
55
+ #SBATCH --mem=60G
56
+ #SBATCH --nodelist=sphinx1
57
+ #SBATCH --open-mode=append
58
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/eval_job_output.txt
59
+ #SBATCH --partition=sphinx
60
+ #SBATCH --time=14-0
61
+
62
+ # activate your desired anaconda environment
63
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
64
+
65
+ # cd to working directory
66
+ cd .
67
+
68
+ # launch commands
69
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/perf'
70
+
71
+ ###############################
72
+
73
+ submission to slurm complete!
74
+
75
+
76
+ ###############################
77
+ slurm submission output
78
+
79
+ Submitted batch job 7651421
80
+
81
+
82
+
83
+ ###############################
84
+
85
+ slurm submission log: 2024-05-25 22:12:51.200849
86
+ created following sbatch script:
87
+
88
+ ###############################
89
+
90
+ #!/bin/bash
91
+
92
+ #SBATCH --account=nlp
93
+ #SBATCH --cpus-per-task=16
94
+ #SBATCH --dependency=afterok:7651461
95
+ #SBATCH --gres=gpu:1
96
+ #SBATCH --job-name=tthrush-job-1012098
97
+ #SBATCH --mem=60G
98
+ #SBATCH --nodelist=sphinx1
99
+ #SBATCH --open-mode=append
100
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/eval_job_output.txt
101
+ #SBATCH --partition=sphinx
102
+ #SBATCH --time=14-0
103
+
104
+ # activate your desired anaconda environment
105
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
106
+
107
+ # cd to working directory
108
+ cd .
109
+
110
+ # launch commands
111
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/perf'
112
+
113
+ ###############################
114
+
115
+ submission to slurm complete!
116
+
117
+
118
+ ###############################
119
+ slurm submission output
120
+
121
+ Submitted batch job 7651462
122
+
123
+
124
+
125
+ ###############################
126
+
127
+ slurm submission log: 2024-05-25 22:15:56.896223
128
+ created following sbatch script:
129
+
130
+ ###############################
131
+
132
+ #!/bin/bash
133
+
134
+ #SBATCH --account=nlp
135
+ #SBATCH --cpus-per-task=16
136
+ #SBATCH --dependency=afterok:7651489
137
+ #SBATCH --gres=gpu:1
138
+ #SBATCH --job-name=tthrush-job-4600543
139
+ #SBATCH --mem=60G
140
+ #SBATCH --nodelist=sphinx1
141
+ #SBATCH --open-mode=append
142
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/eval_job_output.txt
143
+ #SBATCH --partition=sphinx
144
+ #SBATCH --time=14-0
145
+
146
+ # activate your desired anaconda environment
147
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
148
+
149
+ # cd to working directory
150
+ cd .
151
+
152
+ # launch commands
153
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/perf'
154
+
155
+ ###############################
156
+
157
+ submission to slurm complete!
158
+
159
+
160
+ ###############################
161
+ slurm submission output
162
+
163
+ Submitted batch job 7651490
164
+
165
+
166
+
167
+ ###############################
168
+
169
+ slurm submission log: 2024-05-25 22:18:15.756710
170
+ created following sbatch script:
171
+
172
+ ###############################
173
+
174
+ #!/bin/bash
175
+
176
+ #SBATCH --account=nlp
177
+ #SBATCH --cpus-per-task=16
178
+ #SBATCH --dependency=afterok:7651519
179
+ #SBATCH --gres=gpu:1
180
+ #SBATCH --job-name=tthrush-job-3920714
181
+ #SBATCH --mem=60G
182
+ #SBATCH --nodelist=sphinx1
183
+ #SBATCH --open-mode=append
184
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/eval_job_output.txt
185
+ #SBATCH --partition=sphinx
186
+ #SBATCH --time=14-0
187
+
188
+ # activate your desired anaconda environment
189
+ . /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
190
+
191
+ # cd to working directory
192
+ cd .
193
+
194
+ # launch commands
195
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_constrained_big_upsample/llms/pythia-70m_sciq_1/perf'
196
+
197
+ ###############################
198
+
199
+ submission to slurm complete!
200
+
201
+
202
+ ###############################
203
+ slurm submission output
204
+
205
+ Submitted batch job 7651520
206
 
207
 
208
 
logs/events.out.tfevents.1716743892.sphinx2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a4ab904c93db3e2dc78df6a2787874886893722b77bee0b3a17c7976223a7f
3
+ size 26117
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50a4be07c6352aa48a831fbaa2979ed124784b2d84cc1e2713b4539378a37177
3
  size 281715176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a48589a587776162b95fb4619f4e69b08b408c2a3410afe0313e4acd34333c
3
  size 281715176
train_job_output.txt CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a1093854b1dc085cb47307a8ccf603f67215e6816d7415bbf2835bd797f3b5
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209af55204bf74f64a94dcd9521d29859d618933c50d5cfa614440e26a8531f0
3
  size 5240