Tristan commited on
Commit
5097648
1 Parent(s): a3bdea5

Training in progress, epoch 0

Browse files
eval_job_output.txt CHANGED
@@ -1,4 +1,4 @@
1
- slurm submission log: 2024-05-20 10:11:56.252376
2
  created following sbatch script:
3
 
4
  ###############################
@@ -7,13 +7,13 @@ created following sbatch script:
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
- #SBATCH --dependency=afterok:7635778
11
  #SBATCH --gres=gpu:1
12
- #SBATCH --job-name=tthrush-job-1072320
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
- #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/70m_llms_more_data_test_normalized/pythia-70m_sciq_1/eval_job_output.txt
17
  #SBATCH --partition=sphinx
18
  #SBATCH --time=14-0
19
 
@@ -24,7 +24,7 @@ created following sbatch script:
24
  cd .
25
 
26
  # launch commands
27
- srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/70m_llms_more_data_test_normalized/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy,default --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/70m_llms_more_data_test_normalized/pythia-70m_sciq_1/perf'
28
 
29
  ###############################
30
 
@@ -34,7 +34,7 @@ submission to slurm complete!
34
  ###############################
35
  slurm submission output
36
 
37
- Submitted batch job 7635779
38
 
39
 
40
 
 
1
+ slurm submission log: 2024-05-20 23:23:34.884664
2
  created following sbatch script:
3
 
4
  ###############################
 
7
 
8
  #SBATCH --account=nlp
9
  #SBATCH --cpus-per-task=16
10
+ #SBATCH --dependency=afterok:7637764
11
  #SBATCH --gres=gpu:1
12
+ #SBATCH --job-name=tthrush-job-2829433
13
  #SBATCH --mem=60G
14
  #SBATCH --nodelist=sphinx1
15
  #SBATCH --open-mode=append
16
+ #SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled_mean/llms/pythia-70m_sciq_1/eval_job_output.txt
17
  #SBATCH --partition=sphinx
18
  #SBATCH --time=14-0
19
 
 
24
  cd .
25
 
26
  # launch commands
27
+ srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled_mean/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_clipped_scaled_mean/llms/pythia-70m_sciq_1/perf'
28
 
29
  ###############################
30
 
 
34
  ###############################
35
  slurm submission output
36
 
37
+ Submitted batch job 7637765
38
 
39
 
40
 
logs/events.out.tfevents.1716287531.sphinx2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d8f550274083b5512e4197ff7960204b39bc914e5b37e046b3a8fa2be5ed59d
3
+ size 95663
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4509028149dff3e1b88e2bab4cd52bdd67a9ce0645d8b24f3c99a8c69652bc1
3
  size 281715176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5aa6c4a6b1993c46624d7051989d2340b9c9130cb3bde31922f9ff1ff9b77a
3
  size 281715176
train_job_output.txt CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f4c4c68bb8f3a8788182f506cd09b4211ed9eb4229ace5ef242cc8be2d4b684
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf984f6f8e5351ae5507a52b7524a2aec7fd2eb23073a8cae95e8c3d977584b
3
+ size 5240