Muennighoff commited on
Commit
83391d7
1 Parent(s): ff75c09
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +48 -0
  2. 4b284b12bc4/3431499.err +0 -0
  3. 4b284b12bc4/3431499.out +0 -0
  4. 4b284b12bc4/latest +1 -0
  5. 4b284b12bc4/sbatch_4b284b12bval.sh +168 -0
  6. 4b284b12bc4/tensorboard_4b284b12bc4val/events.out.tfevents.1682762126.nid006361.45223.0 +3 -0
  7. 4b284b17bc4/3431498.err +0 -0
  8. 4b284b17bc4/3431498.out +0 -0
  9. 4b284b17bc4/latest +1 -0
  10. 4b284b17bc4/sbatch_4b284b17bval.sh +168 -0
  11. 4b284b17bc4/tensorboard_4b284b17bc4val/events.out.tfevents.1682762116.nid006180.66890.0 +3 -0
  12. 4b284b1b9c4/3449314.err +0 -0
  13. 4b284b1b9c4/3449314.out +0 -0
  14. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_0.jsonl +0 -0
  15. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_1.jsonl +0 -0
  16. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
  17. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
  18. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
  19. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
  20. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_0.jsonl +0 -0
  21. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_1.jsonl +0 -0
  22. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
  23. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
  24. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
  25. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
  26. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +0 -0
  27. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +0 -0
  28. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +0 -0
  29. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +0 -0
  30. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +0 -0
  31. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +0 -0
  32. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_0.jsonl +0 -0
  33. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_1.jsonl +0 -0
  34. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_2.jsonl +0 -0
  35. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_3.jsonl +0 -0
  36. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_4.jsonl +0 -0
  37. 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_5.jsonl +0 -0
  38. 4b284b1b9c4/evaluation/generation/merged.csv +1 -0
  39. 4b284b1b9c4/evaluation/generation/merged.json +1 -0
  40. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_0.csv +21 -0
  41. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_0.json +87 -0
  42. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_1.csv +21 -0
  43. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_1.json +87 -0
  44. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_2.csv +21 -0
  45. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_2.json +87 -0
  46. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_3.csv +21 -0
  47. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_3.json +87 -0
  48. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_4.csv +21 -0
  49. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_4.json +87 -0
  50. 4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_5.csv +21 -0
.gitattributes CHANGED
@@ -3116,3 +3116,51 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
3116
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
3117
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
3118
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3116
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
3117
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
3118
  4b284b84bc4v2/evaluation/generation/examples.4b284b84bc4v2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
3119
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
3120
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
3121
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
3122
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
3123
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
3124
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
3125
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
3126
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
3127
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
3128
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
3129
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
3130
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
3131
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
3132
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
3133
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
3134
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
3135
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
3136
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
3137
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
3138
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
3139
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
3140
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
3141
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
3142
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
3143
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
3144
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
3145
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
3146
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
3147
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
3148
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
3149
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
3150
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
3151
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
3152
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
3153
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
3154
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
3155
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
3156
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
3157
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
3158
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
3159
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
3160
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
3161
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
3162
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
3163
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
3164
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
3165
+ 4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
3166
+ 4b284b6bc4/evaluation/generation/examples.4b284b6bc4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
4b284b12bc4/3431499.err ADDED
The diff for this file is too large to render. See raw diff
 
4b284b12bc4/3431499.out ADDED
The diff for this file is too large to render. See raw diff
 
4b284b12bc4/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step80108
4b284b12bc4/sbatch_4b284b12bval.sh ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901
3
+ #SBATCH --nodes=32
4
+ #SBATCH --ntasks-per-node=1
5
+ #SBATCH --cpus-per-task=40
6
+ #SBATCH --mem=256G
7
+ #SBATCH -p standard-g
8
+ #SBATCH -t 48:00:00
9
+ #SBATCH --gpus-per-node=mi250:8
10
+ #SBATCH --exclusive=user
11
+ #SBATCH --hint=nomultithread
12
+ #SBATCH --account=project_462000119
13
+ #SBATCH -o logs/%j.out
14
+ #SBATCH -e logs/%j.err
15
+
16
+ VARIANT=4b284b12bc4val
17
+ VARIANT_CKPT=lm1-4b2-84b-c4-repetitions/4b284b12bc4
18
+
19
+ # if run without sbatch, invoke here
20
+ if [ -z $SLURM_JOB_ID ]; then
21
+ mkdir -p logs
22
+ sbatch "$0"
23
+ exit
24
+ fi
25
+
26
+ set -euo pipefail
27
+
28
+ # symlink logs/latest.out and logs/latest.err
29
+ ln -f -s $SLURM_JOB_ID.out logs/latest.out
30
+ ln -f -s $SLURM_JOB_ID.err logs/latest.err
31
+
32
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
33
+ CHECKPOINT_PATH=$VARIANT_CKPT
34
+ TENSORBOARD_PATH=tensorboard_$VARIANT
35
+
36
+ # Data
37
+ VOCAB_FILE="gpt2/vocab.json"
38
+ MERGE_FILE="gpt2/merges.txt"
39
+ #DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document"
40
+ TRAIN_DATA_PATH=train1b5.txt
41
+ # "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document"
42
+ VALID_DATA_PATH=val.txt
43
+ # "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document"
44
+
45
+ PP_SIZE=1
46
+ TP_SIZE=2
47
+
48
+ MICRO_BATCH_SIZE=2
49
+ GRADIENT_ACCUMULATION_STEPS=2
50
+ WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
51
+ GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
52
+
53
+ # Model parameters
54
+ source model_params.sh
55
+ MODEL_PARAM=("${PARAM_4516M[@]}")
56
+ NHIDDEN=${MODEL_PARAM[0]}
57
+ FFN_HIDDEN_SIZE=${MODEL_PARAM[1]}
58
+ KV_SIZE=${MODEL_PARAM[2]}
59
+ NHEADS=${MODEL_PARAM[3]}
60
+ NLAYERS=${MODEL_PARAM[4]}
61
+ SEQ_LEN=2048
62
+
63
+ echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS"
64
+
65
+ SAVE_INTERVAL=1000
66
+
67
+ # Tokens: 25611230000
68
+ # -> Samples: 12505484
69
+ TRAIN_SAMPLES=1
70
+
71
+ OPTIMIZER_ARGS=" \
72
+ --optimizer adam \
73
+ --adam-beta1 0.9 \
74
+ --adam-beta2 0.999 \
75
+ --adam-eps 1e-8 \
76
+ --lr 2e-4 \
77
+ --min-lr 2e-5 \
78
+ --lr-decay-style cosine \
79
+ --lr-decay-samples $TRAIN_SAMPLES \
80
+ --lr-warmup-samples 0 \
81
+ --clip-grad 1.0 \
82
+ --weight-decay 1e-1 \
83
+ --override-lr-scheduler \
84
+ --reset-progress \
85
+ --no-load-optim \
86
+ "
87
+
88
+ GPT_ARGS=" \
89
+ --num-layers $NLAYERS \
90
+ --hidden-size $NHIDDEN \
91
+ --num-attention-heads $NHEADS \
92
+ --kv-channels $KV_SIZE \
93
+ --ffn-hidden-size $FFN_HIDDEN_SIZE \
94
+ --seq-length $SEQ_LEN \
95
+ --max-position-embeddings $SEQ_LEN \
96
+ --micro-batch-size $MICRO_BATCH_SIZE \
97
+ --global-batch-size $GLOBAL_BATCH_SIZE \
98
+ --train-samples $TRAIN_SAMPLES \
99
+ --vocab-file $VOCAB_FILE \
100
+ --merge-file $MERGE_FILE \
101
+ --clip-grad 1.0 \
102
+ --kill-switch-path $KILL_SWITCH_PATH \
103
+ --bf16 \
104
+ $OPTIMIZER_ARGS \
105
+ "
106
+
107
+ OUTPUT_ARGS=" \
108
+ --log-interval 10 \
109
+ --save-interval $SAVE_INTERVAL \
110
+ --eval-interval 1 \
111
+ --eval-iters 100 \
112
+ --eval-only true \
113
+ --tensorboard-dir $TENSORBOARD_PATH \
114
+ --tensorboard-queue-size 5 \
115
+ --log-timers-to-tensorboard \
116
+ --log-batch-size-to-tensorboard \
117
+ --log-validation-ppl-to-tensorboard \
118
+ "
119
+
120
+ ZERO_STAGE=0
121
+
122
+ mkdir -p ds_configs
123
+ DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json"
124
+
125
+ cat <<EOF > $DS_CONFIG_PATH
126
+ {
127
+ "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
128
+ "train_batch_size": $GLOBAL_BATCH_SIZE,
129
+ "gradient_clipping": 1.0,
130
+ "zero_optimization": {
131
+ "stage": $ZERO_STAGE
132
+ },
133
+ "bf16": {
134
+ "enabled": true
135
+ },
136
+ "steps_per_print": 2000,
137
+ "wall_clock_breakdown": false
138
+ }
139
+ EOF
140
+
141
+ DEEPSPEED_ARGS=" \
142
+ --deepspeed \
143
+ --deepspeed_config $DS_CONFIG_PATH \
144
+ --zero-stage $ZERO_STAGE \
145
+ "
146
+
147
+ CMD=" \
148
+ Megatron-DeepSpeed/pretrain_gpt.py \
149
+ --tensor-model-parallel-size $TP_SIZE \
150
+ --pipeline-model-parallel-size $PP_SIZE \
151
+ $GPT_ARGS \
152
+ $OUTPUT_ARGS \
153
+ --save $CHECKPOINT_PATH \
154
+ --load $CHECKPOINT_PATH \
155
+ --train-weighted-split-paths-path $TRAIN_DATA_PATH \
156
+ --valid-weighted-split-paths-path $VALID_DATA_PATH \
157
+ --data-impl mmap \
158
+ $DEEPSPEED_ARGS \
159
+ "
160
+
161
+ echo $CMD
162
+
163
+ echo "START $SLURM_JOBID: $(date)"
164
+
165
+ # bash launch_srun_32.sh $CMD
166
+ srun --label launch.sh $CMD
167
+
168
+ echo "END $SLURM_JOBID: $(date)"
4b284b12bc4/tensorboard_4b284b12bc4val/events.out.tfevents.1682762126.nid006361.45223.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f919c1294214557e109fa48f1ef5c77c27e44762991f870399ece229ffdc44bb
3
+ size 980
4b284b17bc4/3431498.err ADDED
The diff for this file is too large to render. See raw diff
 
4b284b17bc4/3431498.out ADDED
The diff for this file is too large to render. See raw diff
 
4b284b17bc4/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step80108
4b284b17bc4/sbatch_4b284b17bval.sh ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901
3
+ #SBATCH --nodes=32
4
+ #SBATCH --ntasks-per-node=1
5
+ #SBATCH --cpus-per-task=40
6
+ #SBATCH --mem=256G
7
+ #SBATCH -p standard-g
8
+ #SBATCH -t 48:00:00
9
+ #SBATCH --gpus-per-node=mi250:8
10
+ #SBATCH --exclusive=user
11
+ #SBATCH --hint=nomultithread
12
+ #SBATCH --account=project_462000119
13
+ #SBATCH -o logs/%j.out
14
+ #SBATCH -e logs/%j.err
15
+
16
+ VARIANT=4b284b17bc4val
17
+ VARIANT_CKPT=lm1-4b2-84b-c4-repetitions/4b284b17bc4
18
+
19
+ # if run without sbatch, invoke here
20
+ if [ -z $SLURM_JOB_ID ]; then
21
+ mkdir -p logs
22
+ sbatch "$0"
23
+ exit
24
+ fi
25
+
26
+ set -euo pipefail
27
+
28
+ # symlink logs/latest.out and logs/latest.err
29
+ ln -f -s $SLURM_JOB_ID.out logs/latest.out
30
+ ln -f -s $SLURM_JOB_ID.err logs/latest.err
31
+
32
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
33
+ CHECKPOINT_PATH=$VARIANT_CKPT
34
+ TENSORBOARD_PATH=tensorboard_$VARIANT
35
+
36
+ # Data
37
+ VOCAB_FILE="gpt2/vocab.json"
38
+ MERGE_FILE="gpt2/merges.txt"
39
+ #DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document"
40
+ TRAIN_DATA_PATH=train1b5.txt
41
+ # "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document"
42
+ VALID_DATA_PATH=val.txt
43
+ # "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document"
44
+
45
+ PP_SIZE=1
46
+ TP_SIZE=2
47
+
48
+ MICRO_BATCH_SIZE=2
49
+ GRADIENT_ACCUMULATION_STEPS=2
50
+ WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
51
+ GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
52
+
53
+ # Model parameters
54
+ source model_params.sh
55
+ MODEL_PARAM=("${PARAM_4516M[@]}")
56
+ NHIDDEN=${MODEL_PARAM[0]}
57
+ FFN_HIDDEN_SIZE=${MODEL_PARAM[1]}
58
+ KV_SIZE=${MODEL_PARAM[2]}
59
+ NHEADS=${MODEL_PARAM[3]}
60
+ NLAYERS=${MODEL_PARAM[4]}
61
+ SEQ_LEN=2048
62
+
63
+ echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS"
64
+
65
+ SAVE_INTERVAL=1000
66
+
67
+ # Tokens: 25611230000
68
+ # -> Samples: 12505484
69
+ TRAIN_SAMPLES=1
70
+
71
+ OPTIMIZER_ARGS=" \
72
+ --optimizer adam \
73
+ --adam-beta1 0.9 \
74
+ --adam-beta2 0.999 \
75
+ --adam-eps 1e-8 \
76
+ --lr 2e-4 \
77
+ --min-lr 2e-5 \
78
+ --lr-decay-style cosine \
79
+ --lr-decay-samples $TRAIN_SAMPLES \
80
+ --lr-warmup-samples 0 \
81
+ --clip-grad 1.0 \
82
+ --weight-decay 1e-1 \
83
+ --override-lr-scheduler \
84
+ --reset-progress \
85
+ --no-load-optim \
86
+ "
87
+
88
+ GPT_ARGS=" \
89
+ --num-layers $NLAYERS \
90
+ --hidden-size $NHIDDEN \
91
+ --num-attention-heads $NHEADS \
92
+ --kv-channels $KV_SIZE \
93
+ --ffn-hidden-size $FFN_HIDDEN_SIZE \
94
+ --seq-length $SEQ_LEN \
95
+ --max-position-embeddings $SEQ_LEN \
96
+ --micro-batch-size $MICRO_BATCH_SIZE \
97
+ --global-batch-size $GLOBAL_BATCH_SIZE \
98
+ --train-samples $TRAIN_SAMPLES \
99
+ --vocab-file $VOCAB_FILE \
100
+ --merge-file $MERGE_FILE \
101
+ --clip-grad 1.0 \
102
+ --kill-switch-path $KILL_SWITCH_PATH \
103
+ --bf16 \
104
+ $OPTIMIZER_ARGS \
105
+ "
106
+
107
+ OUTPUT_ARGS=" \
108
+ --log-interval 10 \
109
+ --save-interval $SAVE_INTERVAL \
110
+ --eval-interval 1 \
111
+ --eval-iters 100 \
112
+ --eval-only true \
113
+ --tensorboard-dir $TENSORBOARD_PATH \
114
+ --tensorboard-queue-size 5 \
115
+ --log-timers-to-tensorboard \
116
+ --log-batch-size-to-tensorboard \
117
+ --log-validation-ppl-to-tensorboard \
118
+ "
119
+
120
+ ZERO_STAGE=0
121
+
122
+ mkdir -p ds_configs
123
+ DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json"
124
+
125
+ cat <<EOF > $DS_CONFIG_PATH
126
+ {
127
+ "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
128
+ "train_batch_size": $GLOBAL_BATCH_SIZE,
129
+ "gradient_clipping": 1.0,
130
+ "zero_optimization": {
131
+ "stage": $ZERO_STAGE
132
+ },
133
+ "bf16": {
134
+ "enabled": true
135
+ },
136
+ "steps_per_print": 2000,
137
+ "wall_clock_breakdown": false
138
+ }
139
+ EOF
140
+
141
+ DEEPSPEED_ARGS=" \
142
+ --deepspeed \
143
+ --deepspeed_config $DS_CONFIG_PATH \
144
+ --zero-stage $ZERO_STAGE \
145
+ "
146
+
147
+ CMD=" \
148
+ Megatron-DeepSpeed/pretrain_gpt.py \
149
+ --tensor-model-parallel-size $TP_SIZE \
150
+ --pipeline-model-parallel-size $PP_SIZE \
151
+ $GPT_ARGS \
152
+ $OUTPUT_ARGS \
153
+ --save $CHECKPOINT_PATH \
154
+ --load $CHECKPOINT_PATH \
155
+ --train-weighted-split-paths-path $TRAIN_DATA_PATH \
156
+ --valid-weighted-split-paths-path $VALID_DATA_PATH \
157
+ --data-impl mmap \
158
+ $DEEPSPEED_ARGS \
159
+ "
160
+
161
+ echo $CMD
162
+
163
+ echo "START $SLURM_JOBID: $(date)"
164
+
165
+ # bash launch_srun_32.sh $CMD
166
+ srun --label launch.sh $CMD
167
+
168
+ echo "END $SLURM_JOBID: $(date)"
4b284b17bc4/tensorboard_4b284b17bc4val/events.out.tfevents.1682762116.nid006180.66890.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:711b6f20e54a3aae0a6a3ecd72666bad45ea2eef635b9b819efa891222c43896
3
+ size 980
4b284b1b9c4/3449314.err ADDED
The diff for this file is too large to render. See raw diff
 
4b284b1b9c4/3449314.out ADDED
The diff for this file is too large to render. See raw diff
 
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_0.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_1.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_2.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_3.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_0.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_1.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_2.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_3.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_4.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_0.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_1.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_2.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_3.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_4.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/examples.4b284b1b9c4_gem_xsum_article_DOC_summary_5.jsonl ADDED
File without changes
4b284b1b9c4/evaluation/generation/merged.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ dataset,fewshots,prompt,metric,value
4b284b1b9c4/evaluation/generation/merged.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_0.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.318,0.014734079309311901,0
3
+ anli_r2,acc,0.354,0.015129868238451773,0
4
+ anli_r3,acc,0.3458333333333333,0.013736245342311012,0
5
+ arc_challenge,acc,0.23890784982935154,0.012461071376316617,0
6
+ arc_challenge,acc_norm,0.2593856655290102,0.012808273573927104,0
7
+ arc_easy,acc,0.49284511784511786,0.010258733022446362,0
8
+ arc_easy,acc_norm,0.45791245791245794,0.010223371342195902,0
9
+ boolq,acc,0.5229357798165137,0.008735849459018513,1
10
+ cb,acc,0.42857142857142855,0.06672848092813058,1
11
+ cb,f1,0.22644574398960363,,1
12
+ copa,acc,0.72,0.04512608598542127,0
13
+ hellaswag,acc,0.40141406094403503,0.004891826692722825,0
14
+ hellaswag,acc_norm,0.5024895439155547,0.004989719559439899,0
15
+ piqa,acc,0.704570184983678,0.010644731559342462,0
16
+ piqa,acc_norm,0.7083786724700761,0.010604441527428793,0
17
+ rte,acc,0.5523465703971119,0.02993107036293953,0
18
+ sciq,acc,0.708,0.014385511563477345,0
19
+ sciq,acc_norm,0.647,0.015120172605483692,0
20
+ storycloze_2016,acc,0.6707642971672902,0.010867199207548979,0
21
+ winogrande,acc,0.5090765588003157,0.014050170094497707,0
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_0.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.318,
5
+ "acc_stderr": 0.014734079309311901
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.354,
9
+ "acc_stderr": 0.015129868238451773
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3458333333333333,
13
+ "acc_stderr": 0.013736245342311012
14
+ },
15
+ "cb": {
16
+ "acc": 0.42857142857142855,
17
+ "acc_stderr": 0.06672848092813058,
18
+ "f1": 0.22644574398960363
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.40141406094403503,
26
+ "acc_stderr": 0.004891826692722825,
27
+ "acc_norm": 0.5024895439155547,
28
+ "acc_norm_stderr": 0.004989719559439899
29
+ },
30
+ "rte": {
31
+ "acc": 0.5523465703971119,
32
+ "acc_stderr": 0.02993107036293953
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5090765588003157,
36
+ "acc_stderr": 0.014050170094497707
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6707642971672902,
40
+ "acc_stderr": 0.010867199207548979
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5229357798165137,
44
+ "acc_stderr": 0.008735849459018513
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.49284511784511786,
48
+ "acc_stderr": 0.010258733022446362,
49
+ "acc_norm": 0.45791245791245794,
50
+ "acc_norm_stderr": 0.010223371342195902
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.23890784982935154,
54
+ "acc_stderr": 0.012461071376316617,
55
+ "acc_norm": 0.2593856655290102,
56
+ "acc_norm_stderr": 0.012808273573927104
57
+ },
58
+ "sciq": {
59
+ "acc": 0.708,
60
+ "acc_stderr": 0.014385511563477345,
61
+ "acc_norm": 0.647,
62
+ "acc_norm_stderr": 0.015120172605483692
63
+ },
64
+ "piqa": {
65
+ "acc": 0.704570184983678,
66
+ "acc_stderr": 0.010644731559342462,
67
+ "acc_norm": 0.7083786724700761,
68
+ "acc_norm_stderr": 0.010604441527428793
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_1.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.343,0.015019206922356951,0
3
+ anli_r2,acc,0.356,0.015149042659306621,0
4
+ anli_r3,acc,0.3433333333333333,0.01371263383046586,0
5
+ arc_challenge,acc,0.23890784982935154,0.012461071376316614,0
6
+ arc_challenge,acc_norm,0.26535836177474403,0.012902554762313967,0
7
+ arc_easy,acc,0.49284511784511786,0.010258733022446367,0
8
+ arc_easy,acc_norm,0.4696969696969697,0.01024092360872654,0
9
+ boolq,acc,0.5333333333333333,0.008725599880049204,1
10
+ cb,acc,0.4107142857142857,0.06633634150359541,1
11
+ cb,f1,0.19658119658119658,,1
12
+ copa,acc,0.73,0.0446196043338474,0
13
+ hellaswag,acc,0.4047002589125672,0.004898308167211846,0
14
+ hellaswag,acc_norm,0.4994025094602669,0.004989777848791008,0
15
+ piqa,acc,0.7110990206746464,0.010575111841364905,0
16
+ piqa,acc_norm,0.7072905331882481,0.010616044462393094,0
17
+ rte,acc,0.5018050541516246,0.030096267148976633,0
18
+ sciq,acc,0.724,0.014142984975740668,0
19
+ sciq,acc_norm,0.688,0.014658474370509001,0
20
+ storycloze_2016,acc,0.6563335114911811,0.010982724236255945,0
21
+ winogrande,acc,0.5193370165745856,0.014041972733712974,0
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_1.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.343,
5
+ "acc_stderr": 0.015019206922356951
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.356,
9
+ "acc_stderr": 0.015149042659306621
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3433333333333333,
13
+ "acc_stderr": 0.01371263383046586
14
+ },
15
+ "cb": {
16
+ "acc": 0.4107142857142857,
17
+ "acc_stderr": 0.06633634150359541,
18
+ "f1": 0.19658119658119658
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.0446196043338474
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4047002589125672,
26
+ "acc_stderr": 0.004898308167211846,
27
+ "acc_norm": 0.4994025094602669,
28
+ "acc_norm_stderr": 0.004989777848791008
29
+ },
30
+ "rte": {
31
+ "acc": 0.5018050541516246,
32
+ "acc_stderr": 0.030096267148976633
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5193370165745856,
36
+ "acc_stderr": 0.014041972733712974
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6563335114911811,
40
+ "acc_stderr": 0.010982724236255945
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5333333333333333,
44
+ "acc_stderr": 0.008725599880049204
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.49284511784511786,
48
+ "acc_stderr": 0.010258733022446367,
49
+ "acc_norm": 0.4696969696969697,
50
+ "acc_norm_stderr": 0.01024092360872654
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.23890784982935154,
54
+ "acc_stderr": 0.012461071376316614,
55
+ "acc_norm": 0.26535836177474403,
56
+ "acc_norm_stderr": 0.012902554762313967
57
+ },
58
+ "sciq": {
59
+ "acc": 0.724,
60
+ "acc_stderr": 0.014142984975740668,
61
+ "acc_norm": 0.688,
62
+ "acc_norm_stderr": 0.014658474370509001
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7110990206746464,
66
+ "acc_stderr": 0.010575111841364905,
67
+ "acc_norm": 0.7072905331882481,
68
+ "acc_norm_stderr": 0.010616044462393094
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_2.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.322,0.014782913600996652,0
3
+ anli_r2,acc,0.361,0.015195720118175113,0
4
+ anli_r3,acc,0.33916666666666667,0.013672343491681815,0
5
+ arc_challenge,acc,0.24829351535836178,0.012624912868089758,0
6
+ arc_challenge,acc_norm,0.2815699658703072,0.013143376735009026,0
7
+ arc_easy,acc,0.494949494949495,0.01025926010256586,0
8
+ arc_easy,acc_norm,0.4730639730639731,0.010244884740620094,0
9
+ boolq,acc,0.5281345565749236,0.008731199646681927,1
10
+ cb,acc,0.4642857142857143,0.06724777654937658,1
11
+ cb,f1,0.21666666666666667,,1
12
+ copa,acc,0.72,0.04512608598542127,0
13
+ hellaswag,acc,0.4039036048595897,0.00489675785702255,0
14
+ hellaswag,acc_norm,0.5044811790479984,0.004989581008163205,0
15
+ piqa,acc,0.7067464635473341,0.010621818421101926,0
16
+ piqa,acc_norm,0.7165397170837867,0.010515057791152041,0
17
+ rte,acc,0.5054151624548736,0.030094698123239966,0
18
+ sciq,acc,0.753,0.013644675781314133,0
19
+ sciq,acc_norm,0.715,0.014282120955200471,0
20
+ storycloze_2016,acc,0.6547300908605024,0.010994860223187675,0
21
+ winogrande,acc,0.5303867403314917,0.014026510839428734,0
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_2.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.322,
5
+ "acc_stderr": 0.014782913600996652
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.361,
9
+ "acc_stderr": 0.015195720118175113
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33916666666666667,
13
+ "acc_stderr": 0.013672343491681815
14
+ },
15
+ "cb": {
16
+ "acc": 0.4642857142857143,
17
+ "acc_stderr": 0.06724777654937658,
18
+ "f1": 0.21666666666666667
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4039036048595897,
26
+ "acc_stderr": 0.00489675785702255,
27
+ "acc_norm": 0.5044811790479984,
28
+ "acc_norm_stderr": 0.004989581008163205
29
+ },
30
+ "rte": {
31
+ "acc": 0.5054151624548736,
32
+ "acc_stderr": 0.030094698123239966
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5303867403314917,
36
+ "acc_stderr": 0.014026510839428734
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6547300908605024,
40
+ "acc_stderr": 0.010994860223187675
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5281345565749236,
44
+ "acc_stderr": 0.008731199646681927
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.494949494949495,
48
+ "acc_stderr": 0.01025926010256586,
49
+ "acc_norm": 0.4730639730639731,
50
+ "acc_norm_stderr": 0.010244884740620094
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.24829351535836178,
54
+ "acc_stderr": 0.012624912868089758,
55
+ "acc_norm": 0.2815699658703072,
56
+ "acc_norm_stderr": 0.013143376735009026
57
+ },
58
+ "sciq": {
59
+ "acc": 0.753,
60
+ "acc_stderr": 0.013644675781314133,
61
+ "acc_norm": 0.715,
62
+ "acc_norm_stderr": 0.014282120955200471
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7067464635473341,
66
+ "acc_stderr": 0.010621818421101926,
67
+ "acc_norm": 0.7165397170837867,
68
+ "acc_norm_stderr": 0.010515057791152041
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_3.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.323,0.014794927843348639,0
3
+ anli_r2,acc,0.36,0.015186527932040122,0
4
+ anli_r3,acc,0.3425,0.013704669762934725,0
5
+ arc_challenge,acc,0.24914675767918087,0.012639407111926437,0
6
+ arc_challenge,acc_norm,0.26109215017064846,0.012835523909473855,0
7
+ arc_easy,acc,0.4936868686868687,0.01025896566804444,0
8
+ arc_easy,acc_norm,0.47558922558922556,0.010247548905242267,0
9
+ boolq,acc,0.5357798165137615,0.008722635482201088,1
10
+ cb,acc,0.48214285714285715,0.0673769750864465,1
11
+ cb,f1,0.2195121951219512,,1
12
+ copa,acc,0.69,0.04648231987117316,0
13
+ hellaswag,acc,0.4055964947221669,0.004900036261309041,0
14
+ hellaswag,acc_norm,0.5056761601274646,0.0049894598716091814,0
15
+ piqa,acc,0.7132752992383025,0.010551314503108056,0
16
+ piqa,acc_norm,0.7165397170837867,0.010515057791152051,0
17
+ rte,acc,0.5054151624548736,0.030094698123239966,0
18
+ sciq,acc,0.752,0.013663187134877658,0
19
+ sciq,acc_norm,0.713,0.014312087053809961,0
20
+ storycloze_2016,acc,0.6568679850347408,0.010978648097499872,0
21
+ winogrande,acc,0.5240726124704025,0.014036189665395132,0
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_3.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.323,
5
+ "acc_stderr": 0.014794927843348639
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.36,
9
+ "acc_stderr": 0.015186527932040122
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3425,
13
+ "acc_stderr": 0.013704669762934725
14
+ },
15
+ "cb": {
16
+ "acc": 0.48214285714285715,
17
+ "acc_stderr": 0.0673769750864465,
18
+ "f1": 0.2195121951219512
19
+ },
20
+ "copa": {
21
+ "acc": 0.69,
22
+ "acc_stderr": 0.04648231987117316
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4055964947221669,
26
+ "acc_stderr": 0.004900036261309041,
27
+ "acc_norm": 0.5056761601274646,
28
+ "acc_norm_stderr": 0.0049894598716091814
29
+ },
30
+ "rte": {
31
+ "acc": 0.5054151624548736,
32
+ "acc_stderr": 0.030094698123239966
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5240726124704025,
36
+ "acc_stderr": 0.014036189665395132
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6568679850347408,
40
+ "acc_stderr": 0.010978648097499872
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5357798165137615,
44
+ "acc_stderr": 0.008722635482201088
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.4936868686868687,
48
+ "acc_stderr": 0.01025896566804444,
49
+ "acc_norm": 0.47558922558922556,
50
+ "acc_norm_stderr": 0.010247548905242267
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.24914675767918087,
54
+ "acc_stderr": 0.012639407111926437,
55
+ "acc_norm": 0.26109215017064846,
56
+ "acc_norm_stderr": 0.012835523909473855
57
+ },
58
+ "sciq": {
59
+ "acc": 0.752,
60
+ "acc_stderr": 0.013663187134877658,
61
+ "acc_norm": 0.713,
62
+ "acc_norm_stderr": 0.014312087053809961
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7132752992383025,
66
+ "acc_stderr": 0.010551314503108056,
67
+ "acc_norm": 0.7165397170837867,
68
+ "acc_norm_stderr": 0.010515057791152051
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_4.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.337,0.014955087918653607,0
3
+ anli_r2,acc,0.362,0.015204840912919498,0
4
+ anli_r3,acc,0.36583333333333334,0.013910212062701167,0
5
+ arc_challenge,acc,0.24146757679180889,0.012506564839739434,0
6
+ arc_challenge,acc_norm,0.26621160409556316,0.012915774781523214,0
7
+ arc_easy,acc,0.4903198653198653,0.010257860554461127,0
8
+ arc_easy,acc_norm,0.47474747474747475,0.010246690042583842,0
9
+ boolq,acc,0.5409785932721712,0.008715635308774413,1
10
+ cb,acc,0.5178571428571429,0.06737697508644647,1
11
+ cb,f1,0.27628205128205124,,1
12
+ copa,acc,0.67,0.04725815626252606,0
13
+ hellaswag,acc,0.40619398526190004,0.004901178917900842,0
14
+ hellaswag,acc_norm,0.5022903804023103,0.004989729059957431,0
15
+ piqa,acc,0.7072905331882481,0.010616044462393092,0
16
+ piqa,acc_norm,0.7072905331882481,0.010616044462393094,0
17
+ rte,acc,0.5090252707581228,0.030091559826331334,0
18
+ sciq,acc,0.761,0.013493000446937591,0
19
+ sciq,acc_norm,0.726,0.014111099288259588,0
20
+ storycloze_2016,acc,0.6606092998396579,0.010949682016358629,0
21
+ winogrande,acc,0.5280189423835833,0.01403040421340578,0
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_4.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.337,
5
+ "acc_stderr": 0.014955087918653607
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.362,
9
+ "acc_stderr": 0.015204840912919498
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.36583333333333334,
13
+ "acc_stderr": 0.013910212062701167
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644647,
18
+ "f1": 0.27628205128205124
19
+ },
20
+ "copa": {
21
+ "acc": 0.67,
22
+ "acc_stderr": 0.04725815626252606
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.40619398526190004,
26
+ "acc_stderr": 0.004901178917900842,
27
+ "acc_norm": 0.5022903804023103,
28
+ "acc_norm_stderr": 0.004989729059957431
29
+ },
30
+ "rte": {
31
+ "acc": 0.5090252707581228,
32
+ "acc_stderr": 0.030091559826331334
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5280189423835833,
36
+ "acc_stderr": 0.01403040421340578
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6606092998396579,
40
+ "acc_stderr": 0.010949682016358629
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5409785932721712,
44
+ "acc_stderr": 0.008715635308774413
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.4903198653198653,
48
+ "acc_stderr": 0.010257860554461127,
49
+ "acc_norm": 0.47474747474747475,
50
+ "acc_norm_stderr": 0.010246690042583842
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.24146757679180889,
54
+ "acc_stderr": 0.012506564839739434,
55
+ "acc_norm": 0.26621160409556316,
56
+ "acc_norm_stderr": 0.012915774781523214
57
+ },
58
+ "sciq": {
59
+ "acc": 0.761,
60
+ "acc_stderr": 0.013493000446937591,
61
+ "acc_norm": 0.726,
62
+ "acc_norm_stderr": 0.014111099288259588
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7072905331882481,
66
+ "acc_stderr": 0.010616044462393092,
67
+ "acc_norm": 0.7072905331882481,
68
+ "acc_norm_stderr": 0.010616044462393094
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
4b284b1b9c4/evaluation/rankeval/4b284b1b9c4_5.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.36,0.015186527932040126,0
3
+ anli_r2,acc,0.354,0.015129868238451773,0
4
+ anli_r3,acc,0.3625,0.013883037874225516,0
5
+ arc_challenge,acc,0.25853242320819114,0.012794553754288682,0
6
+ arc_challenge,acc_norm,0.2696245733788396,0.012968040686869147,0
7
+ arc_easy,acc,0.4877946127946128,0.010256726235129018,0
8
+ arc_easy,acc_norm,0.4713804713804714,0.010242962617927192,0
9
+ boolq,acc,0.5324159021406728,0.008726657178723137,1
10
+ cb,acc,0.5178571428571429,0.06737697508644647,1
11
+ cb,f1,0.25430894308943086,,1
12
+ copa,acc,0.71,0.04560480215720683,0
13
+ hellaswag,acc,0.40400318661621193,0.004896952378506924,0
14
+ hellaswag,acc_norm,0.5022903804023103,0.004989729059957428,0
15
+ piqa,acc,0.7067464635473341,0.010621818421101928,0
16
+ piqa,acc_norm,0.70620239390642,0.01062757408051481,0
17
+ rte,acc,0.48014440433212996,0.0300727231673172,0
18
+ sciq,acc,0.766,0.01339490288966001,0
19
+ sciq,acc_norm,0.734,0.01397996564514515,0
20
+ storycloze_2016,acc,0.6563335114911811,0.010982724236255948,0
21
+ winogrande,acc,0.5224940805051302,0.014038257824059874,0