henhenhahi111112
commited on
Commit
•
af6e330
1
Parent(s):
fc6e88a
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- AVERAGE.md +93 -0
- LICENSE +21 -0
- MANIFEST.in +1 -0
- MOE.md +114 -0
- Makefile +24 -0
- README.md +290 -0
- clear_memo.py +15 -0
- data/alpaca_train.json +3 -0
- data/alpaca_valid.json +0 -0
- environment-tests.yml +9 -0
- environment.yml +9 -0
- eval/eval_openlm_ckpt.py +133 -0
- eval/in_memory_hf_eval.yaml +38 -0
- eval/local_data/.gitignore +1 -0
- eval/local_data/arc_challenge.jsonl +0 -0
- eval/local_data/arc_easy.jsonl +0 -0
- eval/local_data/boolq.jsonl +0 -0
- eval/local_data/copa.jsonl +100 -0
- eval/local_data/hellaswag.jsonl +0 -0
- eval/local_data/jeopardy_all.jsonl +0 -0
- eval/local_data/lambada_openai.jsonl +0 -0
- eval/local_data/mmlu.jsonl +0 -0
- eval/local_data/piqa.jsonl +0 -0
- eval/local_data/triviaqa.jsonl +0 -0
- eval/local_data/winograd_wsc.jsonl +273 -0
- eval/local_data/winogrande.jsonl +0 -0
- logs/test_alpaca_7b_1p25_240612/checkpoints/epoch_1.pt +3 -0
- logs/test_alpaca_7b_1p25_240612/checkpoints/results.jsonl +5 -0
- logs/test_alpaca_7b_1p25_240612/checkpoints/stats_1.pt +3 -0
- logs/test_alpaca_7b_1p25_240612/out.log +264 -0
- logs/test_alpaca_7b_1p25_240612/params.txt +129 -0
- logs/test_alpaca_7b_1p25_240612/tensorboard.tar +3 -0
- logs/test_alpaca_7b_1p25_240612/tensorboard/events.out.tfevents.1718191631.10-16-20-78.280596.0 +3 -0
- open_lm/__init__.py +0 -0
- open_lm/__pycache__/__init__.cpython-310.pyc +0 -0
- open_lm/__pycache__/__init__.cpython-39.pyc +0 -0
- open_lm/__pycache__/attention.cpython-310.pyc +0 -0
- open_lm/__pycache__/data.cpython-310.pyc +0 -0
- open_lm/__pycache__/data.cpython-39.pyc +0 -0
- open_lm/__pycache__/distributed.cpython-310.pyc +0 -0
- open_lm/__pycache__/evaluate.cpython-310.pyc +0 -0
- open_lm/__pycache__/file_utils.cpython-310.pyc +0 -0
- open_lm/__pycache__/logger.cpython-310.pyc +0 -0
- open_lm/__pycache__/losses.cpython-310.pyc +0 -0
- open_lm/__pycache__/main.cpython-310.pyc +0 -0
- open_lm/__pycache__/main.cpython-39.pyc +0 -0
- open_lm/__pycache__/meters.cpython-310.pyc +0 -0
- open_lm/__pycache__/model.cpython-310.pyc +0 -0
- open_lm/__pycache__/norms.cpython-310.pyc +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/alpaca_train.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
open_lm/data/train_data.jsonl filter=lfs diff=lfs merge=lfs -text
|
AVERAGE.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Instruction tuning and weight averaging
|
3 |
+
|
4 |
+
Note that some of these stpes may be out of date, but the general flow should remain.
|
5 |
+
|
6 |
+
We downloaded the data from https://huggingface.co/datasets/timdettmers/openassistant-guanaco then ran `python datapreprocess/make_assistant_data.py --input-files /fsx/home-mitchellw/openassistant_best_replies_train.jsonl --output-dir /fsx/home-mitchellw/tmp --num-workers 1 --num-consumers 1`. Note that we changed shard size so there would be at least 8 shards.
|
7 |
+
|
8 |
+
```
|
9 |
+
torchrun --nproc-per-node 8 -m open_lm.main \
|
10 |
+
--train-data "pipe:aws s3 cp s3://<bucket>/lmdata/assistant_data/train/shard-{0000000..0000008}.tar -" \
|
11 |
+
--train-num-samples 4382720 \
|
12 |
+
--workers 1 \
|
13 |
+
--precision amp_bfloat16 \
|
14 |
+
--batch-size 8 \
|
15 |
+
--grad-checkpointing \
|
16 |
+
--log-every-n-steps 1 \
|
17 |
+
--grad-clip-norm 1 \
|
18 |
+
--lr 2e-5 \
|
19 |
+
--model g3b_neox \
|
20 |
+
--fsdp --fsdp-amp \
|
21 |
+
--warmup 100 \
|
22 |
+
--wd 0.1 \
|
23 |
+
--beta2 0.95 \
|
24 |
+
--epochs 6 \
|
25 |
+
--disable-buffer \
|
26 |
+
--lr-cooldown-end 5e-6 \
|
27 |
+
--report-to wandb \
|
28 |
+
--wandb-project-name lmtune \
|
29 |
+
--pretrained /fsx/home-mitchellw/experimetns/lm/1p5T-bigdata-neox-g3b_neox-10-1e-3-0.1-nodes48-bs10-v0/checkpoints/epoch_24.pt \
|
30 |
+
--name instruction-tune-3b-2e-5-6 \
|
31 |
+
--logs /fsx/home-mitchellw/experimetns/lmtune
|
32 |
+
```
|
33 |
+
|
34 |
+
Now we want to interpolate between the base and fine-tuned model with different coefficients alpha. We can do so with this bash script.
|
35 |
+
|
36 |
+
```
|
37 |
+
BASEMODEL=/fsx/home-mitchellw/experimetns/lm/1p5T-bigdata-neox-g3b_neox-10-1e-3-0.1-nodes48-bs10-v0/checkpoints/epoch_24.pt
|
38 |
+
FINALMODEL=/fsx/home-mitchellw/experimetns/lmtune/instruction-tune-3b-2e-5-6/checkpoints/epoch_6.pt
|
39 |
+
MODEL=g3b_neox
|
40 |
+
|
41 |
+
for alpha in $(seq 0 0.05 1)
|
42 |
+
do
|
43 |
+
|
44 |
+
#echo $model
|
45 |
+
save_path_1="$(dirname $FINALMODEL)/chat-eval-interpolate-$alpha-$(basename $FINALMODEL)"
|
46 |
+
save_path_2="$(dirname $FINALMODEL)/base-eval-interpolate-$alpha-$(basename $FINALMODEL)"
|
47 |
+
|
48 |
+
echo $save_path_1
|
49 |
+
echo $save_path_2
|
50 |
+
|
51 |
+
|
52 |
+
if [ -f "$save_path_1" ]; then
|
53 |
+
echo "$save_path_1 exists."
|
54 |
+
else
|
55 |
+
# first do the chat eval.
|
56 |
+
torchrun --nproc-per-node 4 -m open_lm.main \
|
57 |
+
--val-data "pipe:aws s3 cp s3://<bucket>/lmdata/assistant_data/val.tar -" \
|
58 |
+
--workers 6 \
|
59 |
+
--precision amp_bfloat16 \
|
60 |
+
--batch-size 8 \
|
61 |
+
--grad-checkpointing \
|
62 |
+
--log-every-n-steps 1 \
|
63 |
+
--model $MODEL \
|
64 |
+
--fsdp --fsdp-amp \
|
65 |
+
--train-num-samples 1000000000 \
|
66 |
+
--name $RANDOM \
|
67 |
+
--average $BASEMODEL $FINALMODEL \
|
68 |
+
--average-coefficients $alpha $(echo "1-$alpha" | bc -l) \
|
69 |
+
--logs /fsx/home-mitchellw/experimetns/lmdebug > $save_path_1
|
70 |
+
|
71 |
+
# now do the base eval
|
72 |
+
torchrun --nproc-per-node 4 -m open_lm.main \
|
73 |
+
--val-data "pipe:aws s3 cp s3://<bucket>/lmdata/validation_data_tokenized/open_lm//shard_00000000.tar -" \
|
74 |
+
--workers 6 \
|
75 |
+
--precision amp_bfloat16 \
|
76 |
+
--batch-size 8 \
|
77 |
+
--grad-checkpointing \
|
78 |
+
--log-every-n-steps 1 \
|
79 |
+
--model $MODEL \
|
80 |
+
--data-key json \
|
81 |
+
--fsdp --fsdp-amp \
|
82 |
+
--train-num-samples 1000000000 \
|
83 |
+
--name $RANDOM \
|
84 |
+
--average $BASEMODEL $FINALMODEL \
|
85 |
+
--average-coefficients $alpha $(echo "1-$alpha" | bc -l) \
|
86 |
+
--logs /fsx/home-mitchellw/experimetns/lmdebug > $save_path_2
|
87 |
+
fi
|
88 |
+
done
|
89 |
+
```
|
90 |
+
|
91 |
+
Then you can make a plot with `python plots/interpolation.py` which results in the following plot.
|
92 |
+
|
93 |
+
![](plots/interpolation.png)
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 mlfoundations
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
MANIFEST.in
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
include open_lm/model_configs/*.json
|
MOE.md
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Mixture of Experts Language Models
|
2 |
+
|
3 |
+
## Dependencies
|
4 |
+
|
5 |
+
Our implementation of mixture of experts depends on [megablocks](https://github.com/stanford-futuredata/megablocks) and the version of xformers which is compatible with torch 2.1:
|
6 |
+
|
7 |
+
```
|
8 |
+
pip install megablocks
|
9 |
+
pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu121
|
10 |
+
```
|
11 |
+
|
12 |
+
## Train MoE
|
13 |
+
|
14 |
+
To train an MoE, add the `--moe-X` related arguments to the training command:
|
15 |
+
|
16 |
+
```
|
17 |
+
torchrun --nproc-per-node 8 -m open_lm.main \
|
18 |
+
--train-num-samples 10000000000 \
|
19 |
+
--workers 2 \
|
20 |
+
--dataset-manifest "s3://laion-west/rpj_tokenized_upsampled_eleutherai/manifest.jsonl" "s3://laion-west/2T_no_rpj_tokenized_upsampled_25k_shards/manifest.jsonl" \
|
21 |
+
--train-data-mix-weights 0.725 0.275 \
|
22 |
+
--precision amp_bfloat16 \
|
23 |
+
--batch-size 8 \
|
24 |
+
--accum-freq 4 \
|
25 |
+
--log-every-n-steps 20 \
|
26 |
+
--grad-clip-norm 1 \
|
27 |
+
--lr 5e-4 \
|
28 |
+
--warmup 200 \
|
29 |
+
--model open_lm_41m \
|
30 |
+
--wd 0.1 \
|
31 |
+
--beta2 0.95 \
|
32 |
+
--epochs 50 \
|
33 |
+
--report-to wandb \
|
34 |
+
--moe-freq 2 \
|
35 |
+
--moe-num-experts 8 \
|
36 |
+
--moe-top-k 2 \
|
37 |
+
--moe-capacity-factor 1.25 --moe-loss-weight 0.1 \
|
38 |
+
--disable-meta-device \
|
39 |
+
--wandb-project-name moe \
|
40 |
+
--name test$RANDOM \
|
41 |
+
--logs /fsx/home-$USER/experiments/moe \
|
42 |
+
--resume latest \
|
43 |
+
--seed 124 \
|
44 |
+
--data-key 'json' \
|
45 |
+
--fsdp --fsdp-amp \
|
46 |
+
--model-norm gain_only_layer_norm \
|
47 |
+
--lr-scheduler cosine \
|
48 |
+
--lr-cooldown-end 0.00001
|
49 |
+
```
|
50 |
+
|
51 |
+
The above command will add an MoE FFN layer to every other Transformer block. You can use an arbitrary number of experts; you are only limited by total RAM across all GPUs.
|
52 |
+
|
53 |
+
|
54 |
+
You can also add the `moe_expert_model_parallelism` which will distribute experts across different GPUs. However, if the number of GPUs is larger than number of experts, an additional num_gpu/num_expert tensor parallelism is applied. Currently this is not eval-friendly though, so I would not recommend using it yet.
|
55 |
+
|
56 |
+
You can evaluate the MoE in the same way as dense models:
|
57 |
+
|
58 |
+
```
|
59 |
+
torchrun --nproc-per-node 8 -m open_lm.main \
|
60 |
+
--val-data "pipe:aws s3 cp s3://laion-west/lmdata/validation_data_tokenized/open_lm//shard_00000000.tar -" \
|
61 |
+
--workers 6 \
|
62 |
+
--precision amp_bfloat16 \
|
63 |
+
--batch-size 8 \
|
64 |
+
--log-every-n-steps 1 \
|
65 |
+
--model open_lm_41m \
|
66 |
+
--fsdp --fsdp-amp \
|
67 |
+
--moe-num-experts 64 --moe-freq 2 \
|
68 |
+
--data-key json \
|
69 |
+
--train-num-samples 1000000000 \
|
70 |
+
--model-norm gain_only_layer_norm \
|
71 |
+
--name $RANDOM \
|
72 |
+
--resume /fsx/home-suching/experiments/mix_wo/test8086/checkpoints/epoch_1.pt \
|
73 |
+
--logs /fsx/home-$USER/experiments/eval
|
74 |
+
```
|
75 |
+
|
76 |
+
|
77 |
+
## Benchmarking
|
78 |
+
|
79 |
+
To benchmark your results, here are perplexities we obtain with our implementation across a number of compute budgets and model sizes on our A100 cluster:
|
80 |
+
|
81 |
+
### Compute budgets
|
82 |
+
|
83 |
+
| Compute type | 41M | 87M | 160M | 410M | 830M |
|
84 |
+
|--------------|------|------|------|------|------|
|
85 |
+
| Number of nodes | 1 | 1 | 1 | 2 | 4 |
|
86 |
+
| Number of tokens | 20.0B | 20.0B | 20.0B | 20.0B | 20.0B |
|
87 |
+
|
88 |
+
### Perplexity
|
89 |
+
| Number of Experts | 41M | 87M | 160M | 410M | 830M |
|
90 |
+
|--------------|------|------|------|------|------|
|
91 |
+
| 1 | 27.61 | 18.68 | 14.87 | 10.54 | 9.39 |
|
92 |
+
| 8 | 19.85 | 14.66 | 12.26 | 9.82 | 8.84 |
|
93 |
+
| 32 | 20.55 | 15.28 |14.62 | | |
|
94 |
+
|
95 |
+
|
96 |
+
### Tokens/sec/GPU
|
97 |
+
|
98 |
+
| Number of Experts | 41M | 87M | 160M | 410M | 830M |
|
99 |
+
|--------------|------|------|------|------|------|
|
100 |
+
| 1 | 141.2K | 106.0K | 95.5K | 30.3K | 16.0K |
|
101 |
+
| 8 | 69.5K | 66.6K | 66.2K | 18.5K | 9.2K |
|
102 |
+
|
103 |
+
### Training Parameters
|
104 |
+
|
105 |
+
| Number of Experts | 41M | 87M | 160M | 410M | 830M |
|
106 |
+
|--------------|------|------|------|------|------|
|
107 |
+
| 8 experts | 68.9M | 165.4M | 360.6M | 1.1B | 2.4B |
|
108 |
+
| 32 experts | 164.5M | 439.9M | 1.0B | 3.5B | 7.9B |
|
109 |
+
|
110 |
+
### Inference Parameters
|
111 |
+
|
112 |
+
| Number of Experts | 41M | 87M | 160M | 410M | 830M |
|
113 |
+
|--------------|------|------|------|------|------|
|
114 |
+
| 2 experts | 45.0M | 96.8M | 190.7M | 509.2M | 1.1B |
|
Makefile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
install: ## [Local development] Upgrade pip, install requirements, install package.
|
2 |
+
python -m pip install -U pip
|
3 |
+
python -m pip install -e .
|
4 |
+
|
5 |
+
install-dev: ## [Local development] Install test requirements
|
6 |
+
python -m pip install -r requirements_test.txt
|
7 |
+
|
8 |
+
lint: ## [Local development] Run mypy, pylint and black
|
9 |
+
python -m black --check -l 120 .
|
10 |
+
|
11 |
+
black: ## [Local development] Auto-format python code using black
|
12 |
+
python3 -m black -l 120 .
|
13 |
+
|
14 |
+
TEST_ARGS = tests ## set default to run all tests
|
15 |
+
test: ## [Local development] Run unit tests
|
16 |
+
python -m pytest -x -s -v $(TEST_ARGS) -m "not gpu and not s3"
|
17 |
+
|
18 |
+
test-gpu: ## [Local development] Run unit tests
|
19 |
+
python -m pytest -x -s -v $(TEST_ARGS) -m gpu
|
20 |
+
|
21 |
+
.PHONY: help
|
22 |
+
|
23 |
+
help: # Run `make help` to get help on the make commands
|
24 |
+
@grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
README.md
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# OpenLM
|
2 |
+
|
3 |
+
![](/plots/logo.png)
|
4 |
+
|
5 |
+
OpenLM is a minimal but performative language modeling (LM) repository, aimed to facilitate research on medium sized LMs. We have verified the performance of OpenLM up to 7B parameters and 256 GPUs.
|
6 |
+
In contrast with other repositories such as Megatron, we depend only on PyTorch, XFormers, or Triton for our core modeling code.
|
7 |
+
|
8 |
+
# Contents
|
9 |
+
- [Release Notes](#release-notes)
|
10 |
+
- [Quickstart](#quickstart)
|
11 |
+
- [Setup](#setup)
|
12 |
+
- [Process training data](#process-training-data)
|
13 |
+
- [Run training](#run-training)
|
14 |
+
- [Evaluate Model](#evaluate-model)
|
15 |
+
- [Generate Text](#generate-text)
|
16 |
+
- [Pretrained Models](#pretrained-models)
|
17 |
+
- [Team and Acknowledgements](#team-and-acknowledgements)
|
18 |
+
|
19 |
+
# Release Notes
|
20 |
+
- 09/26/23: Public release and featured on [Laion Blog](https://laion.ai/blog/open-lm/)
|
21 |
+
- 08/18/23: Updated README.md
|
22 |
+
# Quickstart
|
23 |
+
Here we'll go over a basic example where we start from a fresh install, download and preprocess some training data, and train a model.
|
24 |
+
|
25 |
+
## Setup
|
26 |
+
We require python >=3.9, and a current installation of pyTorch, as well as several other packages. The full list of requirements is contained in `requirements.txt` and can be installed in your python enviornment via
|
27 |
+
```>>> pip install -r requirements.txt```
|
28 |
+
Next, to access `open_lm` everywhere in your virtual environment, install it using pip (from within the top level github repo)
|
29 |
+
```>>> pip install --editable . ```
|
30 |
+
Some considerations:
|
31 |
+
- We like [WandB](https://wandb.ai/) and [tensorboard](https://www.tensorflow.org/tensorboard) for logging. We specify how to use these during training below.
|
32 |
+
|
33 |
+
## Process Training Data
|
34 |
+
Next you must specify a collection of tokenized data. For the purposes of this example, we will use a recent dump of english Wikipedia, available on HuggingFace. To download this locally, we've included a script located at [open_lm/datapreprocess/wiki_download.py](open_lm/datapreprocess/wiki_download.py). All you have to do is specify an output directory for where the raw data should be stored:
|
35 |
+
```
|
36 |
+
python open_lm/datapreprocess/wiki_download.py --output-dir path/to/raw_data
|
37 |
+
```
|
38 |
+
|
39 |
+
Next we process our training data by running it through a BPE tokenizer and chunk it into chunks of appropriate length. By default we use the tokenizer attached with [GPT-NeoX-20B](https://github.com/EleutherAI/gpt-neox). To do this, use the script `datapreprocess/make_2048.py`:
|
40 |
+
```
|
41 |
+
>>> python open_lm/datapreprocess/make_2048.py \
|
42 |
+
--input-files path_to_raw_data/*.jsonl
|
43 |
+
--output-dir preproc_data
|
44 |
+
--num-workers 32
|
45 |
+
--num-consumers 1
|
46 |
+
```
|
47 |
+
Where `input-files` passes all of its (possibly many) arguments through the python `glob` module, allowing for wildcards. Optionally, data can be stored in S3 by setting the environment variables: `S3_BASE`, and passing the flag `--upload-to-s3` to the script. This saves sharded data to the given bucket with prefix of `S3_BASE`. E.g.
|
48 |
+
```
|
49 |
+
>>> export S3_BASE=preproc_data-v1/
|
50 |
+
>>> python open_lm/datapreprocess/make2048.py --upload-to-s3 ... # same arguments as before
|
51 |
+
```
|
52 |
+
|
53 |
+
## Run Training
|
54 |
+
Tokenized data can now be passed to the main training script, `open_lm/main.py`. Distributed computatation is handled via `torchrun`, and hyperparameters are specified by a variety of keyword arguments. We highlight several of the most important ones here:
|
55 |
+
- `train-data`: location of the sharded tokenized training data. If locally generated and stored, this will point to a directory containing files like `preproc_data/2048-v1/0/XXXXXXX.tar`. Data are processed using the [webdataset](https://github.com/webdataset/webdataset) package where wildcards are supported like `preproc_data/2048-v1/0/{0000000..0000099}.tar` to select the first 100 .tar files.
|
56 |
+
- `model`: Which model to use. See the table below to see valid options and parameter sizes for each.
|
57 |
+
- `train-num-samples`: how many samples to use from the specified training dataset
|
58 |
+
- `name`: name of this particular training run for logging purposes
|
59 |
+
- `report-to`: if present, can be `wandb`, `tensorboard`, or `all` to stash logging information on WandB or Tensorboard.
|
60 |
+
|
61 |
+
|
62 |
+
Model choices are contained in the following table, where, for instance `11m` indicates an 11 million parameter model and `1b` indicates a 1 billion parameter model.
|
63 |
+
<center>
|
64 |
+
|
65 |
+
| Model Name |
|
66 |
+
|---------------|
|
67 |
+
| `open_lm_11m` |
|
68 |
+
| `open_lm_25m` |
|
69 |
+
| `open_lm_87m` |
|
70 |
+
| `open_lm_160m`|
|
71 |
+
| `open_lm_411m`|
|
72 |
+
| `open_lm_830m`|
|
73 |
+
| `open_lm_1b` |
|
74 |
+
| `open_lm_3b` |
|
75 |
+
| `open_lm_7b` |
|
76 |
+
|
77 |
+
</center>
|
78 |
+
|
79 |
+
An example training run can be called as follows:
|
80 |
+
```
|
81 |
+
>>> export CUDA_VISIBLE_DEVICES=0,1,2,3
|
82 |
+
>>> torchrun --nproc-per-node 4 -m open_lm.main \
|
83 |
+
--model open_lm_3b \
|
84 |
+
--train-data /preproc_data/shard-{0000000..0000099}.tar \
|
85 |
+
--train-num-samples 1000000000 \
|
86 |
+
--workers 8 \
|
87 |
+
--dataset-resampled \
|
88 |
+
--precision amp_bfloat16 \
|
89 |
+
--batch-size 8 \
|
90 |
+
--grad-checkpointing \
|
91 |
+
--log-every-n-steps 100 \
|
92 |
+
--grad-clip-norm 1 \
|
93 |
+
--data-key txt \
|
94 |
+
--lr 3e-4 \
|
95 |
+
--fsdp --fsdp-amp \
|
96 |
+
--warmup 2000 \
|
97 |
+
--wd 0.1 \
|
98 |
+
--beta2 0.95 \
|
99 |
+
--epochs 100 \
|
100 |
+
--report-to wandb \
|
101 |
+
--wandb-project-name open_lm_example \
|
102 |
+
--name open_lm_ex_$RANDOM \
|
103 |
+
--resume latest \
|
104 |
+
--logs path/to/logging/dir/
|
105 |
+
```
|
106 |
+
Checkpoints and final model weights will be saved to the specified logs directory.
|
107 |
+
|
108 |
+
During training, the above command will pick shards to train on via sampling with replacement. Training can also be done by picking shards via sampling without replacement. To do this, the input dataset(s) must first be preprocessed using the following command:
|
109 |
+
```
|
110 |
+
python -m open_lm.utils.make_wds_manifest --data-dir /preproc_data/
|
111 |
+
```
|
112 |
+
This will create a file called ```manifest.jsonl``` under ```/preproc_data```. Training can then be done by sampling wihout replacement via the following example commands:
|
113 |
+
```
|
114 |
+
>>> export CUDA_VISIBLE_DEVICES=0,1,2,3
|
115 |
+
>>> torchrun --nproc-per-node 4 -m open_lm.main \
|
116 |
+
--model open_lm_3b \
|
117 |
+
--dataset-manifest /preproc_data/manifest.jsonl \
|
118 |
+
--train-num-samples 1000000000 \
|
119 |
+
--workers 8 \
|
120 |
+
--precision amp_bfloat16 \
|
121 |
+
--batch-size 8 \
|
122 |
+
--grad-checkpointing \
|
123 |
+
--log-every-n-steps 100 \
|
124 |
+
--grad-clip-norm 1 \
|
125 |
+
--data-key txt \
|
126 |
+
--lr 3e-4 \
|
127 |
+
--fsdp --fsdp-amp \
|
128 |
+
--warmup 2000 \
|
129 |
+
--wd 0.1 \
|
130 |
+
--beta2 0.95 \
|
131 |
+
--epochs 100 \
|
132 |
+
--report-to wandb \
|
133 |
+
--wandb-project-name open_lm_example \
|
134 |
+
--name open_lm_ex_$RANDOM \
|
135 |
+
--resume latest \
|
136 |
+
--logs path/to/logging/dir/
|
137 |
+
```
|
138 |
+
|
139 |
+
### Dataset manifest
|
140 |
+
|
141 |
+
The manifest created with `open_lm/utils/make_wds_manifest.py` is a `jsonl` file describing the dataset. Each line in this file corresponds to a shard of the dataset and is a `json` object containing two fields:
|
142 |
+
|
143 |
+
- `"shard"`: the name of a shard in the dataset.
|
144 |
+
- `"num_sequences"`: the number of sequences contained in the shards. Each sequence contains a set length of tokens.
|
145 |
+
|
146 |
+
This manifest file provides auxiliary information about the dataset, and is assumed to be found within the same directory as the shards.
|
147 |
+
|
148 |
+
## Evaluate Model
|
149 |
+
Once trained, we can evaluate the model. This requires [LLM Foundry](https://github.com/mosaicml/llm-foundry), which can be installed via `pip install llm-foundry`. Next some configurations are required to pass to the evaluator: a skeleton of these parameters is located at [eval/in_memory_hf_eval.yaml](eval/in_memory_hf_eval.yaml). Then just run the following script, making sure to point it at the checkpoint of your trained model (and it's correspending config .json file):
|
150 |
+
```
|
151 |
+
cd eval
|
152 |
+
|
153 |
+
python eval_openlm_ckpt.py \
|
154 |
+
--eval-yaml in_memory_hf_eval.yaml \
|
155 |
+
--model open_lm_1b \
|
156 |
+
--checkpoint /path/to/openlm_checkpoint.pt
|
157 |
+
--positional_embedding_type head_rotary
|
158 |
+
|
159 |
+
```
|
160 |
+
Note that `--positional-embedding-type head_rotary` is only necessary if using the pretrained `open_lm_1b` model hosted below. See discussion in the next section about this.
|
161 |
+
|
162 |
+
## Generate Text
|
163 |
+
One can also use a trained model to generate text. This is accessible via the script located at [scripts/generate.py](scripts/generate.py). The parameters are similar to those used in evaluation:
|
164 |
+
```
|
165 |
+
cd scripts
|
166 |
+
|
167 |
+
python generate.py \
|
168 |
+
--model open_lm_1b \
|
169 |
+
--checkpoint /path/to/openlm_checkpoint.pt \
|
170 |
+
--positional-embedding-type head_rotary \
|
171 |
+
--input-text "Please give me a recipe for chocolate chip cookies"
|
172 |
+
```
|
173 |
+
|
174 |
+
Again, note that `--positional-embedding-type head_rotary` is only necessary for the pretrained `open_lm_1b` model hosted below.
|
175 |
+
|
176 |
+
# Pretrained Models
|
177 |
+
|
178 |
+
## [OpenLM 1B](https://huggingface.co/mlfoundations/open_lm_1B)
|
179 |
+
OpenLM 1B is a ~1Billion parameter model trained on a 1.6T token dataset which consists of a mix of RedPajama, Pile, S2ORC, The Pile of Law, Deepmind Math, and RealNews (the full mixture of training data is described in [more detail here](https://docs.google.com/spreadsheets/d/1YW-_1vGsSPmVtEt2oeeJOecH6dYX2SuEuhOwZyGwy4k/edit?usp=sharing)).
|
180 |
+
The model checkpoint can be downloaded from [HuggingFace here](https://huggingface.co/mlfoundations/open_lm_1B/tree/main).
|
181 |
+
The script used to train this model (for config-copying purposes) is [located here](https://github.com/mlfoundations/open_lm/blob/main/scripts/train_example.sh).
|
182 |
+
Once this checkpoint has been downloaded, you can evaluate it by following the directions in the [Evaluate Model](#evaluate-model) section above and passing `--positional-embedding-type head_rotary` or setting `"positional_embedding_type": "head_rotary"` in the model config (see note below).
|
183 |
+
|
184 |
+
Note: We trained this model with rotary embeddings applied to the _head_
|
185 |
+
dimension, which is the default in xformers as of 09/01/2023. Since these models
|
186 |
+
were trained, we have updated openlm to correctly apply the rotary embeddings to
|
187 |
+
the sequence dimension (see
|
188 |
+
[this issue](https://github.com/mlfoundations/open_lm/issues/4) and [this
|
189 |
+
issue](https://github.com/facebookresearch/xformers/issues/841) for details).
|
190 |
+
To evaluate these models, ensure you use the `"positional_embedding_type": "head_rotary"` in the model config.
|
191 |
+
|
192 |
+
| **OpenLM-1B** | **250B Tokens** | **500B tokens** | **750B tokens** | **1T Tokens** | **1.25T Tokens** | **1.5T Tokens** | **1.6T Tokens** |
|
193 |
+
|----------------|-----------------|-----------------|-----------------|---------------|------------------|-----------------|-----------------|
|
194 |
+
| | | | | | | | |
|
195 |
+
| arc_challenge | 0.27 | 0.28 | 0.29 | 0.28 | 0.29 | 0.31 | 0.31 |
|
196 |
+
| arc_easy | 0.49 | 0.50 | 0.51 | 0.53 | 0.54 | 0.56 | 0.56 |
|
197 |
+
| boolq | 0.60 | 0.61 | 0.62 | 0.62 | 0.65 | 0.64 | 0.65 |
|
198 |
+
| copa | 0.71 | 0.70 | 0.70 | 0.78 | 0.71 | 0.73 | 0.70 |
|
199 |
+
| hellaswag | 0.50 | 0.54 | 0.54 | 0.57 | 0.59 | 0.61 | 0.61 |
|
200 |
+
| lambada_openai | 0.56 | 0.57 | 0.61 | 0.61 | 0.65 | 0.65 | 0.66 |
|
201 |
+
| piqa | 0.70 | 0.70 | 0.71 | 0.72 | 0.73 | 0.74 | 0.74 |
|
202 |
+
| triviaqa | | | | | | | |
|
203 |
+
| winogrande | 0.55 | 0.57 | 0.58 | 0.59 | 0.61 | 0.60 | 0.60 |
|
204 |
+
| MMLU | 0.24 | 0.24 | 0.24 | 0.23 | 0.26 | 0.24 | 0.25 |
|
205 |
+
| Jeopardy | 0.01 | 0.02 | 0.01 | 0.01 | 0.04 | 0.09 | 0.10 |
|
206 |
+
| Winograd | 0.75 | 0.77 | 0.77 | 0.79 | 0.81 | 0.80 | 0.79 |
|
207 |
+
| | | | | | | | |
|
208 |
+
| **Average** | **0.49** | **0.50** | **0.51** | **0.52** | **0.53** | **0.54** | **0.54** |
|
209 |
+
|
210 |
+
|
211 |
+
| **1B Baselines** | **OPT-1.3B** | **Pythia-1B** | **Neox-1.3B** | **OPT-IML-1.3B** |
|
212 |
+
|------------------|-------------:|--------------:|--------------:|-----------------:|
|
213 |
+
| arc_challenge | 0.27 | 0.26 | 0.26 | 0.30 |
|
214 |
+
| arc_easy | 0.49 | 0.51 | 0.47 | 0.58 |
|
215 |
+
| boolq | 0.58 | 0.61 | 0.62 | 0.72 |
|
216 |
+
| copa | 0.75 | 0.68 | 0.72 | 0.73 |
|
217 |
+
| hellaswag | 0.54 | 0.49 | 0.48 | 0.54 |
|
218 |
+
| lambada_openai | 0.59 | 0.58 | 0.57 | 0.57 |
|
219 |
+
| piqa | 0.72 | 0.70 | 0.72 | 0.73 |
|
220 |
+
| triviaqa | | | | |
|
221 |
+
| winogrande | 0.59 | 0.53 | 0.55 | 0.59 |
|
222 |
+
| MMLU | 0.25 | 0.26 | 0.26 | 0.30 |
|
223 |
+
| Jeopardy | 0.01 | 0.00 | 0.00 | 0.12 |
|
224 |
+
| Winograd | 0.74 | 0.71 | 0.75 | 0.73 |
|
225 |
+
| **Average** | **0.50** | **0.48** | **0.49** | **0.54** |
|
226 |
+
|
227 |
+
|
228 |
+
## [OpenLM 7B](https://huggingface.co/mlfoundations/open_lm_7B_1.25T)
|
229 |
+
OpenLM 7B is not yet done training, but we've released a checkpoint at 1.25T tokens. Information is the same as for OpenLM-1B above, including the information pertaining to rotary embeddings.
|
230 |
+
|
231 |
+
|
232 |
+
| **OpenLM-7B** | **275B Tokens** | **500B tokens** | **675B tokens** | **775B tokens** | **1T Tokens** | **1.25T Tokens** | **1.5T Tokens** | **1.6T Tokens** | **LLAMA-7B** | **MPT-7B** |
|
233 |
+
|-----------------|-----------------|-----------------|-----------------|-----------------|---------------|------------------|-----------------|-----------------|--------------|------------|
|
234 |
+
| arc_challenge | 0.35 | 0.35 | 0.36 | 0.37 | 0.39 | 0.39 | | | 0.41 | 0.39 |
|
235 |
+
| arc_easy | 0.60 | 0.61 | 0.62 | 0.62 | 0.63 | 0.66 | | | 0.65 | 0.67 |
|
236 |
+
| boolq | 0.67 | 0.66 | 0.69 | 0.69 | 0.70 | 0.70 | | | 0.77 | 0.75 |
|
237 |
+
| copa | 0.75 | 0.79 | 0.75 | 0.80 | 0.80 | 0.78 | | | 0.78 | 0.81 |
|
238 |
+
| hellaswag | 0.64 | 0.67 | 0.68 | 0.68 | 0.69 | 0.70 | | | 0.75 | 0.76 |
|
239 |
+
| lambada_openai | 0.67 | 0.68 | 0.69 | 0.70 | 0.70 | 0.70 | | | 0.74 | 0.70 |
|
240 |
+
| piqa | 0.75 | 0.76 | 0.76 | 0.76 | 0.77 | 0.77 | | | 0.79 | 0.80 |
|
241 |
+
| triviaqa | | | | | | | | | | |
|
242 |
+
| winogrande | 0.62 | 0.65 | 0.65 | 0.65 | 0.67 | 0.67 | | | 0.68 | 0.68 |
|
243 |
+
| MMLU-0 shot | 0.25 | 0.25 | 0.27 | 0.27 | 0.28 | 0.30 | | | 0.30 | 0.30 |
|
244 |
+
| Jeopardy | 0.15 | 0.18 | 0.23 | 0.22 | 0.16 | 0.21 | | | 0.33 | 0.31 |
|
245 |
+
| Winograd | 0.82 | 0.81 | 0.84 | 0.84 | 0.85 | 0.86 | | | 0.81 | 0.88 |
|
246 |
+
| | | | | | | | | | | |
|
247 |
+
| **Average** | **0.57** | **0.58** | **0.60** | **0.60** | **0.60** | **0.61** | | | **0.64** | **0.64** |
|
248 |
+
| **MMLU-5 shot** | | | | | | **0.34** | | | **0.34** | |
|
249 |
+
|
250 |
+
# Unit tests
|
251 |
+
|
252 |
+
For unit tests we use `pytest`. Either
|
253 |
+
|
254 |
+
```
|
255 |
+
pip install pytest
|
256 |
+
```
|
257 |
+
or create the `open_lm_tests` conda environment by running
|
258 |
+
```
|
259 |
+
conda env create --file environment-tests.yml
|
260 |
+
```
|
261 |
+
|
262 |
+
Tests live in the `tests/` folder.
|
263 |
+
|
264 |
+
To run tests make sure you are on a machine with a GPU and run:
|
265 |
+
```
|
266 |
+
pytest tests/
|
267 |
+
```
|
268 |
+
|
269 |
+
# Team and acknowledgements
|
270 |
+
|
271 |
+
Team (so-far, * = equal contrib): Suchin Gururangan*, Mitchell Wortsman*, Samir Yitzhak Gadre*, Achal Dave*, Maciej Kilian, Weijia Shi, Jean Mercat, Georgios Smyrnis, Gabriel Ilharco, Matt Jordan, Reinhard Heckel, Alex Dimakis, Ali Farhadi, Vaishaal Shankar*, Ludwig Schmidt.
|
272 |
+
|
273 |
+
Code is based heavily on [open-clip](https://github.com/mlfoundations/open_clip) developed by a team including Ross Wightman, Romain Beaumont, Cade Gordon, Mehdi Cherti, Jenia Jitsev, and [open-flamingo](https://github.com/mlfoundations/open_flamingo), developed by a team including Anas Awadalla and Irena Gao. Additional inspiration is from [lit-llama](https://github.com/Lightning-AI/lit-llama).
|
274 |
+
We are greatful to stability.ai for resource support.
|
275 |
+
OpenLM is developed by researchers from various affiliations including the [RAIVN Lab](https://raivn.cs.washington.edu/) at the University of Washington, [UWNLP](https://nlp.washington.edu/), [Toyota Research Institute](https://www.tri.global/), [Columbia University](https://www.columbia.edu/), and more.
|
276 |
+
|
277 |
+
Citation
|
278 |
+
--------
|
279 |
+
|
280 |
+
If you use this model in your work, please use the following BibTeX citation:
|
281 |
+
```bibtex
|
282 |
+
@misc{open_lm,
|
283 |
+
author = {Gururangan, Suchin and Wortsman, Mitchell and Gadre, Samir Yitzhak and Dave, Achal and Kilian, Maciej and Shi, Weijia and Mercat, Jean and Smyrnis, Georgios and Ilharco, Gabriel and Jordan, Matt and Heckel, Reinhard and Dimakis, Alex and Farhadi, Ali and Shankar, Vaishaal and Schmidt, Ludwig},
|
284 |
+
title = {{open_lm}: a minimal but performative language modeling (LM) repository},
|
285 |
+
year = {2023},
|
286 |
+
note = {GitHub repository},
|
287 |
+
url = {https://github.com/mlfoundations/open_lm/}
|
288 |
+
}
|
289 |
+
```
|
290 |
+
|
clear_memo.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
def cleanup_shared_memory():
|
4 |
+
import os
|
5 |
+
import glob
|
6 |
+
|
7 |
+
shm_files = glob.glob('/dev/shm/torch_*')
|
8 |
+
for shm_file in shm_files:
|
9 |
+
try:
|
10 |
+
os.remove(shm_file)
|
11 |
+
except OSError as e:
|
12 |
+
print(f"Error removing shared memory file {shm_file}: {e}")
|
13 |
+
|
14 |
+
# 在主程序开始或结束时调用
|
15 |
+
cleanup_shared_memory()
|
data/alpaca_train.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db0d266491fc794c5ba4aa1d8ffcfc029c7cfb6f892e3f87c61978a4f142793d
|
3 |
+
size 20108854
|
data/alpaca_valid.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
environment-tests.yml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: open_lm_tests
|
2 |
+
channels:
|
3 |
+
- defaults
|
4 |
+
dependencies:
|
5 |
+
- python=3.10
|
6 |
+
- pip
|
7 |
+
- pip:
|
8 |
+
- -r requirements.txt
|
9 |
+
- pytest
|
environment.yml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: open_lm
|
2 |
+
channels:
|
3 |
+
- defaults
|
4 |
+
dependencies:
|
5 |
+
- python=3.10
|
6 |
+
- pip
|
7 |
+
- pip:
|
8 |
+
- -r requirements.txt
|
9 |
+
- -e .
|
eval/eval_openlm_ckpt.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import builtins as __builtin__
|
3 |
+
import time
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from composer.loggers import InMemoryLogger, LoggerDestination
|
8 |
+
from composer.trainer import Trainer
|
9 |
+
from composer.utils import dist, get_device, reproducibility
|
10 |
+
|
11 |
+
try:
|
12 |
+
from llmfoundry.utils.builders import build_icl_evaluators, build_logger
|
13 |
+
except ImportError:
|
14 |
+
import logging
|
15 |
+
|
16 |
+
logging.warning("llmfoundry not installed. Please install llmfoundry `pip install llm-foundry` to run this script.")
|
17 |
+
|
18 |
+
from omegaconf import OmegaConf as om
|
19 |
+
from transformers import GPTNeoXTokenizerFast, LlamaTokenizerFast
|
20 |
+
|
21 |
+
from open_lm.model import create_params
|
22 |
+
from open_lm.params import add_model_args
|
23 |
+
from open_lm.utils.llm_foundry_wrapper import SimpleComposerOpenLMCausalLM
|
24 |
+
from open_lm.utils.transformers.hf_config import OpenLMConfig
|
25 |
+
from open_lm.utils.transformers.hf_model import OpenLMforCausalLM
|
26 |
+
|
27 |
+
builtin_print = __builtin__.print
|
28 |
+
|
29 |
+
|
30 |
+
def setup_for_distributed(is_master):
|
31 |
+
def print(*args, **kwargs):
|
32 |
+
force = kwargs.pop("force", False)
|
33 |
+
if is_master or force:
|
34 |
+
builtin_print(*args, **kwargs)
|
35 |
+
|
36 |
+
__builtin__.print = print
|
37 |
+
|
38 |
+
|
39 |
+
@torch.no_grad()
|
40 |
+
def evaluate(model, tokenizer, cfg):
|
41 |
+
cfg.dist_timeout = cfg.get("dist_timeout", 600.0)
|
42 |
+
|
43 |
+
reproducibility.seed_all(cfg.seed)
|
44 |
+
dist.initialize_dist(get_device(None), timeout=cfg.dist_timeout)
|
45 |
+
setup_for_distributed(dist.get_global_rank() == 0)
|
46 |
+
|
47 |
+
composer_model = SimpleComposerOpenLMCausalLM(model, tokenizer)
|
48 |
+
|
49 |
+
evaluators, logger_keys = build_icl_evaluators(
|
50 |
+
cfg.icl_tasks, tokenizer, cfg.max_seq_len, cfg.device_eval_batch_size
|
51 |
+
)
|
52 |
+
|
53 |
+
in_memory_logger = InMemoryLogger() # track metrics in the in_memory_logger
|
54 |
+
loggers: List[LoggerDestination] = [
|
55 |
+
build_logger(name, logger_cfg) for name, logger_cfg in (cfg.get("loggers") or {}).items()
|
56 |
+
]
|
57 |
+
loggers.append(in_memory_logger)
|
58 |
+
|
59 |
+
fsdp_config = cfg.get("fsdp_config", None)
|
60 |
+
fsdp_config = om.to_container(fsdp_config, resolve=True) if fsdp_config is not None else None
|
61 |
+
|
62 |
+
load_path = cfg.get("load_path", None)
|
63 |
+
|
64 |
+
trainer = Trainer(
|
65 |
+
model=composer_model,
|
66 |
+
loggers=loggers,
|
67 |
+
precision=cfg.precision,
|
68 |
+
fsdp_config=fsdp_config, # type: ignore
|
69 |
+
load_path=load_path,
|
70 |
+
load_weights_only=True,
|
71 |
+
progress_bar=False,
|
72 |
+
log_to_console=True,
|
73 |
+
dist_timeout=cfg.dist_timeout,
|
74 |
+
)
|
75 |
+
|
76 |
+
if torch.cuda.is_available():
|
77 |
+
torch.cuda.synchronize()
|
78 |
+
a = time.time()
|
79 |
+
trainer.eval(eval_dataloader=evaluators)
|
80 |
+
if torch.cuda.is_available():
|
81 |
+
torch.cuda.synchronize()
|
82 |
+
b = time.time()
|
83 |
+
|
84 |
+
print(f"Ran eval in: {b-a} seconds")
|
85 |
+
|
86 |
+
for key in logger_keys:
|
87 |
+
if key in in_memory_logger.data:
|
88 |
+
result = in_memory_logger.data[key][0][1].item()
|
89 |
+
print(f"{key}: {result}")
|
90 |
+
|
91 |
+
|
92 |
+
def main():
|
93 |
+
"""
|
94 |
+
Usage:
|
95 |
+
python eval_openlm_ckpt.py --checkpoint <path_to_openlm_checkpoint> --model <name_of_model_config> --eval-yaml <path_to_eval_yaml> --tokenizer <tokenizer_name_or_path>
|
96 |
+
example:
|
97 |
+
cd eval
|
98 |
+
python eval_openlm_ckpt.py --checkpoint ../checkpoints/llama2_7b.pt --model llama2_7b.json --eval-yaml in_memory_hf_eval.yaml --tokenizer <path_to_tokenizer>
|
99 |
+
multi-gpu example:
|
100 |
+
cd eval
|
101 |
+
torchrun --nproc_per_node 3 python eval_openlm_ckpt.py --checkpoint ../checkpoints/llama2_7b.pt --model llama2_7b.json --eval-yaml in_memory_hf_eval.yaml --tokenizer <path_to_tokenizer>
|
102 |
+
"""
|
103 |
+
parser = argparse.ArgumentParser()
|
104 |
+
parser.add_argument("--checkpoint")
|
105 |
+
parser.add_argument("--model", type=str, default="m1b_neox", help="Name of the model to use.")
|
106 |
+
parser.add_argument("--eval-yaml")
|
107 |
+
parser.add_argument("--tokenizer", type=str, default="EleutherAI/gpt-neox-20b")
|
108 |
+
add_model_args(parser)
|
109 |
+
args = parser.parse_args()
|
110 |
+
|
111 |
+
with open(args.eval_yaml) as f:
|
112 |
+
eval_cfg = om.load(f)
|
113 |
+
|
114 |
+
print("Loading checkpoint from disk")
|
115 |
+
checkpoint = torch.load(args.checkpoint)
|
116 |
+
|
117 |
+
print("Loading model into the right classes")
|
118 |
+
open_lm = OpenLMforCausalLM(OpenLMConfig(create_params(args)))
|
119 |
+
if "gpt-neox-20b" in args.tokenizer:
|
120 |
+
tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
|
121 |
+
elif "llama" in args.tokenizer:
|
122 |
+
tokenizer = LlamaTokenizerFast.from_pretrained(args.tokenizer)
|
123 |
+
|
124 |
+
state_dict = checkpoint["state_dict"]
|
125 |
+
state_dict = {x.replace("module.", ""): y for x, y in state_dict.items()}
|
126 |
+
open_lm.model.load_state_dict(state_dict)
|
127 |
+
open_lm.model.eval()
|
128 |
+
|
129 |
+
evaluate(open_lm, tokenizer, eval_cfg)
|
130 |
+
|
131 |
+
|
132 |
+
if __name__ == "__main__":
|
133 |
+
main()
|
eval/in_memory_hf_eval.yaml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch: 1.25T
|
2 |
+
dataset: bigdata
|
3 |
+
num_params: 1B
|
4 |
+
max_seq_len: 2048
|
5 |
+
seed: 1
|
6 |
+
precision: fp32
|
7 |
+
|
8 |
+
# Tokenizer
|
9 |
+
tokenizer:
|
10 |
+
# name: [Add name from memory]
|
11 |
+
pretrained_model_name_or_path:
|
12 |
+
kwargs:
|
13 |
+
model_max_length: 2048
|
14 |
+
|
15 |
+
model:
|
16 |
+
name: open_lm
|
17 |
+
# pretrained_model_name_or_path: [add name from memory]
|
18 |
+
init_device: cpu
|
19 |
+
pretrained: true
|
20 |
+
|
21 |
+
load_path: # Add your (optional) Composer checkpoint path here!
|
22 |
+
|
23 |
+
device_eval_batch_size: 8
|
24 |
+
|
25 |
+
# FSDP config for model sharding
|
26 |
+
fsdp_config:
|
27 |
+
sharding_strategy: FULL_SHARD
|
28 |
+
mixed_precision: FULL
|
29 |
+
|
30 |
+
|
31 |
+
icl_tasks:
|
32 |
+
-
|
33 |
+
label: mmlu
|
34 |
+
dataset_uri: local_data/mmlu.jsonl # ADD YOUR OWN DATASET URI
|
35 |
+
num_fewshot: [0]
|
36 |
+
icl_task_type: multiple_choice
|
37 |
+
continuation_delimiter: 'Answer: ' # this separates questions from answers
|
38 |
+
has_categories: true
|
eval/local_data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
!*
|
eval/local_data/arc_challenge.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/arc_easy.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/boolq.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/copa.jsonl
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"query": "The man turned on the faucet, therefore", "gold": 1, "choices": ["the toilet filled with water.", "water flowed from the spout."]}
|
2 |
+
{"query": "The girl found a bug in her cereal, therefore", "gold": 1, "choices": ["she poured milk in the bowl.", "she lost her appetite."]}
|
3 |
+
{"query": "The woman retired, therefore", "gold": 0, "choices": ["she received her pension.", "she paid off her mortgage."]}
|
4 |
+
{"query": "I wanted to conserve energy, therefore", "gold": 1, "choices": ["i swept the floor in the unoccupied room.", "i shut off the light in the unoccupied room."]}
|
5 |
+
{"query": "The hamburger meat browned, because", "gold": 1, "choices": ["the cook froze it.", "the cook grilled it."]}
|
6 |
+
{"query": "I doubted the salesman's pitch, therefore", "gold": 0, "choices": ["i turned his offer down.", "he persuaded me to buy the product."]}
|
7 |
+
{"query": "I decided to stay home for the night, because", "gold": 0, "choices": ["the forecast called for storms.", "my friends urged me to go out."]}
|
8 |
+
{"query": "My eyes became red and puffy, because", "gold": 0, "choices": ["i was sobbing.", "i was laughing."]}
|
9 |
+
{"query": "The flame on the candle went out, because", "gold": 0, "choices": ["i blew on the wick.", "i put a match to the wick."]}
|
10 |
+
{"query": "The man drank heavily at the party, therefore", "gold": 0, "choices": ["he had a headache the next day.", "he had a runny nose the next day."]}
|
11 |
+
{"query": "The bowling ball knocked over the bowling pins, because", "gold": 0, "choices": ["the man rolled the bowling ball down the alley.", "the man dropped the bowling ball on his foot."]}
|
12 |
+
{"query": "The community learned of the man's death, because", "gold": 1, "choices": ["his family buried him in the cemetery.", "his obituary appeared in the newspaper."]}
|
13 |
+
{"query": "My computer crashed, therefore", "gold": 1, "choices": ["i installed new speakers.", "i lost all my data."]}
|
14 |
+
{"query": "The woman resigned from her job, because", "gold": 1, "choices": ["she aspired to hold an executive position in the firm.", "she believed her superiors were acting unethically."]}
|
15 |
+
{"query": "The player caught the ball, because", "gold": 0, "choices": ["her teammate threw it to her.", "her opponent tried to intercept it."]}
|
16 |
+
{"query": "The judge pounded the gavel, because", "gold": 0, "choices": ["the courtroom broke into uproar.", "the jury announced its verdict."]}
|
17 |
+
{"query": "The woman banished the children from her property, because", "gold": 1, "choices": ["the children hit a ball into her yard.", "the children trampled through her garden."]}
|
18 |
+
{"query": "The kidnappers released the hostage, because", "gold": 0, "choices": ["they accepted ransom money.", "they escaped from jail."]}
|
19 |
+
{"query": "The cook's eyes watered, because", "gold": 1, "choices": ["he ran out of onions.", "he cut an onion."]}
|
20 |
+
{"query": "The woman ran her finger under cold water, because", "gold": 0, "choices": ["she burned her finger on the toaster.", "she put a diamond ring on her finger."]}
|
21 |
+
{"query": "The student misspelled the word, therefore", "gold": 0, "choices": ["the teacher corrected her.", "the teacher dismissed her."]}
|
22 |
+
{"query": "I regained composure from my fit of anger, because", "gold": 1, "choices": ["my heart pounded.", "i took deep breaths."]}
|
23 |
+
{"query": "I put my hands under the running faucet, therefore", "gold": 0, "choices": ["the soap rinsed off my hands.", "the water splashed in my face."]}
|
24 |
+
{"query": "The man dressed in his best suit, because", "gold": 0, "choices": ["he scheduled a meeting with an important client.", "his wife bought him a new tie."]}
|
25 |
+
{"query": "The man confessed his love for the woman, therefore", "gold": 0, "choices": ["the woman rejected him.", "the woman envied him."]}
|
26 |
+
{"query": "The driver got a flat tire, because", "gold": 1, "choices": ["he went over the speed limit.", "he ran over a nail."]}
|
27 |
+
{"query": "My view of the movie screen was blocked, because", "gold": 1, "choices": ["the couple behind me was whispering.", "a tall person was sitting in front of me."]}
|
28 |
+
{"query": "The driver turned on the car's headlights, because", "gold": 1, "choices": ["he heard thunder.", "the sun went down."]}
|
29 |
+
{"query": "The girl refused to eat her vegetables, therefore", "gold": 1, "choices": ["her father told her to drink her milk.", "her father took away her dessert."]}
|
30 |
+
{"query": "The woman covered her mouth with her hand, because", "gold": 1, "choices": ["she exhaled.", "she sneezed."]}
|
31 |
+
{"query": "The secretary put the caller on hold, therefore", "gold": 1, "choices": ["the caller's phone lost reception.", "the caller waited on the line."]}
|
32 |
+
{"query": "The woman walked with crutches, because", "gold": 1, "choices": ["she shaved her legs.", "she broke her leg."]}
|
33 |
+
{"query": "I coughed, because", "gold": 0, "choices": ["i inhaled smoke.", "i lowered my voice."]}
|
34 |
+
{"query": "The clock chimed, because", "gold": 0, "choices": ["it was the top of the hour.", "the hour seemed to drag on."]}
|
35 |
+
{"query": "The chef hit the egg on the side of the bowl, therefore", "gold": 0, "choices": ["the egg cracked.", "the egg rotted."]}
|
36 |
+
{"query": "The police searched the offender's car, because", "gold": 1, "choices": ["they were trying to elicit a confession.", "they were looking for illegal drugs."]}
|
37 |
+
{"query": "The couple travelled south for the winter, because", "gold": 0, "choices": ["they were retired.", "they were separated."]}
|
38 |
+
{"query": "The man felt obligated to attend the event, because", "gold": 1, "choices": ["he turned down his friend's invitation to go.", "he promised his friend that he would go."]}
|
39 |
+
{"query": "The bride got cold feet before the wedding, therefore", "gold": 1, "choices": ["the wedding guests brought gifts.", "she called the wedding off."]}
|
40 |
+
{"query": "The man grew old, therefore", "gold": 0, "choices": ["his hair turned gray.", "he sold his belongings."]}
|
41 |
+
{"query": "The friends decided to share the hamburger, therefore", "gold": 0, "choices": ["they cut the hamburger in half.", "they ordered fries with the hamburger."]}
|
42 |
+
{"query": "I twisted the cap off the soda bottle, therefore", "gold": 0, "choices": ["the soda fizzed.", "the soda leaked out."]}
|
43 |
+
{"query": "The pair of students came under scrutiny by the teacher, because", "gold": 1, "choices": ["the students both received excellent grades.", "their responses on the assignment were identical."]}
|
44 |
+
{"query": "The student was in a rush to get to school on time, therefore", "gold": 0, "choices": ["he left his assignment at home.", "he brought his lunch to school."]}
|
45 |
+
{"query": "The journalist wrote a biography about the humanitarian's life, because", "gold": 1, "choices": ["the humanitarian was difficult for the journalist to interview.", "the journalist was intrigued by the humanitarian's work."]}
|
46 |
+
{"query": "The man defied the authorities of the church, therefore", "gold": 1, "choices": ["he donated money to the church.", "he was excommunicated from the church."]}
|
47 |
+
{"query": "The woman's hair fell in her face, therefore", "gold": 0, "choices": ["she pulled her hair back with a clip.", "she lathered shampoo into her hair."]}
|
48 |
+
{"query": "The ring on my finger got stuck, because", "gold": 0, "choices": ["my finger swelled.", "i chipped my fingernail."]}
|
49 |
+
{"query": "I pulled the rubber band, therefore", "gold": 1, "choices": ["it flung across the room.", "it stretched."]}
|
50 |
+
{"query": "I pressed my hand into the wet cement, therefore", "gold": 0, "choices": ["my handprint dried in the cement.", "cracks emerged in the cement."]}
|
51 |
+
{"query": "My skin broke out into a rash, because", "gold": 0, "choices": ["i brushed against poison ivy in my yard.", "i eradicated the poison ivy from my yard."]}
|
52 |
+
{"query": "My subscription to the magazine expired, therefore", "gold": 1, "choices": ["i discarded the new issue.", "i stopped receiving new issues."]}
|
53 |
+
{"query": "The detective revealed an anomaly in the case, therefore", "gold": 1, "choices": ["he finalized his theory.", "he scrapped his theory."]}
|
54 |
+
{"query": "The boy threw a temper tantrum, because", "gold": 0, "choices": ["his brother took his toys from him.", "he shared his toys with his brother."]}
|
55 |
+
{"query": "The child learned how to read, because", "gold": 0, "choices": ["he began attending school.", "he skipped a grade in school."]}
|
56 |
+
{"query": "The boy skipped dinner, because", "gold": 1, "choices": ["his mother cooked his favorite meal.", "he ate a big lunch."]}
|
57 |
+
{"query": "The woman lavished her friend with flattery, because", "gold": 0, "choices": ["she wanted to ask her friend for a favor.", "she was irritated with her friend's whining."]}
|
58 |
+
{"query": "The key was missing from my pants pocket, because", "gold": 0, "choices": ["the pocket had a hole.", "the pants were new."]}
|
59 |
+
{"query": "The man fainted, because", "gold": 1, "choices": ["he took a nap.", "he ran a marathon."]}
|
60 |
+
{"query": "The man lost the competition, because", "gold": 0, "choices": ["the competition was sabotaged.", "he intimidated his competitors."]}
|
61 |
+
{"query": "The mother called an ambulance, because", "gold": 1, "choices": ["her son lost his cat.", "her son fell out of his bed."]}
|
62 |
+
{"query": "The driver slammed on his brakes, because", "gold": 0, "choices": ["a deer appeared on the road.", "the car radio shut off."]}
|
63 |
+
{"query": "The lock opened, because", "gold": 0, "choices": ["i turned the key in the lock.", "i made a duplicate of the key."]}
|
64 |
+
{"query": "I put rubber gloves on, because", "gold": 1, "choices": ["i was preparing to wash my hands.", "i was preparing to clean the bathroom."]}
|
65 |
+
{"query": "The animal species became endangered, because", "gold": 0, "choices": ["their habitat was destroyed.", "their predators went extinct."]}
|
66 |
+
{"query": "The man perceived that the woman looked different, because", "gold": 0, "choices": ["the woman got her hair cut.", "the woman wore a bracelet."]}
|
67 |
+
{"query": "The student forgot to do her assignment, therefore", "gold": 0, "choices": ["she made up an excuse to tell the teacher.", "the teacher promoted her to the next grade."]}
|
68 |
+
{"query": "The dog barked, because", "gold": 1, "choices": ["the cat lounged on the couch.", "a knock sounded at the door."]}
|
69 |
+
{"query": "Plans were announced to replace a local park with a shopping mall, therefore", "gold": 0, "choices": ["environmentalists started a petition.", "environmentalists produced a documentary."]}
|
70 |
+
{"query": "The couple was happy to see each other, therefore", "gold": 0, "choices": ["they kissed.", "they rested."]}
|
71 |
+
{"query": "The woman asked the man to leave, because", "gold": 0, "choices": ["he insulted her.", "he thanked her."]}
|
72 |
+
{"query": "The tree branch landed in the river, therefore", "gold": 0, "choices": ["the branch moved downstream.", "the river's current became stronger."]}
|
73 |
+
{"query": "The teacher assigned homework to the students, therefore", "gold": 1, "choices": ["the students passed notes.", "the students groaned."]}
|
74 |
+
{"query": "The seasons changed from summer to autumn, therefore", "gold": 1, "choices": ["people evacuated their homes.", "leaves fell from the trees."]}
|
75 |
+
{"query": "The politician was convicted of fraud, therefore", "gold": 1, "choices": ["he campaigned for re-election.", "he was removed from office."]}
|
76 |
+
{"query": "I pushed the wagon, therefore", "gold": 1, "choices": ["the objects in the wagon fell out.", "the wagon wheels spun forward."]}
|
77 |
+
{"query": "The lobbyist persuaded the legislature to support the bill, therefore", "gold": 1, "choices": ["the president vetoed the bill.", "the legislature passed the bill."]}
|
78 |
+
{"query": "My closet was messy, therefore", "gold": 0, "choices": ["i organized it.", "i decorated it."]}
|
79 |
+
{"query": "I stayed up late, therefore", "gold": 1, "choices": ["i had vivid dreams that night.", "i was tired in the morning."]}
|
80 |
+
{"query": "The man's pocket jingled as he walked, because", "gold": 0, "choices": ["his pocket was filled with coins.", "he sewed the hole in his pocket."]}
|
81 |
+
{"query": "Everyone in the class turned to stare at the student, because", "gold": 0, "choices": ["the student's phone rang.", "the student took notes."]}
|
82 |
+
{"query": "The horse bucked, because", "gold": 0, "choices": ["a fly bit the horse.", "the rider stroked the horse."]}
|
83 |
+
{"query": "The jewelry thieves were caught, therefore", "gold": 0, "choices": ["the stolen jewelry was returned to its owners.", "the cost of the stolen jewelry was calculated."]}
|
84 |
+
{"query": "Political violence broke out in the nation, therefore", "gold": 1, "choices": ["many citizens relocated to the capitol.", "many citizens took refuge in other territories."]}
|
85 |
+
{"query": "The woman was arrested, because", "gold": 1, "choices": ["she checked into rehab.", "she committed assault."]}
|
86 |
+
{"query": "The woman read the newspaper, therefore", "gold": 0, "choices": ["she discovered the outcome of the election.", "she casted a vote in the election."]}
|
87 |
+
{"query": "The sick child coughed on his friend, therefore", "gold": 0, "choices": ["his friend got sick.", "his friend sneezed."]}
|
88 |
+
{"query": "The couple got engaged, therefore", "gold": 0, "choices": ["they planned a wedding.", "they took some time apart."]}
|
89 |
+
{"query": "The woman contacted the real estate agent, because", "gold": 0, "choices": ["the woman planned to buy a condo.", "the woman needed to clean her house."]}
|
90 |
+
{"query": "The man won the lottery, therefore", "gold": 0, "choices": ["he became rich.", "he owed money."]}
|
91 |
+
{"query": "I lit the candle, therefore", "gold": 0, "choices": ["wax dripped off the candle.", "the wax on the candle hardened."]}
|
92 |
+
{"query": "I spent the day at the pool, therefore", "gold": 1, "choices": ["i sprained my ankle.", "my face got sunburned."]}
|
93 |
+
{"query": "The man received a parking ticket, because", "gold": 1, "choices": ["he parallel parked on the street.", "the parking meter expired."]}
|
94 |
+
{"query": "The woman became famous, therefore", "gold": 0, "choices": ["photographers followed her.", "her family avoided her."]}
|
95 |
+
{"query": "The girl wanted to wear earrings, therefore", "gold": 0, "choices": ["she got her ears pierced.", "she got a tattoo."]}
|
96 |
+
{"query": "My ears were ringing, because", "gold": 1, "choices": ["i went to a museum.", "i went to a concert."]}
|
97 |
+
{"query": "I tidied up my house, because", "gold": 1, "choices": ["i was swamped with work.", "i was expecting company."]}
|
98 |
+
{"query": "The airline mishandled my luggage, therefore", "gold": 0, "choices": ["they offered me compensation.", "they cancelled my flight."]}
|
99 |
+
{"query": "The computer was expensive to fix, therefore", "gold": 1, "choices": ["i got it repaired.", "i bought a new one."]}
|
100 |
+
{"query": "The woman was in a bad mood, therefore", "gold": 1, "choices": ["she engaged in small talk with her friend.", "she told her friend to leave her alone."]}
|
eval/local_data/hellaswag.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/jeopardy_all.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/lambada_openai.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/mmlu.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/piqa.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/triviaqa.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/local_data/winograd_wsc.jsonl
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"context_options": ["The city councilmen refused the demonstrators a permit because the city councilmen", "The city councilmen refused the demonstrators a permit because the demonstrators"], "continuation": "feared violence.", "gold": 0}
|
2 |
+
{"context_options": ["The city councilmen refused the demonstrators a permit because the city councilmen", "The city councilmen refused the demonstrators a permit because the demonstrators"], "continuation": "advocated violence.", "gold": 1}
|
3 |
+
{"context_options": ["The trophy doesn't fit into the brown suitcase because the trophy", "The trophy doesn't fit into the brown suitcase because the suitcase"], "continuation": "is too large.", "gold": 0}
|
4 |
+
{"context_options": ["The trophy doesn't fit into the brown suitcase because the trophy", "The trophy doesn't fit into the brown suitcase because the suitcase"], "continuation": "is too small.", "gold": 1}
|
5 |
+
{"context_options": ["Joan made sure to thank Susan for all the help Joan", "Joan made sure to thank Susan for all the help Susan"], "continuation": "had recieved.", "gold": 0}
|
6 |
+
{"context_options": ["Joan made sure to thank Susan for all the help Joan", "Joan made sure to thank Susan for all the help Susan"], "continuation": "had given.", "gold": 1}
|
7 |
+
{"context_options": ["Paul tried to call George on the phone, but Paul", "Paul tried to call George on the phone, but George"], "continuation": "wasn't successful.", "gold": 0}
|
8 |
+
{"context_options": ["Paul tried to call George on the phone, but Paul", "Paul tried to call George on the phone, but George"], "continuation": "wasn't available.", "gold": 1}
|
9 |
+
{"context_options": ["The lawyer asked the witness a question, but the lawyer", "The lawyer asked the witness a question, but the witness"], "continuation": "was reluctant to repeat it.", "gold": 0}
|
10 |
+
{"context_options": ["The lawyer asked the witness a question, but the lawyer", "The lawyer asked the witness a question, but the witness"], "continuation": "was reluctant to answer it.", "gold": 1}
|
11 |
+
{"context_options": ["The delivery truck zoomed by the school bus because the delivery truck", "The delivery truck zoomed by the school bus because the school bus"], "continuation": "was going so fast.", "gold": 0}
|
12 |
+
{"context_options": ["The delivery truck zoomed by the school bus because the delivery truck", "The delivery truck zoomed by the school bus because the school bus"], "continuation": "was going so slow.", "gold": 1}
|
13 |
+
{"context_options": ["Frank felt vindicated when his longtime rival Bill revealed that Frank", "Frank felt vindicated when his longtime rival Bill revealed that Bill"], "continuation": "was the winner of the competition.", "gold": 0}
|
14 |
+
{"context_options": ["Frank felt crushed when his longtime rival Bill revealed that Frank", "Frank felt crushed when his longtime rival Bill revealed that Bill"], "continuation": "was the winner of the competition.", "gold": 1}
|
15 |
+
{"context_options": ["The man couldn't lift his son because the man", "The man couldn't lift his son because the son"], "continuation": "was so weak.", "gold": 0}
|
16 |
+
{"context_options": ["The man couldn't lift his son because the man", "The man couldn't lift his son because the son"], "continuation": "was so heavy.", "gold": 1}
|
17 |
+
{"context_options": ["The large ball crashed right through the table because the large ball", "The large ball crashed right through the table because the table"], "continuation": "was made of steel.", "gold": 0}
|
18 |
+
{"context_options": ["The large ball crashed right through the table because the large ball", "The large ball crashed right through the table because the table"], "continuation": "was made of styrofoam.", "gold": 1}
|
19 |
+
{"context_options": ["John couldn't see the stage with Billy in front of him because John", "John couldn't see the stage with Billy in front of him because Billy"], "continuation": "is so short.", "gold": 0}
|
20 |
+
{"context_options": ["John couldn't see the stage with Billy in front of him because John", "John couldn't see the stage with Billy in front of him because Billy"], "continuation": "is so tall.", "gold": 1}
|
21 |
+
{"context_options": ["Tom threw his schoolbag down to Ray after Tom", "Tom threw his schoolbag down to Ray after Ray"], "continuation": "reached the top of the stairs.", "gold": 0}
|
22 |
+
{"context_options": ["Tom threw his schoolbag down to Ray after Tom", "Tom threw his schoolbag down to Ray after Ray"], "continuation": "reached the bottom of the stairs.", "gold": 1}
|
23 |
+
{"context_options": ["Although they ran at about the same speed, Sue beat Sally because Sue", "Although they ran at about the same speed, Sue beat Sally because Sally"], "continuation": "had such a good start.", "gold": 0}
|
24 |
+
{"context_options": ["Although they ran at about the same speed, Sue beat Sally because Sue", "Although they ran at about the same speed, Sue beat Sally because Sally"], "continuation": "had such a bad start.", "gold": 1}
|
25 |
+
{"context_options": ["The sculpture rolled off the shelf because the sculpture", "The sculpture rolled off the shelf because the shelf"], "continuation": "wasn't anchored.", "gold": 0}
|
26 |
+
{"context_options": ["The sculpture rolled off the shelf because the sculpture", "The sculpture rolled off the shelf because the shelf"], "continuation": "wasn't level.", "gold": 1}
|
27 |
+
{"context_options": ["Sam's drawing was hung just above Tina's and Sam's drawing", "Sam's drawing was hung just above Tina's and Tina's drawing"], "continuation": "did look much better with another one below it.", "gold": 0}
|
28 |
+
{"context_options": ["Sam's drawing was hung just above Tina's and Sam's drawing", "Sam's drawing was hung just above Tina's and Tina's drawing"], "continuation": "did look much better with another one above it.", "gold": 1}
|
29 |
+
{"context_options": ["Anna did a lot better than her good friend Lucy on the test because Anna", "Anna did a lot better than her good friend Lucy on the test because Lucy"], "continuation": "had studied so hard.", "gold": 0}
|
30 |
+
{"context_options": ["Anna did a lot worse than her good friend Lucy on the test because Anna", "Anna did a lot worse than her good friend Lucy on the test because Lucy"], "continuation": "had studied so hard.", "gold": 1}
|
31 |
+
{"context_options": ["The firemen arrived after the police because the firemen", "The firemen arrived after the police because the police"], "continuation": "were coming from so far away.", "gold": 0}
|
32 |
+
{"context_options": ["The firemen arrived before the police because the firemen", "The firemen arrived before the police because the police"], "continuation": "were coming from so far away.", "gold": 1}
|
33 |
+
{"context_options": ["Frank was upset with Tom because the toaster Frank", "Frank was upset with Tom because the toaster Tom"], "continuation": "had bought from him didn't work.", "gold": 0}
|
34 |
+
{"context_options": ["Frank was upset with Tom because the toaster Frank", "Frank was upset with Tom because the toaster Tom"], "continuation": "had sold him didn't work.", "gold": 1}
|
35 |
+
{"context_options": ["Jim yelled at Kevin because Jim", "Jim yelled at Kevin because Kevin"], "continuation": "was so upset.", "gold": 0}
|
36 |
+
{"context_options": ["Jim comforted Kevin because Jim", "Jim comforted Kevin because Kevin"], "continuation": "was so upset.", "gold": 1}
|
37 |
+
{"context_options": ["The sack of potatoes had been placed above the bag of flour, so the sack of potatoes", "The sack of potatoes had been placed above the bag of flour, so the bag of flour"], "continuation": "had to be moved first.", "gold": 0}
|
38 |
+
{"context_options": ["The sack of potatoes had been placed below the bag of flour, so the sack of potatoes", "The sack of potatoes had been placed below the bag of flour, so the bag of flour"], "continuation": "had to be moved first.", "gold": 1}
|
39 |
+
{"context_options": ["Pete envies Martin although Pete", "Pete envies Martin although Martin"], "continuation": "is very successful.", "gold": 0}
|
40 |
+
{"context_options": ["Pete envies Martin because Pete", "Pete envies Martin because Martin"], "continuation": "is very successful.", "gold": 1}
|
41 |
+
{"context_options": ["The older students were bullying the younger ones, so we punished the older students", "The older students were bullying the younger ones, so we punished the younger students"], "continuation": ".", "gold": 0}
|
42 |
+
{"context_options": ["The older students were bullying the younger ones, so we rescued the older students", "The older students were bullying the younger ones, so we rescued the younger students"], "continuation": ".", "gold": 1}
|
43 |
+
{"context_options": ["I poured water from the bottle into the cup until the bottle", "I poured water from the bottle into the cup until the cup"], "continuation": "was empty.", "gold": 0}
|
44 |
+
{"context_options": ["I poured water from the bottle into the cup until the bottle", "I poured water from the bottle into the cup until the cup"], "continuation": "was full.", "gold": 1}
|
45 |
+
{"context_options": ["Susan knows all about Ann's personal problems because Susan", "Susan knows all about Ann's personal problems because Ann"], "continuation": "is nosy.", "gold": 0}
|
46 |
+
{"context_options": ["Susan knows all about Ann's personal problems because Susan", "Susan knows all about Ann's personal problems because Ann"], "continuation": "is indiscreet.", "gold": 1}
|
47 |
+
{"context_options": ["Sid explained his theory to Mark but Sid", "Sid explained his theory to Mark but Mark"], "continuation": "couldn't convince him.", "gold": 0}
|
48 |
+
{"context_options": ["Sid explained his theory to Mark but Sid", "Sid explained his theory to Mark but Mark"], "continuation": "couldn't understand him.", "gold": 1}
|
49 |
+
{"context_options": ["Susan knew that Ann's son had been in a car accident, so Susan", "Susan knew that Ann's son had been in a car accident, so Ann"], "continuation": "told her about it.", "gold": 0}
|
50 |
+
{"context_options": ["Susan knew that Ann's son had been in a car accident, because Susan", "Susan knew that Ann's son had been in a car accident, because Ann"], "continuation": "told her about it.", "gold": 1}
|
51 |
+
{"context_options": ["Joe's uncle can still beat him at tennis, even though Joe", "Joe's uncle can still beat him at tennis, even though Joe's uncle"], "continuation": "is 30 years younger.", "gold": 0}
|
52 |
+
{"context_options": ["Joe's uncle can still beat him at tennis, even though Joe", "Joe's uncle can still beat him at tennis, even though Joe's uncle"], "continuation": "is 30 years older.", "gold": 1}
|
53 |
+
{"context_options": ["The painting in Mark's living room shows an oak tree. The painting", "The painting in Mark's living room shows an oak tree. The oak tree"], "continuation": "is to the right of the bookcase.", "gold": 0}
|
54 |
+
{"context_options": ["The painting in Mark's living room shows an oak tree. The painting", "The painting in Mark's living room shows an oak tree. The oak tree"], "continuation": "is to the right of a house.", "gold": 1}
|
55 |
+
{"context_options": ["There is a gap in the wall. You can see the garden through the gap", "There is a gap in the wall. You can see the garden through the wall"], "continuation": ".", "gold": 0}
|
56 |
+
{"context_options": ["There is a gap in the wall. You can see the garden behind the gap", "There is a gap in the wall. You can see the garden behind the wall"], "continuation": ".", "gold": 1}
|
57 |
+
{"context_options": ["The drain is clogged with hair. The drain", "The drain is clogged with hair. The hair"], "continuation": "has to be cleaned.", "gold": 0}
|
58 |
+
{"context_options": ["The drain is clogged with hair. The drain", "The drain is clogged with hair. The hair"], "continuation": "has to be removed.", "gold": 1}
|
59 |
+
{"context_options": ["My meeting started at 4:00 and I needed to catch the train at 4:30, so there wasn't much time. Luckily, the meeting", "My meeting started at 4:00 and I needed to catch the train at 4:30, so there wasn't much time. Luckily, the train"], "continuation": "was short, so it worked out.", "gold": 0}
|
60 |
+
{"context_options": ["My meeting started at 4:00 and I needed to catch the train at 4:30, so there wasn't much time. Luckily, the meeting", "My meeting started at 4:00 and I needed to catch the train at 4:30, so there wasn't much time. Luckily, the train"], "continuation": "was delayed, so it worked out.", "gold": 1}
|
61 |
+
{"context_options": ["There is a pillar between me and the stage, and I can't see around the pillar", "There is a pillar between me and the stage, and I can't see around the stage"], "continuation": ".", "gold": 0}
|
62 |
+
{"context_options": ["There is a pillar between me and the stage, and I can't see the pillar", "There is a pillar between me and the stage, and I can't see the stage"], "continuation": ".", "gold": 1}
|
63 |
+
{"context_options": ["They broadcast an announcement, but a subway came into the station and I couldn't hear the announcement", "They broadcast an announcement, but a subway came into the station and I couldn't hear the subway"], "continuation": ".", "gold": 0}
|
64 |
+
{"context_options": ["They broadcast an announcement, but a subway came into the station and I couldn't hear over the announcement", "They broadcast an announcement, but a subway came into the station and I couldn't hear over the subway"], "continuation": ".", "gold": 1}
|
65 |
+
{"context_options": ["In the middle of the outdoor concert, the rain started falling, but the concert", "In the middle of the outdoor concert, the rain started falling, but the rain"], "continuation": "continued until 10.", "gold": 0}
|
66 |
+
{"context_options": ["In the middle of the outdoor concert, the rain started falling, and the concert", "In the middle of the outdoor concert, the rain started falling, and the rain"], "continuation": "continued until 10.", "gold": 1}
|
67 |
+
{"context_options": ["I used an old rag to clean the knife, and then I put the rag", "I used an old rag to clean the knife, and then I put the knife"], "continuation": "in the trash.", "gold": 0}
|
68 |
+
{"context_options": ["I used an old rag to clean the knife, and then I put the rag", "I used an old rag to clean the knife, and then I put the knife"], "continuation": "in the drawer.", "gold": 1}
|
69 |
+
{"context_options": ["Ann asked Mary what time the library closes, because Ann", "Ann asked Mary what time the library closes, because Mary"], "continuation": "had forgotten.", "gold": 0}
|
70 |
+
{"context_options": ["Ann asked Mary what time the library closes, but Ann", "Ann asked Mary what time the library closes, but Mary"], "continuation": "had forgotten.", "gold": 1}
|
71 |
+
{"context_options": ["I took the water bottle out of the backpack so that the water bottle", "I took the water bottle out of the backpack so that the backpack"], "continuation": "would be handy.", "gold": 0}
|
72 |
+
{"context_options": ["I took the water bottle out of the backpack so that the water bottle", "I took the water bottle out of the backpack so that the backpack"], "continuation": "would be lighter.", "gold": 1}
|
73 |
+
{"context_options": ["I couldn't put the pot on the shelf because the pot", "I couldn't put the pot on the shelf because the shelf"], "continuation": "was too tall.", "gold": 0}
|
74 |
+
{"context_options": ["I couldn't put the pot on the shelf because the pot", "I couldn't put the pot on the shelf because the shelf"], "continuation": "was too high.", "gold": 1}
|
75 |
+
{"context_options": ["I'm sure that my map will show this building; the map", "I'm sure that my map will show this building; the building"], "continuation": "is very good.", "gold": 0}
|
76 |
+
{"context_options": ["I'm sure that my map will show this building; the map", "I'm sure that my map will show this building; the building"], "continuation": "is very famous.", "gold": 1}
|
77 |
+
{"context_options": ["Bob paid for Charlie's college education. Bob", "Bob paid for Charlie's college education. Charlie"], "continuation": "is very generous.", "gold": 0}
|
78 |
+
{"context_options": ["Bob paid for Charlie's college education. Bob", "Bob paid for Charlie's college education. Charlie"], "continuation": "is very grateful.", "gold": 1}
|
79 |
+
{"context_options": ["Bob paid for Charlie's college education, but now Charlie acts as though it never happened. Bob", "Bob paid for Charlie's college education, but now Charlie acts as though it never happened. Charlie"], "continuation": "is very hurt.", "gold": 0}
|
80 |
+
{"context_options": ["Bob paid for Charlie's college education, but now Charlie acts as though it never happened. Bob", "Bob paid for Charlie's college education, but now Charlie acts as though it never happened. Charlie"], "continuation": "is very ungrateful.", "gold": 1}
|
81 |
+
{"context_options": ["Bob was playing cards with Adam and was way ahead. If Adam hadn't had a sudden run of good luck, Bob", "Bob was playing cards with Adam and was way ahead. If Adam hadn't had a sudden run of good luck, Adam"], "continuation": "would have won.", "gold": 0}
|
82 |
+
{"context_options": ["Bob was playing cards with Adam and was way ahead. If Adam hadn't had a sudden run of good luck, Bob", "Bob was playing cards with Adam and was way ahead. If Adam hadn't had a sudden run of good luck, Adam"], "continuation": "would have lost.", "gold": 1}
|
83 |
+
{"context_options": ["Adam can't leave work here until Bob arrives to replace him. If Bob had left home for work on time, Adam", "Adam can't leave work here until Bob arrives to replace him. If Bob had left home for work on time, Bob"], "continuation": "would be gone by this time.", "gold": 0}
|
84 |
+
{"context_options": ["Adam can't leave work here until Bob arrives to replace him. If Bob had left home for work on time, Adam", "Adam can't leave work here until Bob arrives to replace him. If Bob had left home for work on time, Bob"], "continuation": "would be here by this time.", "gold": 1}
|
85 |
+
{"context_options": ["If the con artist has succeeded in fooling Sam, the con artist", "If the con artist has succeeded in fooling Sam, Sam"], "continuation": "would have gotten a lot of money.", "gold": 0}
|
86 |
+
{"context_options": ["If the con artist has succeeded in fooling Sam, the con artist", "If the con artist has succeeded in fooling Sam, Sam"], "continuation": "would have lost a lot of money.", "gold": 1}
|
87 |
+
{"context_options": ["It was a summer afternoon, and the dog was sitting in the middle of the lawn. After a while, it got up and moved to a spot under the tree, because the dog", "It was a summer afternoon, and the dog was sitting in the middle of the lawn. After a while, it got up and moved to a spot under the tree, because the spot under the tree"], "continuation": "was hot.", "gold": 0}
|
88 |
+
{"context_options": ["It was a summer afternoon, and the dog was sitting in the middle of the lawn. After a while, it got up and moved to a spot under the tree, because the dog", "It was a summer afternoon, and the dog was sitting in the middle of the lawn. After a while, it got up and moved to a spot under the tree, because the spot under the tree"], "continuation": "was cooler.", "gold": 1}
|
89 |
+
{"context_options": ["The cat was lying by the mouse hole waiting for the mouse, but the cat", "The cat was lying by the mouse hole waiting for the mouse, but the mouse"], "continuation": "was too impatient.", "gold": 0}
|
90 |
+
{"context_options": ["The cat was lying by the mouse hole waiting for the mouse, but the cat", "The cat was lying by the mouse hole waiting for the mouse, but the mouse"], "continuation": "was too cautious.", "gold": 1}
|
91 |
+
{"context_options": ["Anne gave birth to a daughter last month. Anne", "Anne gave birth to a daughter last month. Anne's daughter"], "continuation": "is a very charming woman.", "gold": 0}
|
92 |
+
{"context_options": ["Anne gave birth to a daughter last month. Anne", "Anne gave birth to a daughter last month. Anne's daughter"], "continuation": "is a very charming baby.", "gold": 1}
|
93 |
+
{"context_options": ["Alice tried frantically to stop her daughter from chatting at the party, leaving us to wonder why Alice", "Alice tried frantically to stop her daughter from chatting at the party, leaving us to wonder why Alice's daughter"], "continuation": "was behaving so strangely.", "gold": 0}
|
94 |
+
{"context_options": ["Alice tried frantically to stop her daughter from barking at the party, leaving us to wonder why Alice", "Alice tried frantically to stop her daughter from barking at the party, leaving us to wonder why Alice's daughter"], "continuation": "was behaving so strangely.", "gold": 1}
|
95 |
+
{"context_options": ["I saw Jim yelling at some guy in a military uniform with a huge red beard. I don't know why Jim", "I saw Jim yelling at some guy in a military uniform with a huge red beard. I don't know why the guy in uniform"], "continuation": "was, but he looked very unhappy.", "gold": 0}
|
96 |
+
{"context_options": ["I saw Jim yelling at some guy in a military uniform with a huge red beard. I don't know who Jim", "I saw Jim yelling at some guy in a military uniform with a huge red beard. I don't know who the guy in uniform"], "continuation": "was, but he looked very unhappy.", "gold": 1}
|
97 |
+
{"context_options": ["The fish ate the worm. The fish", "The fish ate the worm. The worm"], "continuation": "was hungry.", "gold": 0}
|
98 |
+
{"context_options": ["The fish ate the worm. The fish", "The fish ate the worm. The worm"], "continuation": "was tasty.", "gold": 1}
|
99 |
+
{"context_options": ["I was trying to open the lock with the key, but someone had filled the keyhole with chewing gum, and I couldn't get the key", "I was trying to open the lock with the key, but someone had filled the keyhole with chewing gum, and I couldn't get the chewing gum"], "continuation": "in.", "gold": 0}
|
100 |
+
{"context_options": ["I was trying to open the lock with the key, but someone had filled the keyhole with chewing gum, and I couldn't get the key", "I was trying to open the lock with the key, but someone had filled the keyhole with chewing gum, and I couldn't get the chewing gum"], "continuation": "out.", "gold": 1}
|
101 |
+
{"context_options": ["The dog chased the cat, which ran up a tree. The dog", "The dog chased the cat, which ran up a tree. The cat"], "continuation": "waited at the bottom.", "gold": 0}
|
102 |
+
{"context_options": ["The dog chased the cat, which ran up a tree. The dog", "The dog chased the cat, which ran up a tree. The cat"], "continuation": "waited at the top.", "gold": 1}
|
103 |
+
{"context_options": ["In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get the tree", "In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get the roof"], "continuation": "removed.", "gold": 0}
|
104 |
+
{"context_options": ["In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get the tree", "In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get the roof"], "continuation": "repaired.", "gold": 1}
|
105 |
+
{"context_options": ["The customer walked into the bank and stabbed one of the tellers. The customer", "The customer walked into the bank and stabbed one of the tellers. The teller"], "continuation": "was immediately taken to the police station.", "gold": 0}
|
106 |
+
{"context_options": ["The customer walked into the bank and stabbed one of the tellers. The customer", "The customer walked into the bank and stabbed one of the tellers. The teller"], "continuation": "was immediately taken to the hospital.", "gold": 1}
|
107 |
+
{"context_options": ["John was doing research in the library when he heard a man humming and whistling. John", "John was doing research in the library when he heard a man humming and whistling. The man"], "continuation": "was very annoyed.", "gold": 0}
|
108 |
+
{"context_options": ["John was doing research in the library when he heard a man humming and whistling. John", "John was doing research in the library when he heard a man humming and whistling. The man"], "continuation": "was very annoying.", "gold": 1}
|
109 |
+
{"context_options": ["John was jogging through the park when he saw a man juggling watermelons. John", "John was jogging through the park when he saw a man juggling watermelons. The juggler"], "continuation": "was very impressed.", "gold": 0}
|
110 |
+
{"context_options": ["John was jogging through the park when he saw a man juggling watermelons. John", "John was jogging through the park when he saw a man juggling watermelons. The juggler"], "continuation": "was very impressive.", "gold": 1}
|
111 |
+
{"context_options": ["Bob collapsed on the sidewalk. Soon he saw Carl coming to help. Bob", "Bob collapsed on the sidewalk. Soon he saw Carl coming to help. Carl"], "continuation": "was very ill.", "gold": 0}
|
112 |
+
{"context_options": ["Bob collapsed on the sidewalk. Soon he saw Carl coming to help. Bob", "Bob collapsed on the sidewalk. Soon he saw Carl coming to help. Carl"], "continuation": "was very concerned.", "gold": 1}
|
113 |
+
{"context_options": ["Sam and Amy are passionately in love, but Amy's parents are unhappy about it, because Sam and Amy", "Sam and Amy are passionately in love, but Amy's parents are unhappy about it, because Amy's parents"], "continuation": "are fifteen.", "gold": 0}
|
114 |
+
{"context_options": ["Sam and Amy are passionately in love, but Amy's parents are unhappy about it, because Sam and Amy", "Sam and Amy are passionately in love, but Amy's parents are unhappy about it, because Amy's parents"], "continuation": "are snobs.", "gold": 1}
|
115 |
+
{"context_options": ["Mark told Pete many lies about himself, which Pete included in his book. Mark", "Mark told Pete many lies about himself, which Pete included in his book. Pete"], "continuation": "should have been more truthful.", "gold": 0}
|
116 |
+
{"context_options": ["Mark told Pete many lies about himself, which Pete included in his book. Mark", "Mark told Pete many lies about himself, which Pete included in his book. Pete"], "continuation": "should have been more skeptical.", "gold": 1}
|
117 |
+
{"context_options": ["Joe has sold his house and bought a new one a few miles away. He will be moving out of the old house", "Joe has sold his house and bought a new one a few miles away. He will be moving out of the new house"], "continuation": "on Thursday.", "gold": 0}
|
118 |
+
{"context_options": ["Joe has sold his house and bought a new one a few miles away. He will be moving into the old house", "Joe has sold his house and bought a new one a few miles away. He will be moving into the new house"], "continuation": "on Thursday.", "gold": 1}
|
119 |
+
{"context_options": ["Many people start to read Paul's books and can't put them down. People", "Many people start to read Paul's books and can't put them down. Paul's books"], "continuation": "are gripped because Paul writes so well.", "gold": 0}
|
120 |
+
{"context_options": ["Many people start to read Paul's books and can't put them down. People", "Many people start to read Paul's books and can't put them down. Paul's books"], "continuation": "are popular because Paul writes so well.", "gold": 1}
|
121 |
+
{"context_options": ["Mary took out her flute and played one of her favorite pieces. She has had the flute", "Mary took out her flute and played one of her favorite pieces. She has had the piece"], "continuation": "since she was a child.", "gold": 0}
|
122 |
+
{"context_options": ["Mary took out her flute and played one of her favorite pieces. She has loved the flute", "Mary took out her flute and played one of her favorite pieces. She has loved the piece"], "continuation": "since she was a child.", "gold": 1}
|
123 |
+
{"context_options": ["Sam pulled up a chair to the piano, but the chair", "Sam pulled up a chair to the piano, but the piano"], "continuation": "was broken, so he had to stand instead.", "gold": 0}
|
124 |
+
{"context_options": ["Sam pulled up a chair to the piano, but the chair", "Sam pulled up a chair to the piano, but the piano"], "continuation": "was broken, so he had to sing instead.", "gold": 1}
|
125 |
+
{"context_options": ["Since it was raining, I carried the newspaper in my backpack to keep the newspaper", "Since it was raining, I carried the newspaper in my backpack to keep the backpack"], "continuation": "dry.", "gold": 0}
|
126 |
+
{"context_options": ["Since it was raining, I carried the newspaper over my backpack to keep the newspaper", "Since it was raining, I carried the newspaper over my backpack to keep the backpack"], "continuation": "dry.", "gold": 1}
|
127 |
+
{"context_options": ["Sara borrowed the book from the library because she needs it for an article she is working on. She reads the book", "Sara borrowed the book from the library because she needs it for an article she is working on. She reads the article"], "continuation": "when she gets home from work.", "gold": 0}
|
128 |
+
{"context_options": ["Sara borrowed the book from the library because she needs it for an article she is working on. She writes the book", "Sara borrowed the book from the library because she needs it for an article she is working on. She writes the article"], "continuation": "when she gets home from work.", "gold": 1}
|
129 |
+
{"context_options": ["This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the tide knocked the sand castle", "This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the tide knocked the flag"], "continuation": "down.", "gold": 0}
|
130 |
+
{"context_options": ["This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the wind knocked the sand castle", "This morning, Joey built a sand castle on the beach, and put a toy flag in the highest tower, but this afternoon the wind knocked the flag"], "continuation": "down.", "gold": 1}
|
131 |
+
{"context_options": ["Jane knocked on Susan's door, but there was no answer. Jane", "Jane knocked on Susan's door, but there was no answer. Susan"], "continuation": "was disappointed.", "gold": 0}
|
132 |
+
{"context_options": ["Jane knocked on Susan's door, but there was no answer. Jane", "Jane knocked on Susan's door, but there was no answer. Susan"], "continuation": "was out.", "gold": 1}
|
133 |
+
{"context_options": ["Jane knocked on the door, and Susan answered it. Jane", "Jane knocked on the door, and Susan answered it. Susan"], "continuation": "invited her to come out.", "gold": 0}
|
134 |
+
{"context_options": ["Jane knocked on the door, and Susan answered it. Jane", "Jane knocked on the door, and Susan answered it. Susan"], "continuation": "invited her to come in.", "gold": 1}
|
135 |
+
{"context_options": ["Sam took French classes from Adam, because Sam", "Sam took French classes from Adam, because Adam"], "continuation": "was eager to speak it fluently.", "gold": 0}
|
136 |
+
{"context_options": ["Sam took French classes from Adam, because Sam", "Sam took French classes from Adam, because Adam"], "continuation": "was known to speak it fluently.", "gold": 1}
|
137 |
+
{"context_options": ["The path to the lake was blocked, so we couldn't use the path", "The path to the lake was blocked, so we couldn't use the lake"], "continuation": ".", "gold": 0}
|
138 |
+
{"context_options": ["The path to the lake was blocked, so we couldn't reach the path", "The path to the lake was blocked, so we couldn't reach the lake"], "continuation": ".", "gold": 1}
|
139 |
+
{"context_options": ["The sun was covered by a thick cloud all morning, but luckily, by the time the picnic started, the sun", "The sun was covered by a thick cloud all morning, but luckily, by the time the picnic started, the cloud"], "continuation": "was out.", "gold": 0}
|
140 |
+
{"context_options": ["The sun was covered by a thick cloud all morning, but luckily, by the time the picnic started, the sun", "The sun was covered by a thick cloud all morning, but luckily, by the time the picnic started, the cloud"], "continuation": "was gone.", "gold": 1}
|
141 |
+
{"context_options": ["We went to the lake, because a shark had been seen at the ocean beach, so the lake", "We went to the lake, because a shark had been seen at the ocean beach, so the ocean beach"], "continuation": "was a safer place to swim.", "gold": 0}
|
142 |
+
{"context_options": ["We went to the lake, because a shark had been seen at the ocean beach, so the lake", "We went to the lake, because a shark had been seen at the ocean beach, so the ocean beach"], "continuation": "was a dangerous place to swim.", "gold": 1}
|
143 |
+
{"context_options": ["Sam tried to paint a picture of shepherds with sheep, but the shepherds", "Sam tried to paint a picture of shepherds with sheep, but the sheep"], "continuation": "ended up looking more like golfers.", "gold": 0}
|
144 |
+
{"context_options": ["Sam tried to paint a picture of shepherds with sheep, but the shepherds", "Sam tried to paint a picture of shepherds with sheep, but the sheep"], "continuation": "ended up looking more like dogs.", "gold": 1}
|
145 |
+
{"context_options": ["Mary tucked her daughter Anne into bed, so that Mary", "Mary tucked her daughter Anne into bed, so that Mary's daughter"], "continuation": "could work.", "gold": 0}
|
146 |
+
{"context_options": ["Mary tucked her daughter Anne into bed, so that Mary", "Mary tucked her daughter Anne into bed, so that Mary's daughter"], "continuation": "could sleep.", "gold": 1}
|
147 |
+
{"context_options": ["Fred and Alice had very warm down coats, but Fred and Alice", "Fred and Alice had very warm down coats, but coats"], "continuation": "were not prepared for the cold in Alaska.", "gold": 0}
|
148 |
+
{"context_options": ["Fred and Alice had very warm down coats, but Fred and Alice", "Fred and Alice had very warm down coats, but coats"], "continuation": "were not enough for the cold in Alaska.", "gold": 1}
|
149 |
+
{"context_options": ["Thomson visited Cooper's grave in 1765. At that date Thomson", "Thomson visited Cooper's grave in 1765. At that date Cooper"], "continuation": "had been travelling for five years.", "gold": 0}
|
150 |
+
{"context_options": ["Thomson visited Cooper's grave in 1765. At that date Thomson", "Thomson visited Cooper's grave in 1765. At that date Cooper"], "continuation": "had been dead for five years.", "gold": 1}
|
151 |
+
{"context_options": ["Jackson was greatly influenced by Arnold, though Jackson", "Jackson was greatly influenced by Arnold, though Arnold"], "continuation": "lived two centuries later.", "gold": 0}
|
152 |
+
{"context_options": ["Jackson was greatly influenced by Arnold, though Jackson", "Jackson was greatly influenced by Arnold, though Arnold"], "continuation": "lived two centuries earlier.", "gold": 1}
|
153 |
+
{"context_options": ["I can't cut that tree down with that axe; the tree", "I can't cut that tree down with that axe; the axe"], "continuation": "is too thick.", "gold": 0}
|
154 |
+
{"context_options": ["I can't cut that tree down with that axe; the tree", "I can't cut that tree down with that axe; the axe"], "continuation": "is too small.", "gold": 1}
|
155 |
+
{"context_options": ["The foxes are getting in at night and attacking the chickens. I shall have to kill the foxes", "The foxes are getting in at night and attacking the chickens. I shall have to kill the chickens"], "continuation": ".", "gold": 0}
|
156 |
+
{"context_options": ["The foxes are getting in at night and attacking the chickens. I shall have to guard the foxes", "The foxes are getting in at night and attacking the chickens. I shall have to guard the chickens"], "continuation": ".", "gold": 1}
|
157 |
+
{"context_options": ["The foxes are getting in at night and attacking the chickens. The foxes", "The foxes are getting in at night and attacking the chickens. The chickens"], "continuation": "have gotten very bold.", "gold": 0}
|
158 |
+
{"context_options": ["The foxes are getting in at night and attacking the chickens. The foxes", "The foxes are getting in at night and attacking the chickens. The chickens"], "continuation": "have gotten very nervous.", "gold": 1}
|
159 |
+
{"context_options": ["Fred covered his eyes with his hands, because the wind was blowing sand around. He opened his eyes", "Fred covered his eyes with his hands, because the wind was blowing sand around. He opened his hands"], "continuation": "when the wind stopped.", "gold": 0}
|
160 |
+
{"context_options": ["Fred covered his eyes with his hands, because the wind was blowing sand around. He lowered his eyes", "Fred covered his eyes with his hands, because the wind was blowing sand around. He lowered his hands"], "continuation": "when the wind stopped.", "gold": 1}
|
161 |
+
{"context_options": ["The actress used to be named Terpsichore, but she changed it to Tina a few years ago, because she figured Terpsichore", "The actress used to be named Terpsichore, but she changed it to Tina a few years ago, because she figured Tina"], "continuation": "was too hard to pronounce.", "gold": 0}
|
162 |
+
{"context_options": ["The actress used to be named Terpsichore, but she changed it to Tina a few years ago, because she figured Terpsichore", "The actress used to be named Terpsichore, but she changed it to Tina a few years ago, because she figured Tina"], "continuation": "was easier to pronounce.", "gold": 1}
|
163 |
+
{"context_options": ["Fred watched TV while George went out to buy groceries. After an hour Fred", "Fred watched TV while George went out to buy groceries. After an hour George"], "continuation": "got up.", "gold": 0}
|
164 |
+
{"context_options": ["Fred watched TV while George went out to buy groceries. After an hour Fred", "Fred watched TV while George went out to buy groceries. After an hour George"], "continuation": "got back.", "gold": 1}
|
165 |
+
{"context_options": ["Fred was supposed to run the dishwasher, but he put it off, because he wanted to watch TV. But the show turned out to be boring, so he changed his mind and turned the dishwasher", "Fred was supposed to run the dishwasher, but he put it off, because he wanted to watch TV. But the show turned out to be boring, so he changed his mind and turned the TV"], "continuation": "on.", "gold": 0}
|
166 |
+
{"context_options": ["Fred was supposed to run the dishwasher, but he put it off, because he wanted to watch TV. But the show turned out to be boring, so he changed his mind and turned the dishwasher", "Fred was supposed to run the dishwasher, but he put it off, because he wanted to watch TV. But the show turned out to be boring, so he changed his mind and turned the TV"], "continuation": "off.", "gold": 1}
|
167 |
+
{"context_options": ["Fred is the only man still alive who remembers my great-grandfather. Fred", "Fred is the only man still alive who remembers my great-grandfather. My great-grandfather"], "continuation": "is a remarkable man.", "gold": 0}
|
168 |
+
{"context_options": ["Fred is the only man still alive who remembers my great-grandfather. Fred", "Fred is the only man still alive who remembers my great-grandfather. My great-grandfather"], "continuation": "was a remarkable man.", "gold": 1}
|
169 |
+
{"context_options": ["Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, Fred", "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, my father"], "continuation": "was twelve years old.", "gold": 0}
|
170 |
+
{"context_options": ["Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, Fred", "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, my father"], "continuation": "was twelve months old.", "gold": 1}
|
171 |
+
{"context_options": ["In July, Kamtchatka declared war on Yakutsk. Since Yakutsk's army was much better equipped and ten times larger, Kamchatka", "In July, Kamtchatka declared war on Yakutsk. Since Yakutsk's army was much better equipped and ten times larger, Yakutsk"], "continuation": "were defeated within weeks.", "gold": 0}
|
172 |
+
{"context_options": ["In July, Kamtchatka declared war on Yakutsk. Since Yakutsk's army was much better equipped and ten times larger, Kamchatka", "In July, Kamtchatka declared war on Yakutsk. Since Yakutsk's army was much better equipped and ten times larger, Yakutsk"], "continuation": "were victorious within weeks.", "gold": 1}
|
173 |
+
{"context_options": ["Look! There is a minnow swimming right below that duck! the minnow", "Look! There is a minnow swimming right below that duck! the duck"], "continuation": "had better get away to safety fast!", "gold": 0}
|
174 |
+
{"context_options": ["Look! There is a shark swimming right below that duck! the shark", "Look! There is a shark swimming right below that duck! the duck"], "continuation": "had better get away to safety fast!", "gold": 1}
|
175 |
+
{"context_options": ["Archaeologists have concluded that humans lived in Laputa 20,000 years ago. Archaeologists", "Archaeologists have concluded that humans lived in Laputa 20,000 years ago. Prehistoric humans"], "continuation": "hunted for evidence on the river banks.", "gold": 0}
|
176 |
+
{"context_options": ["Archaeologists have concluded that humans lived in Laputa 20,000 years ago. Archaeologists", "Archaeologists have concluded that humans lived in Laputa 20,000 years ago. Prehistoric humans"], "continuation": "hunted for deer on the river banks.", "gold": 1}
|
177 |
+
{"context_options": ["The scientists are studying three species of fish that have recently been found living in the Indian Ocean. The scientists", "The scientists are studying three species of fish that have recently been found living in the Indian Ocean. The fish"], "continuation": "began two years ago.", "gold": 0}
|
178 |
+
{"context_options": ["The scientists are studying three species of fish that have recently been found living in the Indian Ocean. The scientists", "The scientists are studying three species of fish that have recently been found living in the Indian Ocean. The fish"], "continuation": "appeared two years ago.", "gold": 1}
|
179 |
+
{"context_options": ["The journalists interviewed the stars of the new movie. The journalists", "The journalists interviewed the stars of the new movie. The stars"], "continuation": "were very persistent, so the interview lasted for a long time.", "gold": 0}
|
180 |
+
{"context_options": ["The journalists interviewed the stars of the new movie. The journalists", "The journalists interviewed the stars of the new movie. The stars"], "continuation": "were very cooperative, so the interview lasted for a long time.", "gold": 1}
|
181 |
+
{"context_options": ["The police arrested all of the gang members. The police", "The police arrested all of the gang members. The gang members"], "continuation": "were trying to stop the drug trade in the neighborhood.", "gold": 0}
|
182 |
+
{"context_options": ["The police arrested all of the gang members. The police", "The police arrested all of the gang members. The gang members"], "continuation": "were trying to run the drug trade in the neighborhood.", "gold": 1}
|
183 |
+
{"context_options": ["I put the cake away in the refrigerator. The cake", "I put the cake away in the refrigerator. The refrigerator"], "continuation": "has a lot of butter in it.", "gold": 0}
|
184 |
+
{"context_options": ["I put the cake away in the refrigerator. The cake", "I put the cake away in the refrigerator. The refrigerator"], "continuation": "has a lot of leftovers in it.", "gold": 1}
|
185 |
+
{"context_options": ["Sam broke both his ankles and he's walking with crutches. But a month or so from now the ankles", "Sam broke both his ankles and he's walking with crutches. But a month or so from now the crutches"], "continuation": "should be better.", "gold": 0}
|
186 |
+
{"context_options": ["Sam broke both his ankles and he's walking with crutches. But a month or so from now the ankles", "Sam broke both his ankles and he's walking with crutches. But a month or so from now the crutches"], "continuation": "should be unnecessary.", "gold": 1}
|
187 |
+
{"context_options": ["When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. The sponsors", "When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. The opponents"], "continuation": "were very much in the minority.", "gold": 0}
|
188 |
+
{"context_options": ["When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. The sponsors", "When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. The opponents"], "continuation": "were very much in the majority.", "gold": 1}
|
189 |
+
{"context_options": ["Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make more of the oatmeal cookies", "Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make more of the chocolate chip cookies"], "continuation": ".", "gold": 0}
|
190 |
+
{"context_options": ["Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make fewer of the oatmeal cookies", "Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make fewer of the chocolate chip cookies"], "continuation": ".", "gold": 1}
|
191 |
+
{"context_options": ["We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply not enough of copies of the newsletter", "We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply not enough of chairs"], "continuation": ".", "gold": 0}
|
192 |
+
{"context_options": ["We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply too many of copies of the newsletter", "We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply too many of chairs"], "continuation": ".", "gold": 1}
|
193 |
+
{"context_options": ["I stuck a pin through a carrot. When I pulled the pin out, the pin", "I stuck a pin through a carrot. When I pulled the pin out, the carrot"], "continuation": "left a hole.", "gold": 0}
|
194 |
+
{"context_options": ["I stuck a pin through a carrot. When I pulled the pin out, the pin", "I stuck a pin through a carrot. When I pulled the pin out, the carrot"], "continuation": "had a hole.", "gold": 1}
|
195 |
+
{"context_options": ["I couldn't find a spoon, so I tried using a pen to stir my coffee. But that turned out to be a bad idea, because the pen", "I couldn't find a spoon, so I tried using a pen to stir my coffee. But that turned out to be a bad idea, because the coffee"], "continuation": "got full of coffee.", "gold": 0}
|
196 |
+
{"context_options": ["I couldn't find a spoon, so I tried using a pen to stir my coffee. But that turned out to be a bad idea, because the pen", "I couldn't find a spoon, so I tried using a pen to stir my coffee. But that turned out to be a bad idea, because the coffee"], "continuation": "got full of ink.", "gold": 1}
|
197 |
+
{"context_options": ["Steve follows Fred's example in everything. Steve", "Steve follows Fred's example in everything. Fred"], "continuation": "admires him hugely.", "gold": 0}
|
198 |
+
{"context_options": ["Steve follows Fred's example in everything. Steve", "Steve follows Fred's example in everything. Fred"], "continuation": "influences him hugely.", "gold": 1}
|
199 |
+
{"context_options": ["The table won't fit through the doorway because the table", "The table won't fit through the doorway because the doorway"], "continuation": "is too wide.", "gold": 0}
|
200 |
+
{"context_options": ["The table won't fit through the doorway because the table", "The table won't fit through the doorway because the doorway"], "continuation": "is too narrow.", "gold": 1}
|
201 |
+
{"context_options": ["Grace was happy to trade me her sweater for my jacket. She thinks the sweater", "Grace was happy to trade me her sweater for my jacket. She thinks the jacket"], "continuation": "looks dowdy on her.", "gold": 0}
|
202 |
+
{"context_options": ["Grace was happy to trade me her sweater for my jacket. She thinks the sweater", "Grace was happy to trade me her sweater for my jacket. She thinks the jacket"], "continuation": "looks great on her.", "gold": 1}
|
203 |
+
{"context_options": ["John hired Bill to take care of John", "John hired Bill to take care of Bill"], "continuation": ".", "gold": 0}
|
204 |
+
{"context_options": ["John hired himself out to Bill to take care of John", "John hired himself out to Bill to take care of Bill"], "continuation": ".", "gold": 1}
|
205 |
+
{"context_options": ["John promised Bill to leave, so an hour later John", "John promised Bill to leave, so an hour later Bill"], "continuation": "left.", "gold": 0}
|
206 |
+
{"context_options": ["John ordered Bill to leave, so an hour later John", "John ordered Bill to leave, so an hour later Bill"], "continuation": "left.", "gold": 1}
|
207 |
+
{"context_options": ["Sam Goodman's biography of the Spartan general Xenophanes conveys a vivid sense of the difficulties Goodman", "Sam Goodman's biography of the Spartan general Xenophanes conveys a vivid sense of the difficulties Xenophanes"], "continuation": "faced in his research.", "gold": 0}
|
208 |
+
{"context_options": ["Sam Goodman's biography of the Spartan general Xenophanes conveys a vivid sense of the difficulties Goodman", "Sam Goodman's biography of the Spartan general Xenophanes conveys a vivid sense of the difficulties Xenophanes"], "continuation": "faced in his childhood.", "gold": 1}
|
209 |
+
{"context_options": ["Emma's mother had died long ago, and Emma's", "Emma's mother had died long ago, and Emma's mother's"], "continuation": "education had been managed by an excellent woman as governess.", "gold": 0}
|
210 |
+
{"context_options": ["Emma's mother had died long ago, and Emma's", "Emma's mother had died long ago, and Emma's mother's"], "continuation": "place had been taken by an excellent woman as governess.", "gold": 1}
|
211 |
+
{"context_options": ["Jane knocked on Susan's door but Jane", "Jane knocked on Susan's door but Susan"], "continuation": "did not get an answer.", "gold": 0}
|
212 |
+
{"context_options": ["Jane knocked on Susan's door but Jane", "Jane knocked on Susan's door but Susan"], "continuation": "did not answer.", "gold": 1}
|
213 |
+
{"context_options": ["Joe paid the detective after Joe", "Joe paid the detective after the detective"], "continuation": "received the final report on the case.", "gold": 0}
|
214 |
+
{"context_options": ["Joe paid the detective after Joe", "Joe paid the detective after the detective"], "continuation": "delivered the final report on the case.", "gold": 1}
|
215 |
+
{"context_options": ["Beth didn't get angry with Sally, who had cut her off, because Beth", "Beth didn't get angry with Sally, who had cut her off, because Sally"], "continuation": "stopped and counted to ten.", "gold": 0}
|
216 |
+
{"context_options": ["Beth didn't get angry with Sally, who had cut her off, because Beth", "Beth didn't get angry with Sally, who had cut her off, because Sally"], "continuation": "stopped and apologized.", "gold": 1}
|
217 |
+
{"context_options": ["Jim signaled the barman and gestured toward Jim's", "Jim signaled the barman and gestured toward the barman's"], "continuation": "empty glass", "gold": 0}
|
218 |
+
{"context_options": ["Jim signaled the barman and gestured toward Jim's", "Jim signaled the barman and gestured toward the barman's"], "continuation": "bathroom key.", "gold": 1}
|
219 |
+
{"context_options": ["Dan took the rear seat while Bill claimed the front because Dan's", "Dan took the rear seat while Bill claimed the front because Bill's"], "continuation": "\"Dibs!\" was slow.", "gold": 0}
|
220 |
+
{"context_options": ["Dan took the rear seat while Bill claimed the front because Dan's", "Dan took the rear seat while Bill claimed the front because Bill's"], "continuation": "\"Dibs!\" was quicker.", "gold": 1}
|
221 |
+
{"context_options": ["Tom said \"Check\" to Ralph as he moved Tom's", "Tom said \"Check\" to Ralph as he moved Ralph's"], "continuation": "bishop.", "gold": 0}
|
222 |
+
{"context_options": ["Tom said \"Check\" to Ralph as he took Tom's", "Tom said \"Check\" to Ralph as he took Ralph's"], "continuation": "bishop.", "gold": 1}
|
223 |
+
{"context_options": ["As Andrea in the crop duster passed over Susan, Andrea", "As Andrea in the crop duster passed over Susan, Susan"], "continuation": "could see the landing strip.", "gold": 0}
|
224 |
+
{"context_options": ["As Andrea in the crop duster passed over Susan, Andrea", "As Andrea in the crop duster passed over Susan, Susan"], "continuation": "could see the landing gear.", "gold": 1}
|
225 |
+
{"context_options": ["Tom gave Ralph a lift to school so Tom", "Tom gave Ralph a lift to school so Ralph"], "continuation": "wouldn't have to drive alone.", "gold": 0}
|
226 |
+
{"context_options": ["Tom gave Ralph a lift to school so Tom", "Tom gave Ralph a lift to school so Ralph"], "continuation": "wouldn't have to walk.", "gold": 1}
|
227 |
+
{"context_options": ["Bill passed the half-empty plate to John because Bill", "Bill passed the half-empty plate to John because John"], "continuation": "was full.", "gold": 0}
|
228 |
+
{"context_options": ["Bill passed the half-empty plate to John because Bill", "Bill passed the half-empty plate to John because John"], "continuation": "was hungry.", "gold": 1}
|
229 |
+
{"context_options": ["Bill passed the gameboy to John because Bill's", "Bill passed the gameboy to John because John's"], "continuation": "turn was over.", "gold": 0}
|
230 |
+
{"context_options": ["Bill passed the gameboy to John because Bill's", "Bill passed the gameboy to John because John's"], "continuation": "turn was next.", "gold": 1}
|
231 |
+
{"context_options": ["The man lifted the boy onto the man's", "The man lifted the boy onto the boy's"], "continuation": "shoulders.", "gold": 0}
|
232 |
+
{"context_options": ["The man lifted the boy onto the man's", "The man lifted the boy onto the boy's"], "continuation": "bunk bed.", "gold": 1}
|
233 |
+
{"context_options": ["Stretching the woman's", "Stretching the girl's"], "continuation": "back, the woman smiled at the girl.", "gold": 0}
|
234 |
+
{"context_options": ["Patting the woman's", "Patting the girl's"], "continuation": "back, the woman smiled at the girl.", "gold": 1}
|
235 |
+
{"context_options": ["Billy cried because Toby wouldn't accept Billy's", "Billy cried because Toby wouldn't accept Toby's"], "continuation": "toy.", "gold": 0}
|
236 |
+
{"context_options": ["Billy cried because Toby wouldn't share Billy's", "Billy cried because Toby wouldn't share Toby's"], "continuation": "toy.", "gold": 1}
|
237 |
+
{"context_options": ["Lily spoke to Donna, breaking Lily's", "Lily spoke to Donna, breaking Donna's"], "continuation": "silence.", "gold": 0}
|
238 |
+
{"context_options": ["Lily spoke to Donna, breaking Lily's", "Lily spoke to Donna, breaking Donna's"], "continuation": "concentration.", "gold": 1}
|
239 |
+
{"context_options": ["When Tommy dropped his ice cream, Timmy giggled, so father gave Tommy", "When Tommy dropped his ice cream, Timmy giggled, so father gave Timmy"], "continuation": "a sympathetic look.", "gold": 0}
|
240 |
+
{"context_options": ["When Tommy dropped his ice cream, Timmy giggled, so father gave Tommy", "When Tommy dropped his ice cream, Timmy giggled, so father gave Timmy"], "continuation": "a stern look.", "gold": 1}
|
241 |
+
{"context_options": ["As Ollie carried Tommy up the long winding steps, Ollie's", "As Ollie carried Tommy up the long winding steps, Tommy's"], "continuation": "legs ached.", "gold": 0}
|
242 |
+
{"context_options": ["As Ollie carried Tommy up the long winding steps, Ollie's", "As Ollie carried Tommy up the long winding steps, Tommy's"], "continuation": "legs dangled.", "gold": 1}
|
243 |
+
{"context_options": ["The father carried the sleeping boy in the father's", "The father carried the sleeping boy in the boy's"], "continuation": "arms", "gold": 0}
|
244 |
+
{"context_options": ["The father carried the sleeping boy in the father's", "The father carried the sleeping boy in the boy's"], "continuation": "bassinet.", "gold": 1}
|
245 |
+
{"context_options": ["The woman held the girl against the woman's", "The woman held the girl against the girl's"], "continuation": "chest", "gold": 0}
|
246 |
+
{"context_options": ["The woman held the girl against the woman's", "The woman held the girl against the girl's"], "continuation": "will.", "gold": 1}
|
247 |
+
{"context_options": ["Pam's parents came home and found her having sex with her boyfriend, Paul. Pam's parents", "Pam's parents came home and found her having sex with her boyfriend, Paul. Pam and Paul"], "continuation": "were furious about it.", "gold": 0}
|
248 |
+
{"context_options": ["Pam's parents came home and found her having sex with her boyfriend, Paul. Pam's parents", "Pam's parents came home and found her having sex with her boyfriend, Paul. Pam and Paul"], "continuation": "were embarrassed about it.", "gold": 1}
|
249 |
+
{"context_options": ["Dr. Adams informed Kate that Dr. Adams", "Dr. Adams informed Kate that Kate"], "continuation": "had retired and presented several options for future treatment.", "gold": 0}
|
250 |
+
{"context_options": ["Dr. Adams informed Kate that Dr. Adams", "Dr. Adams informed Kate that Kate"], "continuation": "had cancer and presented several options for future treatment.", "gold": 1}
|
251 |
+
{"context_options": ["Dan had to stop Bill from toying with the injured bird. Dan", "Dan had to stop Bill from toying with the injured bird. Bill"], "continuation": "is very compassionate.", "gold": 0}
|
252 |
+
{"context_options": ["Dan had to stop Bill from toying with the injured bird. Dan", "Dan had to stop Bill from toying with the injured bird. Bill"], "continuation": "is very cruel.", "gold": 1}
|
253 |
+
{"context_options": ["George got free tickets to the play, but he gave them to Eric, even though George", "George got free tickets to the play, but he gave them to Eric, even though Eric"], "continuation": "was particularly eager to see it.", "gold": 0}
|
254 |
+
{"context_options": ["George got free tickets to the play, but he gave them to Eric, because George", "George got free tickets to the play, but he gave them to Eric, because Eric"], "continuation": "was particularly eager to see it.", "gold": 1}
|
255 |
+
{"context_options": ["George got free tickets to the play, but he gave them to Eric, because George", "George got free tickets to the play, but he gave them to Eric, because Eric"], "continuation": "was not particularly eager to see it.", "gold": 0}
|
256 |
+
{"context_options": ["Jane gave Joan candy because Jane", "Jane gave Joan candy because Joan"], "continuation": "wasn't hungry.", "gold": 0}
|
257 |
+
{"context_options": ["Jane gave Joan candy because Jane", "Jane gave Joan candy because Joan"], "continuation": "was hungry.", "gold": 1}
|
258 |
+
{"context_options": ["I tried to paint a picture of an orchard, with lemons in the lemon trees, but lemons", "I tried to paint a picture of an orchard, with lemons in the lemon trees, but lemon trees"], "continuation": "came out looking more like light bulbs.", "gold": 0}
|
259 |
+
{"context_options": ["I tried to paint a picture of an orchard, with lemons in the lemon trees, but lemons", "I tried to paint a picture of an orchard, with lemons in the lemon trees, but lemon trees"], "continuation": "came out looking more like telephone poles.", "gold": 1}
|
260 |
+
{"context_options": ["James asked Robert for a favor but James", "James asked Robert for a favor but Robert"], "continuation": "was refused.", "gold": 0}
|
261 |
+
{"context_options": ["James asked Robert for a favor but James", "James asked Robert for a favor but Robert"], "continuation": "refused.", "gold": 1}
|
262 |
+
{"context_options": ["Kirilov ceded the presidency to Shatov because Kirilov", "Kirilov ceded the presidency to Shatov because Shatov"], "continuation": "was less popular.", "gold": 0}
|
263 |
+
{"context_options": ["Kirilov ceded the presidency to Shatov because Kirilov", "Kirilov ceded the presidency to Shatov because Shatov"], "continuation": "was more popular.", "gold": 1}
|
264 |
+
{"context_options": ["Emma did not pass the ball to Janie although Emma", "Emma did not pass the ball to Janie although Janie"], "continuation": "saw that she was open.", "gold": 0}
|
265 |
+
{"context_options": ["Emma did not pass the ball to Janie although Emma", "Emma did not pass the ball to Janie although Janie"], "continuation": "was open.", "gold": 1}
|
266 |
+
{"context_options": ["I put the butterfly wing on the table and the butterfly wing", "I put the butterfly wing on the table and the table"], "continuation": "broke.", "gold": 0}
|
267 |
+
{"context_options": ["I put the heavy book on the table and the heavy book", "I put the heavy book on the table and the table"], "continuation": "broke.", "gold": 1}
|
268 |
+
{"context_options": ["Madonna fired her trainer because Madonna", "Madonna fired her trainer because the trainer"], "continuation": "couldn't stand her boyfriend.", "gold": 0}
|
269 |
+
{"context_options": ["Madonna fired her trainer because Madonna", "Madonna fired her trainer because the trainer"], "continuation": "slept with her boyfriend.", "gold": 1}
|
270 |
+
{"context_options": ["Madonna fired her trainer because she slept with Madonna's", "Madonna fired her trainer because she slept with the trainer's"], "continuation": "boyfriend.", "gold": 0}
|
271 |
+
{"context_options": ["Madonna fired her trainer because she couldn't stand Madonna's", "Madonna fired her trainer because she couldn't stand the trainer's"], "continuation": "boyfriend.", "gold": 1}
|
272 |
+
{"context_options": ["Carol believed that Rebecca suspected that Carol", "Carol believed that Rebecca suspected that Rebecca"], "continuation": "had stolen the watch.", "gold": 0}
|
273 |
+
{"context_options": ["Carol believed that Rebecca regretted that Carol", "Carol believed that Rebecca regretted that Rebecca"], "continuation": "had stolen the watch.", "gold": 1}
|
eval/local_data/winogrande.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
logs/test_alpaca_7b_1p25_240612/checkpoints/epoch_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2989032a64a7b2afb7b9445ee7fcee09e85531d5d9483eb99d6d5009f03a564c
|
3 |
+
size 27558851338
|
logs/test_alpaca_7b_1p25_240612/checkpoints/results.jsonl
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[{"loss": 1.3897356633096933, "data_time": 0.002783033065497875, "batch_time": 1.1955944942310452, "samples_per_second": 109587.79834876416, "samples_per_second_per_gpu": 13698.47479359552, "loss_sequences_lower_95": -1.0, "loss_sequences_upper_95": -1.0, "loss_tokens_lower_95": -1.0, "loss_tokens_upper_95": -1.0, "sequences": 2048, "tokens": 125144, "train_tokens": 2048000, "checkpoint_path": null, "val_data": ["/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl"], "model": "open_lm_7b"}]
|
2 |
+
[{"loss": 1.4330836422741413, "data_time": 0.002671465277671814, "batch_time": 1.1965740416198969, "samples_per_second": 109495.47954659315, "samples_per_second_per_gpu": 13686.934943324144, "loss_sequences_lower_95": -1.0, "loss_sequences_upper_95": -1.0, "loss_tokens_lower_95": -1.0, "loss_tokens_upper_95": -1.0, "sequences": 2048, "tokens": 125144, "train_tokens": 4096000, "checkpoint_path": null, "val_data": ["/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl"], "model": "open_lm_7b"}]
|
3 |
+
[{"loss": 1.7271701656281948, "data_time": 0.002651987597346306, "batch_time": 1.1973171858116984, "samples_per_second": 109427.82850018333, "samples_per_second_per_gpu": 13678.478562522916, "loss_sequences_lower_95": -1.0, "loss_sequences_upper_95": -1.0, "loss_tokens_lower_95": -1.0, "loss_tokens_upper_95": -1.0, "sequences": 2048, "tokens": 125144, "train_tokens": 6144000, "checkpoint_path": null, "val_data": ["/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl"], "model": "open_lm_7b"}]
|
4 |
+
[{"loss": 2.06808203458786, "data_time": 0.0027167657390236855, "batch_time": 1.197363798506558, "samples_per_second": 109425.08167824618, "samples_per_second_per_gpu": 13678.135209780772, "loss_sequences_lower_95": -1.0, "loss_sequences_upper_95": -1.0, "loss_tokens_lower_95": -1.0, "loss_tokens_upper_95": -1.0, "sequences": 2048, "tokens": 125144, "train_tokens": 8192000, "checkpoint_path": null, "val_data": ["/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl"], "model": "open_lm_7b"}]
|
5 |
+
[{"loss": 2.0682402066886425, "data_time": 0.0026804842054843903, "batch_time": 1.1899999463930726, "samples_per_second": 110103.57829482725, "samples_per_second_per_gpu": 13762.947286853407, "loss_sequences_lower_95": -1.0, "loss_sequences_upper_95": -1.0, "loss_tokens_lower_95": -1.0, "loss_tokens_upper_95": -1.0, "sequences": 2048, "tokens": 125144, "train_tokens": 10240000, "checkpoint_path": null, "val_data": ["/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl"], "model": "open_lm_7b"}]
|
logs/test_alpaca_7b_1p25_240612/checkpoints/stats_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35b4b318a8b4a4019c45b1e8a0ba3110e2a091e8061bf6059f14f1dca23aa95f
|
3 |
+
size 1568
|
logs/test_alpaca_7b_1p25_240612/out.log
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-06-12,19:26:04 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
2 |
+
2024-06-12,19:26:06 | INFO | Model (has 861209600 parameters):
|
3 |
+
2024-06-12,19:26:06 | INFO | FullyShardedDataParallel(
|
4 |
+
(_fsdp_wrapped_module): Transformer(
|
5 |
+
(post_embed_norm): Identity()
|
6 |
+
(tok_embeddings): Embedding(50432, 4096)
|
7 |
+
(layers): ModuleList(
|
8 |
+
(0-31): 32 x FullyShardedDataParallel(
|
9 |
+
(_fsdp_wrapped_module): Block(
|
10 |
+
(attention): CustomAttn(
|
11 |
+
(in_proj): Linear(in_features=4096, out_features=12288, bias=False)
|
12 |
+
(out_proj): Linear(in_features=4096, out_features=4096, bias=False)
|
13 |
+
(pos_embed): RotaryWithCast()
|
14 |
+
(q_norm): Identity()
|
15 |
+
(k_norm): Identity()
|
16 |
+
)
|
17 |
+
(feed_forward): SwiGLU(
|
18 |
+
(w12): Linear(in_features=4096, out_features=22016, bias=False)
|
19 |
+
(w3): Linear(in_features=11008, out_features=4096, bias=False)
|
20 |
+
)
|
21 |
+
(attention_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
|
22 |
+
(ffn_norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
)
|
26 |
+
(norm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
|
27 |
+
(output): Linear(in_features=4096, out_features=50432, bias=False)
|
28 |
+
)
|
29 |
+
)
|
30 |
+
2024-06-12,19:26:06 | INFO | Params:
|
31 |
+
2024-06-12,19:26:06 | INFO | accum_freq: 1
|
32 |
+
2024-06-12,19:26:06 | INFO | attn_activation: None
|
33 |
+
2024-06-12,19:26:06 | INFO | attn_name: auto
|
34 |
+
2024-06-12,19:26:06 | INFO | attn_seq_scalar: None
|
35 |
+
2024-06-12,19:26:06 | INFO | attn_seq_scalar_alpha: None
|
36 |
+
2024-06-12,19:26:06 | INFO | average: None
|
37 |
+
2024-06-12,19:26:06 | INFO | average_coefficients: None
|
38 |
+
2024-06-12,19:26:06 | INFO | averagers: None
|
39 |
+
2024-06-12,19:26:06 | INFO | beta1: 0.9
|
40 |
+
2024-06-12,19:26:06 | INFO | beta2: 0.95
|
41 |
+
2024-06-12,19:26:06 | INFO | checkpoint_path: ./logs/test_alpaca_7b_1p25_240612/checkpoints
|
42 |
+
2024-06-12,19:26:06 | INFO | copy_codebase: False
|
43 |
+
2024-06-12,19:26:06 | INFO | data_key: jsonl
|
44 |
+
2024-06-12,19:26:06 | INFO | data_tolerate_error_p: 0.09
|
45 |
+
2024-06-12,19:26:06 | INFO | data_tolerate_num_ckpts: 0
|
46 |
+
2024-06-12,19:26:06 | INFO | dataset_manifest: None
|
47 |
+
2024-06-12,19:26:06 | INFO | dataset_resampled: True
|
48 |
+
2024-06-12,19:26:06 | INFO | dataset_type: jsonl
|
49 |
+
2024-06-12,19:26:06 | INFO | ddp_static_graph: False
|
50 |
+
2024-06-12,19:26:06 | INFO | debug: False
|
51 |
+
2024-06-12,19:26:06 | INFO | delete_previous_checkpoint: False
|
52 |
+
2024-06-12,19:26:06 | INFO | device: cuda:0
|
53 |
+
2024-06-12,19:26:06 | INFO | disable_buffer: False
|
54 |
+
2024-06-12,19:26:06 | INFO | dist_backend: nccl
|
55 |
+
2024-06-12,19:26:06 | INFO | dist_url: env://
|
56 |
+
2024-06-12,19:26:06 | INFO | distill_model: None
|
57 |
+
2024-06-12,19:26:06 | INFO | distill_pretrained: None
|
58 |
+
2024-06-12,19:26:06 | INFO | distributed: True
|
59 |
+
2024-06-12,19:26:06 | INFO | epochs: 4
|
60 |
+
2024-06-12,19:26:06 | INFO | epochs_cooldown: None
|
61 |
+
2024-06-12,19:26:06 | INFO | eps: 1e-08
|
62 |
+
2024-06-12,19:26:06 | INFO | experimental_meta_device: False
|
63 |
+
2024-06-12,19:26:06 | INFO | failed_checkpoint_path: ./logs/test_alpaca_7b_1p25_240612/checkpoints_failed
|
64 |
+
2024-06-12,19:26:06 | INFO | ffn_type: swiglu
|
65 |
+
2024-06-12,19:26:06 | INFO | force_distributed: False
|
66 |
+
2024-06-12,19:26:06 | INFO | force_min_lr: 0.0
|
67 |
+
2024-06-12,19:26:06 | INFO | fsdp: True
|
68 |
+
2024-06-12,19:26:06 | INFO | fsdp_amp: True
|
69 |
+
2024-06-12,19:26:06 | INFO | fsdp_backward_prefetch: False
|
70 |
+
2024-06-12,19:26:06 | INFO | fsdp_checkpoint: False
|
71 |
+
2024-06-12,19:26:06 | INFO | fsdp_cpu_offload: False
|
72 |
+
2024-06-12,19:26:06 | INFO | fsdp_hybrid: False
|
73 |
+
2024-06-12,19:26:06 | INFO | fsdp_hybrid_o2: False
|
74 |
+
2024-06-12,19:26:06 | INFO | fsdp_limit_all_gathers: False
|
75 |
+
2024-06-12,19:26:06 | INFO | fsdp_pure_bf16: False
|
76 |
+
2024-06-12,19:26:06 | INFO | fsdp_use_orig_params: False
|
77 |
+
2024-06-12,19:26:06 | INFO | global_batch_size: 64
|
78 |
+
2024-06-12,19:26:06 | INFO | global_val_batch_size: 64
|
79 |
+
2024-06-12,19:26:06 | INFO | grad_checkpointing: True
|
80 |
+
2024-06-12,19:26:06 | INFO | grad_clip_norm: 1.0
|
81 |
+
2024-06-12,19:26:06 | INFO | hf_fsdp_block: None
|
82 |
+
2024-06-12,19:26:06 | INFO | hf_model: None
|
83 |
+
2024-06-12,19:26:06 | INFO | hf_seq_len: None
|
84 |
+
2024-06-12,19:26:06 | INFO | ignore_parse_errors: False
|
85 |
+
2024-06-12,19:26:06 | INFO | load_pretrained_state: False
|
86 |
+
2024-06-12,19:26:06 | INFO | local_rank: 0
|
87 |
+
2024-06-12,19:26:06 | INFO | log_avg_model_training_loss: 0
|
88 |
+
2024-06-12,19:26:06 | INFO | log_every_n_steps: 40
|
89 |
+
2024-06-12,19:26:06 | INFO | log_level: 20
|
90 |
+
2024-06-12,19:26:06 | INFO | log_local: False
|
91 |
+
2024-06-12,19:26:06 | INFO | log_logit_mean: False
|
92 |
+
2024-06-12,19:26:06 | INFO | log_path: ./logs/test_alpaca_7b_1p25_240612/out.log
|
93 |
+
2024-06-12,19:26:06 | INFO | logs: ./logs/
|
94 |
+
2024-06-12,19:26:06 | INFO | lr: 1e-05
|
95 |
+
2024-06-12,19:26:06 | INFO | lr_cooldown_end: 0.0
|
96 |
+
2024-06-12,19:26:06 | INFO | lr_cooldown_power: 1.0
|
97 |
+
2024-06-12,19:26:06 | INFO | lr_scheduler: cosine
|
98 |
+
2024-06-12,19:26:06 | INFO | model: open_lm_7b
|
99 |
+
2024-06-12,19:26:06 | INFO | model_norm: default_layer_norm
|
100 |
+
2024-06-12,19:26:06 | INFO | moe_capacity_factor: 1.25
|
101 |
+
2024-06-12,19:26:06 | INFO | moe_expert_model_parallelism: False
|
102 |
+
2024-06-12,19:26:06 | INFO | moe_freq: 0
|
103 |
+
2024-06-12,19:26:06 | INFO | moe_loss_weight: 0.1
|
104 |
+
2024-06-12,19:26:06 | INFO | moe_num_experts: None
|
105 |
+
2024-06-12,19:26:06 | INFO | moe_top_k: 2
|
106 |
+
2024-06-12,19:26:06 | INFO | moe_weight_parallelism: False
|
107 |
+
2024-06-12,19:26:06 | INFO | multiple_data_passes: False
|
108 |
+
2024-06-12,19:26:06 | INFO | name: test_alpaca_7b_1p25_240612
|
109 |
+
2024-06-12,19:26:06 | INFO | no_set_device_rank: False
|
110 |
+
2024-06-12,19:26:06 | INFO | optimizer: adamw
|
111 |
+
2024-06-12,19:26:06 | INFO | per_gpu_batch_size: 8
|
112 |
+
2024-06-12,19:26:06 | INFO | per_gpu_val_batch_size: 8
|
113 |
+
2024-06-12,19:26:06 | INFO | positional_embedding_type: rotary
|
114 |
+
2024-06-12,19:26:06 | INFO | precision: amp_bfloat16
|
115 |
+
2024-06-12,19:26:06 | INFO | preset_world_size: None
|
116 |
+
2024-06-12,19:26:06 | INFO | pretrained: /home/ubuntu/model_sft/ch/open_lm/open_lm_7b_1p25/open_lm_7b_1p25T.pt
|
117 |
+
2024-06-12,19:26:06 | INFO | qk_norm: False
|
118 |
+
2024-06-12,19:26:06 | INFO | rank: 0
|
119 |
+
2024-06-12,19:26:06 | INFO | remote_sync: None
|
120 |
+
2024-06-12,19:26:06 | INFO | remote_sync_frequency: 300
|
121 |
+
2024-06-12,19:26:06 | INFO | remote_sync_protocol: s3
|
122 |
+
2024-06-12,19:26:06 | INFO | report_to: tensorboard
|
123 |
+
2024-06-12,19:26:06 | INFO | resume: None
|
124 |
+
2024-06-12,19:26:06 | INFO | save_frequency: 1
|
125 |
+
2024-06-12,19:26:06 | INFO | save_most_recent: False
|
126 |
+
2024-06-12,19:26:06 | INFO | seed: 0
|
127 |
+
2024-06-12,19:26:06 | INFO | seq_len: 2048
|
128 |
+
2024-06-12,19:26:06 | INFO | skip_scheduler: False
|
129 |
+
2024-06-12,19:26:06 | INFO | squash_mask_left: False
|
130 |
+
2024-06-12,19:26:06 | INFO | target_mask_individual: None
|
131 |
+
2024-06-12,19:26:06 | INFO | target_mask_left: None
|
132 |
+
2024-06-12,19:26:06 | INFO | tensorboard: True
|
133 |
+
2024-06-12,19:26:06 | INFO | tensorboard_path: ./logs/test_alpaca_7b_1p25_240612/tensorboard
|
134 |
+
2024-06-12,19:26:06 | INFO | torchcompile: False
|
135 |
+
2024-06-12,19:26:06 | INFO | torchscript: False
|
136 |
+
2024-06-12,19:26:06 | INFO | trace: False
|
137 |
+
2024-06-12,19:26:06 | INFO | train_data: ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
138 |
+
2024-06-12,19:26:06 | INFO | train_data_mix_weights: None
|
139 |
+
2024-06-12,19:26:06 | INFO | train_data_upsampling_factors: None
|
140 |
+
2024-06-12,19:26:06 | INFO | train_num_samples: None
|
141 |
+
2024-06-12,19:26:06 | INFO | use_bn_sync: False
|
142 |
+
2024-06-12,19:26:06 | INFO | use_bnb_linear: None
|
143 |
+
2024-06-12,19:26:06 | INFO | val_data: ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl']
|
144 |
+
2024-06-12,19:26:06 | INFO | val_data_key: ['jsonl']
|
145 |
+
2024-06-12,19:26:06 | INFO | val_frequency: 1
|
146 |
+
2024-06-12,19:26:06 | INFO | val_iter_ci: 10000
|
147 |
+
2024-06-12,19:26:06 | INFO | val_max_pop_ci: None
|
148 |
+
2024-06-12,19:26:06 | INFO | val_num_samples: None
|
149 |
+
2024-06-12,19:26:06 | INFO | val_seq_ci: False
|
150 |
+
2024-06-12,19:26:06 | INFO | val_tok_ci: False
|
151 |
+
2024-06-12,19:26:06 | INFO | vocab_size: 50432
|
152 |
+
2024-06-12,19:26:06 | INFO | wandb: False
|
153 |
+
2024-06-12,19:26:06 | INFO | wandb_notes:
|
154 |
+
2024-06-12,19:26:06 | INFO | wandb_project_name: open-lm
|
155 |
+
2024-06-12,19:26:06 | INFO | warmup: 400
|
156 |
+
2024-06-12,19:26:06 | INFO | wd: 0.1
|
157 |
+
2024-06-12,19:26:06 | INFO | workers: 1
|
158 |
+
2024-06-12,19:26:06 | INFO | world_size: 8
|
159 |
+
2024-06-12,19:26:06 | INFO | z_loss_coefficient: 0.0
|
160 |
+
2024-06-12,19:26:06 | INFO | => loading from a pre-trained model.
|
161 |
+
2024-06-12,19:26:20 | INFO | Resuming a checkpoint without checking that the seed match. This means that training might not be reproducible.
|
162 |
+
2024-06-12,19:26:31 | INFO | => resuming checkpoint '/home/ubuntu/model_sft/ch/open_lm/open_lm_7b_1p25/open_lm_7b_1p25T.pt' (epoch 50)(step None)
|
163 |
+
2024-06-12,19:26:33 | INFO | current encoder name is:EleutherAI/gpt-neox-20b
|
164 |
+
2024-06-12,19:26:33 | INFO | padding side is :right
|
165 |
+
2024-06-12,19:27:11 | INFO | total steps required by training is 3128
|
166 |
+
2024-06-12,19:27:11 | INFO | Start epoch 0
|
167 |
+
2024-06-12,19:27:11 | INFO | => epoch 0, training on ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
168 |
+
2024-06-12,19:27:23 | INFO | Train Epoch: 0 [ 64/50048 (0%)] Loss: 3.330 Data (t): 0.160 Batch (t): 6.458, 20286.3/s, 2535.79/s/gpu LR: 0.000000
|
169 |
+
2024-06-12,19:30:32 | INFO | Train Epoch: 0 [ 2624/50048 (5%)] Loss: 2.764 Data (t): 0.047 Batch (t): 4.721, 27705.5/s, 3463.19/s/gpu LR: 0.000001
|
170 |
+
2024-06-12,19:33:41 | INFO | Train Epoch: 0 [ 5184/50048 (10%)] Loss: 1.989 Data (t): 0.047 Batch (t): 4.734, 27695.6/s, 3461.95/s/gpu LR: 0.000002
|
171 |
+
2024-06-12,19:36:50 | INFO | Train Epoch: 0 [ 7744/50048 (15%)] Loss: 1.654 Data (t): 0.048 Batch (t): 4.729, 27726.9/s, 3465.87/s/gpu LR: 0.000003
|
172 |
+
2024-06-12,19:39:59 | INFO | Train Epoch: 0 [10304/50048 (21%)] Loss: 1.537 Data (t): 0.048 Batch (t): 4.727, 27718.4/s, 3464.80/s/gpu LR: 0.000004
|
173 |
+
2024-06-12,19:43:08 | INFO | Train Epoch: 0 [12864/50048 (26%)] Loss: 1.521 Data (t): 0.047 Batch (t): 4.728, 27736.8/s, 3467.10/s/gpu LR: 0.000005
|
174 |
+
2024-06-12,19:46:17 | INFO | Train Epoch: 0 [15424/50048 (31%)] Loss: 1.484 Data (t): 0.047 Batch (t): 4.730, 27701.4/s, 3462.67/s/gpu LR: 0.000006
|
175 |
+
2024-06-12,19:49:27 | INFO | Train Epoch: 0 [17984/50048 (36%)] Loss: 1.448 Data (t): 0.047 Batch (t): 4.728, 27696.5/s, 3462.06/s/gpu LR: 0.000007
|
176 |
+
2024-06-12,19:52:36 | INFO | Train Epoch: 0 [20544/50048 (41%)] Loss: 1.448 Data (t): 0.047 Batch (t): 4.731, 27705.0/s, 3463.12/s/gpu LR: 0.000008
|
177 |
+
2024-06-12,19:55:45 | INFO | Train Epoch: 0 [23104/50048 (46%)] Loss: 1.439 Data (t): 0.047 Batch (t): 4.732, 27699.3/s, 3462.42/s/gpu LR: 0.000009
|
178 |
+
2024-06-12,19:58:54 | INFO | Train Epoch: 0 [25664/50048 (51%)] Loss: 1.419 Data (t): 0.048 Batch (t): 4.733, 27718.3/s, 3464.79/s/gpu LR: 0.000010
|
179 |
+
2024-06-12,20:02:04 | INFO | Train Epoch: 0 [28224/50048 (56%)] Loss: 1.409 Data (t): 0.047 Batch (t): 4.732, 27680.8/s, 3460.09/s/gpu LR: 0.000010
|
180 |
+
2024-06-12,20:05:13 | INFO | Train Epoch: 0 [30784/50048 (62%)] Loss: 1.411 Data (t): 0.048 Batch (t): 4.730, 27710.5/s, 3463.82/s/gpu LR: 0.000010
|
181 |
+
2024-06-12,20:08:22 | INFO | Train Epoch: 0 [33344/50048 (67%)] Loss: 1.421 Data (t): 0.047 Batch (t): 4.730, 27688.0/s, 3461.00/s/gpu LR: 0.000010
|
182 |
+
2024-06-12,20:11:31 | INFO | Train Epoch: 0 [35904/50048 (72%)] Loss: 1.386 Data (t): 0.048 Batch (t): 4.731, 27727.2/s, 3465.90/s/gpu LR: 0.000010
|
183 |
+
2024-06-12,20:14:41 | INFO | Train Epoch: 0 [38464/50048 (77%)] Loss: 1.397 Data (t): 0.047 Batch (t): 4.732, 27714.5/s, 3464.31/s/gpu LR: 0.000010
|
184 |
+
2024-06-12,20:17:50 | INFO | Train Epoch: 0 [41024/50048 (82%)] Loss: 1.392 Data (t): 0.048 Batch (t): 4.732, 27672.8/s, 3459.10/s/gpu LR: 0.000010
|
185 |
+
2024-06-12,20:20:59 | INFO | Train Epoch: 0 [43584/50048 (87%)] Loss: 1.375 Data (t): 0.047 Batch (t): 4.730, 27655.0/s, 3456.87/s/gpu LR: 0.000010
|
186 |
+
2024-06-12,20:24:08 | INFO | Train Epoch: 0 [46144/50048 (92%)] Loss: 1.395 Data (t): 0.047 Batch (t): 4.732, 27640.4/s, 3455.05/s/gpu LR: 0.000010
|
187 |
+
2024-06-12,20:27:18 | INFO | Train Epoch: 0 [48704/50048 (97%)] Loss: 1.366 Data (t): 0.048 Batch (t): 4.732, 27700.8/s, 3462.61/s/gpu LR: 0.000010
|
188 |
+
2024-06-12,20:28:52 | INFO | Process the eval step
|
189 |
+
2024-06-12,20:32:19 | INFO | Start epoch 1
|
190 |
+
2024-06-12,20:32:19 | INFO | => epoch 1, training on ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
191 |
+
2024-06-12,20:32:24 | INFO | Train Epoch: 1 [ 64/50048 (0%)] Loss: 0.958 Data (t): 0.259 Batch (t): 4.949, 26469.4/s, 3308.68/s/gpu LR: 0.000010
|
192 |
+
2024-06-12,20:35:33 | INFO | Train Epoch: 1 [ 2624/50048 (5%)] Loss: 1.015 Data (t): 0.047 Batch (t): 4.714, 27772.4/s, 3471.56/s/gpu LR: 0.000009
|
193 |
+
2024-06-12,20:38:42 | INFO | Train Epoch: 1 [ 5184/50048 (10%)] Loss: 1.000 Data (t): 0.047 Batch (t): 4.727, 27709.5/s, 3463.69/s/gpu LR: 0.000009
|
194 |
+
2024-06-12,20:41:51 | INFO | Train Epoch: 1 [ 7744/50048 (15%)] Loss: 1.017 Data (t): 0.048 Batch (t): 4.729, 27704.6/s, 3463.07/s/gpu LR: 0.000009
|
195 |
+
2024-06-12,20:45:00 | INFO | Train Epoch: 1 [10304/50048 (21%)] Loss: 1.016 Data (t): 0.048 Batch (t): 4.728, 27646.3/s, 3455.79/s/gpu LR: 0.000009
|
196 |
+
2024-06-12,20:48:09 | INFO | Train Epoch: 1 [12864/50048 (26%)] Loss: 1.037 Data (t): 0.048 Batch (t): 4.733, 27718.9/s, 3464.86/s/gpu LR: 0.000009
|
197 |
+
2024-06-12,20:51:18 | INFO | Train Epoch: 1 [15424/50048 (31%)] Loss: 1.007 Data (t): 0.047 Batch (t): 4.731, 27667.1/s, 3458.38/s/gpu LR: 0.000009
|
198 |
+
2024-06-12,20:54:28 | INFO | Train Epoch: 1 [17984/50048 (36%)] Loss: 1.019 Data (t): 0.048 Batch (t): 4.732, 27662.5/s, 3457.81/s/gpu LR: 0.000009
|
199 |
+
2024-06-12,20:57:37 | INFO | Train Epoch: 1 [20544/50048 (41%)] Loss: 1.013 Data (t): 0.047 Batch (t): 4.735, 27636.9/s, 3454.61/s/gpu LR: 0.000008
|
200 |
+
2024-06-12,21:00:47 | INFO | Train Epoch: 1 [23104/50048 (46%)] Loss: 1.010 Data (t): 0.047 Batch (t): 4.735, 27686.3/s, 3460.79/s/gpu LR: 0.000008
|
201 |
+
2024-06-12,21:03:56 | INFO | Train Epoch: 1 [25664/50048 (51%)] Loss: 1.030 Data (t): 0.048 Batch (t): 4.734, 27636.0/s, 3454.50/s/gpu LR: 0.000008
|
202 |
+
2024-06-12,21:07:05 | INFO | Train Epoch: 1 [28224/50048 (56%)] Loss: 1.012 Data (t): 0.047 Batch (t): 4.736, 27701.3/s, 3462.67/s/gpu LR: 0.000008
|
203 |
+
2024-06-12,21:10:15 | INFO | Train Epoch: 1 [30784/50048 (62%)] Loss: 1.034 Data (t): 0.048 Batch (t): 4.737, 27631.4/s, 3453.93/s/gpu LR: 0.000008
|
204 |
+
2024-06-12,21:13:24 | INFO | Train Epoch: 1 [33344/50048 (67%)] Loss: 1.017 Data (t): 0.047 Batch (t): 4.737, 27669.5/s, 3458.68/s/gpu LR: 0.000008
|
205 |
+
2024-06-12,21:16:34 | INFO | Train Epoch: 1 [35904/50048 (72%)] Loss: 1.027 Data (t): 0.047 Batch (t): 4.734, 27666.3/s, 3458.28/s/gpu LR: 0.000007
|
206 |
+
2024-06-12,21:19:43 | INFO | Train Epoch: 1 [38464/50048 (77%)] Loss: 1.035 Data (t): 0.048 Batch (t): 4.732, 27655.7/s, 3456.96/s/gpu LR: 0.000007
|
207 |
+
2024-06-12,21:22:52 | INFO | Train Epoch: 1 [41024/50048 (82%)] Loss: 1.027 Data (t): 0.048 Batch (t): 4.735, 27662.3/s, 3457.79/s/gpu LR: 0.000007
|
208 |
+
2024-06-12,21:26:02 | INFO | Train Epoch: 1 [43584/50048 (87%)] Loss: 1.031 Data (t): 0.048 Batch (t): 4.735, 27704.0/s, 3463.00/s/gpu LR: 0.000007
|
209 |
+
2024-06-12,21:29:11 | INFO | Train Epoch: 1 [46144/50048 (92%)] Loss: 1.021 Data (t): 0.047 Batch (t): 4.736, 27615.9/s, 3451.99/s/gpu LR: 0.000006
|
210 |
+
2024-06-12,21:32:21 | INFO | Train Epoch: 1 [48704/50048 (97%)] Loss: 1.016 Data (t): 0.048 Batch (t): 4.735, 27664.0/s, 3458.01/s/gpu LR: 0.000006
|
211 |
+
2024-06-12,21:33:56 | INFO | Process the eval step
|
212 |
+
2024-06-12,21:37:11 | INFO | Start epoch 2
|
213 |
+
2024-06-12,21:37:11 | INFO | => epoch 2, training on ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
214 |
+
2024-06-12,21:37:16 | INFO | Train Epoch: 2 [ 64/50048 (0%)] Loss: 0.604 Data (t): 0.298 Batch (t): 4.984, 26287.8/s, 3285.98/s/gpu LR: 0.000006
|
215 |
+
2024-06-12,21:40:25 | INFO | Train Epoch: 2 [ 2624/50048 (5%)] Loss: 0.532 Data (t): 0.047 Batch (t): 4.718, 27712.2/s, 3464.02/s/gpu LR: 0.000006
|
216 |
+
2024-06-12,21:43:34 | INFO | Train Epoch: 2 [ 5184/50048 (10%)] Loss: 0.520 Data (t): 0.047 Batch (t): 4.727, 27735.6/s, 3466.95/s/gpu LR: 0.000006
|
217 |
+
2024-06-12,21:46:43 | INFO | Train Epoch: 2 [ 7744/50048 (15%)] Loss: 0.505 Data (t): 0.047 Batch (t): 4.729, 27730.2/s, 3466.28/s/gpu LR: 0.000005
|
218 |
+
2024-06-12,21:49:53 | INFO | Train Epoch: 2 [10304/50048 (21%)] Loss: 0.489 Data (t): 0.048 Batch (t): 4.733, 27681.1/s, 3460.14/s/gpu LR: 0.000005
|
219 |
+
2024-06-12,21:53:02 | INFO | Train Epoch: 2 [12864/50048 (26%)] Loss: 0.509 Data (t): 0.047 Batch (t): 4.733, 27632.0/s, 3454.00/s/gpu LR: 0.000005
|
220 |
+
2024-06-12,21:56:11 | INFO | Train Epoch: 2 [15424/50048 (31%)] Loss: 0.497 Data (t): 0.047 Batch (t): 4.731, 27690.1/s, 3461.27/s/gpu LR: 0.000005
|
221 |
+
2024-06-12,21:59:21 | INFO | Train Epoch: 2 [17984/50048 (36%)] Loss: 0.506 Data (t): 0.047 Batch (t): 4.732, 27743.6/s, 3467.96/s/gpu LR: 0.000005
|
222 |
+
2024-06-12,22:02:30 | INFO | Train Epoch: 2 [20544/50048 (41%)] Loss: 0.514 Data (t): 0.048 Batch (t): 4.730, 27669.5/s, 3458.69/s/gpu LR: 0.000004
|
223 |
+
2024-06-12,22:05:39 | INFO | Train Epoch: 2 [23104/50048 (46%)] Loss: 0.520 Data (t): 0.047 Batch (t): 4.733, 27669.9/s, 3458.73/s/gpu LR: 0.000004
|
224 |
+
2024-06-12,22:08:48 | INFO | Train Epoch: 2 [25664/50048 (51%)] Loss: 0.536 Data (t): 0.048 Batch (t): 4.735, 27708.8/s, 3463.60/s/gpu LR: 0.000004
|
225 |
+
2024-06-12,22:11:58 | INFO | Train Epoch: 2 [28224/50048 (56%)] Loss: 0.525 Data (t): 0.048 Batch (t): 4.734, 27709.5/s, 3463.68/s/gpu LR: 0.000004
|
226 |
+
2024-06-12,22:15:07 | INFO | Train Epoch: 2 [30784/50048 (62%)] Loss: 0.512 Data (t): 0.048 Batch (t): 4.737, 27680.0/s, 3460.00/s/gpu LR: 0.000003
|
227 |
+
2024-06-12,22:18:17 | INFO | Train Epoch: 2 [33344/50048 (67%)] Loss: 0.490 Data (t): 0.048 Batch (t): 4.734, 27709.7/s, 3463.72/s/gpu LR: 0.000003
|
228 |
+
2024-06-12,22:21:26 | INFO | Train Epoch: 2 [35904/50048 (72%)] Loss: 0.519 Data (t): 0.047 Batch (t): 4.733, 27682.1/s, 3460.26/s/gpu LR: 0.000003
|
229 |
+
2024-06-12,22:24:35 | INFO | Train Epoch: 2 [38464/50048 (77%)] Loss: 0.503 Data (t): 0.048 Batch (t): 4.737, 27660.6/s, 3457.58/s/gpu LR: 0.000003
|
230 |
+
2024-06-12,22:27:45 | INFO | Train Epoch: 2 [41024/50048 (82%)] Loss: 0.505 Data (t): 0.047 Batch (t): 4.735, 27652.6/s, 3456.58/s/gpu LR: 0.000003
|
231 |
+
2024-06-12,22:30:54 | INFO | Train Epoch: 2 [43584/50048 (87%)] Loss: 0.495 Data (t): 0.048 Batch (t): 4.735, 27668.6/s, 3458.58/s/gpu LR: 0.000002
|
232 |
+
2024-06-12,22:34:04 | INFO | Train Epoch: 2 [46144/50048 (92%)] Loss: 0.509 Data (t): 0.048 Batch (t): 4.736, 27663.3/s, 3457.92/s/gpu LR: 0.000002
|
233 |
+
2024-06-12,22:37:13 | INFO | Train Epoch: 2 [48704/50048 (97%)] Loss: 0.525 Data (t): 0.048 Batch (t): 4.735, 27669.3/s, 3458.66/s/gpu LR: 0.000002
|
234 |
+
2024-06-12,22:38:49 | INFO | Process the eval step
|
235 |
+
2024-06-12,22:42:03 | INFO | Start epoch 3
|
236 |
+
2024-06-12,22:42:03 | INFO | => epoch 3, training on ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
237 |
+
2024-06-12,22:42:08 | INFO | Train Epoch: 3 [ 64/50048 (0%)] Loss: 0.243 Data (t): 0.321 Batch (t): 5.018, 26108.8/s, 3263.60/s/gpu LR: 0.000002
|
238 |
+
2024-06-12,22:45:17 | INFO | Train Epoch: 3 [ 2624/50048 (5%)] Loss: 0.228 Data (t): 0.047 Batch (t): 4.722, 27652.9/s, 3456.61/s/gpu LR: 0.000002
|
239 |
+
2024-06-12,22:48:26 | INFO | Train Epoch: 3 [ 5184/50048 (10%)] Loss: 0.213 Data (t): 0.047 Batch (t): 4.732, 27708.0/s, 3463.49/s/gpu LR: 0.000002
|
240 |
+
2024-06-12,22:51:35 | INFO | Train Epoch: 3 [ 7744/50048 (15%)] Loss: 0.221 Data (t): 0.048 Batch (t): 4.735, 27646.6/s, 3455.83/s/gpu LR: 0.000001
|
241 |
+
2024-06-12,22:54:45 | INFO | Train Epoch: 3 [10304/50048 (21%)] Loss: 0.208 Data (t): 0.047 Batch (t): 4.733, 27655.2/s, 3456.90/s/gpu LR: 0.000001
|
242 |
+
2024-06-12,22:57:54 | INFO | Train Epoch: 3 [12864/50048 (26%)] Loss: 0.228 Data (t): 0.047 Batch (t): 4.736, 27673.3/s, 3459.16/s/gpu LR: 0.000001
|
243 |
+
2024-06-12,23:01:03 | INFO | Train Epoch: 3 [15424/50048 (31%)] Loss: 0.220 Data (t): 0.047 Batch (t): 4.734, 27666.0/s, 3458.25/s/gpu LR: 0.000001
|
244 |
+
2024-06-12,23:04:13 | INFO | Train Epoch: 3 [17984/50048 (36%)] Loss: 0.232 Data (t): 0.047 Batch (t): 4.733, 27680.1/s, 3460.02/s/gpu LR: 0.000001
|
245 |
+
2024-06-12,23:07:22 | INFO | Train Epoch: 3 [20544/50048 (41%)] Loss: 0.221 Data (t): 0.047 Batch (t): 4.733, 27732.4/s, 3466.55/s/gpu LR: 0.000001
|
246 |
+
2024-06-12,23:10:31 | INFO | Train Epoch: 3 [23104/50048 (46%)] Loss: 0.221 Data (t): 0.048 Batch (t): 4.733, 27667.3/s, 3458.41/s/gpu LR: 0.000001
|
247 |
+
2024-06-12,23:13:41 | INFO | Train Epoch: 3 [25664/50048 (51%)] Loss: 0.219 Data (t): 0.047 Batch (t): 4.734, 27663.4/s, 3457.92/s/gpu LR: 0.000000
|
248 |
+
2024-06-12,23:16:50 | INFO | Train Epoch: 3 [28224/50048 (56%)] Loss: 0.222 Data (t): 0.047 Batch (t): 4.735, 27666.1/s, 3458.26/s/gpu LR: 0.000000
|
249 |
+
2024-06-12,23:20:00 | INFO | Train Epoch: 3 [30784/50048 (62%)] Loss: 0.222 Data (t): 0.047 Batch (t): 4.734, 27687.2/s, 3460.90/s/gpu LR: 0.000000
|
250 |
+
2024-06-12,23:23:09 | INFO | Train Epoch: 3 [33344/50048 (67%)] Loss: 0.224 Data (t): 0.047 Batch (t): 4.734, 27710.5/s, 3463.81/s/gpu LR: 0.000000
|
251 |
+
2024-06-12,23:26:18 | INFO | Train Epoch: 3 [35904/50048 (72%)] Loss: 0.217 Data (t): 0.047 Batch (t): 4.733, 27647.9/s, 3455.99/s/gpu LR: 0.000000
|
252 |
+
2024-06-12,23:29:28 | INFO | Train Epoch: 3 [38464/50048 (77%)] Loss: 0.221 Data (t): 0.047 Batch (t): 4.733, 27636.6/s, 3454.58/s/gpu LR: 0.000000
|
253 |
+
2024-06-12,23:32:37 | INFO | Train Epoch: 3 [41024/50048 (82%)] Loss: 0.208 Data (t): 0.048 Batch (t): 4.733, 27687.3/s, 3460.91/s/gpu LR: 0.000000
|
254 |
+
2024-06-12,23:35:46 | INFO | Train Epoch: 3 [43584/50048 (87%)] Loss: 0.212 Data (t): 0.048 Batch (t): 4.732, 27663.4/s, 3457.93/s/gpu LR: 0.000000
|
255 |
+
2024-06-12,23:38:55 | INFO | Train Epoch: 3 [46144/50048 (92%)] Loss: 0.216 Data (t): 0.047 Batch (t): 4.734, 27707.0/s, 3463.38/s/gpu LR: 0.000000
|
256 |
+
2024-06-12,23:42:05 | INFO | Train Epoch: 3 [48704/50048 (97%)] Loss: 0.220 Data (t): 0.047 Batch (t): 4.733, 27664.7/s, 3458.09/s/gpu LR: 0.000000
|
257 |
+
2024-06-12,23:43:40 | INFO | Process the eval step
|
258 |
+
2024-06-12,23:46:54 | INFO | Start epoch 4
|
259 |
+
2024-06-12,23:46:54 | INFO | => epoch 4, training on ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
260 |
+
2024-06-12,23:46:59 | INFO | Train Epoch: 4 [ 64/50048 (0%)] Loss: 0.188 Data (t): 0.389 Batch (t): 5.103, 25671.5/s, 3208.93/s/gpu LR: 0.000000
|
261 |
+
2024-06-12,23:47:08 | INFO | Train Epoch: 4 [ 192/50048 (0%)] Loss: 0.137 Data (t): 0.048 Batch (t): 4.688, 27946.7/s, 3493.34/s/gpu LR: 0.000000
|
262 |
+
2024-06-12,23:47:13 | WARNING | step: 3128 has reached/exceeded total_steps: 3128. ending training.
|
263 |
+
2024-06-12,23:47:15 | INFO | Process the eval step
|
264 |
+
2024-06-12,23:50:31 | INFO | Model has seen the desired number of tokens. Ending training.
|
logs/test_alpaca_7b_1p25_240612/params.txt
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accum_freq: 1
|
2 |
+
attn_activation: None
|
3 |
+
attn_name: auto
|
4 |
+
attn_seq_scalar: None
|
5 |
+
attn_seq_scalar_alpha: None
|
6 |
+
average: None
|
7 |
+
average_coefficients: None
|
8 |
+
averagers: None
|
9 |
+
beta1: 0.9
|
10 |
+
beta2: 0.95
|
11 |
+
checkpoint_path: ./logs/test_alpaca_7b_1p25_240612/checkpoints
|
12 |
+
copy_codebase: False
|
13 |
+
data_key: jsonl
|
14 |
+
data_tolerate_error_p: 0.09
|
15 |
+
data_tolerate_num_ckpts: 0
|
16 |
+
dataset_manifest: None
|
17 |
+
dataset_resampled: True
|
18 |
+
dataset_type: jsonl
|
19 |
+
ddp_static_graph: False
|
20 |
+
debug: False
|
21 |
+
delete_previous_checkpoint: False
|
22 |
+
device: cuda:0
|
23 |
+
disable_buffer: False
|
24 |
+
dist_backend: nccl
|
25 |
+
dist_url: env://
|
26 |
+
distill_model: None
|
27 |
+
distill_pretrained: None
|
28 |
+
distributed: True
|
29 |
+
epochs: 4
|
30 |
+
epochs_cooldown: None
|
31 |
+
eps: 1e-08
|
32 |
+
experimental_meta_device: False
|
33 |
+
failed_checkpoint_path: ./logs/test_alpaca_7b_1p25_240612/checkpoints_failed
|
34 |
+
ffn_type: swiglu
|
35 |
+
force_distributed: False
|
36 |
+
force_min_lr: 0.0
|
37 |
+
fsdp: True
|
38 |
+
fsdp_amp: True
|
39 |
+
fsdp_backward_prefetch: False
|
40 |
+
fsdp_checkpoint: False
|
41 |
+
fsdp_cpu_offload: False
|
42 |
+
fsdp_hybrid: False
|
43 |
+
fsdp_hybrid_o2: False
|
44 |
+
fsdp_limit_all_gathers: False
|
45 |
+
fsdp_pure_bf16: False
|
46 |
+
fsdp_use_orig_params: False
|
47 |
+
global_batch_size: 64
|
48 |
+
global_val_batch_size: 64
|
49 |
+
grad_checkpointing: True
|
50 |
+
grad_clip_norm: 1.0
|
51 |
+
hf_fsdp_block: None
|
52 |
+
hf_model: None
|
53 |
+
hf_seq_len: None
|
54 |
+
ignore_parse_errors: False
|
55 |
+
load_pretrained_state: False
|
56 |
+
local_rank: 0
|
57 |
+
log_avg_model_training_loss: 0
|
58 |
+
log_every_n_steps: 40
|
59 |
+
log_level: 20
|
60 |
+
log_local: False
|
61 |
+
log_logit_mean: False
|
62 |
+
log_path: ./logs/test_alpaca_7b_1p25_240612/out.log
|
63 |
+
logs: ./logs/
|
64 |
+
lr: 1e-05
|
65 |
+
lr_cooldown_end: 0.0
|
66 |
+
lr_cooldown_power: 1.0
|
67 |
+
lr_scheduler: cosine
|
68 |
+
model: open_lm_7b
|
69 |
+
model_norm: default_layer_norm
|
70 |
+
moe_capacity_factor: 1.25
|
71 |
+
moe_expert_model_parallelism: False
|
72 |
+
moe_freq: 0
|
73 |
+
moe_loss_weight: 0.1
|
74 |
+
moe_num_experts: None
|
75 |
+
moe_top_k: 2
|
76 |
+
moe_weight_parallelism: False
|
77 |
+
multiple_data_passes: False
|
78 |
+
name: test_alpaca_7b_1p25_240612
|
79 |
+
no_set_device_rank: False
|
80 |
+
optimizer: adamw
|
81 |
+
per_gpu_batch_size: 8
|
82 |
+
per_gpu_val_batch_size: 8
|
83 |
+
positional_embedding_type: rotary
|
84 |
+
precision: amp_bfloat16
|
85 |
+
preset_world_size: None
|
86 |
+
pretrained: /home/ubuntu/model_sft/ch/open_lm/open_lm_7b_1p25/open_lm_7b_1p25T.pt
|
87 |
+
qk_norm: False
|
88 |
+
rank: 0
|
89 |
+
remote_sync: None
|
90 |
+
remote_sync_frequency: 300
|
91 |
+
remote_sync_protocol: s3
|
92 |
+
report_to: tensorboard
|
93 |
+
resume: None
|
94 |
+
save_frequency: 1
|
95 |
+
save_most_recent: False
|
96 |
+
seed: 0
|
97 |
+
seq_len: 2048
|
98 |
+
skip_scheduler: False
|
99 |
+
squash_mask_left: False
|
100 |
+
target_mask_individual: None
|
101 |
+
target_mask_left: None
|
102 |
+
tensorboard: True
|
103 |
+
tensorboard_path: ./logs/test_alpaca_7b_1p25_240612/tensorboard
|
104 |
+
torchcompile: False
|
105 |
+
torchscript: False
|
106 |
+
trace: False
|
107 |
+
train_data: ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/train_data.jsonl']
|
108 |
+
train_data_mix_weights: None
|
109 |
+
train_data_upsampling_factors: None
|
110 |
+
train_num_samples: None
|
111 |
+
use_bn_sync: False
|
112 |
+
use_bnb_linear: None
|
113 |
+
val_data: ['/home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/data/val_data.jsonl']
|
114 |
+
val_data_key: ['jsonl']
|
115 |
+
val_frequency: 1
|
116 |
+
val_iter_ci: 10000
|
117 |
+
val_max_pop_ci: None
|
118 |
+
val_num_samples: None
|
119 |
+
val_seq_ci: False
|
120 |
+
val_tok_ci: False
|
121 |
+
vocab_size: 50432
|
122 |
+
wandb: False
|
123 |
+
wandb_notes:
|
124 |
+
wandb_project_name: open-lm
|
125 |
+
warmup: 400
|
126 |
+
wd: 0.1
|
127 |
+
workers: 1
|
128 |
+
world_size: 8
|
129 |
+
z_loss_coefficient: 0.0
|
logs/test_alpaca_7b_1p25_240612/tensorboard.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8cc427b44915dfb8a961876fc6ac58d43fc5ca3fb7e919809ac9e69de4ed2c0
|
3 |
+
size 81920
|
logs/test_alpaca_7b_1p25_240612/tensorboard/events.out.tfevents.1718191631.10-16-20-78.280596.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f45e560601dffa7bf9ffc039e1236afef2b395d50b242d93521bc2239890f0b
|
3 |
+
size 71110
|
open_lm/__init__.py
ADDED
File without changes
|
open_lm/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (160 Bytes). View file
|
|
open_lm/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (154 Bytes). View file
|
|
open_lm/__pycache__/attention.cpython-310.pyc
ADDED
Binary file (5.84 kB). View file
|
|
open_lm/__pycache__/data.cpython-310.pyc
ADDED
Binary file (24.3 kB). View file
|
|
open_lm/__pycache__/data.cpython-39.pyc
ADDED
Binary file (24.3 kB). View file
|
|
open_lm/__pycache__/distributed.cpython-310.pyc
ADDED
Binary file (2.96 kB). View file
|
|
open_lm/__pycache__/evaluate.cpython-310.pyc
ADDED
Binary file (4.08 kB). View file
|
|
open_lm/__pycache__/file_utils.cpython-310.pyc
ADDED
Binary file (14.8 kB). View file
|
|
open_lm/__pycache__/logger.cpython-310.pyc
ADDED
Binary file (788 Bytes). View file
|
|
open_lm/__pycache__/losses.cpython-310.pyc
ADDED
Binary file (1.17 kB). View file
|
|
open_lm/__pycache__/main.cpython-310.pyc
ADDED
Binary file (23.9 kB). View file
|
|
open_lm/__pycache__/main.cpython-39.pyc
ADDED
Binary file (23.9 kB). View file
|
|
open_lm/__pycache__/meters.cpython-310.pyc
ADDED
Binary file (3.46 kB). View file
|
|
open_lm/__pycache__/model.cpython-310.pyc
ADDED
Binary file (14.6 kB). View file
|
|
open_lm/__pycache__/norms.cpython-310.pyc
ADDED
Binary file (5.05 kB). View file
|
|