Commit
β’
0e7d81d
1
Parent(s):
87113b9
End of training
Browse files- all_results.json +9 -9
- eval_results.json +6 -6
- runs/Mar26_17-11-01_sanchit--v100/events.out.tfevents.1648367457.sanchit--v100.2600125.2 +3 -0
- train_results.json +4 -4
- trainer_state.json +0 -0
- wandb/run-20220326_171130-bdf5nvyg/files/output.log +334 -0
- wandb/run-20220326_171130-bdf5nvyg/files/wandb-summary.json +0 -0
- wandb/run-20220326_171130-bdf5nvyg/logs/debug-internal.log +2 -2
- wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb +2 -2
all_results.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 2642,
|
6 |
-
"eval_samples_per_second": 3.
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
-
"eval_wer": 0.
|
9 |
-
"train_loss":
|
10 |
-
"train_runtime":
|
11 |
"train_samples": 28538,
|
12 |
-
"train_samples_per_second": 5.
|
13 |
"train_steps_per_second": 0.043
|
14 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_loss": 0.3577839434146881,
|
4 |
+
"eval_runtime": 722.44,
|
5 |
"eval_samples": 2642,
|
6 |
+
"eval_samples_per_second": 3.657,
|
7 |
+
"eval_steps_per_second": 0.458,
|
8 |
+
"eval_wer": 0.09315747719159063,
|
9 |
+
"train_loss": 1.290548439615828,
|
10 |
+
"train_runtime": 51756.4072,
|
11 |
"train_samples": 28538,
|
12 |
+
"train_samples_per_second": 5.514,
|
13 |
"train_steps_per_second": 0.043
|
14 |
}
|
eval_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 2642,
|
6 |
-
"eval_samples_per_second": 3.
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
-
"eval_wer": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_loss": 0.3577839434146881,
|
4 |
+
"eval_runtime": 722.44,
|
5 |
"eval_samples": 2642,
|
6 |
+
"eval_samples_per_second": 3.657,
|
7 |
+
"eval_steps_per_second": 0.458,
|
8 |
+
"eval_wer": 0.09315747719159063
|
9 |
}
|
runs/Mar26_17-11-01_sanchit--v100/events.out.tfevents.1648367457.sanchit--v100.2600125.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c76168e22789cb080ab7a8b8f00b0c8e9fb59f13d65464944fed71a9e51310d
|
3 |
+
size 358
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss":
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 28538,
|
6 |
-
"train_samples_per_second": 5.
|
7 |
"train_steps_per_second": 0.043
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"train_loss": 1.290548439615828,
|
4 |
+
"train_runtime": 51756.4072,
|
5 |
"train_samples": 28538,
|
6 |
+
"train_samples_per_second": 5.514,
|
7 |
"train_steps_per_second": 0.043
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20220326_171130-bdf5nvyg/files/output.log
CHANGED
@@ -27431,3 +27431,337 @@ Upload file wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb: 100%|ββ
|
|
27431 |
Upload file wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb: 100%|βββββββββββββ| 434M/434M [00:58<00:00, 16.8MB/s]g-point operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27432 |
03/27/2022 07:38:16 - WARNING - huggingface_hub.repository - To https://huggingface.co/sanchit-gandhi/wav2vec2-2-bart-large-cnn
|
27433 |
Upload file runs/Mar26_17-11-01_sanchit--v100/events.out.tfevents.1648314690.sanchit--v100.2600125.0: 100%|β| 352k/352k g-point operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27431 |
Upload file wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb: 100%|βββββββββββββ| 434M/434M [00:58<00:00, 16.8MB/s]g-point operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27432 |
03/27/2022 07:38:16 - WARNING - huggingface_hub.repository - To https://huggingface.co/sanchit-gandhi/wav2vec2-2-bart-large-cnn
|
27433 |
Upload file runs/Mar26_17-11-01_sanchit--v100/events.out.tfevents.1648314690.sanchit--v100.2600125.0: 100%|β| 352k/352k g-point operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27434 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27435 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27436 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27437 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27438 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27439 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27440 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27441 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27442 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27443 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27444 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27445 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27446 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27447 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27448 |
+
[INFO|modelcard.py:460] 2022-03-27 07:38:19,288 >> Dropping the following result as it does not have all the necessary fields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27449 |
+
03/27/2022 07:38:51 - WARNING - huggingface_hub.repository - To https://huggingface.co/sanchit-gandhi/wav2vec2-2-bart-large-cnn
|
27450 |
+
Upload file wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb: 100%|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27451 |
+
Upload file wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb: 100%|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27452 |
+
***** train metrics *****
|
27453 |
+
epoch = 10.0
|
27454 |
+
train_loss = 1.2905
|
27455 |
+
train_runtime = 14:22:36.40
|
27456 |
+
train_samples = 28538
|
27457 |
+
train_samples_per_second = 5.514
|
27458 |
+
train_steps_per_second = 0.043
|
27459 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27460 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27461 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27462 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27463 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27464 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27465 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27466 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27467 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27468 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27469 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27470 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27471 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27472 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27473 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27474 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27475 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27476 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27477 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27478 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27479 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27480 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27481 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27482 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27483 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27484 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27485 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27486 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27487 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27488 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27489 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27490 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27491 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27492 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27493 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27494 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27495 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27496 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27497 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27498 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27499 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27500 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27501 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27502 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27503 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27504 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27505 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27506 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27507 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27508 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27509 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27510 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27511 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27512 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27513 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27514 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27515 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27516 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27517 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27518 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27519 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27520 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27521 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27522 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27523 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27524 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27525 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27526 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27527 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27528 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27529 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27530 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27531 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27532 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27533 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27534 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27535 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27536 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27537 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27538 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27539 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27540 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27541 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27542 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27543 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27544 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27545 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27546 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27547 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27548 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27549 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27550 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27551 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27552 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27553 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27554 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27555 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27556 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27557 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27558 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27559 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27560 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27561 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27562 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27563 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27564 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27565 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27566 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27567 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27568 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27569 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27570 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27571 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27572 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27573 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27574 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27575 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27576 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27577 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27578 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27579 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27580 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27581 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27582 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27583 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27584 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27585 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27586 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27587 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27588 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27589 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27590 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27591 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27592 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27593 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27594 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27595 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27596 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27597 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27598 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27599 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27600 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27601 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27602 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27603 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27604 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27605 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27606 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27607 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27608 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27609 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27610 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27611 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27612 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27613 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27614 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27615 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27616 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27617 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27618 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27619 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27620 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27621 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27622 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27623 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27624 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27625 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27626 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27627 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27628 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27629 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27630 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27631 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27632 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27633 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27634 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27635 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27636 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27637 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27638 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27639 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27640 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27641 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27642 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27643 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27644 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27645 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27646 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27647 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27648 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27649 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27650 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27651 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27652 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27653 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27654 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27655 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27656 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27657 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27658 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27659 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27660 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27661 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27662 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27663 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββοΏ½οΏ½οΏ½βββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27664 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27665 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27666 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27667 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27668 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27669 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27670 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27671 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27672 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27673 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27674 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27675 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27676 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27677 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27678 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27679 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27680 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27681 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27682 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27683 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27684 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27685 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27686 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27687 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27688 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27689 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27690 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27691 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27692 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27693 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27694 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27695 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27696 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27697 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27698 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27699 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27700 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27701 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27702 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27703 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27704 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27705 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27706 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27707 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27708 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27709 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27710 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27711 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27712 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27713 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27714 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27715 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27716 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27717 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27718 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27719 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27720 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27721 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27722 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27723 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27724 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27725 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27726 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27727 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27728 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27729 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27730 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27731 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27732 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27733 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27734 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27735 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27736 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27737 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27738 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27739 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27740 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27741 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27742 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27743 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27744 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27745 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27746 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27747 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27748 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27749 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27750 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27751 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27752 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27753 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27754 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27755 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27756 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27757 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
27758 |
+
03/27/2022 07:50:57 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow
|
27759 |
+
***** eval metrics *****
|
27760 |
+
epoch = 10.0
|
27761 |
+
eval_loss = 0.3578
|
27762 |
+
eval_runtime = 0:12:02.44
|
27763 |
+
eval_samples = 2642
|
27764 |
+
eval_samples_per_second = 3.657
|
27765 |
+
eval_steps_per_second = 0.458
|
27766 |
+
eval_wer = 0.0932
|
27767 |
+
[INFO|trainer.py:2366] 2022-03-27 07:38:54,563 >> Num examples = 2642|βββββββββββββ| 434M/434M [00:25<00:00, 18.1MB/s]ields:t operations will not be computed-27 07:27:04,529 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
|
wandb/run-20220326_171130-bdf5nvyg/files/wandb-summary.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20220326_171130-bdf5nvyg/logs/debug-internal.log
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9390843eb9e29cbff8081fd9c955aa5e548c4aadf12a40b738c2dccf6b263b4
|
3 |
+
size 11827180
|
wandb/run-20220326_171130-bdf5nvyg/run-bdf5nvyg.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07b8089a65e54817bb1f82c0125e890f0820c2f064b6baa83ff71388e0349784
|
3 |
+
size 455282886
|