dat
commited on
Commit
β’
be9ccf6
1
Parent(s):
7398222
Saving weights and logs of step 135000
Browse files- Load data & train tokenizer.ipynb +16 -17
- checkpoint_135000 +3 -0
- config.json +0 -2
- flax_model.msgpack +1 -1
- wandb/run-20210715_185845-dq8uirtg/files/output.log +461 -0
- wandb/run-20210715_185845-dq8uirtg/files/wandb-summary.json +1 -1
- wandb/run-20210715_185845-dq8uirtg/logs/debug-internal.log +0 -0
- wandb/run-20210715_185845-dq8uirtg/run-dq8uirtg.wandb +0 -0
Load data & train tokenizer.ipynb
CHANGED
@@ -85,29 +85,28 @@
|
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
-
"execution_count":
|
89 |
-
"id": "
|
90 |
"metadata": {},
|
91 |
-
"outputs": [
|
92 |
-
{
|
93 |
-
"data": {
|
94 |
-
"text/plain": [
|
95 |
-
"{'input_ids': [4, 3620], 'attention_mask': [1, 1]}"
|
96 |
-
]
|
97 |
-
},
|
98 |
-
"execution_count": 45,
|
99 |
-
"metadata": {},
|
100 |
-
"output_type": "execute_result"
|
101 |
-
}
|
102 |
-
],
|
103 |
"source": [
|
104 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
]
|
106 |
},
|
107 |
{
|
108 |
"cell_type": "code",
|
109 |
"execution_count": null,
|
110 |
-
"id": "
|
111 |
"metadata": {},
|
112 |
"outputs": [],
|
113 |
"source": [
|
@@ -117,7 +116,7 @@
|
|
117 |
{
|
118 |
"cell_type": "code",
|
119 |
"execution_count": 46,
|
120 |
-
"id": "
|
121 |
"metadata": {},
|
122 |
"outputs": [
|
123 |
{
|
|
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
+
"execution_count": 1,
|
89 |
+
"id": "6b89ed82",
|
90 |
"metadata": {},
|
91 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
"source": [
|
93 |
+
"import transformers"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"cell_type": "code",
|
98 |
+
"execution_count": null,
|
99 |
+
"id": "5ad59ec0",
|
100 |
+
"metadata": {},
|
101 |
+
"outputs": [],
|
102 |
+
"source": [
|
103 |
+
"x = transformers.FillMaskPipeline"
|
104 |
]
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
"execution_count": null,
|
109 |
+
"id": "4cb13b65",
|
110 |
"metadata": {},
|
111 |
"outputs": [],
|
112 |
"source": [
|
|
|
116 |
{
|
117 |
"cell_type": "code",
|
118 |
"execution_count": 46,
|
119 |
+
"id": "617073b7",
|
120 |
"metadata": {},
|
121 |
"outputs": [
|
122 |
{
|
checkpoint_135000
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a080463686525430cb78ca56cad979b53fe60d205f42470d294073bc5f5016d
|
3 |
+
size 1530270447
|
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": ".",
|
3 |
"architectures": [
|
4 |
"BigBirdForMaskedLM"
|
5 |
],
|
@@ -24,7 +23,6 @@
|
|
24 |
"position_embedding_type": "absolute",
|
25 |
"rescale_embeddings": false,
|
26 |
"sep_token_id": 66,
|
27 |
-
"torch_dtype": "float32",
|
28 |
"transformers_version": "4.9.0.dev0",
|
29 |
"type_vocab_size": 2,
|
30 |
"use_bias": true,
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"BigBirdForMaskedLM"
|
4 |
],
|
|
|
23 |
"position_embedding_type": "absolute",
|
24 |
"rescale_embeddings": false,
|
25 |
"sep_token_id": 66,
|
|
|
26 |
"transformers_version": "4.9.0.dev0",
|
27 |
"type_vocab_size": 2,
|
28 |
"use_bias": true,
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510090043
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29ed0773269721507328c69a2ccd94dd845503587ae4a89525ec4f204ab05f0d
|
3 |
size 510090043
|
wandb/run-20210715_185845-dq8uirtg/files/output.log
CHANGED
@@ -3610,3 +3610,464 @@ Training...: 31248it [3:41:07, 2.74it/s]βββββββββββββ
|
|
3610 |
|
3611 |
|
3612 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3610 |
|
3611 |
|
3612 |
|
3613 |
+
Training...: 31298it [3:41:27, 2.75it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3614 |
+
|
3615 |
+
|
3616 |
+
|
3617 |
+
|
3618 |
+
|
3619 |
+
Training...: 31348it [3:41:47, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3620 |
+
|
3621 |
+
|
3622 |
+
|
3623 |
+
|
3624 |
+
|
3625 |
+
Training...: 31398it [3:42:07, 2.72it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3626 |
+
|
3627 |
+
|
3628 |
+
|
3629 |
+
|
3630 |
+
|
3631 |
+
Training...: 31448it [3:42:27, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3632 |
+
|
3633 |
+
|
3634 |
+
|
3635 |
+
|
3636 |
+
Training...: 31498it [3:42:47, 2.63it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3637 |
+
|
3638 |
+
|
3639 |
+
|
3640 |
+
|
3641 |
+
Training...: 31548it [3:43:07, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3642 |
+
|
3643 |
+
|
3644 |
+
|
3645 |
+
|
3646 |
+
Training...: 31598it [3:43:27, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3647 |
+
|
3648 |
+
|
3649 |
+
|
3650 |
+
|
3651 |
+
|
3652 |
+
Training...: 31648it [3:43:47, 2.74it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3653 |
+
|
3654 |
+
|
3655 |
+
|
3656 |
+
|
3657 |
+
|
3658 |
+
Training...: 31698it [3:44:07, 2.66it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3659 |
+
|
3660 |
+
|
3661 |
+
|
3662 |
+
|
3663 |
+
Training...: 31748it [3:44:27, 2.74it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3664 |
+
|
3665 |
+
|
3666 |
+
|
3667 |
+
|
3668 |
+
Training...: 31798it [3:44:48, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3669 |
+
|
3670 |
+
|
3671 |
+
|
3672 |
+
|
3673 |
+
Training...: 31848it [3:45:08, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3674 |
+
|
3675 |
+
|
3676 |
+
|
3677 |
+
|
3678 |
+
|
3679 |
+
Training...: 31898it [3:45:28, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3680 |
+
|
3681 |
+
|
3682 |
+
|
3683 |
+
|
3684 |
+
|
3685 |
+
Training...: 31948it [3:45:48, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3686 |
+
|
3687 |
+
|
3688 |
+
|
3689 |
+
|
3690 |
+
|
3691 |
+
Training...: 31998it [3:46:08, 2.72it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3692 |
+
Training...: 31998it [3:46:21, 2.72it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3693 |
+
|
3694 |
+
|
3695 |
+
|
3696 |
+
|
3697 |
+
|
3698 |
+
|
3699 |
+
|
3700 |
+
|
3701 |
+
|
3702 |
+
|
3703 |
+
|
3704 |
+
|
3705 |
+
|
3706 |
+
|
3707 |
+
|
3708 |
+
|
3709 |
+
|
3710 |
+
|
3711 |
+
|
3712 |
+
|
3713 |
+
|
3714 |
+
|
3715 |
+
|
3716 |
+
|
3717 |
+
|
3718 |
+
|
3719 |
+
|
3720 |
+
|
3721 |
+
|
3722 |
+
|
3723 |
+
|
3724 |
+
|
3725 |
+
|
3726 |
+
|
3727 |
+
Training...: 32048it [3:47:32, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3728 |
+
|
3729 |
+
|
3730 |
+
|
3731 |
+
|
3732 |
+
Training...: 32098it [3:47:52, 2.71it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3733 |
+
|
3734 |
+
|
3735 |
+
|
3736 |
+
|
3737 |
+
|
3738 |
+
Training...: 32149it [3:48:25, 4.01s/it]ββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3739 |
+
|
3740 |
+
|
3741 |
+
|
3742 |
+
|
3743 |
+
Training...: 32198it [3:48:32, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3744 |
+
|
3745 |
+
|
3746 |
+
|
3747 |
+
|
3748 |
+
Training...: 32248it [3:48:52, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3749 |
+
|
3750 |
+
|
3751 |
+
|
3752 |
+
|
3753 |
+
Training...: 32298it [3:49:12, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3754 |
+
|
3755 |
+
|
3756 |
+
|
3757 |
+
|
3758 |
+
Training...: 32348it [3:49:32, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3759 |
+
|
3760 |
+
|
3761 |
+
|
3762 |
+
|
3763 |
+
Training...: 32398it [3:49:52, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3764 |
+
|
3765 |
+
|
3766 |
+
|
3767 |
+
|
3768 |
+
Training...: 32448it [3:50:12, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3769 |
+
|
3770 |
+
|
3771 |
+
|
3772 |
+
|
3773 |
+
Training...: 32498it [3:50:32, 2.68it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3774 |
+
|
3775 |
+
|
3776 |
+
|
3777 |
+
|
3778 |
+
Training...: 32548it [3:50:52, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3779 |
+
|
3780 |
+
|
3781 |
+
|
3782 |
+
|
3783 |
+
|
3784 |
+
Training...: 32599it [3:51:25, 4.01s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3785 |
+
|
3786 |
+
|
3787 |
+
|
3788 |
+
|
3789 |
+
Training...: 32648it [3:51:32, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3790 |
+
|
3791 |
+
|
3792 |
+
|
3793 |
+
|
3794 |
+
Training...: 32698it [3:51:52, 2.74it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3795 |
+
|
3796 |
+
|
3797 |
+
|
3798 |
+
|
3799 |
+
Training...: 32748it [3:52:12, 2.69it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3800 |
+
|
3801 |
+
|
3802 |
+
|
3803 |
+
|
3804 |
+
Training...: 32798it [3:52:32, 2.72it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3805 |
+
|
3806 |
+
|
3807 |
+
|
3808 |
+
|
3809 |
+
|
3810 |
+
Training...: 32849it [3:53:05, 4.01s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3811 |
+
|
3812 |
+
|
3813 |
+
|
3814 |
+
|
3815 |
+
|
3816 |
+
Training...: 32901it [3:53:26, 2.24s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3817 |
+
|
3818 |
+
|
3819 |
+
|
3820 |
+
|
3821 |
+
|
3822 |
+
Training...: 32951it [3:53:46, 2.24s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3823 |
+
|
3824 |
+
|
3825 |
+
|
3826 |
+
|
3827 |
+
|
3828 |
+
Training...: 33003it [3:54:06, 1.39s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3829 |
+
|
3830 |
+
|
3831 |
+
|
3832 |
+
|
3833 |
+
|
3834 |
+
Training...: 33053it [3:54:26, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3835 |
+
|
3836 |
+
|
3837 |
+
|
3838 |
+
|
3839 |
+
|
3840 |
+
Training...: 33105it [3:54:46, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3841 |
+
|
3842 |
+
|
3843 |
+
|
3844 |
+
|
3845 |
+
|
3846 |
+
Training...: 33153it [3:55:06, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3847 |
+
|
3848 |
+
|
3849 |
+
|
3850 |
+
|
3851 |
+
|
3852 |
+
Training...: 33205it [3:55:26, 1.08it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3853 |
+
|
3854 |
+
|
3855 |
+
|
3856 |
+
|
3857 |
+
|
3858 |
+
Training...: 33255it [3:55:46, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3859 |
+
|
3860 |
+
|
3861 |
+
|
3862 |
+
|
3863 |
+
|
3864 |
+
Training...: 33305it [3:56:06, 1.08it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3865 |
+
|
3866 |
+
|
3867 |
+
|
3868 |
+
|
3869 |
+
|
3870 |
+
Training...: 33357it [3:56:26, 1.58it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3871 |
+
|
3872 |
+
|
3873 |
+
|
3874 |
+
|
3875 |
+
|
3876 |
+
Training...: 33401it [3:56:46, 2.24s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3877 |
+
|
3878 |
+
|
3879 |
+
|
3880 |
+
|
3881 |
+
|
3882 |
+
Training...: 33451it [3:57:06, 2.26s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3883 |
+
|
3884 |
+
|
3885 |
+
|
3886 |
+
|
3887 |
+
|
3888 |
+
Training...: 33501it [3:57:26, 2.25s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3889 |
+
|
3890 |
+
|
3891 |
+
|
3892 |
+
|
3893 |
+
|
3894 |
+
Training...: 33553it [3:57:46, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3895 |
+
|
3896 |
+
|
3897 |
+
|
3898 |
+
|
3899 |
+
|
3900 |
+
Training...: 33607it [3:58:07, 1.58it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3901 |
+
|
3902 |
+
|
3903 |
+
|
3904 |
+
|
3905 |
+
|
3906 |
+
Training...: 33657it [3:58:27, 1.58it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3907 |
+
|
3908 |
+
|
3909 |
+
|
3910 |
+
|
3911 |
+
|
3912 |
+
Training...: 33701it [3:58:46, 2.24s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3913 |
+
|
3914 |
+
|
3915 |
+
|
3916 |
+
|
3917 |
+
|
3918 |
+
Training...: 33753it [3:59:07, 1.39s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3919 |
+
|
3920 |
+
|
3921 |
+
|
3922 |
+
|
3923 |
+
|
3924 |
+
Training...: 33803it [3:59:27, 1.39s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3925 |
+
|
3926 |
+
|
3927 |
+
|
3928 |
+
|
3929 |
+
|
3930 |
+
Training...: 33853it [3:59:47, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3931 |
+
|
3932 |
+
|
3933 |
+
|
3934 |
+
|
3935 |
+
|
3936 |
+
Training...: 33905it [4:00:07, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3937 |
+
|
3938 |
+
|
3939 |
+
|
3940 |
+
|
3941 |
+
|
3942 |
+
Training...: 33957it [4:00:27, 1.59it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3943 |
+
|
3944 |
+
|
3945 |
+
|
3946 |
+
|
3947 |
+
|
3948 |
+
Training...: 34007it [4:00:47, 1.58it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½β| 500/500 [00:59<00:00, 7.90it/s]
|
3949 |
+
|
3950 |
+
|
3951 |
+
|
3952 |
+
|
3953 |
+
|
3954 |
+
Training...: 34059it [4:01:07, 2.26it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3955 |
+
|
3956 |
+
|
3957 |
+
|
3958 |
+
|
3959 |
+
|
3960 |
+
Training...: 34109it [4:01:27, 2.23it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3961 |
+
|
3962 |
+
|
3963 |
+
|
3964 |
+
|
3965 |
+
|
3966 |
+
Training...: 34152it [4:01:47, 1.61s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3967 |
+
|
3968 |
+
|
3969 |
+
|
3970 |
+
|
3971 |
+
|
3972 |
+
Training...: 34203it [4:02:07, 1.38s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3973 |
+
|
3974 |
+
|
3975 |
+
|
3976 |
+
|
3977 |
+
|
3978 |
+
Training...: 34255it [4:02:27, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3979 |
+
|
3980 |
+
|
3981 |
+
|
3982 |
+
|
3983 |
+
|
3984 |
+
Training...: 34304it [4:02:47, 1.01s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3985 |
+
|
3986 |
+
|
3987 |
+
|
3988 |
+
|
3989 |
+
|
3990 |
+
Training...: 34353it [4:03:07, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3991 |
+
|
3992 |
+
|
3993 |
+
|
3994 |
+
|
3995 |
+
|
3996 |
+
Training...: 34405it [4:03:27, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
3997 |
+
|
3998 |
+
|
3999 |
+
|
4000 |
+
|
4001 |
+
|
4002 |
+
Training...: 34457it [4:03:47, 1.59it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4003 |
+
|
4004 |
+
|
4005 |
+
|
4006 |
+
|
4007 |
+
|
4008 |
+
Training...: 34507it [4:04:08, 1.89it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4009 |
+
|
4010 |
+
|
4011 |
+
|
4012 |
+
|
4013 |
+
|
4014 |
+
Training...: 34553it [4:04:27, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4015 |
+
|
4016 |
+
|
4017 |
+
|
4018 |
+
|
4019 |
+
|
4020 |
+
Training...: 34603it [4:04:47, 1.40s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4021 |
+
|
4022 |
+
|
4023 |
+
|
4024 |
+
|
4025 |
+
|
4026 |
+
Training...: 34654it [4:05:08, 1.05s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4027 |
+
|
4028 |
+
|
4029 |
+
|
4030 |
+
|
4031 |
+
|
4032 |
+
Training...: 34705it [4:05:28, 1.09it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4033 |
+
|
4034 |
+
|
4035 |
+
|
4036 |
+
|
4037 |
+
|
4038 |
+
Training...: 34756it [4:05:48, 1.39it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4039 |
+
|
4040 |
+
|
4041 |
+
|
4042 |
+
|
4043 |
+
|
4044 |
+
Training...: 34809it [4:06:08, 2.24it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4045 |
+
|
4046 |
+
|
4047 |
+
|
4048 |
+
|
4049 |
+
|
4050 |
+
Training...: 34853it [4:06:28, 1.39s/it]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4051 |
+
|
4052 |
+
|
4053 |
+
|
4054 |
+
|
4055 |
+
|
4056 |
+
Training...: 34907it [4:06:48, 1.59it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4057 |
+
|
4058 |
+
|
4059 |
+
|
4060 |
+
|
4061 |
+
|
4062 |
+
Training...: 34957it [4:07:08, 1.59it/s]ββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4063 |
+
|
4064 |
+
|
4065 |
+
|
4066 |
+
|
4067 |
+
|
4068 |
+
Training...: 34998it [4:07:27, 2.70it/s]ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
4069 |
+
|
4070 |
+
tcmalloc: large alloc 2715181056 bytes == 0x3dd038000 @ 0x7f7011c37680 0x7f7011c57bdd 0x7f6ff81f120d 0x7f6ff81ff340 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fee87 0x7f6ff81fabd3 0x7f6ff81fb1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a
|
4071 |
+
[23:12:05] - INFO - absl - Saved checkpoint at checkpoint_135000
|
4072 |
+
[23:12:06] - INFO - huggingface_hub.repository - git version 2.25.1
|
4073 |
+
git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
|
wandb/run-20210715_185845-dq8uirtg/files/wandb-summary.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"training_step":
|
|
|
1 |
+
{"training_step": 135000, "learning_rate": 2.4608441890450194e-05, "train_loss": 2.3477954864501953, "_runtime": 15192, "_timestamp": 1626390717, "_step": 705, "eval_step": 132000, "eval_accuracy": 0.5815154314041138, "eval_loss": 2.216282367706299}
|
wandb/run-20210715_185845-dq8uirtg/logs/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20210715_185845-dq8uirtg/run-dq8uirtg.wandb
CHANGED
Binary files a/wandb/run-20210715_185845-dq8uirtg/run-dq8uirtg.wandb and b/wandb/run-20210715_185845-dq8uirtg/run-dq8uirtg.wandb differ
|
|