diff --git "a/combsub/log_info.txt" "b/combsub/log_info.txt" new file mode 100644--- /dev/null +++ "b/combsub/log_info.txt" @@ -0,0 +1,1148 @@ +--- model size --- +model: 4,000,518 +======= start training ======= +epoch: 0 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.03 | loss: 6.296 | time: 0:00:31.0 | step: 1 +epoch: 0 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 6.121 | time: 0:00:58.0 | step: 2 +epoch: 0 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 5.764 | time: 0:01:25.3 | step: 3 +epoch: 0 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 5.618 | time: 0:01:52.2 | step: 4 +epoch: 0 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 5.360 | time: 0:02:19.5 | step: 5 +epoch: 0 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 5.446 | time: 0:02:46.5 | step: 6 +epoch: 0 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 5.077 | time: 0:03:13.5 | step: 7 +epoch: 0 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.03 | loss: 5.051 | time: 0:03:42.5 | step: 8 +epoch: 0 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.876 | time: 0:04:09.4 | step: 9 +epoch: 0 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.780 | time: 0:04:36.4 | step: 10 +epoch: 0 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.681 | time: 0:05:03.3 | step: 11 +epoch: 0 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.598 | time: 0:05:30.4 | step: 12 +epoch: 0 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.509 | time: 0:05:57.0 | step: 13 +epoch: 0 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.406 | time: 0:06:23.9 | step: 14 +epoch: 0 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.313 | time: 0:06:50.5 | step: 15 +epoch: 0 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.096 | time: 0:07:17.3 | step: 16 +epoch: 0 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 4.011 | time: 0:07:43.4 | step: 17 +epoch: 0 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.913 | time: 0:08:09.4 | step: 18 +epoch: 0 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.878 | time: 0:08:35.5 | step: 19 +epoch: 0 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.814 | time: 0:09:02.5 | step: 20 +epoch: 0 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.666 | time: 0:09:28.8 | step: 21 +epoch: 0 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.588 | time: 0:09:54.7 | step: 22 +epoch: 0 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.576 | time: 0:10:20.8 | step: 23 +epoch: 0 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.413 | time: 0:10:46.8 | step: 24 +epoch: 0 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.348 | time: 0:11:12.8 | step: 25 +epoch: 0 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.339 | time: 0:11:38.9 | step: 26 +epoch: 0 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.253 | time: 0:12:04.7 | step: 27 +epoch: 0 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.125 | time: 0:12:31.2 | step: 28 +epoch: 0 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 3.072 | time: 0:12:57.7 | step: 29 +epoch: 0 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.985 | time: 0:13:24.9 | step: 30 +epoch: 0 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.954 | time: 0:13:51.2 | step: 31 +epoch: 0 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.965 | time: 0:14:18.9 | step: 32 +epoch: 0 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.861 | time: 0:14:45.0 | step: 33 +epoch: 0 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.894 | time: 0:15:11.3 | step: 34 +epoch: 0 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.774 | time: 0:15:38.0 | step: 35 +epoch: 0 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.758 | time: 0:16:04.4 | step: 36 +epoch: 0 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.723 | time: 0:16:30.7 | step: 37 +epoch: 0 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.684 | time: 0:16:56.5 | step: 38 +epoch: 0 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.646 | time: 0:17:22.3 | step: 39 +epoch: 0 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.599 | time: 0:17:48.5 | step: 40 +epoch: 0 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.541 | time: 0:18:14.2 | step: 41 +epoch: 0 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.511 | time: 0:18:40.1 | step: 42 +epoch: 0 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.513 | time: 0:19:05.9 | step: 43 +epoch: 0 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.445 | time: 0:19:32.4 | step: 44 +epoch: 0 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.474 | time: 0:19:58.1 | step: 45 +epoch: 0 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.458 | time: 0:20:23.8 | step: 46 +epoch: 0 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.462 | time: 0:20:49.5 | step: 47 +epoch: 0 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.452 | time: 0:21:15.0 | step: 48 +epoch: 0 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.417 | time: 0:21:40.5 | step: 49 +epoch: 0 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.389 | time: 0:22:06.0 | step: 50 +epoch: 0 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.335 | time: 0:22:31.5 | step: 51 +epoch: 0 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.316 | time: 0:22:57.1 | step: 52 +epoch: 0 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.287 | time: 0:23:22.4 | step: 53 +epoch: 0 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.281 | time: 0:23:47.8 | step: 54 +epoch: 0 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.288 | time: 0:24:13.3 | step: 55 +epoch: 0 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.250 | time: 0:24:38.9 | step: 56 +epoch: 0 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.224 | time: 0:25:04.1 | step: 57 +epoch: 0 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.221 | time: 0:25:29.4 | step: 58 +epoch: 0 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.181 | time: 0:25:54.8 | step: 59 +epoch: 0 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.174 | time: 0:26:20.0 | step: 60 +epoch: 0 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.186 | time: 0:26:45.4 | step: 61 +epoch: 0 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.171 | time: 0:27:10.7 | step: 62 +epoch: 0 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.124 | time: 0:27:36.1 | step: 63 +epoch: 0 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.112 | time: 0:28:01.3 | step: 64 +epoch: 0 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.090 | time: 0:28:26.5 | step: 65 +epoch: 0 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.101 | time: 0:28:52.0 | step: 66 +epoch: 0 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.047 | time: 0:29:17.1 | step: 67 +epoch: 0 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.083 | time: 0:29:42.5 | step: 68 +epoch: 0 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.085 | time: 0:30:08.8 | step: 69 +epoch: 0 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.026 | time: 0:30:34.1 | step: 70 +epoch: 0 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.004 | time: 0:30:59.7 | step: 71 +epoch: 0 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.047 | time: 0:31:25.3 | step: 72 +epoch: 0 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.037 | time: 0:31:51.0 | step: 73 +epoch: 0 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.008 | time: 0:32:16.7 | step: 74 +epoch: 0 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.018 | time: 0:32:42.2 | step: 75 +epoch: 0 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.989 | time: 0:33:07.9 | step: 76 +epoch: 0 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.979 | time: 0:33:33.4 | step: 77 +epoch: 0 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.992 | time: 0:33:59.1 | step: 78 +epoch: 0 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.969 | time: 0:34:24.3 | step: 79 +epoch: 0 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.966 | time: 0:34:49.6 | step: 80 +epoch: 0 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.939 | time: 0:35:15.4 | step: 81 +epoch: 0 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.990 | time: 0:35:41.4 | step: 82 +epoch: 0 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.949 | time: 0:36:07.0 | step: 83 +epoch: 0 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.962 | time: 0:36:32.8 | step: 84 +epoch: 0 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.971 | time: 0:36:58.3 | step: 85 +epoch: 0 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.918 | time: 0:37:24.0 | step: 86 +epoch: 0 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.962 | time: 0:37:34.9 | step: 87 + --- --- +loss: 2.086. +epoch: 1 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.915 | time: 0:38:00.3 | step: 88 +epoch: 1 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.917 | time: 0:38:25.5 | step: 89 +epoch: 1 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.929 | time: 0:38:50.6 | step: 90 +epoch: 1 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.936 | time: 0:39:15.8 | step: 91 +epoch: 1 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.934 | time: 0:39:41.2 | step: 92 +epoch: 1 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.908 | time: 0:40:06.5 | step: 93 +epoch: 1 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.914 | time: 0:40:31.9 | step: 94 +epoch: 1 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.907 | time: 0:40:57.8 | step: 95 +epoch: 1 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.912 | time: 0:41:23.3 | step: 96 +epoch: 1 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.909 | time: 0:41:49.3 | step: 97 +epoch: 1 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.896 | time: 0:42:14.7 | step: 98 +epoch: 1 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.859 | time: 0:42:40.2 | step: 99 +epoch: 1 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.887 | time: 0:43:06.0 | step: 100 +epoch: 1 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.862 | time: 0:43:31.2 | step: 101 +epoch: 1 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.867 | time: 0:43:56.2 | step: 102 +epoch: 1 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.865 | time: 0:44:21.5 | step: 103 +epoch: 1 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.864 | time: 0:44:46.9 | step: 104 +epoch: 1 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.828 | time: 0:45:12.2 | step: 105 +epoch: 1 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.904 | time: 0:45:37.6 | step: 106 +epoch: 1 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.953 | time: 0:46:03.1 | step: 107 +epoch: 1 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.857 | time: 0:46:28.7 | step: 108 +epoch: 1 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.827 | time: 0:46:54.2 | step: 109 +epoch: 1 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.903 | time: 0:47:19.8 | step: 110 +epoch: 1 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.830 | time: 0:47:45.2 | step: 111 +epoch: 1 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.853 | time: 0:48:10.5 | step: 112 +epoch: 1 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.797 | time: 0:48:35.5 | step: 113 +epoch: 1 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.836 | time: 0:49:01.0 | step: 114 +epoch: 1 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.770 | time: 0:49:26.1 | step: 115 +epoch: 1 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.744 | time: 0:49:51.4 | step: 116 +epoch: 1 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.732 | time: 0:50:16.8 | step: 117 +epoch: 1 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.723 | time: 0:50:42.7 | step: 118 +epoch: 1 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.851 | time: 0:51:08.1 | step: 119 +epoch: 1 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 2.078 | time: 0:51:33.6 | step: 120 +epoch: 1 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.823 | time: 0:51:59.2 | step: 121 +epoch: 1 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.940 | time: 0:52:24.5 | step: 122 +epoch: 1 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.803 | time: 0:52:49.8 | step: 123 +epoch: 1 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.741 | time: 0:53:15.1 | step: 124 +epoch: 1 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.836 | time: 0:53:40.5 | step: 125 +epoch: 1 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.819 | time: 0:54:05.9 | step: 126 +epoch: 1 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.747 | time: 0:54:31.5 | step: 127 +epoch: 1 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.801 | time: 0:54:56.8 | step: 128 +epoch: 1 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.765 | time: 0:55:21.8 | step: 129 +epoch: 1 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.758 | time: 0:55:47.1 | step: 130 +epoch: 1 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.740 | time: 0:56:12.5 | step: 131 +epoch: 1 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.729 | time: 0:56:38.5 | step: 132 +epoch: 1 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.718 | time: 0:57:03.8 | step: 133 +epoch: 1 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.674 | time: 0:57:29.1 | step: 134 +epoch: 1 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.688 | time: 0:57:54.3 | step: 135 +epoch: 1 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.670 | time: 0:58:19.8 | step: 136 +epoch: 1 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.665 | time: 0:58:45.0 | step: 137 +epoch: 1 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.661 | time: 0:59:10.2 | step: 138 +epoch: 1 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.632 | time: 0:59:35.3 | step: 139 +epoch: 1 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.611 | time: 1:00:00.5 | step: 140 +epoch: 1 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.602 | time: 1:00:25.8 | step: 141 +epoch: 1 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.589 | time: 1:00:51.0 | step: 142 +epoch: 1 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.644 | time: 1:01:16.3 | step: 143 +epoch: 1 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.605 | time: 1:01:41.7 | step: 144 +epoch: 1 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.569 | time: 1:02:07.3 | step: 145 +epoch: 1 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.673 | time: 1:02:32.3 | step: 146 +epoch: 1 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.579 | time: 1:02:57.0 | step: 147 +epoch: 1 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.557 | time: 1:03:22.0 | step: 148 +epoch: 1 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.604 | time: 1:03:47.0 | step: 149 +epoch: 1 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.591 | time: 1:04:12.6 | step: 150 +epoch: 1 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.553 | time: 1:04:37.8 | step: 151 +epoch: 1 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.554 | time: 1:05:03.1 | step: 152 +epoch: 1 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.557 | time: 1:05:28.6 | step: 153 +epoch: 1 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.525 | time: 1:05:53.9 | step: 154 +epoch: 1 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.549 | time: 1:06:19.6 | step: 155 +epoch: 1 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.604 | time: 1:06:44.6 | step: 156 +epoch: 1 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.574 | time: 1:07:10.4 | step: 157 +epoch: 1 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.504 | time: 1:07:38.8 | step: 158 +epoch: 1 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.495 | time: 1:08:04.3 | step: 159 +epoch: 1 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.529 | time: 1:08:29.5 | step: 160 +epoch: 1 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.566 | time: 1:08:54.9 | step: 161 +epoch: 1 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.484 | time: 1:09:19.8 | step: 162 +epoch: 1 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.524 | time: 1:09:45.0 | step: 163 +epoch: 1 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.595 | time: 1:10:10.5 | step: 164 +epoch: 1 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.468 | time: 1:10:36.0 | step: 165 +epoch: 1 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.592 | time: 1:11:01.6 | step: 166 +epoch: 1 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.494 | time: 1:11:27.1 | step: 167 +epoch: 1 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.508 | time: 1:11:52.6 | step: 168 +epoch: 1 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.558 | time: 1:12:18.1 | step: 169 +epoch: 1 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.524 | time: 1:12:44.1 | step: 170 +epoch: 1 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.563 | time: 1:13:11.5 | step: 171 +epoch: 1 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.495 | time: 1:13:37.0 | step: 172 +epoch: 1 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.517 | time: 1:14:02.8 | step: 173 +epoch: 1 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.501 | time: 1:14:13.2 | step: 174 + --- --- +loss: 1.437. +epoch: 2 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.490 | time: 1:14:38.8 | step: 175 +epoch: 2 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.492 | time: 1:15:04.1 | step: 176 +epoch: 2 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.470 | time: 1:15:29.6 | step: 177 +epoch: 2 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.473 | time: 1:15:55.2 | step: 178 +epoch: 2 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.452 | time: 1:16:20.8 | step: 179 +epoch: 2 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.474 | time: 1:16:46.0 | step: 180 +epoch: 2 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.472 | time: 1:17:11.0 | step: 181 +epoch: 2 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.451 | time: 1:17:36.3 | step: 182 +epoch: 2 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.463 | time: 1:18:01.9 | step: 183 +epoch: 2 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.451 | time: 1:18:27.5 | step: 184 +epoch: 2 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.486 | time: 1:18:53.3 | step: 185 +epoch: 2 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.436 | time: 1:19:19.2 | step: 186 +epoch: 2 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.469 | time: 1:19:44.7 | step: 187 +epoch: 2 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.474 | time: 1:20:10.5 | step: 188 +epoch: 2 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.441 | time: 1:20:36.1 | step: 189 +epoch: 2 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.444 | time: 1:21:01.1 | step: 190 +epoch: 2 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.467 | time: 1:21:26.6 | step: 191 +epoch: 2 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.440 | time: 1:21:52.3 | step: 192 +epoch: 2 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.440 | time: 1:22:18.6 | step: 193 +epoch: 2 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.439 | time: 1:22:44.2 | step: 194 +epoch: 2 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.433 | time: 1:23:10.0 | step: 195 +epoch: 2 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.449 | time: 1:23:35.5 | step: 196 +epoch: 2 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.423 | time: 1:24:02.4 | step: 197 +epoch: 2 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.460 | time: 1:24:28.8 | step: 198 +epoch: 2 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.393 | time: 1:24:54.1 | step: 199 +epoch: 2 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 1:25:19.9 | step: 200 +epoch: 2 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 1:25:45.3 | step: 201 +epoch: 2 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.459 | time: 1:26:10.6 | step: 202 +epoch: 2 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.528 | time: 1:26:35.8 | step: 203 +epoch: 2 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.431 | time: 1:27:00.8 | step: 204 +epoch: 2 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.411 | time: 1:27:25.9 | step: 205 +epoch: 2 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.425 | time: 1:27:51.1 | step: 206 +epoch: 2 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.463 | time: 1:28:16.2 | step: 207 +epoch: 2 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 1:28:41.5 | step: 208 +epoch: 2 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.429 | time: 1:29:07.3 | step: 209 +epoch: 2 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.469 | time: 1:29:32.6 | step: 210 +epoch: 2 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 1:29:57.7 | step: 211 +epoch: 2 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.422 | time: 1:30:23.0 | step: 212 +epoch: 2 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.442 | time: 1:30:48.1 | step: 213 +epoch: 2 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.433 | time: 1:31:13.3 | step: 214 +epoch: 2 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 1:31:38.4 | step: 215 +epoch: 2 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.484 | time: 1:32:03.4 | step: 216 +epoch: 2 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.445 | time: 1:32:28.4 | step: 217 +epoch: 2 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 1:32:53.5 | step: 218 +epoch: 2 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.472 | time: 1:33:18.8 | step: 219 +epoch: 2 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.412 | time: 1:33:44.0 | step: 220 +epoch: 2 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.436 | time: 1:34:09.5 | step: 221 +epoch: 2 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.446 | time: 1:34:35.5 | step: 222 +epoch: 2 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.434 | time: 1:35:01.6 | step: 223 +epoch: 2 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.429 | time: 1:35:27.0 | step: 224 +epoch: 2 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.495 | time: 1:35:51.7 | step: 225 +epoch: 2 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 1:36:17.0 | step: 226 +epoch: 2 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.433 | time: 1:36:42.3 | step: 227 +epoch: 2 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.447 | time: 1:37:07.5 | step: 228 +epoch: 2 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 1:37:32.2 | step: 229 +epoch: 2 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 1:37:57.1 | step: 230 +epoch: 2 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.435 | time: 1:38:22.1 | step: 231 +epoch: 2 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.401 | time: 1:38:47.4 | step: 232 +epoch: 2 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.436 | time: 1:39:12.9 | step: 233 +epoch: 2 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 1:39:38.3 | step: 234 +epoch: 2 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.416 | time: 1:40:04.3 | step: 235 +epoch: 2 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.422 | time: 1:40:30.2 | step: 236 +epoch: 2 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.454 | time: 1:40:54.9 | step: 237 +epoch: 2 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.419 | time: 1:41:19.7 | step: 238 +epoch: 2 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.416 | time: 1:41:45.0 | step: 239 +epoch: 2 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.448 | time: 1:42:10.0 | step: 240 +epoch: 2 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.448 | time: 1:42:35.4 | step: 241 +epoch: 2 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.432 | time: 1:43:00.9 | step: 242 +epoch: 2 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 1:43:26.6 | step: 243 +epoch: 2 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.408 | time: 1:43:52.2 | step: 244 +epoch: 2 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.423 | time: 1:44:17.9 | step: 245 +epoch: 2 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.455 | time: 1:44:43.3 | step: 246 +epoch: 2 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.457 | time: 1:45:08.7 | step: 247 +epoch: 2 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.431 | time: 1:45:34.3 | step: 248 +epoch: 2 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.436 | time: 1:46:00.8 | step: 249 +epoch: 2 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.445 | time: 1:46:26.2 | step: 250 +epoch: 2 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.417 | time: 1:46:51.5 | step: 251 +epoch: 2 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.436 | time: 1:47:17.1 | step: 252 +epoch: 2 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.404 | time: 1:47:42.5 | step: 253 +epoch: 2 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 1:48:07.6 | step: 254 +epoch: 2 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 1:48:33.1 | step: 255 +epoch: 2 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 1:48:58.6 | step: 256 +epoch: 2 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.400 | time: 1:49:24.2 | step: 257 +epoch: 2 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.421 | time: 1:49:49.5 | step: 258 +epoch: 2 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.447 | time: 1:50:14.3 | step: 259 +epoch: 2 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 1:50:39.1 | step: 260 +epoch: 2 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.418 | time: 1:50:49.5 | step: 261 + --- --- +loss: 1.340. +epoch: 3 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.389 | time: 1:51:15.1 | step: 262 +epoch: 3 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 1:51:40.0 | step: 263 +epoch: 3 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 1:52:04.2 | step: 264 +epoch: 3 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 1:52:28.6 | step: 265 +epoch: 3 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.425 | time: 1:52:54.0 | step: 266 +epoch: 3 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.382 | time: 1:53:18.3 | step: 267 +epoch: 3 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.416 | time: 1:53:42.7 | step: 268 +epoch: 3 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.381 | time: 1:54:07.4 | step: 269 +epoch: 3 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.410 | time: 1:54:32.0 | step: 270 +epoch: 3 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.424 | time: 1:54:56.3 | step: 271 +epoch: 3 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 1:55:20.8 | step: 272 +epoch: 3 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 1:55:45.6 | step: 273 +epoch: 3 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.443 | time: 1:56:10.3 | step: 274 +epoch: 3 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.490 | time: 1:56:35.0 | step: 275 +epoch: 3 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 1:57:00.3 | step: 276 +epoch: 3 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.385 | time: 1:57:25.0 | step: 277 +epoch: 3 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.444 | time: 1:57:49.6 | step: 278 +epoch: 3 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 1:58:14.4 | step: 279 +epoch: 3 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 1:58:38.9 | step: 280 +epoch: 3 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.401 | time: 1:59:03.5 | step: 281 +epoch: 3 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.397 | time: 1:59:28.3 | step: 282 +epoch: 3 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 1:59:53.0 | step: 283 +epoch: 3 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 2:00:17.5 | step: 284 +epoch: 3 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 2:00:42.0 | step: 285 +epoch: 3 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.454 | time: 2:01:06.8 | step: 286 +epoch: 3 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.414 | time: 2:01:31.5 | step: 287 +epoch: 3 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.421 | time: 2:01:56.1 | step: 288 +epoch: 3 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.386 | time: 2:02:20.8 | step: 289 +epoch: 3 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 2:02:45.5 | step: 290 +epoch: 3 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.429 | time: 2:03:10.0 | step: 291 +epoch: 3 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.404 | time: 2:03:34.6 | step: 292 +epoch: 3 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.422 | time: 2:03:59.4 | step: 293 +epoch: 3 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.447 | time: 2:04:24.2 | step: 294 +epoch: 3 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.421 | time: 2:04:48.8 | step: 295 +epoch: 3 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.380 | time: 2:05:13.4 | step: 296 +epoch: 3 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 2:05:38.1 | step: 297 +epoch: 3 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 2:06:02.5 | step: 298 +epoch: 3 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 2:06:27.1 | step: 299 +epoch: 3 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 2:06:51.7 | step: 300 +epoch: 3 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 2:07:16.2 | step: 301 +epoch: 3 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.413 | time: 2:07:41.1 | step: 302 +epoch: 3 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.402 | time: 2:08:05.5 | step: 303 +epoch: 3 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 2:08:30.1 | step: 304 +epoch: 3 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.390 | time: 2:08:54.7 | step: 305 +epoch: 3 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.379 | time: 2:09:19.1 | step: 306 +epoch: 3 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 2:09:43.8 | step: 307 +epoch: 3 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 2:10:08.2 | step: 308 +epoch: 3 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.379 | time: 2:10:32.7 | step: 309 +epoch: 3 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 2:10:57.4 | step: 310 +epoch: 3 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.345 | time: 2:11:21.8 | step: 311 +epoch: 3 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 2:11:46.2 | step: 312 +epoch: 3 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 2:12:10.9 | step: 313 +epoch: 3 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.439 | time: 2:12:35.3 | step: 314 +epoch: 3 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.474 | time: 2:12:59.6 | step: 315 +epoch: 3 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.443 | time: 2:13:24.3 | step: 316 +epoch: 3 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 2:13:48.7 | step: 317 +epoch: 3 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.480 | time: 2:14:13.3 | step: 318 +epoch: 3 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.413 | time: 2:14:37.9 | step: 319 +epoch: 3 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.410 | time: 2:15:02.4 | step: 320 +epoch: 3 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.425 | time: 2:15:26.8 | step: 321 +epoch: 3 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.431 | time: 2:15:51.3 | step: 322 +epoch: 3 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.415 | time: 2:16:16.0 | step: 323 +epoch: 3 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.440 | time: 2:16:40.5 | step: 324 +epoch: 3 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.439 | time: 2:17:04.9 | step: 325 +epoch: 3 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 2:17:29.4 | step: 326 +epoch: 3 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.406 | time: 2:17:54.1 | step: 327 +epoch: 3 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 2:18:18.8 | step: 328 +epoch: 3 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 2:18:43.3 | step: 329 +epoch: 3 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.385 | time: 2:19:07.9 | step: 330 +epoch: 3 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.404 | time: 2:19:32.5 | step: 331 +epoch: 3 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.394 | time: 2:19:56.9 | step: 332 +epoch: 3 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.435 | time: 2:20:21.5 | step: 333 +epoch: 3 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.387 | time: 2:20:45.9 | step: 334 +epoch: 3 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.431 | time: 2:21:11.3 | step: 335 +epoch: 3 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.480 | time: 2:21:35.8 | step: 336 +epoch: 3 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 2:22:00.5 | step: 337 +epoch: 3 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.406 | time: 2:22:25.0 | step: 338 +epoch: 3 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 2:22:49.6 | step: 339 +epoch: 3 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.422 | time: 2:23:14.3 | step: 340 +epoch: 3 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.437 | time: 2:23:39.7 | step: 341 +epoch: 3 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 2:24:04.4 | step: 342 +epoch: 3 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.393 | time: 2:24:29.3 | step: 343 +epoch: 3 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.394 | time: 2:24:53.9 | step: 344 +epoch: 3 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 2:25:18.6 | step: 345 +epoch: 3 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.372 | time: 2:25:42.9 | step: 346 +epoch: 3 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.377 | time: 2:26:08.1 | step: 347 +epoch: 3 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.10 | loss: 1.364 | time: 2:26:18.0 | step: 348 + --- --- +loss: 1.351. +epoch: 4 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.385 | time: 2:26:42.5 | step: 349 +epoch: 4 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 2:27:06.8 | step: 350 +epoch: 4 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.404 | time: 2:27:31.6 | step: 351 +epoch: 4 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 2:27:56.4 | step: 352 +epoch: 4 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.366 | time: 2:28:21.5 | step: 353 +epoch: 4 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 2:28:46.7 | step: 354 +epoch: 4 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 2:29:12.0 | step: 355 +epoch: 4 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.507 | time: 2:29:37.4 | step: 356 +epoch: 4 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.423 | time: 2:30:02.6 | step: 357 +epoch: 4 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.408 | time: 2:30:28.0 | step: 358 +epoch: 4 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 2:30:54.2 | step: 359 +epoch: 4 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.413 | time: 2:31:19.5 | step: 360 +epoch: 4 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.428 | time: 2:31:44.2 | step: 361 +epoch: 4 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.425 | time: 2:32:09.3 | step: 362 +epoch: 4 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 2:32:34.6 | step: 363 +epoch: 4 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.386 | time: 2:33:00.1 | step: 364 +epoch: 4 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.421 | time: 2:33:25.4 | step: 365 +epoch: 4 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.406 | time: 2:33:50.8 | step: 366 +epoch: 4 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 2:34:16.2 | step: 367 +epoch: 4 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 2:34:41.1 | step: 368 +epoch: 4 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.415 | time: 2:35:06.0 | step: 369 +epoch: 4 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.409 | time: 2:35:32.0 | step: 370 +epoch: 4 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 2:35:57.9 | step: 371 +epoch: 4 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.433 | time: 2:36:23.6 | step: 372 +epoch: 4 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 2:36:49.4 | step: 373 +epoch: 4 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 2:37:15.1 | step: 374 +epoch: 4 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.401 | time: 2:37:40.8 | step: 375 +epoch: 4 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.400 | time: 2:38:07.2 | step: 376 +epoch: 4 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 2:38:32.7 | step: 377 +epoch: 4 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.410 | time: 2:38:58.5 | step: 378 +epoch: 4 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.355 | time: 2:39:24.3 | step: 379 +epoch: 4 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 2:39:50.0 | step: 380 +epoch: 4 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 2:40:14.9 | step: 381 +epoch: 4 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 2:40:39.6 | step: 382 +epoch: 4 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 2:41:04.3 | step: 383 +epoch: 4 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.393 | time: 2:41:29.1 | step: 384 +epoch: 4 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 2:41:54.2 | step: 385 +epoch: 4 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 2:42:19.1 | step: 386 +epoch: 4 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 2:42:45.1 | step: 387 +epoch: 4 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 2:43:11.1 | step: 388 +epoch: 4 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.373 | time: 2:43:37.2 | step: 389 +epoch: 4 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.394 | time: 2:44:02.8 | step: 390 +epoch: 4 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.394 | time: 2:44:28.6 | step: 391 +epoch: 4 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 2:44:54.5 | step: 392 +epoch: 4 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 2:45:20.3 | step: 393 +epoch: 4 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.410 | time: 2:45:46.2 | step: 394 +epoch: 4 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 2:46:12.2 | step: 395 +epoch: 4 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 2:46:38.5 | step: 396 +epoch: 4 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.380 | time: 2:47:03.4 | step: 397 +epoch: 4 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.419 | time: 2:47:28.2 | step: 398 +epoch: 4 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 2:47:54.1 | step: 399 +epoch: 4 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.416 | time: 2:48:20.0 | step: 400 +epoch: 4 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 2:48:46.0 | step: 401 +epoch: 4 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 2:49:11.9 | step: 402 +epoch: 4 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.419 | time: 2:49:38.0 | step: 403 +epoch: 4 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.364 | time: 2:50:04.1 | step: 404 +epoch: 4 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 2:50:30.3 | step: 405 +epoch: 4 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 2:50:56.3 | step: 406 +epoch: 4 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.364 | time: 2:51:22.2 | step: 407 +epoch: 4 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.418 | time: 2:51:48.1 | step: 408 +epoch: 4 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.380 | time: 2:52:14.4 | step: 409 +epoch: 4 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 2:52:40.3 | step: 410 +epoch: 4 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 2:53:06.1 | step: 411 +epoch: 4 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 2:53:32.0 | step: 412 +epoch: 4 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 2:53:57.8 | step: 413 +epoch: 4 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 2:54:23.8 | step: 414 +epoch: 4 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.366 | time: 2:54:49.7 | step: 415 +epoch: 4 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.411 | time: 2:55:15.7 | step: 416 +epoch: 4 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 2:55:41.0 | step: 417 +epoch: 4 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.422 | time: 2:56:07.0 | step: 418 +epoch: 4 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 2:56:33.8 | step: 419 +epoch: 4 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 2:57:00.6 | step: 420 +epoch: 4 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.409 | time: 2:57:27.0 | step: 421 +epoch: 4 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.430 | time: 2:57:52.6 | step: 422 +epoch: 4 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.423 | time: 2:58:18.6 | step: 423 +epoch: 4 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.404 | time: 2:58:44.1 | step: 424 +epoch: 4 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.441 | time: 2:59:09.2 | step: 425 +epoch: 4 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.381 | time: 2:59:34.7 | step: 426 +epoch: 4 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 2:59:59.9 | step: 427 +epoch: 4 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 3:00:24.7 | step: 428 +epoch: 4 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.426 | time: 3:00:49.3 | step: 429 +epoch: 4 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 3:01:14.0 | step: 430 +epoch: 4 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.394 | time: 3:01:40.1 | step: 431 +epoch: 4 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.378 | time: 3:02:05.8 | step: 432 +epoch: 4 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.407 | time: 3:02:30.8 | step: 433 +epoch: 4 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 3:02:56.2 | step: 434 +epoch: 4 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.10 | loss: 1.349 | time: 3:03:06.4 | step: 435 + --- --- +loss: 1.346. +epoch: 5 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 3:03:31.9 | step: 436 +epoch: 5 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 3:03:57.3 | step: 437 +epoch: 5 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 3:04:23.0 | step: 438 +epoch: 5 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.402 | time: 3:04:48.1 | step: 439 +epoch: 5 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.361 | time: 3:05:13.7 | step: 440 +epoch: 5 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 3:05:38.9 | step: 441 +epoch: 5 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 3:06:04.1 | step: 442 +epoch: 5 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 3:06:29.4 | step: 443 +epoch: 5 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 3:06:55.2 | step: 444 +epoch: 5 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 3:07:20.7 | step: 445 +epoch: 5 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 3:07:46.2 | step: 446 +epoch: 5 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 3:08:11.9 | step: 447 +epoch: 5 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.381 | time: 3:08:37.5 | step: 448 +epoch: 5 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 3:09:03.0 | step: 449 +epoch: 5 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.385 | time: 3:09:28.6 | step: 450 +epoch: 5 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 3:09:54.6 | step: 451 +epoch: 5 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 3:10:20.3 | step: 452 +epoch: 5 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 3:10:46.1 | step: 453 +epoch: 5 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 3:11:11.1 | step: 454 +epoch: 5 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 3:11:36.9 | step: 455 +epoch: 5 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 3:12:02.7 | step: 456 +epoch: 5 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.389 | time: 3:12:28.7 | step: 457 +epoch: 5 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 3:12:54.7 | step: 458 +epoch: 5 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 3:13:20.3 | step: 459 +epoch: 5 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 3:13:46.5 | step: 460 +epoch: 5 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 3:14:12.3 | step: 461 +epoch: 5 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 3:14:38.2 | step: 462 +epoch: 5 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 3:15:04.0 | step: 463 +epoch: 5 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 3:15:29.7 | step: 464 +epoch: 5 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:15:55.5 | step: 465 +epoch: 5 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 3:16:21.2 | step: 466 +epoch: 5 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.366 | time: 3:16:47.4 | step: 467 +epoch: 5 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 3:17:13.2 | step: 468 +epoch: 5 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.465 | time: 3:17:39.3 | step: 469 +epoch: 5 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.381 | time: 3:18:04.9 | step: 470 +epoch: 5 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 3:18:31.0 | step: 471 +epoch: 5 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 3:18:58.1 | step: 472 +epoch: 5 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 3:19:23.9 | step: 473 +epoch: 5 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 3:19:50.5 | step: 474 +epoch: 5 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 3:20:17.5 | step: 475 +epoch: 5 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 3:20:44.0 | step: 476 +epoch: 5 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 3:21:10.1 | step: 477 +epoch: 5 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:21:36.2 | step: 478 +epoch: 5 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.424 | time: 3:22:02.6 | step: 479 +epoch: 5 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.378 | time: 3:22:29.3 | step: 480 +epoch: 5 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 3:22:55.6 | step: 481 +epoch: 5 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 3:23:21.7 | step: 482 +epoch: 5 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 3:23:48.1 | step: 483 +epoch: 5 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 3:24:15.1 | step: 484 +epoch: 5 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.372 | time: 3:24:40.9 | step: 485 +epoch: 5 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.364 | time: 3:25:06.6 | step: 486 +epoch: 5 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 3:25:32.4 | step: 487 +epoch: 5 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.386 | time: 3:25:58.3 | step: 488 +epoch: 5 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 3:26:24.1 | step: 489 +epoch: 5 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:26:49.9 | step: 490 +epoch: 5 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 3:27:15.7 | step: 491 +epoch: 5 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 3:27:41.9 | step: 492 +epoch: 5 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 3:28:07.8 | step: 493 +epoch: 5 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 3:28:33.9 | step: 494 +epoch: 5 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 3:29:00.4 | step: 495 +epoch: 5 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 3:29:26.3 | step: 496 +epoch: 5 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 3:29:51.7 | step: 497 +epoch: 5 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.379 | time: 3:30:17.5 | step: 498 +epoch: 5 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 3:30:43.7 | step: 499 +epoch: 5 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 3:31:10.1 | step: 500 +epoch: 5 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 3:31:36.0 | step: 501 +epoch: 5 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 3:32:02.6 | step: 502 +epoch: 5 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 3:32:28.6 | step: 503 +epoch: 5 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 3:32:54.9 | step: 504 +epoch: 5 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.379 | time: 3:33:20.9 | step: 505 +epoch: 5 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 3:33:47.0 | step: 506 +epoch: 5 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:34:13.2 | step: 507 +epoch: 5 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.461 | time: 3:34:39.9 | step: 508 +epoch: 5 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 3:35:06.0 | step: 509 +epoch: 5 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.417 | time: 3:35:31.9 | step: 510 +epoch: 5 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.384 | time: 3:35:58.4 | step: 511 +epoch: 5 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.386 | time: 3:36:24.3 | step: 512 +epoch: 5 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.382 | time: 3:36:50.5 | step: 513 +epoch: 5 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 3:37:16.4 | step: 514 +epoch: 5 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.349 | time: 3:37:42.2 | step: 515 +epoch: 5 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.382 | time: 3:38:07.9 | step: 516 +epoch: 5 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 3:38:34.1 | step: 517 +epoch: 5 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 3:39:00.1 | step: 518 +epoch: 5 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 3:39:25.9 | step: 519 +epoch: 5 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 3:39:52.0 | step: 520 +epoch: 5 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 3:40:17.8 | step: 521 +epoch: 5 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.361 | time: 3:40:28.3 | step: 522 + --- --- +loss: 1.304. +epoch: 6 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 3:40:53.9 | step: 523 +epoch: 6 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 3:41:18.3 | step: 524 +epoch: 6 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 3:41:43.8 | step: 525 +epoch: 6 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 3:42:09.3 | step: 526 +epoch: 6 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.345 | time: 3:42:35.2 | step: 527 +epoch: 6 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.377 | time: 3:43:00.9 | step: 528 +epoch: 6 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 3:43:26.4 | step: 529 +epoch: 6 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.380 | time: 3:43:52.4 | step: 530 +epoch: 6 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 3:44:18.1 | step: 531 +epoch: 6 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.352 | time: 3:44:43.7 | step: 532 +epoch: 6 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.371 | time: 3:45:09.5 | step: 533 +epoch: 6 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 3:45:35.4 | step: 534 +epoch: 6 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 3:46:01.3 | step: 535 +epoch: 6 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 3:46:26.4 | step: 536 +epoch: 6 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 3:46:52.2 | step: 537 +epoch: 6 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 3:47:18.1 | step: 538 +epoch: 6 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 3:47:43.5 | step: 539 +epoch: 6 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 3:48:09.5 | step: 540 +epoch: 6 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 3:48:35.0 | step: 541 +epoch: 6 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 3:49:00.8 | step: 542 +epoch: 6 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 3:49:26.5 | step: 543 +epoch: 6 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 3:49:52.3 | step: 544 +epoch: 6 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 3:50:17.9 | step: 545 +epoch: 6 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:50:44.5 | step: 546 +epoch: 6 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.428 | time: 3:51:10.3 | step: 547 +epoch: 6 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 3:51:35.6 | step: 548 +epoch: 6 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 3:52:01.1 | step: 549 +epoch: 6 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 3:52:26.6 | step: 550 +epoch: 6 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 3:52:52.1 | step: 551 +epoch: 6 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 3:53:17.6 | step: 552 +epoch: 6 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 3:53:42.9 | step: 553 +epoch: 6 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 3:54:08.3 | step: 554 +epoch: 6 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 3:54:35.1 | step: 555 +epoch: 6 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 3:55:00.6 | step: 556 +epoch: 6 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.412 | time: 3:55:26.2 | step: 557 +epoch: 6 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.387 | time: 3:55:51.7 | step: 558 +epoch: 6 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 3:56:17.3 | step: 559 +epoch: 6 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.373 | time: 3:56:43.3 | step: 560 +epoch: 6 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 3:57:08.2 | step: 561 +epoch: 6 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 3:57:33.6 | step: 562 +epoch: 6 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 3:57:59.0 | step: 563 +epoch: 6 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 3:58:24.8 | step: 564 +epoch: 6 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 3:58:50.4 | step: 565 +epoch: 6 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 3:59:15.7 | step: 566 +epoch: 6 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 3:59:41.2 | step: 567 +epoch: 6 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 4:00:07.1 | step: 568 +epoch: 6 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 4:00:33.1 | step: 569 +epoch: 6 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 4:00:58.9 | step: 570 +epoch: 6 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 4:01:24.4 | step: 571 +epoch: 6 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:01:50.1 | step: 572 +epoch: 6 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 4:02:16.0 | step: 573 +epoch: 6 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 4:02:41.4 | step: 574 +epoch: 6 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 4:03:07.1 | step: 575 +epoch: 6 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 4:03:32.9 | step: 576 +epoch: 6 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 4:03:58.8 | step: 577 +epoch: 6 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.330 | time: 4:04:23.8 | step: 578 +epoch: 6 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.389 | time: 4:04:49.8 | step: 579 +epoch: 6 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.409 | time: 4:05:15.4 | step: 580 +epoch: 6 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.390 | time: 4:05:40.5 | step: 581 +epoch: 6 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 4:06:05.9 | step: 582 +epoch: 6 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.403 | time: 4:06:31.3 | step: 583 +epoch: 6 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 4:06:56.8 | step: 584 +epoch: 6 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 4:07:22.8 | step: 585 +epoch: 6 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.367 | time: 4:07:48.6 | step: 586 +epoch: 6 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 4:08:14.3 | step: 587 +epoch: 6 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.414 | time: 4:08:39.9 | step: 588 +epoch: 6 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 4:09:05.7 | step: 589 +epoch: 6 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 4:09:31.2 | step: 590 +epoch: 6 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.378 | time: 4:09:57.0 | step: 591 +epoch: 6 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:10:22.2 | step: 592 +epoch: 6 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 4:10:47.6 | step: 593 +epoch: 6 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 4:11:13.4 | step: 594 +epoch: 6 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 4:11:39.6 | step: 595 +epoch: 6 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.349 | time: 4:12:05.4 | step: 596 +epoch: 6 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.388 | time: 4:12:31.7 | step: 597 +epoch: 6 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 4:12:58.1 | step: 598 +epoch: 6 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 4:13:24.4 | step: 599 +epoch: 6 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:13:51.8 | step: 600 +epoch: 6 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.408 | time: 4:14:17.1 | step: 601 +epoch: 6 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.395 | time: 4:14:42.3 | step: 602 +epoch: 6 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 4:15:08.9 | step: 603 +epoch: 6 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 4:15:34.4 | step: 604 +epoch: 6 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.355 | time: 4:16:00.4 | step: 605 +epoch: 6 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 4:16:26.3 | step: 606 +epoch: 6 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 4:16:52.2 | step: 607 +epoch: 6 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.349 | time: 4:17:16.8 | step: 608 +epoch: 6 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.10 | loss: 1.376 | time: 4:17:27.0 | step: 609 + --- --- +loss: 1.277. +epoch: 7 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 4:17:52.5 | step: 610 +epoch: 7 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 4:18:18.1 | step: 611 +epoch: 7 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 4:18:44.0 | step: 612 +epoch: 7 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.396 | time: 4:19:09.3 | step: 613 +epoch: 7 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.361 | time: 4:19:34.5 | step: 614 +epoch: 7 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.402 | time: 4:19:59.6 | step: 615 +epoch: 7 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.352 | time: 4:20:25.2 | step: 616 +epoch: 7 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.377 | time: 4:20:51.1 | step: 617 +epoch: 7 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 4:21:16.4 | step: 618 +epoch: 7 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.405 | time: 4:21:41.5 | step: 619 +epoch: 7 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.361 | time: 4:22:07.3 | step: 620 +epoch: 7 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 4:22:31.9 | step: 621 +epoch: 7 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 4:22:57.3 | step: 622 +epoch: 7 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.373 | time: 4:23:23.3 | step: 623 +epoch: 7 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 4:23:48.5 | step: 624 +epoch: 7 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 4:24:14.6 | step: 625 +epoch: 7 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 4:24:39.8 | step: 626 +epoch: 7 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 4:25:06.2 | step: 627 +epoch: 7 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 4:25:31.4 | step: 628 +epoch: 7 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 4:25:56.6 | step: 629 +epoch: 7 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 4:26:21.3 | step: 630 +epoch: 7 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.349 | time: 4:26:45.9 | step: 631 +epoch: 7 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.323 | time: 4:27:11.3 | step: 632 +epoch: 7 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 4:27:37.1 | step: 633 +epoch: 7 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 4:28:02.6 | step: 634 +epoch: 7 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 4:28:28.3 | step: 635 +epoch: 7 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 4:28:53.4 | step: 636 +epoch: 7 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 4:29:18.8 | step: 637 +epoch: 7 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 4:29:44.6 | step: 638 +epoch: 7 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.319 | time: 4:30:09.5 | step: 639 +epoch: 7 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.349 | time: 4:30:35.4 | step: 640 +epoch: 7 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.336 | time: 4:31:00.4 | step: 641 +epoch: 7 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 4:31:25.6 | step: 642 +epoch: 7 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 4:31:50.2 | step: 643 +epoch: 7 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 4:32:15.2 | step: 644 +epoch: 7 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:32:39.8 | step: 645 +epoch: 7 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:33:04.5 | step: 646 +epoch: 7 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.398 | time: 4:33:29.8 | step: 647 +epoch: 7 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 4:33:55.4 | step: 648 +epoch: 7 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.332 | time: 4:34:21.1 | step: 649 +epoch: 7 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.364 | time: 4:34:46.7 | step: 650 +epoch: 7 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 4:35:12.2 | step: 651 +epoch: 7 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 4:35:37.2 | step: 652 +epoch: 7 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.429 | time: 4:36:02.6 | step: 653 +epoch: 7 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.378 | time: 4:36:28.2 | step: 654 +epoch: 7 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 4:36:53.4 | step: 655 +epoch: 7 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 4:37:19.2 | step: 656 +epoch: 7 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 4:37:44.2 | step: 657 +epoch: 7 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 4:38:09.9 | step: 658 +epoch: 7 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.382 | time: 4:38:35.3 | step: 659 +epoch: 7 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.352 | time: 4:39:01.1 | step: 660 +epoch: 7 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.336 | time: 4:39:26.1 | step: 661 +epoch: 7 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.393 | time: 4:39:52.5 | step: 662 +epoch: 7 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 4:40:18.4 | step: 663 +epoch: 7 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.374 | time: 4:40:44.2 | step: 664 +epoch: 7 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 4:41:09.8 | step: 665 +epoch: 7 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 4:41:35.5 | step: 666 +epoch: 7 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 4:42:01.4 | step: 667 +epoch: 7 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 4:42:27.4 | step: 668 +epoch: 7 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.382 | time: 4:42:52.3 | step: 669 +epoch: 7 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 4:43:18.1 | step: 670 +epoch: 7 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 4:43:43.4 | step: 671 +epoch: 7 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 4:44:09.0 | step: 672 +epoch: 7 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 4:44:34.7 | step: 673 +epoch: 7 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 4:45:00.2 | step: 674 +epoch: 7 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.336 | time: 4:45:25.8 | step: 675 +epoch: 7 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.308 | time: 4:45:51.0 | step: 676 +epoch: 7 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 4:46:15.8 | step: 677 +epoch: 7 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 4:46:40.9 | step: 678 +epoch: 7 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 4:47:05.7 | step: 679 +epoch: 7 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 4:47:30.8 | step: 680 +epoch: 7 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 4:47:55.7 | step: 681 +epoch: 7 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.306 | time: 4:48:21.0 | step: 682 +epoch: 7 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 4:48:45.8 | step: 683 +epoch: 7 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.333 | time: 4:49:11.5 | step: 684 +epoch: 7 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 4:49:36.2 | step: 685 +epoch: 7 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 4:50:01.8 | step: 686 +epoch: 7 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 4:50:27.5 | step: 687 +epoch: 7 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 4:50:53.0 | step: 688 +epoch: 7 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 4:51:18.2 | step: 689 +epoch: 7 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 4:51:43.2 | step: 690 +epoch: 7 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.364 | time: 4:52:08.0 | step: 691 +epoch: 7 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 4:52:32.7 | step: 692 +epoch: 7 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 4:52:57.7 | step: 693 +epoch: 7 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.345 | time: 4:53:22.8 | step: 694 +epoch: 7 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 4:53:47.8 | step: 695 +epoch: 7 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.10 | loss: 1.336 | time: 4:53:57.9 | step: 696 + --- --- +loss: 1.241. +epoch: 8 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 4:54:22.8 | step: 697 +epoch: 8 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.352 | time: 4:54:47.5 | step: 698 +epoch: 8 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 4:55:12.4 | step: 699 +epoch: 8 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 4:55:38.1 | step: 700 +epoch: 8 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 4:56:03.3 | step: 701 +epoch: 8 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.330 | time: 4:56:28.5 | step: 702 +epoch: 8 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 4:56:53.3 | step: 703 +epoch: 8 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.376 | time: 4:57:19.1 | step: 704 +epoch: 8 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 4:57:44.3 | step: 705 +epoch: 8 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 4:58:09.4 | step: 706 +epoch: 8 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 4:58:35.1 | step: 707 +epoch: 8 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 4:59:00.1 | step: 708 +epoch: 8 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 4:59:25.2 | step: 709 +epoch: 8 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 4:59:50.8 | step: 710 +epoch: 8 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 5:00:16.0 | step: 711 +epoch: 8 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.359 | time: 5:00:41.5 | step: 712 +epoch: 8 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 5:01:06.1 | step: 713 +epoch: 8 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 5:01:31.6 | step: 714 +epoch: 8 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 5:01:57.1 | step: 715 +epoch: 8 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 5:02:22.2 | step: 716 +epoch: 8 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 5:02:47.2 | step: 717 +epoch: 8 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 5:03:12.2 | step: 718 +epoch: 8 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 5:03:37.1 | step: 719 +epoch: 8 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.323 | time: 5:04:02.3 | step: 720 +epoch: 8 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.353 | time: 5:04:27.6 | step: 721 +epoch: 8 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 5:04:52.4 | step: 722 +epoch: 8 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 5:05:17.4 | step: 723 +epoch: 8 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 5:05:43.1 | step: 724 +epoch: 8 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 5:06:08.6 | step: 725 +epoch: 8 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 5:06:34.3 | step: 726 +epoch: 8 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 5:07:00.1 | step: 727 +epoch: 8 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 5:07:25.6 | step: 728 +epoch: 8 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.340 | time: 5:07:51.8 | step: 729 +epoch: 8 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 5:08:17.6 | step: 730 +epoch: 8 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 5:08:42.8 | step: 731 +epoch: 8 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.366 | time: 5:09:08.0 | step: 732 +epoch: 8 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.345 | time: 5:09:33.5 | step: 733 +epoch: 8 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 5:09:58.7 | step: 734 +epoch: 8 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 5:10:24.7 | step: 735 +epoch: 8 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 5:10:50.4 | step: 736 +epoch: 8 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 5:11:15.6 | step: 737 +epoch: 8 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 5:11:41.5 | step: 738 +epoch: 8 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 5:12:06.6 | step: 739 +epoch: 8 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:12:32.3 | step: 740 +epoch: 8 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 5:12:58.3 | step: 741 +epoch: 8 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 5:13:24.1 | step: 742 +epoch: 8 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 5:13:50.0 | step: 743 +epoch: 8 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 5:14:15.9 | step: 744 +epoch: 8 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 5:14:41.8 | step: 745 +epoch: 8 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 5:15:06.4 | step: 746 +epoch: 8 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 5:15:32.2 | step: 747 +epoch: 8 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 5:15:57.8 | step: 748 +epoch: 8 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 5:16:23.1 | step: 749 +epoch: 8 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 5:16:48.7 | step: 750 +epoch: 8 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 5:17:13.9 | step: 751 +epoch: 8 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 5:17:39.7 | step: 752 +epoch: 8 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 5:18:05.5 | step: 753 +epoch: 8 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 5:18:30.7 | step: 754 +epoch: 8 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 5:18:55.8 | step: 755 +epoch: 8 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.385 | time: 5:19:21.1 | step: 756 +epoch: 8 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 5:19:46.6 | step: 757 +epoch: 8 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:20:11.8 | step: 758 +epoch: 8 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 5:20:37.3 | step: 759 +epoch: 8 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 5:21:02.6 | step: 760 +epoch: 8 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 5:21:28.1 | step: 761 +epoch: 8 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 5:21:54.0 | step: 762 +epoch: 8 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 5:22:19.5 | step: 763 +epoch: 8 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 5:22:44.7 | step: 764 +epoch: 8 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 5:23:09.6 | step: 765 +epoch: 8 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 5:23:34.6 | step: 766 +epoch: 8 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.383 | time: 5:23:59.9 | step: 767 +epoch: 8 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 5:24:25.2 | step: 768 +epoch: 8 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.336 | time: 5:24:50.5 | step: 769 +epoch: 8 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 5:25:16.3 | step: 770 +epoch: 8 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 5:25:41.7 | step: 771 +epoch: 8 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 5:26:07.2 | step: 772 +epoch: 8 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 5:26:32.2 | step: 773 +epoch: 8 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 5:26:57.7 | step: 774 +epoch: 8 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 5:27:22.2 | step: 775 +epoch: 8 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.306 | time: 5:27:46.7 | step: 776 +epoch: 8 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 5:28:12.2 | step: 777 +epoch: 8 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.355 | time: 5:28:36.6 | step: 778 +epoch: 8 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 5:29:01.6 | step: 779 +epoch: 8 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 5:29:26.5 | step: 780 +epoch: 8 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 5:29:51.6 | step: 781 +epoch: 8 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 5:30:17.4 | step: 782 +epoch: 8 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.10 | loss: 1.339 | time: 5:30:27.7 | step: 783 + --- --- +loss: 1.226. +epoch: 9 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 5:30:53.1 | step: 784 +epoch: 9 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.399 | time: 5:31:19.4 | step: 785 +epoch: 9 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 5:31:45.1 | step: 786 +epoch: 9 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 5:32:10.2 | step: 787 +epoch: 9 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 5:32:35.6 | step: 788 +epoch: 9 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 5:33:01.6 | step: 789 +epoch: 9 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 5:33:26.5 | step: 790 +epoch: 9 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:33:52.4 | step: 791 +epoch: 9 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 5:34:17.5 | step: 792 +epoch: 9 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.393 | time: 5:34:42.7 | step: 793 +epoch: 9 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 5:35:07.8 | step: 794 +epoch: 9 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.347 | time: 5:35:33.4 | step: 795 +epoch: 9 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 5:35:58.4 | step: 796 +epoch: 9 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.333 | time: 5:36:24.2 | step: 797 +epoch: 9 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.319 | time: 5:36:49.7 | step: 798 +epoch: 9 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.336 | time: 5:37:15.4 | step: 799 +epoch: 9 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 5:37:41.5 | step: 800 +epoch: 9 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.333 | time: 5:38:07.4 | step: 801 +epoch: 9 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 5:38:33.9 | step: 802 +epoch: 9 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 5:39:00.3 | step: 803 +epoch: 9 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 5:39:26.5 | step: 804 +epoch: 9 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 5:39:52.5 | step: 805 +epoch: 9 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.352 | time: 5:40:18.6 | step: 806 +epoch: 9 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 5:40:44.7 | step: 807 +epoch: 9 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 5:41:10.8 | step: 808 +epoch: 9 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.375 | time: 5:41:36.8 | step: 809 +epoch: 9 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:42:02.9 | step: 810 +epoch: 9 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 5:42:29.0 | step: 811 +epoch: 9 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 5:42:55.0 | step: 812 +epoch: 9 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.319 | time: 5:43:21.4 | step: 813 +epoch: 9 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 5:43:46.9 | step: 814 +epoch: 9 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 5:44:12.9 | step: 815 +epoch: 9 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 5:44:38.9 | step: 816 +epoch: 9 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 5:45:04.9 | step: 817 +epoch: 9 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 5:45:30.9 | step: 818 +epoch: 9 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 5:45:57.3 | step: 819 +epoch: 9 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 5:46:22.6 | step: 820 +epoch: 9 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.392 | time: 5:46:48.2 | step: 821 +epoch: 9 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.332 | time: 5:47:15.2 | step: 822 +epoch: 9 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 5:47:40.7 | step: 823 +epoch: 9 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 5:48:06.0 | step: 824 +epoch: 9 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 5:48:31.7 | step: 825 +epoch: 9 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.297 | time: 5:48:57.5 | step: 826 +epoch: 9 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 5:49:23.8 | step: 827 +epoch: 9 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 5:49:48.8 | step: 828 +epoch: 9 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 5:50:14.5 | step: 829 +epoch: 9 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 5:50:40.5 | step: 830 +epoch: 9 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 5:51:06.3 | step: 831 +epoch: 9 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.332 | time: 5:51:32.6 | step: 832 +epoch: 9 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.332 | time: 5:51:58.7 | step: 833 +epoch: 9 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 5:52:24.7 | step: 834 +epoch: 9 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 5:52:50.7 | step: 835 +epoch: 9 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:53:16.6 | step: 836 +epoch: 9 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 5:53:43.7 | step: 837 +epoch: 9 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 5:54:11.1 | step: 838 +epoch: 9 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.372 | time: 5:54:38.2 | step: 839 +epoch: 9 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 5:55:05.7 | step: 840 +epoch: 9 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 5:55:32.7 | step: 841 +epoch: 9 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 5:55:59.8 | step: 842 +epoch: 9 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 5:56:26.4 | step: 843 +epoch: 9 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 5:56:51.8 | step: 844 +epoch: 9 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 5:57:18.0 | step: 845 +epoch: 9 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 5:57:43.8 | step: 846 +epoch: 9 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.357 | time: 5:58:10.0 | step: 847 +epoch: 9 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.355 | time: 5:58:36.1 | step: 848 +epoch: 9 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.334 | time: 5:59:01.7 | step: 849 +epoch: 9 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.358 | time: 5:59:27.5 | step: 850 +epoch: 9 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 5:59:54.8 | step: 851 +epoch: 9 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 6:00:21.5 | step: 852 +epoch: 9 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 6:00:47.6 | step: 853 +epoch: 9 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 6:01:14.0 | step: 854 +epoch: 9 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 6:01:40.9 | step: 855 +epoch: 9 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 6:02:07.7 | step: 856 +epoch: 9 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.330 | time: 6:02:33.7 | step: 857 +epoch: 9 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.370 | time: 6:03:00.2 | step: 858 +epoch: 9 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 6:03:27.2 | step: 859 +epoch: 9 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 6:03:53.4 | step: 860 +epoch: 9 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 6:04:19.0 | step: 861 +epoch: 9 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 6:04:45.8 | step: 862 +epoch: 9 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.363 | time: 6:05:12.9 | step: 863 +epoch: 9 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:05:39.8 | step: 864 +epoch: 9 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 6:06:06.3 | step: 865 +epoch: 9 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 6:06:33.0 | step: 866 +epoch: 9 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 6:06:59.8 | step: 867 +epoch: 9 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 6:07:26.0 | step: 868 +epoch: 9 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 6:07:51.6 | step: 869 +epoch: 9 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.335 | time: 6:08:02.6 | step: 870 + --- --- +loss: 1.222. +epoch: 10 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 6:08:29.0 | step: 871 +epoch: 10 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 6:08:56.0 | step: 872 +epoch: 10 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.297 | time: 6:09:22.5 | step: 873 +epoch: 10 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.298 | time: 6:09:49.0 | step: 874 +epoch: 10 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 6:10:15.6 | step: 875 +epoch: 10 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.360 | time: 6:10:41.5 | step: 876 +epoch: 10 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.301 | time: 6:11:07.4 | step: 877 +epoch: 10 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 6:11:32.8 | step: 878 +epoch: 10 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 6:11:58.9 | step: 879 +epoch: 10 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.344 | time: 6:12:24.3 | step: 880 +epoch: 10 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.390 | time: 6:12:49.8 | step: 881 +epoch: 10 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 6:13:15.6 | step: 882 +epoch: 10 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 6:13:42.3 | step: 883 +epoch: 10 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.378 | time: 6:14:08.2 | step: 884 +epoch: 10 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.301 | time: 6:14:34.8 | step: 885 +epoch: 10 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.369 | time: 6:15:00.6 | step: 886 +epoch: 10 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 6:15:26.6 | step: 887 +epoch: 10 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.341 | time: 6:15:53.0 | step: 888 +epoch: 10 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 6:16:19.0 | step: 889 +epoch: 10 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 6:16:44.8 | step: 890 +epoch: 10 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 6:17:11.6 | step: 891 +epoch: 10 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.355 | time: 6:17:36.8 | step: 892 +epoch: 10 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 6:18:02.6 | step: 893 +epoch: 10 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 6:18:28.4 | step: 894 +epoch: 10 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 6:18:54.1 | step: 895 +epoch: 10 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 6:19:20.4 | step: 896 +epoch: 10 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 6:19:46.0 | step: 897 +epoch: 10 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:20:11.8 | step: 898 +epoch: 10 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 6:20:37.8 | step: 899 +epoch: 10 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 6:21:03.5 | step: 900 +epoch: 10 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 6:21:29.8 | step: 901 +epoch: 10 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 6:21:56.5 | step: 902 +epoch: 10 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 6:22:22.6 | step: 903 +epoch: 10 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 6:22:48.4 | step: 904 +epoch: 10 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.292 | time: 6:23:14.2 | step: 905 +epoch: 10 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.346 | time: 6:23:39.5 | step: 906 +epoch: 10 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.312 | time: 6:24:05.2 | step: 907 +epoch: 10 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.306 | time: 6:24:31.1 | step: 908 +epoch: 10 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.291 | time: 6:24:57.0 | step: 909 +epoch: 10 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.290 | time: 6:25:23.1 | step: 910 +epoch: 10 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.278 | time: 6:25:49.5 | step: 911 +epoch: 10 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.276 | time: 6:26:16.1 | step: 912 +epoch: 10 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 6:26:41.9 | step: 913 +epoch: 10 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.286 | time: 6:27:07.7 | step: 914 +epoch: 10 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.286 | time: 6:27:34.8 | step: 915 +epoch: 10 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 6:28:00.2 | step: 916 +epoch: 10 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.282 | time: 6:28:25.9 | step: 917 +epoch: 10 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 6:28:52.4 | step: 918 +epoch: 10 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 6:29:18.0 | step: 919 +epoch: 10 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.296 | time: 6:29:43.7 | step: 920 +epoch: 10 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 6:30:09.4 | step: 921 +epoch: 10 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 6:30:34.7 | step: 922 +epoch: 10 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:31:00.5 | step: 923 +epoch: 10 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.284 | time: 6:31:26.6 | step: 924 +epoch: 10 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 6:31:52.7 | step: 925 +epoch: 10 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.312 | time: 6:32:18.5 | step: 926 +epoch: 10 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 6:32:44.4 | step: 927 +epoch: 10 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 6:33:10.5 | step: 928 +epoch: 10 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:33:36.6 | step: 929 +epoch: 10 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.278 | time: 6:34:02.6 | step: 930 +epoch: 10 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.294 | time: 6:34:28.1 | step: 931 +epoch: 10 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 6:34:54.0 | step: 932 +epoch: 10 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 6:35:19.3 | step: 933 +epoch: 10 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.275 | time: 6:35:45.7 | step: 934 +epoch: 10 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.279 | time: 6:36:11.5 | step: 935 +epoch: 10 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.284 | time: 6:36:37.3 | step: 936 +epoch: 10 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.303 | time: 6:37:03.1 | step: 937 +epoch: 10 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 6:37:29.4 | step: 938 +epoch: 10 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 6:37:55.4 | step: 939 +epoch: 10 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.288 | time: 6:38:21.2 | step: 940 +epoch: 10 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 6:38:47.3 | step: 941 +epoch: 10 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 6:39:13.4 | step: 942 +epoch: 10 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 6:39:39.2 | step: 943 +epoch: 10 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 6:40:05.4 | step: 944 +epoch: 10 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 6:40:31.5 | step: 945 +epoch: 10 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 6:40:57.6 | step: 946 +epoch: 10 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 6:41:22.9 | step: 947 +epoch: 10 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.350 | time: 6:41:48.7 | step: 948 +epoch: 10 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 6:42:14.9 | step: 949 +epoch: 10 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.361 | time: 6:42:40.9 | step: 950 +epoch: 10 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 6:43:06.8 | step: 951 +epoch: 10 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 6:43:33.2 | step: 952 +epoch: 10 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 6:43:59.1 | step: 953 +epoch: 10 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.402 | time: 6:44:25.1 | step: 954 +epoch: 10 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:44:51.2 | step: 955 +epoch: 10 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 6:45:17.5 | step: 956 +epoch: 10 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.323 | time: 6:45:28.3 | step: 957 + --- --- +loss: 1.248. +epoch: 11 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 6:45:54.2 | step: 958 +epoch: 11 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.296 | time: 6:46:20.6 | step: 959 +epoch: 11 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 6:46:46.5 | step: 960 +epoch: 11 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 6:47:12.8 | step: 961 +epoch: 11 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 6:47:38.2 | step: 962 +epoch: 11 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.304 | time: 6:48:03.8 | step: 963 +epoch: 11 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 6:48:30.0 | step: 964 +epoch: 11 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.331 | time: 6:48:56.8 | step: 965 +epoch: 11 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 6:49:22.0 | step: 966 +epoch: 11 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.304 | time: 6:49:48.2 | step: 967 +epoch: 11 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.330 | time: 6:50:15.6 | step: 968 +epoch: 11 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 6:50:41.9 | step: 969 +epoch: 11 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.304 | time: 6:51:08.1 | step: 970 +epoch: 11 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 6:51:34.0 | step: 971 +epoch: 11 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.290 | time: 6:51:59.8 | step: 972 +epoch: 11 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.327 | time: 6:52:26.1 | step: 973 +epoch: 11 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 6:52:52.7 | step: 974 +epoch: 11 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 6:53:18.1 | step: 975 +epoch: 11 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 6:53:44.4 | step: 976 +epoch: 11 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 6:54:10.8 | step: 977 +epoch: 11 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 6:54:37.2 | step: 978 +epoch: 11 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.343 | time: 6:55:04.4 | step: 979 +epoch: 11 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 6:55:30.6 | step: 980 +epoch: 11 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 6:55:57.0 | step: 981 +epoch: 11 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 6:56:23.0 | step: 982 +epoch: 11 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.391 | time: 6:56:48.8 | step: 983 +epoch: 11 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.301 | time: 6:57:15.0 | step: 984 +epoch: 11 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.322 | time: 6:57:41.0 | step: 985 +epoch: 11 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.338 | time: 6:58:07.5 | step: 986 +epoch: 11 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.297 | time: 6:58:34.6 | step: 987 +epoch: 11 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.284 | time: 6:59:01.8 | step: 988 +epoch: 11 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 6:59:28.0 | step: 989 +epoch: 11 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.303 | time: 6:59:54.6 | step: 990 +epoch: 11 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 7:00:20.9 | step: 991 +epoch: 11 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.296 | time: 7:00:47.7 | step: 992 +epoch: 11 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.300 | time: 7:01:14.0 | step: 993 +epoch: 11 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 7:01:40.5 | step: 994 +epoch: 11 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.308 | time: 7:02:06.5 | step: 995 +epoch: 11 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.294 | time: 7:02:32.9 | step: 996 +epoch: 11 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 7:02:59.0 | step: 997 +epoch: 11 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.298 | time: 7:03:25.7 | step: 998 +epoch: 11 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 7:03:52.3 | step: 999 +epoch: 11 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.290 | time: 7:04:19.3 | step: 1000 +epoch: 11 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.293 | time: 7:04:45.9 | step: 1001 +epoch: 11 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.362 | time: 7:05:12.6 | step: 1002 +epoch: 11 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:05:39.1 | step: 1003 +epoch: 11 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.298 | time: 7:06:06.0 | step: 1004 +epoch: 11 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 7:06:32.7 | step: 1005 +epoch: 11 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.301 | time: 7:06:59.6 | step: 1006 +epoch: 11 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 7:07:26.4 | step: 1007 +epoch: 11 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.312 | time: 7:07:52.0 | step: 1008 +epoch: 11 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.294 | time: 7:08:19.0 | step: 1009 +epoch: 11 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.325 | time: 7:08:46.1 | step: 1010 +epoch: 11 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 7:09:12.4 | step: 1011 +epoch: 11 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 7:09:38.4 | step: 1012 +epoch: 11 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.381 | time: 7:10:04.7 | step: 1013 +epoch: 11 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 7:10:30.9 | step: 1014 +epoch: 11 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 7:10:57.1 | step: 1015 +epoch: 11 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 7:11:23.6 | step: 1016 +epoch: 11 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 7:11:49.8 | step: 1017 +epoch: 11 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.304 | time: 7:12:15.8 | step: 1018 +epoch: 11 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 7:12:41.6 | step: 1019 +epoch: 11 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 7:13:07.4 | step: 1020 +epoch: 11 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.281 | time: 7:13:33.6 | step: 1021 +epoch: 11 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.361 | time: 7:13:59.6 | step: 1022 +epoch: 11 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.288 | time: 7:14:25.8 | step: 1023 +epoch: 11 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 7:14:52.5 | step: 1024 +epoch: 11 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 7:15:19.4 | step: 1025 +epoch: 11 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 7:15:45.4 | step: 1026 +epoch: 11 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.292 | time: 7:16:13.0 | step: 1027 +epoch: 11 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.296 | time: 7:16:39.1 | step: 1028 +epoch: 11 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.287 | time: 7:17:05.8 | step: 1029 +epoch: 11 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.287 | time: 7:17:33.1 | step: 1030 +epoch: 11 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.312 | time: 7:17:59.1 | step: 1031 +epoch: 11 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 7:18:25.1 | step: 1032 +epoch: 11 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.315 | time: 7:18:51.9 | step: 1033 +epoch: 11 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 7:19:18.6 | step: 1034 +epoch: 11 | 77/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 7:19:45.8 | step: 1035 +epoch: 11 | 78/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 7:20:12.5 | step: 1036 +epoch: 11 | 79/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 7:20:38.5 | step: 1037 +epoch: 11 | 80/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.290 | time: 7:21:05.9 | step: 1038 +epoch: 11 | 81/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.288 | time: 7:21:32.5 | step: 1039 +epoch: 11 | 82/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 7:21:58.5 | step: 1040 +epoch: 11 | 83/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.268 | time: 7:22:24.7 | step: 1041 +epoch: 11 | 84/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.297 | time: 7:22:51.0 | step: 1042 +epoch: 11 | 85/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.291 | time: 7:23:17.6 | step: 1043 +epoch: 11 | 86/ 87 | exp/pretrain/combsub | batch/s: 0.09 | loss: 1.308 | time: 7:23:28.8 | step: 1044 + --- --- +loss: 1.187. +epoch: 12 | 0/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.339 | time: 7:23:55.6 | step: 1045 +epoch: 12 | 1/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 7:24:22.5 | step: 1046 +epoch: 12 | 2/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 7:24:49.2 | step: 1047 +epoch: 12 | 3/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.294 | time: 7:25:14.8 | step: 1048 +epoch: 12 | 4/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 7:25:41.4 | step: 1049 +epoch: 12 | 5/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.289 | time: 7:26:07.4 | step: 1050 +epoch: 12 | 6/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.285 | time: 7:26:33.8 | step: 1051 +epoch: 12 | 7/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 7:27:00.6 | step: 1052 +epoch: 12 | 8/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.284 | time: 7:27:26.2 | step: 1053 +epoch: 12 | 9/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.287 | time: 7:27:52.9 | step: 1054 +epoch: 12 | 10/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.318 | time: 7:28:19.6 | step: 1055 +epoch: 12 | 11/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.289 | time: 7:28:46.0 | step: 1056 +epoch: 12 | 12/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.305 | time: 7:29:12.3 | step: 1057 +epoch: 12 | 13/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.351 | time: 7:29:38.4 | step: 1058 +epoch: 12 | 14/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 7:30:04.2 | step: 1059 +epoch: 12 | 15/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.282 | time: 7:30:31.2 | step: 1060 +epoch: 12 | 16/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.297 | time: 7:30:58.1 | step: 1061 +epoch: 12 | 17/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.289 | time: 7:31:25.1 | step: 1062 +epoch: 12 | 18/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.300 | time: 7:31:51.0 | step: 1063 +epoch: 12 | 19/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.312 | time: 7:32:17.9 | step: 1064 +epoch: 12 | 20/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:32:43.8 | step: 1065 +epoch: 12 | 21/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.342 | time: 7:33:10.9 | step: 1066 +epoch: 12 | 22/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.324 | time: 7:33:37.8 | step: 1067 +epoch: 12 | 23/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:34:03.9 | step: 1068 +epoch: 12 | 24/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 7:34:30.3 | step: 1069 +epoch: 12 | 25/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:34:55.8 | step: 1070 +epoch: 12 | 26/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.332 | time: 7:35:22.5 | step: 1071 +epoch: 12 | 27/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 7:35:48.2 | step: 1072 +epoch: 12 | 28/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.354 | time: 7:36:13.9 | step: 1073 +epoch: 12 | 29/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.368 | time: 7:36:40.1 | step: 1074 +epoch: 12 | 30/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.329 | time: 7:37:05.9 | step: 1075 +epoch: 12 | 31/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 7:37:31.4 | step: 1076 +epoch: 12 | 32/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.309 | time: 7:37:57.6 | step: 1077 +epoch: 12 | 33/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.365 | time: 7:38:24.6 | step: 1078 +epoch: 12 | 34/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 7:38:50.5 | step: 1079 +epoch: 12 | 35/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.294 | time: 7:39:17.5 | step: 1080 +epoch: 12 | 36/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.348 | time: 7:39:43.6 | step: 1081 +epoch: 12 | 37/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.321 | time: 7:40:08.9 | step: 1082 +epoch: 12 | 38/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 7:40:35.2 | step: 1083 +epoch: 12 | 39/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 7:41:01.8 | step: 1084 +epoch: 12 | 40/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.311 | time: 7:41:28.0 | step: 1085 +epoch: 12 | 41/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.316 | time: 7:41:54.8 | step: 1086 +epoch: 12 | 42/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.308 | time: 7:42:21.3 | step: 1087 +epoch: 12 | 43/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.310 | time: 7:42:48.0 | step: 1088 +epoch: 12 | 44/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.292 | time: 7:43:14.8 | step: 1089 +epoch: 12 | 45/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.335 | time: 7:43:40.8 | step: 1090 +epoch: 12 | 46/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.307 | time: 7:44:07.7 | step: 1091 +epoch: 12 | 47/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.281 | time: 7:44:33.8 | step: 1092 +epoch: 12 | 48/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.288 | time: 7:45:00.1 | step: 1093 +epoch: 12 | 49/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 7:45:26.2 | step: 1094 +epoch: 12 | 50/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 7:45:52.4 | step: 1095 +epoch: 12 | 51/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:46:18.3 | step: 1096 +epoch: 12 | 52/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.308 | time: 7:46:44.2 | step: 1097 +epoch: 12 | 53/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:47:10.2 | step: 1098 +epoch: 12 | 54/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.286 | time: 7:47:35.4 | step: 1099 +epoch: 12 | 55/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 7:48:01.3 | step: 1100 +epoch: 12 | 56/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.285 | time: 7:48:27.1 | step: 1101 +epoch: 12 | 57/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.337 | time: 7:48:53.4 | step: 1102 +epoch: 12 | 58/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.291 | time: 7:49:20.3 | step: 1103 +epoch: 12 | 59/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.302 | time: 7:49:46.3 | step: 1104 +epoch: 12 | 60/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.295 | time: 7:50:12.0 | step: 1105 +epoch: 12 | 61/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.306 | time: 7:50:37.4 | step: 1106 +epoch: 12 | 62/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.320 | time: 7:51:03.0 | step: 1107 +epoch: 12 | 63/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.283 | time: 7:51:29.1 | step: 1108 +epoch: 12 | 64/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.290 | time: 7:51:55.1 | step: 1109 +epoch: 12 | 65/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.299 | time: 7:52:21.0 | step: 1110 +epoch: 12 | 66/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.328 | time: 7:52:46.5 | step: 1111 +epoch: 12 | 67/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.319 | time: 7:53:11.6 | step: 1112 +epoch: 12 | 68/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.298 | time: 7:53:37.2 | step: 1113 +epoch: 12 | 69/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.314 | time: 7:54:03.7 | step: 1114 +epoch: 12 | 70/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.356 | time: 7:54:30.5 | step: 1115 +epoch: 12 | 71/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.313 | time: 7:54:56.4 | step: 1116 +epoch: 12 | 72/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.326 | time: 7:55:22.3 | step: 1117 +epoch: 12 | 73/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.317 | time: 7:55:48.9 | step: 1118 +epoch: 12 | 74/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.288 | time: 7:56:15.4 | step: 1119 +epoch: 12 | 75/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.303 | time: 7:56:41.7 | step: 1120 +epoch: 12 | 76/ 87 | exp/pretrain/combsub | batch/s: 0.04 | loss: 1.275 | time: 7:57:08.1 | step: 1121