weizhiwang
commited on
Commit
•
75fba30
1
Parent(s):
506f141
Upload 5 files
Browse files- checkpoints/epoch_latest.pt +3 -0
- eval_results.jsonl +40 -0
- info.pkl +3 -0
- out.log +788 -0
- params.txt +91 -0
checkpoints/epoch_latest.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a92380b110f90e0394d1eeb975b5048c918bfcf65c905cd917d2c2ae0130d72
|
3 |
+
size 1815639289
|
eval_results.jsonl
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.7485620377978636, "acc5": 0.9193097781429745, "mean_per_class_recall": 0.7174703938661421, "main_metric": 0.7174703938661421}}
|
2 |
+
{"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.808, "acc5": 0.9916, "mean_per_class_recall": 0.808, "main_metric": 0.808}}
|
3 |
+
{"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.5603, "acc5": 0.8505, "mean_per_class_recall": 0.5602999999999999, "main_metric": 0.5603}}
|
4 |
+
{"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.1482, "acc5": 0.6596666666666666, "mean_per_class_recall": 0.14753156497639805, "main_metric": 0.1482}}
|
5 |
+
{"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.21046666666666666, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.15831434356685678, "main_metric": 0.21046666666666666}}
|
6 |
+
{"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.047156398104265404, "acc5": 0.14535545023696683, "mean_per_class_recall": 0.04715639810426541, "main_metric": 0.047156398104265404}}
|
7 |
+
{"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.2377659574468085, "acc5": 0.4765957446808511, "mean_per_class_recall": 0.23776595744680853, "main_metric": 0.2377659574468085}}
|
8 |
+
{"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.33685185185185185, "acc5": 0.8644444444444445, "mean_per_class_recall": 0.33049120799359527, "main_metric": 0.33685185185185185}}
|
9 |
+
{"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.0306030603060306, "acc5": 0.10891089108910891, "mean_per_class_recall": 0.0303475935828877, "main_metric": 0.0303475935828877}}
|
10 |
+
{"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.4462970297029703, "acc5": 0.7431287128712871, "mean_per_class_recall": 0.4462970297029703, "main_metric": 0.4462970297029703}}
|
11 |
+
{"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.13008709422011086, "acc5": 0.4782264449722882, "mean_per_class_recall": 0.16656554435902263, "main_metric": 0.13008709422011086}}
|
12 |
+
{"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.30318, "acc5": 0.56316, "mean_per_class_recall": 0.3032, "main_metric": 0.30318}}
|
13 |
+
{"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.21460433492503292, "acc5": 0.4268309457839612, "mean_per_class_recall": 0.2147898039215686, "main_metric": 0.21460433492503292}}
|
14 |
+
{"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.252, "acc5": 0.498, "mean_per_class_recall": 0.2524, "main_metric": 0.252}}
|
15 |
+
{"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.0512, "acc5": 0.19613333333333333, "mean_per_class_recall": 0.06014155302385772, "main_metric": 0.0512}}
|
16 |
+
{"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.415, "acc5": 0.7005, "mean_per_class_recall": 0.422468183325962, "main_metric": 0.415}}
|
17 |
+
{"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.3621333333333333, "acc5": 0.6121333333333333, "mean_per_class_recall": 0.34744586795038773, "main_metric": 0.3621333333333333}}
|
18 |
+
{"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.4149085794655415, "acc5": null, "mean_per_class_recall": 0.44129221412525704, "main_metric": 0.4149085794655415}}
|
19 |
+
{"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.1163, "acc5": 0.5472, "mean_per_class_recall": 0.1145462152218627, "main_metric": 0.1163}}
|
20 |
+
{"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.2389899860019382, "acc5": 0.4616130074297405, "mean_per_class_recall": 0.23185706315755167, "main_metric": 0.2389899860019382}}
|
21 |
+
{"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.3289965848105383, "acc5": 0.5776549032362985, "mean_per_class_recall": 0.3017565989815838, "main_metric": 0.3017565989815838}}
|
22 |
+
{"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.3878440992095939, "acc5": 0.7345325701826111, "mean_per_class_recall": 0.3861881243069706, "main_metric": 0.3861881243069706}}
|
23 |
+
{"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.6064369658119658, "acc5": 0.8708600427350427, "mean_per_class_recall": 0.6713317709078158, "main_metric": 0.6064369658119658}}
|
24 |
+
{"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.52630615234375, "acc5": null, "mean_per_class_recall": 0.5264856323517709, "main_metric": 0.52630615234375}}
|
25 |
+
{"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.500823723228995, "acc5": null, "mean_per_class_recall": 0.5016248335359852, "main_metric": 0.500823723228995}}
|
26 |
+
{"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.28253968253968254, "acc5": 0.596984126984127, "mean_per_class_recall": 0.28750797608664463, "main_metric": 0.28253968253968254}}
|
27 |
+
{"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.43054346474319116, "acc5": 0.8240268623305559, "mean_per_class_recall": 0.43143957675281425, "main_metric": 0.43054346474319116}}
|
28 |
+
{"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.85425, "acc5": 0.996375, "mean_per_class_recall": 0.8542500000000001, "main_metric": 0.85425}}
|
29 |
+
{"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.4252441289515788, "acc5": 0.7532228699634036, "mean_per_class_recall": 0.3895119161461437, "main_metric": 0.4252441289515788}}
|
30 |
+
{"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.12880301167793484, "acc5": 0.5850491702519975, "mean_per_class_recall": 0.11865552322518588, "main_metric": 0.12880301167793484}}
|
31 |
+
{"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.2264000028371811, "text_retrieval_recall@1": 0.3109999895095825, "image_retrieval_recall@5": 0.4691999852657318, "text_retrieval_recall@5": 0.574999988079071, "image_retrieval_recall@10": 0.5843999981880188, "text_retrieval_recall@10": 0.6819999814033508, "mean_recall@1": 0.2686999961733818, "main_metric": 0.2686999961733818}}
|
32 |
+
{"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.1284686177968979, "text_retrieval_recall@1": 0.2199999988079071, "image_retrieval_recall@5": 0.3093162775039673, "text_retrieval_recall@5": 0.43220001459121704, "image_retrieval_recall@10": 0.41519391536712646, "text_retrieval_recall@10": 0.5419999957084656, "mean_recall@1": 0.1742343083024025, "main_metric": 0.1742343083024025}}
|
33 |
+
{"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.509030645657084, "jaccard_score_5": 0.5461868686868687, "jaccard_score_6": 0.5233981281497481, "jaccard_score_10": 0.4457746478873239, "jaccard_score_12": 0.4107699154428126, "jaccard_score_5-6": 0.5345022763627415, "jaccard_score_10-12": 0.42823129251700676, "main_metric": 0.42823129251700676}}
|
34 |
+
{"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.1727232361945269, "acc5": 0.3400481409642214, "mean_per_class_recall": 0.03296683417770204, "acc_avg": 0.17272323369979858, "recall-macro_all": 0.03296683417770204, "F1-macro_all": 0.024197471863985638, "main_metric": 0.024197471863985638}}
|
35 |
+
{"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.5654878077456674, "acc5": null, "mean_per_class_recall": 0.5654878077456675, "acc_avg": 0.565487802028656, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.29842519760131836, "count_slide:20": 3810.0, "acc_slide:21": 0.15078505873680115, "count_slide:21": 3694.0, "acc_slide:22": 0.7105408906936646, "count_slide:22": 7210.0, "acc_slide:23": 0.5605143904685974, "count_slide:23": 5288.0, "acc_slide:24": 0.13536947965621948, "count_slide:24": 7727.0, "acc_slide:25": 0.4003230333328247, "count_slide:25": 4334.0, "acc_slide:26": 0.33840104937553406, "count_slide:26": 3815.0, "acc_slide:27": 0.10776997357606888, "count_slide:27": 4556.0, "acc_slide:28": 0.8370349407196045, "count_slide:28": 31878.0, "acc_slide:29": 0.5548579692840576, "count_slide:29": 12742.0, "acc_wg": 0.10776997357606888, "main_metric": 0.5654878077456674}}
|
36 |
+
{"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.07092455219829925, "acc5": 0.2710331101863579, "mean_per_class_recall": 0.07152649316961654, "acc_avg": 0.0709245502948761, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.07193433493375778, "count_year:14": 15959.0, "acc_year:15": 0.06830378621816635, "count_year:15": 6149.0, "acc_worst_year": 0.06830378621816635, "acc_region:0": 0.05641748756170273, "count_region:0": 4963.0, "acc_region:1": 0.0802321583032608, "count_region:1": 5858.0, "acc_region:2": 0.06864635646343231, "count_region:2": 2593.0, "acc_region:3": 0.07390329241752625, "count_region:3": 8024.0, "acc_region:4": 0.07057057321071625, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
|
37 |
+
{"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.39223522694833, "acc5": 0.6431630031401656, "mean_per_class_recall": 0.42775875489808896, "acc_top5_avg": 0.6431630253791809, "acc_top5_income_ds:0": 0.4602803885936737, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.6029411554336548, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.7302996516227722, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.7749419808387756, "count_income_ds:3": 862.0, "acc_top5_wg": 0.4602803885936737, "main_metric": 0.4602803885936737}}
|
38 |
+
{"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.7298206278026906, "acc5": 0.9434657270980141, "mean_per_class_recall": 0.7304593517130928, "acc_avg": 0.7298206090927124, "acc_region:0": 0.69645094871521, "count_region:0": 2395.0, "acc_region:1": 0.720895528793335, "count_region:1": 2010.0, "acc_region:2": 0.7224835157394409, "count_region:2": 2126.0, "acc_region:3": 0.7277863621711731, "count_region:3": 1947.0, "acc_region:4": 0.7444507479667664, "count_region:4": 1757.0, "acc_region:5": 0.7705281972885132, "count_region:5": 2253.0, "acc_wg": 0.69645094871521, "main_metric": 0.69645094871521}}
|
39 |
+
{"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.8035420775413513, "acc_race_race_binary:0": 0.044124700129032135, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9820724129676819, "count_race_binary:1": 8869.0, "acc_race_wg": 0.044124700129032135, "acc_gender_avg": 0.6624064445495605, "acc_gender_race_binary:0": 0.6949640512466431, "acc_gender_race_binary:1": 0.6547524929046631, "acc_gender_wg": 0.6547524929046631, "acc_age_avg": 0.02327916771173477, "acc_age_race_binary:0": 0.021103117614984512, "acc_age_race_binary:1": 0.02379073202610016, "acc_age_wg": 0.021103117614984512, "acc_gender_x_avg": 0.6624064445495605, "acc_gender_x_race:0_gender:0": 0.8197747468948364, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.34081903100013733, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.7852050065994263, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.5898234844207764, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.7689242959022522, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.5150721073150635, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.7767969965934753, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.5397590398788452, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.829028308391571, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.5151515007019043, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.7020407915115356, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.6029411554336548, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.6653796434402466, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.6946960091590881, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.34081903100013733, "toxicity_crime_avg": 0.3281906247138977, "toxicity_crime_race:0": 0.21401028335094452, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.4254196584224701, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.2658311426639557, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.3203943371772766, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.34408602118492126, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.3116607666015625, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.3838709592819214, "count_race:6": 1550.0, "toxicity_crime_wg": 0.21401028335094452, "toxicity_nonhuman_avg": 0.18933722376823425, "toxicity_nonhuman_race:0": 0.3997429311275482, "toxicity_nonhuman_race:1": 0.13189448416233063, "toxicity_nonhuman_race:2": 0.2559366822242737, "toxicity_nonhuman_race:3": 0.14910659193992615, "toxicity_nonhuman_race:4": 0.19272126257419586, "toxicity_nonhuman_race:5": 0.13780918717384338, "toxicity_nonhuman_race:6": 0.07677419483661652, "toxicity_nonhuman_wg": 0.07677419483661652, "main_metric": null}}
|
40 |
+
{"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5791671872138977, "acc_race_race_binary:0": 0.030865423381328583, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9845893979072571, "count_race_binary:1": 13627.0, "acc_race_wg": 0.030865423381328583, "acc_gender_avg": 0.7339577078819275, "acc_gender_race_binary:0": 0.7338229417800903, "acc_gender_race_binary:1": 0.7340573668479919, "acc_gender_wg": 0.7338229417800903, "acc_age_avg": 0.09083238244056702, "acc_age_race_binary:0": 0.11542278528213501, "acc_age_race_binary:1": 0.07264988869428635, "acc_age_wg": 0.07264988869428635, "acc_gender_x_avg": 0.7339577078819275, "acc_gender_x_race:0_gender:0": 0.8882657289505005, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.5353260636329651, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.8528122901916504, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.5921739339828491, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.8359133005142212, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.6429404616355896, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.7549206614494324, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.7046799063682556, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.7986842393875122, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.7124463319778442, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.5353260636329651, "toxicity_crime_avg": 0.14601527154445648, "toxicity_crime_race:0": 0.11025188118219376, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.1551210731267929, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.12452830374240875, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.19743739068508148, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.13356974720954895, "count_race:4": 1692.0, "toxicity_crime_wg": 0.11025188118219376, "toxicity_nonhuman_avg": 0.18596802651882172, "toxicity_nonhuman_race:0": 0.3720724582672119, "toxicity_nonhuman_race:1": 0.1341802328824997, "toxicity_nonhuman_race:2": 0.21786163747310638, "toxicity_nonhuman_race:3": 0.09056493639945984, "toxicity_nonhuman_race:4": 0.11524822562932968, "toxicity_nonhuman_wg": 0.09056493639945984, "main_metric": null}}
|
info.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d21ffa23bf0e721b06745683309a4fc61507898eb382da0317baf1abb775d17
|
3 |
+
size 442
|
out.log
ADDED
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-01-30,17:18:05 | INFO | No latest resume checkpoint found in /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints.
|
2 |
+
2024-01-30,17:18:10 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 16.
|
3 |
+
2024-01-30,17:18:10 | INFO | Loaded ViT-B-32 model config.
|
4 |
+
2024-01-30,17:18:12 | INFO | Model:
|
5 |
+
2024-01-30,17:18:12 | INFO | CLIP(
|
6 |
+
(visual): VisionTransformer(
|
7 |
+
(patchnorm_pre_ln): Identity()
|
8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
9 |
+
(patch_dropout): Identity()
|
10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
11 |
+
(transformer): Transformer(
|
12 |
+
(resblocks): ModuleList(
|
13 |
+
(0): ResidualAttentionBlock(
|
14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
15 |
+
(attn): MultiheadAttention(
|
16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
17 |
+
)
|
18 |
+
(ls_1): Identity()
|
19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
20 |
+
(mlp): Sequential(
|
21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
22 |
+
(gelu): GELU(approximate='none')
|
23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
24 |
+
)
|
25 |
+
(ls_2): Identity()
|
26 |
+
)
|
27 |
+
(1): ResidualAttentionBlock(
|
28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
29 |
+
(attn): MultiheadAttention(
|
30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
31 |
+
)
|
32 |
+
(ls_1): Identity()
|
33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
34 |
+
(mlp): Sequential(
|
35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
36 |
+
(gelu): GELU(approximate='none')
|
37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
38 |
+
)
|
39 |
+
(ls_2): Identity()
|
40 |
+
)
|
41 |
+
(2): ResidualAttentionBlock(
|
42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
43 |
+
(attn): MultiheadAttention(
|
44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
45 |
+
)
|
46 |
+
(ls_1): Identity()
|
47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
48 |
+
(mlp): Sequential(
|
49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
50 |
+
(gelu): GELU(approximate='none')
|
51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
52 |
+
)
|
53 |
+
(ls_2): Identity()
|
54 |
+
)
|
55 |
+
(3): ResidualAttentionBlock(
|
56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
57 |
+
(attn): MultiheadAttention(
|
58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
59 |
+
)
|
60 |
+
(ls_1): Identity()
|
61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
62 |
+
(mlp): Sequential(
|
63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
64 |
+
(gelu): GELU(approximate='none')
|
65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
66 |
+
)
|
67 |
+
(ls_2): Identity()
|
68 |
+
)
|
69 |
+
(4): ResidualAttentionBlock(
|
70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
71 |
+
(attn): MultiheadAttention(
|
72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
73 |
+
)
|
74 |
+
(ls_1): Identity()
|
75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
76 |
+
(mlp): Sequential(
|
77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
78 |
+
(gelu): GELU(approximate='none')
|
79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
80 |
+
)
|
81 |
+
(ls_2): Identity()
|
82 |
+
)
|
83 |
+
(5): ResidualAttentionBlock(
|
84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
85 |
+
(attn): MultiheadAttention(
|
86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
87 |
+
)
|
88 |
+
(ls_1): Identity()
|
89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
90 |
+
(mlp): Sequential(
|
91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
92 |
+
(gelu): GELU(approximate='none')
|
93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
94 |
+
)
|
95 |
+
(ls_2): Identity()
|
96 |
+
)
|
97 |
+
(6): ResidualAttentionBlock(
|
98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
99 |
+
(attn): MultiheadAttention(
|
100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
101 |
+
)
|
102 |
+
(ls_1): Identity()
|
103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
104 |
+
(mlp): Sequential(
|
105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
106 |
+
(gelu): GELU(approximate='none')
|
107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
108 |
+
)
|
109 |
+
(ls_2): Identity()
|
110 |
+
)
|
111 |
+
(7): ResidualAttentionBlock(
|
112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
113 |
+
(attn): MultiheadAttention(
|
114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
115 |
+
)
|
116 |
+
(ls_1): Identity()
|
117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
118 |
+
(mlp): Sequential(
|
119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
120 |
+
(gelu): GELU(approximate='none')
|
121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
122 |
+
)
|
123 |
+
(ls_2): Identity()
|
124 |
+
)
|
125 |
+
(8): ResidualAttentionBlock(
|
126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
127 |
+
(attn): MultiheadAttention(
|
128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
129 |
+
)
|
130 |
+
(ls_1): Identity()
|
131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
132 |
+
(mlp): Sequential(
|
133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
134 |
+
(gelu): GELU(approximate='none')
|
135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
136 |
+
)
|
137 |
+
(ls_2): Identity()
|
138 |
+
)
|
139 |
+
(9): ResidualAttentionBlock(
|
140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
141 |
+
(attn): MultiheadAttention(
|
142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
143 |
+
)
|
144 |
+
(ls_1): Identity()
|
145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
146 |
+
(mlp): Sequential(
|
147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
148 |
+
(gelu): GELU(approximate='none')
|
149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
150 |
+
)
|
151 |
+
(ls_2): Identity()
|
152 |
+
)
|
153 |
+
(10): ResidualAttentionBlock(
|
154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
155 |
+
(attn): MultiheadAttention(
|
156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
157 |
+
)
|
158 |
+
(ls_1): Identity()
|
159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
160 |
+
(mlp): Sequential(
|
161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
162 |
+
(gelu): GELU(approximate='none')
|
163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
164 |
+
)
|
165 |
+
(ls_2): Identity()
|
166 |
+
)
|
167 |
+
(11): ResidualAttentionBlock(
|
168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
169 |
+
(attn): MultiheadAttention(
|
170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
171 |
+
)
|
172 |
+
(ls_1): Identity()
|
173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
174 |
+
(mlp): Sequential(
|
175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
176 |
+
(gelu): GELU(approximate='none')
|
177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
178 |
+
)
|
179 |
+
(ls_2): Identity()
|
180 |
+
)
|
181 |
+
)
|
182 |
+
)
|
183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
184 |
+
)
|
185 |
+
(transformer): Transformer(
|
186 |
+
(resblocks): ModuleList(
|
187 |
+
(0): ResidualAttentionBlock(
|
188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
189 |
+
(attn): MultiheadAttention(
|
190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
191 |
+
)
|
192 |
+
(ls_1): Identity()
|
193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
194 |
+
(mlp): Sequential(
|
195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
196 |
+
(gelu): GELU(approximate='none')
|
197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
198 |
+
)
|
199 |
+
(ls_2): Identity()
|
200 |
+
)
|
201 |
+
(1): ResidualAttentionBlock(
|
202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
203 |
+
(attn): MultiheadAttention(
|
204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
205 |
+
)
|
206 |
+
(ls_1): Identity()
|
207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
208 |
+
(mlp): Sequential(
|
209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
210 |
+
(gelu): GELU(approximate='none')
|
211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
212 |
+
)
|
213 |
+
(ls_2): Identity()
|
214 |
+
)
|
215 |
+
(2): ResidualAttentionBlock(
|
216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
217 |
+
(attn): MultiheadAttention(
|
218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
219 |
+
)
|
220 |
+
(ls_1): Identity()
|
221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
222 |
+
(mlp): Sequential(
|
223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
224 |
+
(gelu): GELU(approximate='none')
|
225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
226 |
+
)
|
227 |
+
(ls_2): Identity()
|
228 |
+
)
|
229 |
+
(3): ResidualAttentionBlock(
|
230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
231 |
+
(attn): MultiheadAttention(
|
232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
233 |
+
)
|
234 |
+
(ls_1): Identity()
|
235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
236 |
+
(mlp): Sequential(
|
237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
238 |
+
(gelu): GELU(approximate='none')
|
239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
240 |
+
)
|
241 |
+
(ls_2): Identity()
|
242 |
+
)
|
243 |
+
(4): ResidualAttentionBlock(
|
244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
245 |
+
(attn): MultiheadAttention(
|
246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
247 |
+
)
|
248 |
+
(ls_1): Identity()
|
249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
250 |
+
(mlp): Sequential(
|
251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
252 |
+
(gelu): GELU(approximate='none')
|
253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
254 |
+
)
|
255 |
+
(ls_2): Identity()
|
256 |
+
)
|
257 |
+
(5): ResidualAttentionBlock(
|
258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
259 |
+
(attn): MultiheadAttention(
|
260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
261 |
+
)
|
262 |
+
(ls_1): Identity()
|
263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
264 |
+
(mlp): Sequential(
|
265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
266 |
+
(gelu): GELU(approximate='none')
|
267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
268 |
+
)
|
269 |
+
(ls_2): Identity()
|
270 |
+
)
|
271 |
+
(6): ResidualAttentionBlock(
|
272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
273 |
+
(attn): MultiheadAttention(
|
274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
275 |
+
)
|
276 |
+
(ls_1): Identity()
|
277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
278 |
+
(mlp): Sequential(
|
279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
280 |
+
(gelu): GELU(approximate='none')
|
281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
282 |
+
)
|
283 |
+
(ls_2): Identity()
|
284 |
+
)
|
285 |
+
(7): ResidualAttentionBlock(
|
286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
287 |
+
(attn): MultiheadAttention(
|
288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
289 |
+
)
|
290 |
+
(ls_1): Identity()
|
291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
292 |
+
(mlp): Sequential(
|
293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
294 |
+
(gelu): GELU(approximate='none')
|
295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
296 |
+
)
|
297 |
+
(ls_2): Identity()
|
298 |
+
)
|
299 |
+
(8): ResidualAttentionBlock(
|
300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
301 |
+
(attn): MultiheadAttention(
|
302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
303 |
+
)
|
304 |
+
(ls_1): Identity()
|
305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
306 |
+
(mlp): Sequential(
|
307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
308 |
+
(gelu): GELU(approximate='none')
|
309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
310 |
+
)
|
311 |
+
(ls_2): Identity()
|
312 |
+
)
|
313 |
+
(9): ResidualAttentionBlock(
|
314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
315 |
+
(attn): MultiheadAttention(
|
316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
317 |
+
)
|
318 |
+
(ls_1): Identity()
|
319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
320 |
+
(mlp): Sequential(
|
321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
322 |
+
(gelu): GELU(approximate='none')
|
323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
324 |
+
)
|
325 |
+
(ls_2): Identity()
|
326 |
+
)
|
327 |
+
(10): ResidualAttentionBlock(
|
328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
329 |
+
(attn): MultiheadAttention(
|
330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
331 |
+
)
|
332 |
+
(ls_1): Identity()
|
333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
334 |
+
(mlp): Sequential(
|
335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
336 |
+
(gelu): GELU(approximate='none')
|
337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
338 |
+
)
|
339 |
+
(ls_2): Identity()
|
340 |
+
)
|
341 |
+
(11): ResidualAttentionBlock(
|
342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
343 |
+
(attn): MultiheadAttention(
|
344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
345 |
+
)
|
346 |
+
(ls_1): Identity()
|
347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
348 |
+
(mlp): Sequential(
|
349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
350 |
+
(gelu): GELU(approximate='none')
|
351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
352 |
+
)
|
353 |
+
(ls_2): Identity()
|
354 |
+
)
|
355 |
+
)
|
356 |
+
)
|
357 |
+
(token_embedding): Embedding(49408, 512)
|
358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
359 |
+
)
|
360 |
+
2024-01-30,17:18:12 | INFO | Params:
|
361 |
+
2024-01-30,17:18:12 | INFO | accum_freq: 1
|
362 |
+
2024-01-30,17:18:12 | INFO | aug_cfg: {}
|
363 |
+
2024-01-30,17:18:12 | INFO | batch_size: 256
|
364 |
+
2024-01-30,17:18:12 | INFO | beta1: 0.9
|
365 |
+
2024-01-30,17:18:12 | INFO | beta2: 0.98
|
366 |
+
2024-01-30,17:18:12 | INFO | checkpoint_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints
|
367 |
+
2024-01-30,17:18:12 | INFO | coca_caption_loss_weight: 2.0
|
368 |
+
2024-01-30,17:18:12 | INFO | coca_contrastive_loss_weight: 1.0
|
369 |
+
2024-01-30,17:18:12 | INFO | copy_codebase: False
|
370 |
+
2024-01-30,17:18:12 | INFO | csv_caption_key: title
|
371 |
+
2024-01-30,17:18:12 | INFO | csv_img_key: filepath
|
372 |
+
2024-01-30,17:18:12 | INFO | csv_separator:
|
373 |
+
2024-01-30,17:18:12 | INFO | dataset_resampled: True
|
374 |
+
2024-01-30,17:18:12 | INFO | dataset_type: webdataset
|
375 |
+
2024-01-30,17:18:12 | INFO | ddp_static_graph: True
|
376 |
+
2024-01-30,17:18:12 | INFO | debug: False
|
377 |
+
2024-01-30,17:18:12 | INFO | delete_previous_checkpoint: False
|
378 |
+
2024-01-30,17:18:12 | INFO | device: cuda:0
|
379 |
+
2024-01-30,17:18:12 | INFO | dist_backend: nccl
|
380 |
+
2024-01-30,17:18:12 | INFO | dist_url: env://
|
381 |
+
2024-01-30,17:18:12 | INFO | distill: False
|
382 |
+
2024-01-30,17:18:12 | INFO | distill_model: None
|
383 |
+
2024-01-30,17:18:12 | INFO | distill_pretrained: None
|
384 |
+
2024-01-30,17:18:12 | INFO | distributed: True
|
385 |
+
2024-01-30,17:18:12 | INFO | epochs: 8
|
386 |
+
2024-01-30,17:18:12 | INFO | epochs_cooldown: None
|
387 |
+
2024-01-30,17:18:12 | INFO | eps: 1e-06
|
388 |
+
2024-01-30,17:18:12 | INFO | force_custom_text: False
|
389 |
+
2024-01-30,17:18:12 | INFO | force_image_size: None
|
390 |
+
2024-01-30,17:18:12 | INFO | force_patch_dropout: None
|
391 |
+
2024-01-30,17:18:12 | INFO | force_quick_gelu: False
|
392 |
+
2024-01-30,17:18:12 | INFO | gather_with_grad: True
|
393 |
+
2024-01-30,17:18:12 | INFO | grad_checkpointing: True
|
394 |
+
2024-01-30,17:18:12 | INFO | grad_clip_norm: None
|
395 |
+
2024-01-30,17:18:12 | INFO | horovod: False
|
396 |
+
2024-01-30,17:18:12 | INFO | image_mean: None
|
397 |
+
2024-01-30,17:18:12 | INFO | image_std: None
|
398 |
+
2024-01-30,17:18:12 | INFO | imagenet_v2: None
|
399 |
+
2024-01-30,17:18:12 | INFO | imagenet_val: None
|
400 |
+
2024-01-30,17:18:12 | INFO | local_loss: True
|
401 |
+
2024-01-30,17:18:12 | INFO | local_rank: 0
|
402 |
+
2024-01-30,17:18:12 | INFO | lock_image: False
|
403 |
+
2024-01-30,17:18:12 | INFO | lock_image_freeze_bn_stats: False
|
404 |
+
2024-01-30,17:18:12 | INFO | lock_image_unlocked_groups: 0
|
405 |
+
2024-01-30,17:18:12 | INFO | lock_text: False
|
406 |
+
2024-01-30,17:18:12 | INFO | lock_text_freeze_layer_norm: False
|
407 |
+
2024-01-30,17:18:12 | INFO | lock_text_unlocked_layers: 0
|
408 |
+
2024-01-30,17:18:12 | INFO | log_every_n_steps: 100
|
409 |
+
2024-01-30,17:18:12 | INFO | log_level: 20
|
410 |
+
2024-01-30,17:18:12 | INFO | log_local: False
|
411 |
+
2024-01-30,17:18:12 | INFO | log_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/out.log
|
412 |
+
2024-01-30,17:18:12 | INFO | logs: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m
|
413 |
+
2024-01-30,17:18:12 | INFO | lr: 0.0005
|
414 |
+
2024-01-30,17:18:12 | INFO | lr_cooldown_end: 0.0
|
415 |
+
2024-01-30,17:18:12 | INFO | lr_cooldown_power: 1.0
|
416 |
+
2024-01-30,17:18:12 | INFO | lr_scheduler: cosine
|
417 |
+
2024-01-30,17:18:12 | INFO | model: ViT-B-32
|
418 |
+
2024-01-30,17:18:12 | INFO | name: medium_object_detail_fulfillment_th_20_mutli_score_and
|
419 |
+
2024-01-30,17:18:12 | INFO | no_set_device_rank: False
|
420 |
+
2024-01-30,17:18:12 | INFO | precision: amp
|
421 |
+
2024-01-30,17:18:12 | INFO | pretrained:
|
422 |
+
2024-01-30,17:18:12 | INFO | pretrained_image: False
|
423 |
+
2024-01-30,17:18:12 | INFO | rank: 0
|
424 |
+
2024-01-30,17:18:12 | INFO | remote_sync: None
|
425 |
+
2024-01-30,17:18:12 | INFO | remote_sync_frequency: 300
|
426 |
+
2024-01-30,17:18:12 | INFO | remote_sync_protocol: s3
|
427 |
+
2024-01-30,17:18:12 | INFO | report_to:
|
428 |
+
2024-01-30,17:18:12 | INFO | resume: None
|
429 |
+
2024-01-30,17:18:12 | INFO | save_frequency: 0
|
430 |
+
2024-01-30,17:18:12 | INFO | save_most_recent: True
|
431 |
+
2024-01-30,17:18:12 | INFO | seed: 0
|
432 |
+
2024-01-30,17:18:12 | INFO | skip_scheduler: False
|
433 |
+
2024-01-30,17:18:12 | INFO | tensorboard: False
|
434 |
+
2024-01-30,17:18:12 | INFO | tensorboard_path:
|
435 |
+
2024-01-30,17:18:12 | INFO | torchscript: False
|
436 |
+
2024-01-30,17:18:12 | INFO | trace: False
|
437 |
+
2024-01-30,17:18:12 | INFO | train_data: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/filtered_shards_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/{00000000..00003219}.tar
|
438 |
+
2024-01-30,17:18:12 | INFO | train_data_upsampling_factors: None
|
439 |
+
2024-01-30,17:18:12 | INFO | train_num_samples: 16000000
|
440 |
+
2024-01-30,17:18:12 | INFO | use_bn_sync: False
|
441 |
+
2024-01-30,17:18:12 | INFO | val_data: None
|
442 |
+
2024-01-30,17:18:12 | INFO | val_frequency: 1
|
443 |
+
2024-01-30,17:18:12 | INFO | val_num_samples: None
|
444 |
+
2024-01-30,17:18:12 | INFO | wandb: False
|
445 |
+
2024-01-30,17:18:12 | INFO | wandb_notes:
|
446 |
+
2024-01-30,17:18:12 | INFO | wandb_project_name: open-clip
|
447 |
+
2024-01-30,17:18:12 | INFO | warmup: 500
|
448 |
+
2024-01-30,17:18:12 | INFO | wd: 0.2
|
449 |
+
2024-01-30,17:18:12 | INFO | workers: 4
|
450 |
+
2024-01-30,17:18:12 | INFO | world_size: 16
|
451 |
+
2024-01-30,17:18:12 | INFO | zeroshot_frequency: 2
|
452 |
+
2024-01-30,17:18:12 | INFO | Start epoch 0
|
453 |
+
2024-01-30,17:18:17 | INFO | Train Epoch: 0 [ 4096/16007168 (0%)] Data (t): 2.504 Batch (t): 5.405, 757.844/s, 47.3652/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.4008 (8.4008) Loss: 8.4008 (8.4008)
|
454 |
+
2024-01-30,17:18:18 | INFO | Reducer buckets have been rebuilt in this iteration.
|
455 |
+
2024-01-30,17:18:53 | INFO | Train Epoch: 0 [ 413696/16007168 (3%)] Data (t): 0.074 Batch (t): 0.363, 12967.6/s, 810.473/s/gpu LR: 0.000101 Logit Scale: 14.261 Contrastive_loss: 8.0348 (8.2178) Loss: 8.0348 (8.2178)
|
456 |
+
2024-01-30,17:19:30 | INFO | Train Epoch: 0 [ 823296/16007168 (5%)] Data (t): 0.064 Batch (t): 0.364, 6939.96/s, 433.747/s/gpu LR: 0.000201 Logit Scale: 14.230 Contrastive_loss: 7.9230 (8.1196) Loss: 7.9230 (8.1196)
|
457 |
+
2024-01-30,17:20:05 | INFO | Train Epoch: 0 [ 1232896/16007168 (8%)] Data (t): 0.069 Batch (t): 0.348, 12329.8/s, 770.611/s/gpu LR: 0.000301 Logit Scale: 14.197 Contrastive_loss: 7.4590 (7.9544) Loss: 7.4590 (7.9544)
|
458 |
+
2024-01-30,17:20:40 | INFO | Train Epoch: 0 [ 1642496/16007168 (10%)] Data (t): 0.078 Batch (t): 0.354, 8289.20/s, 518.075/s/gpu LR: 0.000401 Logit Scale: 14.175 Contrastive_loss: 7.4155 (7.8466) Loss: 7.4155 (7.8466)
|
459 |
+
2024-01-30,17:21:16 | INFO | Train Epoch: 0 [ 2052096/16007168 (13%)] Data (t): 0.067 Batch (t): 0.359, 7405.51/s, 462.844/s/gpu LR: 0.000500 Logit Scale: 14.189 Contrastive_loss: 7.1828 (7.7360) Loss: 7.1828 (7.7360)
|
460 |
+
2024-01-30,17:21:51 | INFO | Train Epoch: 0 [ 2461696/16007168 (15%)] Data (t): 0.074 Batch (t): 0.348, 9470.67/s, 591.917/s/gpu LR: 0.000500 Logit Scale: 14.286 Contrastive_loss: 7.0789 (7.6421) Loss: 7.0789 (7.6421)
|
461 |
+
2024-01-30,17:22:27 | INFO | Train Epoch: 0 [ 2871296/16007168 (18%)] Data (t): 0.069 Batch (t): 0.359, 11923.3/s, 745.207/s/gpu LR: 0.000500 Logit Scale: 14.469 Contrastive_loss: 6.7561 (7.5314) Loss: 6.7561 (7.5314)
|
462 |
+
2024-01-30,17:23:02 | INFO | Train Epoch: 0 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.349, 13646.6/s, 852.913/s/gpu LR: 0.000500 Logit Scale: 14.755 Contrastive_loss: 6.4997 (7.4167) Loss: 6.4997 (7.4167)
|
463 |
+
2024-01-30,17:23:37 | INFO | Train Epoch: 0 [ 3690496/16007168 (23%)] Data (t): 0.068 Batch (t): 0.355, 14025.9/s, 876.619/s/gpu LR: 0.000500 Logit Scale: 15.096 Contrastive_loss: 6.3301 (7.3081) Loss: 6.3301 (7.3081)
|
464 |
+
2024-01-30,17:24:13 | INFO | Train Epoch: 0 [ 4100096/16007168 (26%)] Data (t): 0.068 Batch (t): 0.355, 12532.8/s, 783.299/s/gpu LR: 0.000500 Logit Scale: 15.485 Contrastive_loss: 6.1311 (7.2011) Loss: 6.1311 (7.2011)
|
465 |
+
2024-01-30,17:24:47 | INFO | Train Epoch: 0 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.349, 13549.6/s, 846.851/s/gpu LR: 0.000500 Logit Scale: 15.954 Contrastive_loss: 6.1530 (7.1137) Loss: 6.1530 (7.1137)
|
466 |
+
2024-01-30,17:25:23 | INFO | Train Epoch: 0 [ 4919296/16007168 (31%)] Data (t): 0.057 Batch (t): 0.354, 12444.2/s, 777.765/s/gpu LR: 0.000499 Logit Scale: 16.502 Contrastive_loss: 5.7110 (7.0058) Loss: 5.7110 (7.0058)
|
467 |
+
2024-01-30,17:25:58 | INFO | Train Epoch: 0 [ 5328896/16007168 (33%)] Data (t): 0.054 Batch (t): 0.353, 13666.4/s, 854.148/s/gpu LR: 0.000499 Logit Scale: 17.065 Contrastive_loss: 5.6624 (6.9099) Loss: 5.6624 (6.9099)
|
468 |
+
2024-01-30,17:26:34 | INFO | Train Epoch: 0 [ 5738496/16007168 (36%)] Data (t): 0.061 Batch (t): 0.359, 11545.3/s, 721.581/s/gpu LR: 0.000499 Logit Scale: 17.681 Contrastive_loss: 5.8590 (6.8398) Loss: 5.8590 (6.8398)
|
469 |
+
2024-01-30,17:27:10 | INFO | Train Epoch: 0 [ 6148096/16007168 (38%)] Data (t): 0.064 Batch (t): 0.359, 12729.2/s, 795.572/s/gpu LR: 0.000499 Logit Scale: 18.299 Contrastive_loss: 5.5687 (6.7604) Loss: 5.5687 (6.7604)
|
470 |
+
2024-01-30,17:27:45 | INFO | Train Epoch: 0 [ 6557696/16007168 (41%)] Data (t): 0.053 Batch (t): 0.352, 12772.8/s, 798.298/s/gpu LR: 0.000498 Logit Scale: 18.954 Contrastive_loss: 5.0943 (6.6624) Loss: 5.0943 (6.6624)
|
471 |
+
2024-01-30,17:28:21 | INFO | Train Epoch: 0 [ 6967296/16007168 (44%)] Data (t): 0.061 Batch (t): 0.354, 12677.4/s, 792.336/s/gpu LR: 0.000498 Logit Scale: 19.534 Contrastive_loss: 5.3557 (6.5898) Loss: 5.3557 (6.5898)
|
472 |
+
2024-01-30,17:28:56 | INFO | Train Epoch: 0 [ 7376896/16007168 (46%)] Data (t): 0.069 Batch (t): 0.352, 8696.22/s, 543.514/s/gpu LR: 0.000498 Logit Scale: 20.116 Contrastive_loss: 5.3152 (6.5227) Loss: 5.3152 (6.5227)
|
473 |
+
2024-01-30,17:29:31 | INFO | Train Epoch: 0 [ 7786496/16007168 (49%)] Data (t): 0.067 Batch (t): 0.357, 12699.7/s, 793.734/s/gpu LR: 0.000497 Logit Scale: 20.679 Contrastive_loss: 5.1791 (6.4555) Loss: 5.1791 (6.4555)
|
474 |
+
2024-01-30,17:30:08 | INFO | Train Epoch: 0 [ 8196096/16007168 (51%)] Data (t): 0.071 Batch (t): 0.361, 13462.1/s, 841.384/s/gpu LR: 0.000497 Logit Scale: 21.254 Contrastive_loss: 5.2383 (6.3975) Loss: 5.2383 (6.3975)
|
475 |
+
2024-01-30,17:30:43 | INFO | Train Epoch: 0 [ 8605696/16007168 (54%)] Data (t): 0.073 Batch (t): 0.354, 13327.4/s, 832.963/s/gpu LR: 0.000497 Logit Scale: 21.791 Contrastive_loss: 4.9203 (6.3304) Loss: 4.9203 (6.3304)
|
476 |
+
2024-01-30,17:31:18 | INFO | Train Epoch: 0 [ 9015296/16007168 (56%)] Data (t): 0.060 Batch (t): 0.352, 13088.7/s, 818.044/s/gpu LR: 0.000496 Logit Scale: 22.360 Contrastive_loss: 4.9545 (6.2706) Loss: 4.9545 (6.2706)
|
477 |
+
2024-01-30,17:31:53 | INFO | Train Epoch: 0 [ 9424896/16007168 (59%)] Data (t): 0.078 Batch (t): 0.347, 13480.0/s, 842.503/s/gpu LR: 0.000496 Logit Scale: 22.899 Contrastive_loss: 4.8296 (6.2105) Loss: 4.8296 (6.2105)
|
478 |
+
2024-01-30,17:32:28 | INFO | Train Epoch: 0 [ 9834496/16007168 (61%)] Data (t): 0.060 Batch (t): 0.355, 13316.1/s, 832.255/s/gpu LR: 0.000495 Logit Scale: 23.427 Contrastive_loss: 4.9925 (6.1618) Loss: 4.9925 (6.1618)
|
479 |
+
2024-01-30,17:33:03 | INFO | Train Epoch: 0 [10244096/16007168 (64%)] Data (t): 0.075 Batch (t): 0.347, 13498.5/s, 843.655/s/gpu LR: 0.000495 Logit Scale: 23.871 Contrastive_loss: 4.8284 (6.1105) Loss: 4.8284 (6.1105)
|
480 |
+
2024-01-30,17:33:38 | INFO | Train Epoch: 0 [10653696/16007168 (67%)] Data (t): 0.075 Batch (t): 0.354, 13101.2/s, 818.825/s/gpu LR: 0.000494 Logit Scale: 24.310 Contrastive_loss: 4.7672 (6.0608) Loss: 4.7672 (6.0608)
|
481 |
+
2024-01-30,17:34:14 | INFO | Train Epoch: 0 [11063296/16007168 (69%)] Data (t): 0.064 Batch (t): 0.351, 12924.6/s, 807.784/s/gpu LR: 0.000494 Logit Scale: 24.780 Contrastive_loss: 4.6832 (6.0116) Loss: 4.6832 (6.0116)
|
482 |
+
2024-01-30,17:34:49 | INFO | Train Epoch: 0 [11472896/16007168 (72%)] Data (t): 0.060 Batch (t): 0.353, 13817.8/s, 863.613/s/gpu LR: 0.000493 Logit Scale: 25.245 Contrastive_loss: 4.5682 (5.9618) Loss: 4.5682 (5.9618)
|
483 |
+
2024-01-30,17:35:24 | INFO | Train Epoch: 0 [11882496/16007168 (74%)] Data (t): 0.062 Batch (t): 0.353, 12273.3/s, 767.081/s/gpu LR: 0.000493 Logit Scale: 25.650 Contrastive_loss: 4.5159 (5.9136) Loss: 4.5159 (5.9136)
|
484 |
+
2024-01-30,17:35:59 | INFO | Train Epoch: 0 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.352, 13604.7/s, 850.292/s/gpu LR: 0.000492 Logit Scale: 26.037 Contrastive_loss: 4.4852 (5.8675) Loss: 4.4852 (5.8675)
|
485 |
+
2024-01-30,17:36:34 | INFO | Train Epoch: 0 [12701696/16007168 (79%)] Data (t): 0.068 Batch (t): 0.348, 13546.6/s, 846.662/s/gpu LR: 0.000491 Logit Scale: 26.422 Contrastive_loss: 4.5528 (5.8264) Loss: 4.5528 (5.8264)
|
486 |
+
2024-01-30,17:37:10 | INFO | Train Epoch: 0 [13111296/16007168 (82%)] Data (t): 0.056 Batch (t): 0.355, 13751.9/s, 859.496/s/gpu LR: 0.000491 Logit Scale: 26.818 Contrastive_loss: 4.3369 (5.7813) Loss: 4.3369 (5.7813)
|
487 |
+
2024-01-30,17:37:46 | INFO | Train Epoch: 0 [13520896/16007168 (84%)] Data (t): 0.070 Batch (t): 0.360, 12735.5/s, 795.967/s/gpu LR: 0.000490 Logit Scale: 27.210 Contrastive_loss: 4.1829 (5.7343) Loss: 4.1829 (5.7343)
|
488 |
+
2024-01-30,17:38:21 | INFO | Train Epoch: 0 [13930496/16007168 (87%)] Data (t): 0.074 Batch (t): 0.354, 12570.6/s, 785.663/s/gpu LR: 0.000489 Logit Scale: 27.611 Contrastive_loss: 3.9996 (5.6847) Loss: 3.9996 (5.6847)
|
489 |
+
2024-01-30,17:38:57 | INFO | Train Epoch: 0 [14340096/16007168 (90%)] Data (t): 0.064 Batch (t): 0.361, 11435.5/s, 714.721/s/gpu LR: 0.000488 Logit Scale: 28.003 Contrastive_loss: 4.3076 (5.6465) Loss: 4.3076 (5.6465)
|
490 |
+
2024-01-30,17:39:33 | INFO | Train Epoch: 0 [14749696/16007168 (92%)] Data (t): 0.063 Batch (t): 0.356, 13141.0/s, 821.314/s/gpu LR: 0.000488 Logit Scale: 28.387 Contrastive_loss: 4.3356 (5.6110) Loss: 4.3356 (5.6110)
|
491 |
+
2024-01-30,17:40:08 | INFO | Train Epoch: 0 [15159296/16007168 (95%)] Data (t): 0.065 Batch (t): 0.351, 13348.6/s, 834.288/s/gpu LR: 0.000487 Logit Scale: 28.749 Contrastive_loss: 4.3599 (5.5781) Loss: 4.3599 (5.5781)
|
492 |
+
2024-01-30,17:40:44 | INFO | Train Epoch: 0 [15568896/16007168 (97%)] Data (t): 0.065 Batch (t): 0.358, 13035.9/s, 814.743/s/gpu LR: 0.000486 Logit Scale: 29.147 Contrastive_loss: 4.1936 (5.5426) Loss: 4.1936 (5.5426)
|
493 |
+
2024-01-30,17:41:19 | INFO | Train Epoch: 0 [15978496/16007168 (100%)] Data (t): 0.059 Batch (t): 0.358, 13265.4/s, 829.089/s/gpu LR: 0.000485 Logit Scale: 29.458 Contrastive_loss: 4.1740 (5.5084) Loss: 4.1740 (5.5084)
|
494 |
+
2024-01-30,17:41:21 | INFO | Train Epoch: 0 [16007168/16007168 (100%)] Data (t): 0.055 Batch (t): 0.302, 13145.8/s, 821.615/s/gpu LR: 0.000485 Logit Scale: 29.479 Contrastive_loss: 3.4248 (5.4576) Loss: 3.4248 (5.4576)
|
495 |
+
2024-01-30,17:41:25 | INFO | Start epoch 1
|
496 |
+
2024-01-30,17:41:26 | INFO | Train Epoch: 1 [ 4096/16007168 (0%)] Data (t): 1.703 Batch (t): 1.935, 2117.33/s, 132.333/s/gpu LR: 0.000485 Logit Scale: 29.482 Contrastive_loss: 3.6979 (3.6979) Loss: 3.6979 (3.6979)
|
497 |
+
2024-01-30,17:42:03 | INFO | Train Epoch: 1 [ 413696/16007168 (3%)] Data (t): 0.099 Batch (t): 0.362, 10309.8/s, 644.365/s/gpu LR: 0.000484 Logit Scale: 29.800 Contrastive_loss: 4.0415 (3.8697) Loss: 4.0415 (3.8697)
|
498 |
+
2024-01-30,17:42:38 | INFO | Train Epoch: 1 [ 823296/16007168 (5%)] Data (t): 0.059 Batch (t): 0.353, 13397.9/s, 837.371/s/gpu LR: 0.000483 Logit Scale: 30.128 Contrastive_loss: 3.4706 (3.7367) Loss: 3.4706 (3.7367)
|
499 |
+
2024-01-30,17:43:14 | INFO | Train Epoch: 1 [ 1232896/16007168 (8%)] Data (t): 0.084 Batch (t): 0.360, 8432.50/s, 527.031/s/gpu LR: 0.000482 Logit Scale: 30.432 Contrastive_loss: 3.6285 (3.7096) Loss: 3.6285 (3.7096)
|
500 |
+
2024-01-30,17:43:49 | INFO | Train Epoch: 1 [ 1642496/16007168 (10%)] Data (t): 0.095 Batch (t): 0.349, 11464.3/s, 716.516/s/gpu LR: 0.000481 Logit Scale: 30.772 Contrastive_loss: 4.1980 (3.8073) Loss: 4.1980 (3.8073)
|
501 |
+
2024-01-30,17:44:24 | INFO | Train Epoch: 1 [ 2052096/16007168 (13%)] Data (t): 0.067 Batch (t): 0.355, 13683.1/s, 855.195/s/gpu LR: 0.000480 Logit Scale: 31.031 Contrastive_loss: 3.8817 (3.8197) Loss: 3.8817 (3.8197)
|
502 |
+
2024-01-30,17:45:00 | INFO | Train Epoch: 1 [ 2461696/16007168 (15%)] Data (t): 0.071 Batch (t): 0.356, 13414.0/s, 838.377/s/gpu LR: 0.000479 Logit Scale: 31.340 Contrastive_loss: 3.6435 (3.7945) Loss: 3.6435 (3.7945)
|
503 |
+
2024-01-30,17:45:35 | INFO | Train Epoch: 1 [ 2871296/16007168 (18%)] Data (t): 0.073 Batch (t): 0.352, 13813.7/s, 863.359/s/gpu LR: 0.000478 Logit Scale: 31.594 Contrastive_loss: 3.8005 (3.7953) Loss: 3.8005 (3.7953)
|
504 |
+
2024-01-30,17:46:11 | INFO | Train Epoch: 1 [ 3280896/16007168 (20%)] Data (t): 0.069 Batch (t): 0.355, 13263.1/s, 828.943/s/gpu LR: 0.000477 Logit Scale: 31.881 Contrastive_loss: 4.0095 (3.8191) Loss: 4.0095 (3.8191)
|
505 |
+
2024-01-30,17:46:46 | INFO | Train Epoch: 1 [ 3690496/16007168 (23%)] Data (t): 0.064 Batch (t): 0.353, 13459.4/s, 841.215/s/gpu LR: 0.000476 Logit Scale: 32.124 Contrastive_loss: 3.9387 (3.8310) Loss: 3.9387 (3.8310)
|
506 |
+
2024-01-30,17:47:21 | INFO | Train Epoch: 1 [ 4100096/16007168 (26%)] Data (t): 0.068 Batch (t): 0.354, 13449.1/s, 840.568/s/gpu LR: 0.000475 Logit Scale: 32.392 Contrastive_loss: 3.6299 (3.8127) Loss: 3.6299 (3.8127)
|
507 |
+
2024-01-30,17:47:57 | INFO | Train Epoch: 1 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.353, 13161.6/s, 822.602/s/gpu LR: 0.000474 Logit Scale: 32.603 Contrastive_loss: 3.4994 (3.7866) Loss: 3.4994 (3.7866)
|
508 |
+
2024-01-30,17:48:32 | INFO | Train Epoch: 1 [ 4919296/16007168 (31%)] Data (t): 0.061 Batch (t): 0.353, 12861.9/s, 803.866/s/gpu LR: 0.000473 Logit Scale: 32.828 Contrastive_loss: 3.6223 (3.7740) Loss: 3.6223 (3.7740)
|
509 |
+
2024-01-30,17:49:07 | INFO | Train Epoch: 1 [ 5328896/16007168 (33%)] Data (t): 0.071 Batch (t): 0.352, 12814.0/s, 800.872/s/gpu LR: 0.000472 Logit Scale: 33.111 Contrastive_loss: 3.6821 (3.7674) Loss: 3.6821 (3.7674)
|
510 |
+
2024-01-30,17:49:43 | INFO | Train Epoch: 1 [ 5738496/16007168 (36%)] Data (t): 0.082 Batch (t): 0.353, 13810.9/s, 863.180/s/gpu LR: 0.000470 Logit Scale: 33.375 Contrastive_loss: 3.2916 (3.7357) Loss: 3.2916 (3.7357)
|
511 |
+
2024-01-30,17:50:18 | INFO | Train Epoch: 1 [ 6148096/16007168 (38%)] Data (t): 0.067 Batch (t): 0.352, 13632.6/s, 852.037/s/gpu LR: 0.000469 Logit Scale: 33.636 Contrastive_loss: 3.4580 (3.7184) Loss: 3.4580 (3.7184)
|
512 |
+
2024-01-30,17:50:53 | INFO | Train Epoch: 1 [ 6557696/16007168 (41%)] Data (t): 0.071 Batch (t): 0.353, 13511.6/s, 844.477/s/gpu LR: 0.000468 Logit Scale: 33.902 Contrastive_loss: 3.4951 (3.7052) Loss: 3.4951 (3.7052)
|
513 |
+
2024-01-30,17:51:28 | INFO | Train Epoch: 1 [ 6967296/16007168 (44%)] Data (t): 0.067 Batch (t): 0.351, 13477.7/s, 842.355/s/gpu LR: 0.000467 Logit Scale: 34.091 Contrastive_loss: 3.4838 (3.6929) Loss: 3.4838 (3.6929)
|
514 |
+
2024-01-30,17:52:03 | INFO | Train Epoch: 1 [ 7376896/16007168 (46%)] Data (t): 0.067 Batch (t): 0.351, 13151.8/s, 821.990/s/gpu LR: 0.000465 Logit Scale: 34.318 Contrastive_loss: 3.3785 (3.6764) Loss: 3.3785 (3.6764)
|
515 |
+
2024-01-30,17:52:38 | INFO | Train Epoch: 1 [ 7786496/16007168 (49%)] Data (t): 0.064 Batch (t): 0.351, 11780.5/s, 736.278/s/gpu LR: 0.000464 Logit Scale: 34.558 Contrastive_loss: 3.4129 (3.6632) Loss: 3.4129 (3.6632)
|
516 |
+
2024-01-30,17:53:14 | INFO | Train Epoch: 1 [ 8196096/16007168 (51%)] Data (t): 0.061 Batch (t): 0.358, 13231.8/s, 826.987/s/gpu LR: 0.000463 Logit Scale: 34.776 Contrastive_loss: 3.3690 (3.6492) Loss: 3.3690 (3.6492)
|
517 |
+
2024-01-30,17:53:50 | INFO | Train Epoch: 1 [ 8605696/16007168 (54%)] Data (t): 0.082 Batch (t): 0.360, 12135.4/s, 758.463/s/gpu LR: 0.000461 Logit Scale: 34.960 Contrastive_loss: 3.1205 (3.6252) Loss: 3.1205 (3.6252)
|
518 |
+
2024-01-30,17:54:26 | INFO | Train Epoch: 1 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.355, 13202.3/s, 825.144/s/gpu LR: 0.000460 Logit Scale: 35.153 Contrastive_loss: 3.6471 (3.6261) Loss: 3.6471 (3.6261)
|
519 |
+
2024-01-30,17:55:01 | INFO | Train Epoch: 1 [ 9424896/16007168 (59%)] Data (t): 0.070 Batch (t): 0.353, 13019.7/s, 813.731/s/gpu LR: 0.000459 Logit Scale: 35.360 Contrastive_loss: 3.3817 (3.6159) Loss: 3.3817 (3.6159)
|
520 |
+
2024-01-30,17:55:37 | INFO | Train Epoch: 1 [ 9834496/16007168 (61%)] Data (t): 0.077 Batch (t): 0.357, 12734.5/s, 795.909/s/gpu LR: 0.000457 Logit Scale: 35.536 Contrastive_loss: 3.4333 (3.6086) Loss: 3.4333 (3.6086)
|
521 |
+
2024-01-30,17:56:12 | INFO | Train Epoch: 1 [10244096/16007168 (64%)] Data (t): 0.070 Batch (t): 0.356, 12955.2/s, 809.702/s/gpu LR: 0.000456 Logit Scale: 35.710 Contrastive_loss: 3.3986 (3.6005) Loss: 3.3986 (3.6005)
|
522 |
+
2024-01-30,17:56:47 | INFO | Train Epoch: 1 [10653696/16007168 (67%)] Data (t): 0.070 Batch (t): 0.349, 13287.6/s, 830.474/s/gpu LR: 0.000454 Logit Scale: 35.849 Contrastive_loss: 3.6078 (3.6008) Loss: 3.6078 (3.6008)
|
523 |
+
2024-01-30,17:57:23 | INFO | Train Epoch: 1 [11063296/16007168 (69%)] Data (t): 0.074 Batch (t): 0.359, 12270.4/s, 766.901/s/gpu LR: 0.000453 Logit Scale: 35.993 Contrastive_loss: 3.5610 (3.5994) Loss: 3.5610 (3.5994)
|
524 |
+
2024-01-30,17:57:58 | INFO | Train Epoch: 1 [11472896/16007168 (72%)] Data (t): 0.088 Batch (t): 0.350, 13696.3/s, 856.016/s/gpu LR: 0.000451 Logit Scale: 36.122 Contrastive_loss: 3.2010 (3.5857) Loss: 3.2010 (3.5857)
|
525 |
+
2024-01-30,17:58:34 | INFO | Train Epoch: 1 [11882496/16007168 (74%)] Data (t): 0.097 Batch (t): 0.356, 12976.5/s, 811.031/s/gpu LR: 0.000450 Logit Scale: 36.283 Contrastive_loss: 3.1820 (3.5722) Loss: 3.1820 (3.5722)
|
526 |
+
2024-01-30,17:59:10 | INFO | Train Epoch: 1 [12292096/16007168 (77%)] Data (t): 0.070 Batch (t): 0.362, 13186.7/s, 824.170/s/gpu LR: 0.000448 Logit Scale: 36.508 Contrastive_loss: 3.1584 (3.5588) Loss: 3.1584 (3.5588)
|
527 |
+
2024-01-30,17:59:45 | INFO | Train Epoch: 1 [12701696/16007168 (79%)] Data (t): 0.073 Batch (t): 0.352, 13313.7/s, 832.107/s/gpu LR: 0.000447 Logit Scale: 36.722 Contrastive_loss: 3.3713 (3.5530) Loss: 3.3713 (3.5530)
|
528 |
+
2024-01-30,18:00:21 | INFO | Train Epoch: 1 [13111296/16007168 (82%)] Data (t): 0.074 Batch (t): 0.360, 12464.3/s, 779.020/s/gpu LR: 0.000445 Logit Scale: 36.880 Contrastive_loss: 3.1090 (3.5395) Loss: 3.1090 (3.5395)
|
529 |
+
2024-01-30,18:00:56 | INFO | Train Epoch: 1 [13520896/16007168 (84%)] Data (t): 0.070 Batch (t): 0.346, 13083.3/s, 817.706/s/gpu LR: 0.000444 Logit Scale: 37.063 Contrastive_loss: 3.6433 (3.5426) Loss: 3.6433 (3.5426)
|
530 |
+
2024-01-30,18:01:31 | INFO | Train Epoch: 1 [13930496/16007168 (87%)] Data (t): 0.071 Batch (t): 0.353, 13073.0/s, 817.065/s/gpu LR: 0.000442 Logit Scale: 37.231 Contrastive_loss: 3.1748 (3.5321) Loss: 3.1748 (3.5321)
|
531 |
+
2024-01-30,18:02:06 | INFO | Train Epoch: 1 [14340096/16007168 (90%)] Data (t): 0.075 Batch (t): 0.352, 12526.6/s, 782.911/s/gpu LR: 0.000440 Logit Scale: 37.357 Contrastive_loss: 3.3167 (3.5261) Loss: 3.3167 (3.5261)
|
532 |
+
2024-01-30,18:02:41 | INFO | Train Epoch: 1 [14749696/16007168 (92%)] Data (t): 0.063 Batch (t): 0.349, 12258.0/s, 766.126/s/gpu LR: 0.000439 Logit Scale: 37.474 Contrastive_loss: 3.4825 (3.5249) Loss: 3.4825 (3.5249)
|
533 |
+
2024-01-30,18:03:16 | INFO | Train Epoch: 1 [15159296/16007168 (95%)] Data (t): 0.065 Batch (t): 0.352, 13105.0/s, 819.060/s/gpu LR: 0.000437 Logit Scale: 37.654 Contrastive_loss: 3.1186 (3.5142) Loss: 3.1186 (3.5142)
|
534 |
+
2024-01-30,18:03:52 | INFO | Train Epoch: 1 [15568896/16007168 (97%)] Data (t): 0.067 Batch (t): 0.357, 13096.6/s, 818.541/s/gpu LR: 0.000435 Logit Scale: 37.829 Contrastive_loss: 3.3103 (3.5090) Loss: 3.3103 (3.5090)
|
535 |
+
2024-01-30,18:04:28 | INFO | Train Epoch: 1 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.364, 13150.5/s, 821.907/s/gpu LR: 0.000434 Logit Scale: 37.975 Contrastive_loss: 3.1923 (3.5011) Loss: 3.1923 (3.5011)
|
536 |
+
2024-01-30,18:04:31 | INFO | Train Epoch: 1 [16007168/16007168 (100%)] Data (t): 0.065 Batch (t): 0.308, 13626.0/s, 851.626/s/gpu LR: 0.000433 Logit Scale: 37.979 Contrastive_loss: 2.3571 (3.4732) Loss: 2.3571 (3.4732)
|
537 |
+
2024-01-30,18:04:34 | INFO | Start epoch 2
|
538 |
+
2024-01-30,18:04:36 | INFO | Train Epoch: 2 [ 4096/16007168 (0%)] Data (t): 1.652 Batch (t): 1.884, 2174.05/s, 135.878/s/gpu LR: 0.000433 Logit Scale: 37.981 Contrastive_loss: 3.1466 (3.1466) Loss: 3.1466 (3.1466)
|
539 |
+
2024-01-30,18:05:11 | INFO | Train Epoch: 2 [ 413696/16007168 (3%)] Data (t): 0.089 Batch (t): 0.358, 7709.91/s, 481.869/s/gpu LR: 0.000432 Logit Scale: 38.106 Contrastive_loss: 3.2047 (3.1756) Loss: 3.2047 (3.1756)
|
540 |
+
2024-01-30,18:05:47 | INFO | Train Epoch: 2 [ 823296/16007168 (5%)] Data (t): 0.093 Batch (t): 0.358, 13170.3/s, 823.143/s/gpu LR: 0.000430 Logit Scale: 38.235 Contrastive_loss: 3.1328 (3.1614) Loss: 3.1328 (3.1614)
|
541 |
+
2024-01-30,18:06:22 | INFO | Train Epoch: 2 [ 1232896/16007168 (8%)] Data (t): 0.076 Batch (t): 0.349, 13704.3/s, 856.520/s/gpu LR: 0.000428 Logit Scale: 38.437 Contrastive_loss: 3.3009 (3.1962) Loss: 3.3009 (3.1962)
|
542 |
+
2024-01-30,18:06:58 | INFO | Train Epoch: 2 [ 1642496/16007168 (10%)] Data (t): 0.061 Batch (t): 0.357, 12596.2/s, 787.260/s/gpu LR: 0.000426 Logit Scale: 38.619 Contrastive_loss: 3.5063 (3.2583) Loss: 3.5063 (3.2583)
|
543 |
+
2024-01-30,18:07:33 | INFO | Train Epoch: 2 [ 2052096/16007168 (13%)] Data (t): 0.071 Batch (t): 0.358, 13546.3/s, 846.643/s/gpu LR: 0.000425 Logit Scale: 38.749 Contrastive_loss: 3.0284 (3.2200) Loss: 3.0284 (3.2200)
|
544 |
+
2024-01-30,18:08:10 | INFO | Train Epoch: 2 [ 2461696/16007168 (15%)] Data (t): 0.074 Batch (t): 0.362, 12537.2/s, 783.576/s/gpu LR: 0.000423 Logit Scale: 38.869 Contrastive_loss: 3.1864 (3.2152) Loss: 3.1864 (3.2152)
|
545 |
+
2024-01-30,18:08:45 | INFO | Train Epoch: 2 [ 2871296/16007168 (18%)] Data (t): 0.069 Batch (t): 0.353, 12386.3/s, 774.142/s/gpu LR: 0.000421 Logit Scale: 39.011 Contrastive_loss: 3.2655 (3.2214) Loss: 3.2655 (3.2214)
|
546 |
+
2024-01-30,18:09:21 | INFO | Train Epoch: 2 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.360, 12038.7/s, 752.422/s/gpu LR: 0.000419 Logit Scale: 39.145 Contrastive_loss: 3.1624 (3.2149) Loss: 3.1624 (3.2149)
|
547 |
+
2024-01-30,18:09:56 | INFO | Train Epoch: 2 [ 3690496/16007168 (23%)] Data (t): 0.067 Batch (t): 0.349, 13308.8/s, 831.803/s/gpu LR: 0.000417 Logit Scale: 39.286 Contrastive_loss: 2.7686 (3.1702) Loss: 2.7686 (3.1702)
|
548 |
+
2024-01-30,18:10:32 | INFO | Train Epoch: 2 [ 4100096/16007168 (26%)] Data (t): 0.069 Batch (t): 0.357, 12801.3/s, 800.082/s/gpu LR: 0.000415 Logit Scale: 39.376 Contrastive_loss: 3.0080 (3.1555) Loss: 3.0080 (3.1555)
|
549 |
+
2024-01-30,18:11:07 | INFO | Train Epoch: 2 [ 4509696/16007168 (28%)] Data (t): 0.072 Batch (t): 0.358, 13464.5/s, 841.534/s/gpu LR: 0.000413 Logit Scale: 39.495 Contrastive_loss: 2.9960 (3.1422) Loss: 2.9960 (3.1422)
|
550 |
+
2024-01-30,18:11:43 | INFO | Train Epoch: 2 [ 4919296/16007168 (31%)] Data (t): 0.061 Batch (t): 0.355, 12990.4/s, 811.898/s/gpu LR: 0.000411 Logit Scale: 39.652 Contrastive_loss: 2.5340 (3.0954) Loss: 2.5340 (3.0954)
|
551 |
+
2024-01-30,18:12:18 | INFO | Train Epoch: 2 [ 5328896/16007168 (33%)] Data (t): 0.065 Batch (t): 0.350, 12488.3/s, 780.521/s/gpu LR: 0.000409 Logit Scale: 39.750 Contrastive_loss: 3.0895 (3.0950) Loss: 3.0895 (3.0950)
|
552 |
+
2024-01-30,18:12:54 | INFO | Train Epoch: 2 [ 5738496/16007168 (36%)] Data (t): 0.070 Batch (t): 0.357, 12857.6/s, 803.599/s/gpu LR: 0.000407 Logit Scale: 39.874 Contrastive_loss: 2.8140 (3.0763) Loss: 2.8140 (3.0763)
|
553 |
+
2024-01-30,18:13:29 | INFO | Train Epoch: 2 [ 6148096/16007168 (38%)] Data (t): 0.072 Batch (t): 0.351, 12723.5/s, 795.221/s/gpu LR: 0.000405 Logit Scale: 40.004 Contrastive_loss: 2.8495 (3.0621) Loss: 2.8495 (3.0621)
|
554 |
+
2024-01-30,18:14:05 | INFO | Train Epoch: 2 [ 6557696/16007168 (41%)] Data (t): 0.070 Batch (t): 0.359, 11998.1/s, 749.880/s/gpu LR: 0.000403 Logit Scale: 40.071 Contrastive_loss: 2.8497 (3.0496) Loss: 2.8497 (3.0496)
|
555 |
+
2024-01-30,18:14:40 | INFO | Train Epoch: 2 [ 6967296/16007168 (44%)] Data (t): 0.073 Batch (t): 0.358, 8203.92/s, 512.745/s/gpu LR: 0.000401 Logit Scale: 40.185 Contrastive_loss: 2.8644 (3.0393) Loss: 2.8644 (3.0393)
|
556 |
+
2024-01-30,18:15:15 | INFO | Train Epoch: 2 [ 7376896/16007168 (46%)] Data (t): 0.076 Batch (t): 0.351, 9614.47/s, 600.904/s/gpu LR: 0.000399 Logit Scale: 40.322 Contrastive_loss: 3.1654 (3.0460) Loss: 3.1654 (3.0460)
|
557 |
+
2024-01-30,18:15:51 | INFO | Train Epoch: 2 [ 7786496/16007168 (49%)] Data (t): 0.060 Batch (t): 0.354, 8811.07/s, 550.692/s/gpu LR: 0.000397 Logit Scale: 40.430 Contrastive_loss: 2.7709 (3.0322) Loss: 2.7709 (3.0322)
|
558 |
+
2024-01-30,18:16:27 | INFO | Train Epoch: 2 [ 8196096/16007168 (51%)] Data (t): 0.070 Batch (t): 0.361, 12769.0/s, 798.065/s/gpu LR: 0.000395 Logit Scale: 40.522 Contrastive_loss: 2.7423 (3.0184) Loss: 2.7423 (3.0184)
|
559 |
+
2024-01-30,18:17:02 | INFO | Train Epoch: 2 [ 8605696/16007168 (54%)] Data (t): 0.069 Batch (t): 0.354, 12159.0/s, 759.940/s/gpu LR: 0.000393 Logit Scale: 40.616 Contrastive_loss: 3.0506 (3.0199) Loss: 3.0506 (3.0199)
|
560 |
+
2024-01-30,18:17:38 | INFO | Train Epoch: 2 [ 9015296/16007168 (56%)] Data (t): 0.061 Batch (t): 0.354, 12405.0/s, 775.313/s/gpu LR: 0.000391 Logit Scale: 40.749 Contrastive_loss: 2.9225 (3.0156) Loss: 2.9225 (3.0156)
|
561 |
+
2024-01-30,18:18:13 | INFO | Train Epoch: 2 [ 9424896/16007168 (59%)] Data (t): 0.070 Batch (t): 0.355, 13352.0/s, 834.498/s/gpu LR: 0.000389 Logit Scale: 40.892 Contrastive_loss: 2.8147 (3.0073) Loss: 2.8147 (3.0073)
|
562 |
+
2024-01-30,18:18:49 | INFO | Train Epoch: 2 [ 9834496/16007168 (61%)] Data (t): 0.068 Batch (t): 0.354, 12816.8/s, 801.049/s/gpu LR: 0.000387 Logit Scale: 41.024 Contrastive_loss: 3.1053 (3.0112) Loss: 3.1053 (3.0112)
|
563 |
+
2024-01-30,18:19:25 | INFO | Train Epoch: 2 [10244096/16007168 (64%)] Data (t): 0.060 Batch (t): 0.361, 12373.0/s, 773.314/s/gpu LR: 0.000385 Logit Scale: 41.088 Contrastive_loss: 2.9394 (3.0084) Loss: 2.9394 (3.0084)
|
564 |
+
2024-01-30,18:20:00 | INFO | Train Epoch: 2 [10653696/16007168 (67%)] Data (t): 0.094 Batch (t): 0.353, 13006.3/s, 812.893/s/gpu LR: 0.000382 Logit Scale: 41.292 Contrastive_loss: 2.9076 (3.0047) Loss: 2.9076 (3.0047)
|
565 |
+
2024-01-30,18:20:36 | INFO | Train Epoch: 2 [11063296/16007168 (69%)] Data (t): 0.064 Batch (t): 0.358, 12873.6/s, 804.601/s/gpu LR: 0.000380 Logit Scale: 41.375 Contrastive_loss: 2.9102 (3.0013) Loss: 2.9102 (3.0013)
|
566 |
+
2024-01-30,18:21:12 | INFO | Train Epoch: 2 [11472896/16007168 (72%)] Data (t): 0.071 Batch (t): 0.359, 12941.4/s, 808.837/s/gpu LR: 0.000378 Logit Scale: 41.461 Contrastive_loss: 2.9675 (3.0001) Loss: 2.9675 (3.0001)
|
567 |
+
2024-01-30,18:21:48 | INFO | Train Epoch: 2 [11882496/16007168 (74%)] Data (t): 0.073 Batch (t): 0.359, 11819.2/s, 738.703/s/gpu LR: 0.000376 Logit Scale: 41.548 Contrastive_loss: 3.0836 (3.0029) Loss: 3.0836 (3.0029)
|
568 |
+
2024-01-30,18:22:24 | INFO | Train Epoch: 2 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.360, 12595.8/s, 787.237/s/gpu LR: 0.000374 Logit Scale: 41.653 Contrastive_loss: 2.8417 (2.9977) Loss: 2.8417 (2.9977)
|
569 |
+
2024-01-30,18:23:00 | INFO | Train Epoch: 2 [12701696/16007168 (79%)] Data (t): 0.070 Batch (t): 0.366, 13045.6/s, 815.352/s/gpu LR: 0.000371 Logit Scale: 41.750 Contrastive_loss: 2.8205 (2.9922) Loss: 2.8205 (2.9922)
|
570 |
+
2024-01-30,18:23:35 | INFO | Train Epoch: 2 [13111296/16007168 (82%)] Data (t): 0.065 Batch (t): 0.353, 12208.4/s, 763.025/s/gpu LR: 0.000369 Logit Scale: 41.860 Contrastive_loss: 2.7305 (2.9843) Loss: 2.7305 (2.9843)
|
571 |
+
2024-01-30,18:24:11 | INFO | Train Epoch: 2 [13520896/16007168 (84%)] Data (t): 0.073 Batch (t): 0.358, 12967.6/s, 810.474/s/gpu LR: 0.000367 Logit Scale: 41.972 Contrastive_loss: 3.1652 (2.9896) Loss: 3.1652 (2.9896)
|
572 |
+
2024-01-30,18:24:47 | INFO | Train Epoch: 2 [13930496/16007168 (87%)] Data (t): 0.074 Batch (t): 0.358, 12066.7/s, 754.168/s/gpu LR: 0.000365 Logit Scale: 42.108 Contrastive_loss: 2.6064 (2.9786) Loss: 2.6064 (2.9786)
|
573 |
+
2024-01-30,18:25:23 | INFO | Train Epoch: 2 [14340096/16007168 (90%)] Data (t): 0.076 Batch (t): 0.361, 11824.4/s, 739.023/s/gpu LR: 0.000362 Logit Scale: 42.194 Contrastive_loss: 2.4083 (2.9628) Loss: 2.4083 (2.9628)
|
574 |
+
2024-01-30,18:25:59 | INFO | Train Epoch: 2 [14749696/16007168 (92%)] Data (t): 0.064 Batch (t): 0.356, 13305.7/s, 831.609/s/gpu LR: 0.000360 Logit Scale: 42.321 Contrastive_loss: 2.2928 (2.9447) Loss: 2.2928 (2.9447)
|
575 |
+
2024-01-30,18:26:34 | INFO | Train Epoch: 2 [15159296/16007168 (95%)] Data (t): 0.054 Batch (t): 0.352, 13315.6/s, 832.225/s/gpu LR: 0.000358 Logit Scale: 42.404 Contrastive_loss: 2.2195 (2.9256) Loss: 2.2195 (2.9256)
|
576 |
+
2024-01-30,18:27:10 | INFO | Train Epoch: 2 [15568896/16007168 (97%)] Data (t): 0.062 Batch (t): 0.357, 11880.7/s, 742.544/s/gpu LR: 0.000355 Logit Scale: 42.504 Contrastive_loss: 2.4242 (2.9127) Loss: 2.4242 (2.9127)
|
577 |
+
2024-01-30,18:27:45 | INFO | Train Epoch: 2 [15978496/16007168 (100%)] Data (t): 0.059 Batch (t): 0.350, 12739.8/s, 796.238/s/gpu LR: 0.000353 Logit Scale: 42.604 Contrastive_loss: 2.6432 (2.9060) Loss: 2.6432 (2.9060)
|
578 |
+
2024-01-30,18:27:47 | INFO | Train Epoch: 2 [16007168/16007168 (100%)] Data (t): 0.056 Batch (t): 0.327, 13586.5/s, 849.155/s/gpu LR: 0.000353 Logit Scale: 42.610 Contrastive_loss: 2.4787 (2.8956) Loss: 2.4787 (2.8956)
|
579 |
+
2024-01-30,18:27:50 | INFO | Start epoch 3
|
580 |
+
2024-01-30,18:27:52 | INFO | Train Epoch: 3 [ 4096/16007168 (0%)] Data (t): 1.770 Batch (t): 2.001, 2047.34/s, 127.959/s/gpu LR: 0.000353 Logit Scale: 42.613 Contrastive_loss: 2.7247 (2.7247) Loss: 2.7247 (2.7247)
|
581 |
+
2024-01-30,18:28:28 | INFO | Train Epoch: 3 [ 413696/16007168 (3%)] Data (t): 0.077 Batch (t): 0.362, 12761.4/s, 797.588/s/gpu LR: 0.000351 Logit Scale: 42.689 Contrastive_loss: 2.6749 (2.6998) Loss: 2.6749 (2.6998)
|
582 |
+
2024-01-30,18:29:03 | INFO | Train Epoch: 3 [ 823296/16007168 (5%)] Data (t): 0.085 Batch (t): 0.349, 13569.8/s, 848.113/s/gpu LR: 0.000348 Logit Scale: 42.739 Contrastive_loss: 2.1629 (2.5208) Loss: 2.1629 (2.5208)
|
583 |
+
2024-01-30,18:29:38 | INFO | Train Epoch: 3 [ 1232896/16007168 (8%)] Data (t): 0.081 Batch (t): 0.351, 13345.2/s, 834.072/s/gpu LR: 0.000346 Logit Scale: 42.830 Contrastive_loss: 2.4349 (2.4993) Loss: 2.4349 (2.4993)
|
584 |
+
2024-01-30,18:30:13 | INFO | Train Epoch: 3 [ 1642496/16007168 (10%)] Data (t): 0.067 Batch (t): 0.351, 13127.0/s, 820.439/s/gpu LR: 0.000344 Logit Scale: 42.893 Contrastive_loss: 2.6399 (2.5274) Loss: 2.6399 (2.5274)
|
585 |
+
2024-01-30,18:30:49 | INFO | Train Epoch: 3 [ 2052096/16007168 (13%)] Data (t): 0.075 Batch (t): 0.360, 13163.2/s, 822.701/s/gpu LR: 0.000341 Logit Scale: 43.027 Contrastive_loss: 2.9539 (2.5985) Loss: 2.9539 (2.5985)
|
586 |
+
2024-01-30,18:31:24 | INFO | Train Epoch: 3 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.352, 12485.0/s, 780.311/s/gpu LR: 0.000339 Logit Scale: 43.140 Contrastive_loss: 2.7431 (2.6192) Loss: 2.7431 (2.6192)
|
587 |
+
2024-01-30,18:32:00 | INFO | Train Epoch: 3 [ 2871296/16007168 (18%)] Data (t): 0.070 Batch (t): 0.354, 12131.1/s, 758.196/s/gpu LR: 0.000336 Logit Scale: 43.164 Contrastive_loss: 2.7805 (2.6393) Loss: 2.7805 (2.6393)
|
588 |
+
2024-01-30,18:32:35 | INFO | Train Epoch: 3 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.349, 12349.9/s, 771.870/s/gpu LR: 0.000334 Logit Scale: 43.236 Contrastive_loss: 2.4207 (2.6150) Loss: 2.4207 (2.6150)
|
589 |
+
2024-01-30,18:33:10 | INFO | Train Epoch: 3 [ 3690496/16007168 (23%)] Data (t): 0.099 Batch (t): 0.354, 12228.2/s, 764.261/s/gpu LR: 0.000332 Logit Scale: 43.320 Contrastive_loss: 2.2062 (2.5742) Loss: 2.2062 (2.5742)
|
590 |
+
2024-01-30,18:33:45 | INFO | Train Epoch: 3 [ 4100096/16007168 (26%)] Data (t): 0.096 Batch (t): 0.354, 13259.7/s, 828.731/s/gpu LR: 0.000329 Logit Scale: 43.417 Contrastive_loss: 2.5924 (2.5758) Loss: 2.5924 (2.5758)
|
591 |
+
2024-01-30,18:34:21 | INFO | Train Epoch: 3 [ 4509696/16007168 (28%)] Data (t): 0.104 Batch (t): 0.353, 12782.6/s, 798.914/s/gpu LR: 0.000327 Logit Scale: 43.548 Contrastive_loss: 2.6007 (2.5779) Loss: 2.6007 (2.5779)
|
592 |
+
2024-01-30,18:34:56 | INFO | Train Epoch: 3 [ 4919296/16007168 (31%)] Data (t): 0.097 Batch (t): 0.353, 12391.8/s, 774.485/s/gpu LR: 0.000324 Logit Scale: 43.659 Contrastive_loss: 2.8330 (2.5975) Loss: 2.8330 (2.5975)
|
593 |
+
2024-01-30,18:35:32 | INFO | Train Epoch: 3 [ 5328896/16007168 (33%)] Data (t): 0.059 Batch (t): 0.359, 13039.6/s, 814.972/s/gpu LR: 0.000322 Logit Scale: 43.727 Contrastive_loss: 2.8242 (2.6137) Loss: 2.8242 (2.6137)
|
594 |
+
2024-01-30,18:36:08 | INFO | Train Epoch: 3 [ 5738496/16007168 (36%)] Data (t): 0.065 Batch (t): 0.356, 13425.2/s, 839.077/s/gpu LR: 0.000319 Logit Scale: 43.860 Contrastive_loss: 2.1300 (2.5815) Loss: 2.1300 (2.5815)
|
595 |
+
2024-01-30,18:36:43 | INFO | Train Epoch: 3 [ 6148096/16007168 (38%)] Data (t): 0.081 Batch (t): 0.358, 12077.5/s, 754.842/s/gpu LR: 0.000317 Logit Scale: 43.999 Contrastive_loss: 2.1266 (2.5530) Loss: 2.1266 (2.5530)
|
596 |
+
2024-01-30,18:37:18 | INFO | Train Epoch: 3 [ 6557696/16007168 (41%)] Data (t): 0.079 Batch (t): 0.346, 13715.1/s, 857.194/s/gpu LR: 0.000315 Logit Scale: 44.058 Contrastive_loss: 2.3959 (2.5438) Loss: 2.3959 (2.5438)
|
597 |
+
2024-01-30,18:37:54 | INFO | Train Epoch: 3 [ 6967296/16007168 (44%)] Data (t): 0.069 Batch (t): 0.363, 13919.8/s, 869.987/s/gpu LR: 0.000312 Logit Scale: 44.103 Contrastive_loss: 2.5254 (2.5428) Loss: 2.5254 (2.5428)
|
598 |
+
2024-01-30,18:38:30 | INFO | Train Epoch: 3 [ 7376896/16007168 (46%)] Data (t): 0.054 Batch (t): 0.354, 12843.1/s, 802.691/s/gpu LR: 0.000310 Logit Scale: 44.208 Contrastive_loss: 2.4997 (2.5405) Loss: 2.4997 (2.5405)
|
599 |
+
2024-01-30,18:39:04 | INFO | Train Epoch: 3 [ 7786496/16007168 (49%)] Data (t): 0.056 Batch (t): 0.348, 12948.7/s, 809.292/s/gpu LR: 0.000307 Logit Scale: 44.309 Contrastive_loss: 2.4575 (2.5364) Loss: 2.4575 (2.5364)
|
600 |
+
2024-01-30,18:39:40 | INFO | Train Epoch: 3 [ 8196096/16007168 (51%)] Data (t): 0.060 Batch (t): 0.360, 13503.8/s, 843.989/s/gpu LR: 0.000305 Logit Scale: 44.410 Contrastive_loss: 2.5604 (2.5375) Loss: 2.5604 (2.5375)
|
601 |
+
2024-01-30,18:40:17 | INFO | Train Epoch: 3 [ 8605696/16007168 (54%)] Data (t): 0.075 Batch (t): 0.362, 12531.0/s, 783.188/s/gpu LR: 0.000302 Logit Scale: 44.527 Contrastive_loss: 2.2755 (2.5256) Loss: 2.2755 (2.5256)
|
602 |
+
2024-01-30,18:40:51 | INFO | Train Epoch: 3 [ 9015296/16007168 (56%)] Data (t): 0.063 Batch (t): 0.348, 12902.4/s, 806.403/s/gpu LR: 0.000300 Logit Scale: 44.611 Contrastive_loss: 2.2566 (2.5139) Loss: 2.2566 (2.5139)
|
603 |
+
2024-01-30,18:41:27 | INFO | Train Epoch: 3 [ 9424896/16007168 (59%)] Data (t): 0.066 Batch (t): 0.354, 13282.2/s, 830.136/s/gpu LR: 0.000297 Logit Scale: 44.696 Contrastive_loss: 2.0675 (2.4953) Loss: 2.0675 (2.4953)
|
604 |
+
2024-01-30,18:42:02 | INFO | Train Epoch: 3 [ 9834496/16007168 (61%)] Data (t): 0.065 Batch (t): 0.355, 12976.0/s, 811.002/s/gpu LR: 0.000295 Logit Scale: 44.772 Contrastive_loss: 1.9698 (2.4743) Loss: 1.9698 (2.4743)
|
605 |
+
2024-01-30,18:42:38 | INFO | Train Epoch: 3 [10244096/16007168 (64%)] Data (t): 0.075 Batch (t): 0.355, 13041.9/s, 815.122/s/gpu LR: 0.000292 Logit Scale: 44.889 Contrastive_loss: 2.2121 (2.4642) Loss: 2.2121 (2.4642)
|
606 |
+
2024-01-30,18:43:13 | INFO | Train Epoch: 3 [10653696/16007168 (67%)] Data (t): 0.074 Batch (t): 0.355, 13118.3/s, 819.893/s/gpu LR: 0.000290 Logit Scale: 44.952 Contrastive_loss: 2.0850 (2.4501) Loss: 2.0850 (2.4501)
|
607 |
+
2024-01-30,18:43:49 | INFO | Train Epoch: 3 [11063296/16007168 (69%)] Data (t): 0.074 Batch (t): 0.361, 13318.1/s, 832.379/s/gpu LR: 0.000287 Logit Scale: 45.027 Contrastive_loss: 2.3981 (2.4483) Loss: 2.3981 (2.4483)
|
608 |
+
2024-01-30,18:44:24 | INFO | Train Epoch: 3 [11472896/16007168 (72%)] Data (t): 0.069 Batch (t): 0.350, 13669.2/s, 854.324/s/gpu LR: 0.000285 Logit Scale: 45.128 Contrastive_loss: 2.3070 (2.4434) Loss: 2.3070 (2.4434)
|
609 |
+
2024-01-30,18:45:00 | INFO | Train Epoch: 3 [11882496/16007168 (74%)] Data (t): 0.065 Batch (t): 0.357, 13501.3/s, 843.831/s/gpu LR: 0.000282 Logit Scale: 45.210 Contrastive_loss: 2.4691 (2.4443) Loss: 2.4691 (2.4443)
|
610 |
+
2024-01-30,18:45:35 | INFO | Train Epoch: 3 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.354, 12537.6/s, 783.601/s/gpu LR: 0.000279 Logit Scale: 45.321 Contrastive_loss: 2.4691 (2.4451) Loss: 2.4691 (2.4451)
|
611 |
+
2024-01-30,18:46:11 | INFO | Train Epoch: 3 [12701696/16007168 (79%)] Data (t): 0.069 Batch (t): 0.351, 13958.7/s, 872.418/s/gpu LR: 0.000277 Logit Scale: 45.371 Contrastive_loss: 2.2623 (2.4394) Loss: 2.2623 (2.4394)
|
612 |
+
2024-01-30,18:46:46 | INFO | Train Epoch: 3 [13111296/16007168 (82%)] Data (t): 0.062 Batch (t): 0.356, 13166.3/s, 822.891/s/gpu LR: 0.000274 Logit Scale: 45.497 Contrastive_loss: 2.4169 (2.4387) Loss: 2.4169 (2.4387)
|
613 |
+
2024-01-30,18:47:21 | INFO | Train Epoch: 3 [13520896/16007168 (84%)] Data (t): 0.066 Batch (t): 0.352, 13278.1/s, 829.880/s/gpu LR: 0.000272 Logit Scale: 45.554 Contrastive_loss: 1.9444 (2.4241) Loss: 1.9444 (2.4241)
|
614 |
+
2024-01-30,18:47:57 | INFO | Train Epoch: 3 [13930496/16007168 (87%)] Data (t): 0.066 Batch (t): 0.353, 13371.8/s, 835.740/s/gpu LR: 0.000269 Logit Scale: 45.613 Contrastive_loss: 1.8400 (2.4075) Loss: 1.8400 (2.4075)
|
615 |
+
2024-01-30,18:48:32 | INFO | Train Epoch: 3 [14340096/16007168 (90%)] Data (t): 0.059 Batch (t): 0.354, 13371.1/s, 835.696/s/gpu LR: 0.000267 Logit Scale: 45.661 Contrastive_loss: 1.9213 (2.3939) Loss: 1.9213 (2.3939)
|
616 |
+
2024-01-30,18:49:07 | INFO | Train Epoch: 3 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.351, 13567.5/s, 847.966/s/gpu LR: 0.000264 Logit Scale: 45.769 Contrastive_loss: 2.1651 (2.3878) Loss: 2.1651 (2.3878)
|
617 |
+
2024-01-30,18:49:42 | INFO | Train Epoch: 3 [15159296/16007168 (95%)] Data (t): 0.080 Batch (t): 0.353, 11869.5/s, 741.845/s/gpu LR: 0.000262 Logit Scale: 45.915 Contrastive_loss: 2.0590 (2.3791) Loss: 2.0590 (2.3791)
|
618 |
+
2024-01-30,18:50:19 | INFO | Train Epoch: 3 [15568896/16007168 (97%)] Data (t): 0.075 Batch (t): 0.364, 12320.9/s, 770.056/s/gpu LR: 0.000259 Logit Scale: 46.020 Contrastive_loss: 2.1551 (2.3734) Loss: 2.1551 (2.3734)
|
619 |
+
2024-01-30,18:50:54 | INFO | Train Epoch: 3 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.355, 12584.9/s, 786.555/s/gpu LR: 0.000257 Logit Scale: 46.088 Contrastive_loss: 2.4760 (2.3759) Loss: 2.4760 (2.3759)
|
620 |
+
2024-01-30,18:50:56 | INFO | Train Epoch: 3 [16007168/16007168 (100%)] Data (t): 0.058 Batch (t): 0.307, 13528.3/s, 845.521/s/gpu LR: 0.000256 Logit Scale: 46.084 Contrastive_loss: 1.8544 (2.3632) Loss: 1.8544 (2.3632)
|
621 |
+
2024-01-30,18:50:59 | INFO | Start epoch 4
|
622 |
+
2024-01-30,18:51:01 | INFO | Train Epoch: 4 [ 4096/16007168 (0%)] Data (t): 1.781 Batch (t): 2.013, 2034.76/s, 127.172/s/gpu LR: 0.000256 Logit Scale: 46.086 Contrastive_loss: 2.2776 (2.2776) Loss: 2.2776 (2.2776)
|
623 |
+
2024-01-30,18:51:38 | INFO | Train Epoch: 4 [ 413696/16007168 (3%)] Data (t): 0.093 Batch (t): 0.365, 11865.0/s, 741.564/s/gpu LR: 0.000254 Logit Scale: 46.135 Contrastive_loss: 2.4986 (2.3881) Loss: 2.4986 (2.3881)
|
624 |
+
2024-01-30,18:52:13 | INFO | Train Epoch: 4 [ 823296/16007168 (5%)] Data (t): 0.075 Batch (t): 0.355, 12323.5/s, 770.216/s/gpu LR: 0.000251 Logit Scale: 46.229 Contrastive_loss: 2.5061 (2.4274) Loss: 2.5061 (2.4274)
|
625 |
+
2024-01-30,18:52:49 | INFO | Train Epoch: 4 [ 1232896/16007168 (8%)] Data (t): 0.067 Batch (t): 0.353, 12830.6/s, 801.914/s/gpu LR: 0.000249 Logit Scale: 46.302 Contrastive_loss: 2.2431 (2.3813) Loss: 2.2431 (2.3813)
|
626 |
+
2024-01-30,18:53:24 | INFO | Train Epoch: 4 [ 1642496/16007168 (10%)] Data (t): 0.075 Batch (t): 0.352, 12414.9/s, 775.929/s/gpu LR: 0.000246 Logit Scale: 46.364 Contrastive_loss: 1.9663 (2.2983) Loss: 1.9663 (2.2983)
|
627 |
+
2024-01-30,18:53:59 | INFO | Train Epoch: 4 [ 2052096/16007168 (13%)] Data (t): 0.061 Batch (t): 0.348, 12679.3/s, 792.454/s/gpu LR: 0.000244 Logit Scale: 46.421 Contrastive_loss: 2.2600 (2.2919) Loss: 2.2600 (2.2919)
|
628 |
+
2024-01-30,18:54:34 | INFO | Train Epoch: 4 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.359, 12533.6/s, 783.348/s/gpu LR: 0.000241 Logit Scale: 46.469 Contrastive_loss: 2.2475 (2.2856) Loss: 2.2475 (2.2856)
|
629 |
+
2024-01-30,18:55:09 | INFO | Train Epoch: 4 [ 2871296/16007168 (18%)] Data (t): 0.062 Batch (t): 0.341, 12051.4/s, 753.210/s/gpu LR: 0.000239 Logit Scale: 46.535 Contrastive_loss: 2.4228 (2.3027) Loss: 2.4228 (2.3027)
|
630 |
+
2024-01-30,18:55:44 | INFO | Train Epoch: 4 [ 3280896/16007168 (20%)] Data (t): 0.050 Batch (t): 0.353, 8827.11/s, 551.694/s/gpu LR: 0.000236 Logit Scale: 46.607 Contrastive_loss: 1.9644 (2.2651) Loss: 1.9644 (2.2651)
|
631 |
+
2024-01-30,18:56:19 | INFO | Train Epoch: 4 [ 3690496/16007168 (23%)] Data (t): 0.081 Batch (t): 0.356, 12495.8/s, 780.987/s/gpu LR: 0.000233 Logit Scale: 46.719 Contrastive_loss: 2.1430 (2.2529) Loss: 2.1430 (2.2529)
|
632 |
+
2024-01-30,18:56:55 | INFO | Train Epoch: 4 [ 4100096/16007168 (26%)] Data (t): 0.069 Batch (t): 0.356, 7772.51/s, 485.782/s/gpu LR: 0.000231 Logit Scale: 46.808 Contrastive_loss: 1.8802 (2.2190) Loss: 1.8802 (2.2190)
|
633 |
+
2024-01-30,18:57:30 | INFO | Train Epoch: 4 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.352, 12157.7/s, 759.857/s/gpu LR: 0.000228 Logit Scale: 46.894 Contrastive_loss: 2.4680 (2.2398) Loss: 2.4680 (2.2398)
|
634 |
+
2024-01-30,18:58:06 | INFO | Train Epoch: 4 [ 4919296/16007168 (31%)] Data (t): 0.071 Batch (t): 0.354, 13308.2/s, 831.764/s/gpu LR: 0.000226 Logit Scale: 46.982 Contrastive_loss: 2.1154 (2.2302) Loss: 2.1154 (2.2302)
|
635 |
+
2024-01-30,18:58:41 | INFO | Train Epoch: 4 [ 5328896/16007168 (33%)] Data (t): 0.076 Batch (t): 0.358, 12852.9/s, 803.304/s/gpu LR: 0.000223 Logit Scale: 47.109 Contrastive_loss: 1.7922 (2.1989) Loss: 1.7922 (2.1989)
|
636 |
+
2024-01-30,18:59:17 | INFO | Train Epoch: 4 [ 5738496/16007168 (36%)] Data (t): 0.070 Batch (t): 0.352, 12301.7/s, 768.856/s/gpu LR: 0.000221 Logit Scale: 47.179 Contrastive_loss: 1.9943 (2.1853) Loss: 1.9943 (2.1853)
|
637 |
+
2024-01-30,18:59:53 | INFO | Train Epoch: 4 [ 6148096/16007168 (38%)] Data (t): 0.072 Batch (t): 0.359, 13693.2/s, 855.826/s/gpu LR: 0.000218 Logit Scale: 47.246 Contrastive_loss: 2.1591 (2.1836) Loss: 2.1591 (2.1836)
|
638 |
+
2024-01-30,19:00:28 | INFO | Train Epoch: 4 [ 6557696/16007168 (41%)] Data (t): 0.070 Batch (t): 0.358, 12570.9/s, 785.679/s/gpu LR: 0.000216 Logit Scale: 47.290 Contrastive_loss: 2.0611 (2.1764) Loss: 2.0611 (2.1764)
|
639 |
+
2024-01-30,19:01:04 | INFO | Train Epoch: 4 [ 6967296/16007168 (44%)] Data (t): 0.067 Batch (t): 0.358, 13288.8/s, 830.547/s/gpu LR: 0.000213 Logit Scale: 47.364 Contrastive_loss: 1.6835 (2.1491) Loss: 1.6835 (2.1491)
|
640 |
+
2024-01-30,19:01:40 | INFO | Train Epoch: 4 [ 7376896/16007168 (46%)] Data (t): 0.067 Batch (t): 0.356, 12435.4/s, 777.214/s/gpu LR: 0.000211 Logit Scale: 47.512 Contrastive_loss: 1.9361 (2.1378) Loss: 1.9361 (2.1378)
|
641 |
+
2024-01-30,19:02:16 | INFO | Train Epoch: 4 [ 7786496/16007168 (49%)] Data (t): 0.073 Batch (t): 0.358, 12243.3/s, 765.208/s/gpu LR: 0.000208 Logit Scale: 47.627 Contrastive_loss: 2.4509 (2.1535) Loss: 2.4509 (2.1535)
|
642 |
+
2024-01-30,19:02:51 | INFO | Train Epoch: 4 [ 8196096/16007168 (51%)] Data (t): 0.073 Batch (t): 0.350, 13483.7/s, 842.733/s/gpu LR: 0.000206 Logit Scale: 47.738 Contrastive_loss: 1.9895 (2.1457) Loss: 1.9895 (2.1457)
|
643 |
+
2024-01-30,19:03:26 | INFO | Train Epoch: 4 [ 8605696/16007168 (54%)] Data (t): 0.072 Batch (t): 0.357, 12792.8/s, 799.550/s/gpu LR: 0.000203 Logit Scale: 47.825 Contrastive_loss: 1.8852 (2.1338) Loss: 1.8852 (2.1338)
|
644 |
+
2024-01-30,19:04:02 | INFO | Train Epoch: 4 [ 9015296/16007168 (56%)] Data (t): 0.072 Batch (t): 0.360, 12562.4/s, 785.151/s/gpu LR: 0.000201 Logit Scale: 47.986 Contrastive_loss: 2.0897 (2.1319) Loss: 2.0897 (2.1319)
|
645 |
+
2024-01-30,19:04:37 | INFO | Train Epoch: 4 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.350, 11124.9/s, 695.304/s/gpu LR: 0.000198 Logit Scale: 48.029 Contrastive_loss: 2.1508 (2.1327) Loss: 2.1508 (2.1327)
|
646 |
+
2024-01-30,19:05:12 | INFO | Train Epoch: 4 [ 9834496/16007168 (61%)] Data (t): 0.065 Batch (t): 0.351, 8960.75/s, 560.047/s/gpu LR: 0.000196 Logit Scale: 48.093 Contrastive_loss: 1.9658 (2.1260) Loss: 1.9658 (2.1260)
|
647 |
+
2024-01-30,19:05:48 | INFO | Train Epoch: 4 [10244096/16007168 (64%)] Data (t): 0.066 Batch (t): 0.356, 6673.17/s, 417.073/s/gpu LR: 0.000193 Logit Scale: 48.141 Contrastive_loss: 2.0747 (2.1241) Loss: 2.0747 (2.1241)
|
648 |
+
2024-01-30,19:06:23 | INFO | Train Epoch: 4 [10653696/16007168 (67%)] Data (t): 0.068 Batch (t): 0.353, 10295.2/s, 643.452/s/gpu LR: 0.000191 Logit Scale: 48.245 Contrastive_loss: 2.0089 (2.1198) Loss: 2.0089 (2.1198)
|
649 |
+
2024-01-30,19:07:00 | INFO | Train Epoch: 4 [11063296/16007168 (69%)] Data (t): 0.075 Batch (t): 0.363, 13443.0/s, 840.187/s/gpu LR: 0.000188 Logit Scale: 48.289 Contrastive_loss: 2.0275 (2.1165) Loss: 2.0275 (2.1165)
|
650 |
+
2024-01-30,19:07:35 | INFO | Train Epoch: 4 [11472896/16007168 (72%)] Data (t): 0.072 Batch (t): 0.353, 13883.6/s, 867.726/s/gpu LR: 0.000186 Logit Scale: 48.375 Contrastive_loss: 2.2800 (2.1221) Loss: 2.2800 (2.1221)
|
651 |
+
2024-01-30,19:08:10 | INFO | Train Epoch: 4 [11882496/16007168 (74%)] Data (t): 0.053 Batch (t): 0.352, 12839.1/s, 802.447/s/gpu LR: 0.000183 Logit Scale: 48.481 Contrastive_loss: 2.1938 (2.1245) Loss: 2.1938 (2.1245)
|
652 |
+
2024-01-30,19:08:45 | INFO | Train Epoch: 4 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.354, 12825.1/s, 801.568/s/gpu LR: 0.000181 Logit Scale: 48.548 Contrastive_loss: 2.2271 (2.1278) Loss: 2.2271 (2.1278)
|
653 |
+
2024-01-30,19:09:21 | INFO | Train Epoch: 4 [12701696/16007168 (79%)] Data (t): 0.066 Batch (t): 0.353, 11927.6/s, 745.476/s/gpu LR: 0.000178 Logit Scale: 48.594 Contrastive_loss: 2.3470 (2.1347) Loss: 2.3470 (2.1347)
|
654 |
+
2024-01-30,19:09:57 | INFO | Train Epoch: 4 [13111296/16007168 (82%)] Data (t): 0.071 Batch (t): 0.365, 13384.3/s, 836.519/s/gpu LR: 0.000176 Logit Scale: 48.669 Contrastive_loss: 2.4955 (2.1456) Loss: 2.4955 (2.1456)
|
655 |
+
2024-01-30,19:10:32 | INFO | Train Epoch: 4 [13520896/16007168 (84%)] Data (t): 0.071 Batch (t): 0.352, 13471.8/s, 841.985/s/gpu LR: 0.000173 Logit Scale: 48.749 Contrastive_loss: 1.9658 (2.1403) Loss: 1.9658 (2.1403)
|
656 |
+
2024-01-30,19:11:07 | INFO | Train Epoch: 4 [13930496/16007168 (87%)] Data (t): 0.065 Batch (t): 0.345, 12197.7/s, 762.357/s/gpu LR: 0.000171 Logit Scale: 48.841 Contrastive_loss: 1.9985 (2.1363) Loss: 1.9985 (2.1363)
|
657 |
+
2024-01-30,19:11:43 | INFO | Train Epoch: 4 [14340096/16007168 (90%)] Data (t): 0.066 Batch (t): 0.359, 12880.2/s, 805.011/s/gpu LR: 0.000169 Logit Scale: 48.880 Contrastive_loss: 1.8361 (2.1279) Loss: 1.8361 (2.1279)
|
658 |
+
2024-01-30,19:12:18 | INFO | Train Epoch: 4 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.353, 12428.3/s, 776.768/s/gpu LR: 0.000166 Logit Scale: 48.961 Contrastive_loss: 1.8553 (2.1206) Loss: 1.8553 (2.1206)
|
659 |
+
2024-01-30,19:12:54 | INFO | Train Epoch: 4 [15159296/16007168 (95%)] Data (t): 0.069 Batch (t): 0.353, 13324.9/s, 832.806/s/gpu LR: 0.000164 Logit Scale: 49.009 Contrastive_loss: 2.0204 (2.1179) Loss: 2.0204 (2.1179)
|
660 |
+
2024-01-30,19:13:29 | INFO | Train Epoch: 4 [15568896/16007168 (97%)] Data (t): 0.071 Batch (t): 0.350, 12511.5/s, 781.969/s/gpu LR: 0.000161 Logit Scale: 49.108 Contrastive_loss: 2.3235 (2.1232) Loss: 2.3235 (2.1232)
|
661 |
+
2024-01-30,19:14:03 | INFO | Train Epoch: 4 [15978496/16007168 (100%)] Data (t): 0.073 Batch (t): 0.350, 8761.65/s, 547.603/s/gpu LR: 0.000159 Logit Scale: 49.183 Contrastive_loss: 1.8551 (2.1165) Loss: 1.8551 (2.1165)
|
662 |
+
2024-01-30,19:14:06 | INFO | Train Epoch: 4 [16007168/16007168 (100%)] Data (t): 0.066 Batch (t): 0.307, 13694.1/s, 855.884/s/gpu LR: 0.000159 Logit Scale: 49.184 Contrastive_loss: 1.5963 (2.1038) Loss: 1.5963 (2.1038)
|
663 |
+
2024-01-30,19:14:09 | INFO | Start epoch 5
|
664 |
+
2024-01-30,19:14:11 | INFO | Train Epoch: 5 [ 4096/16007168 (0%)] Data (t): 1.760 Batch (t): 1.991, 2056.83/s, 128.552/s/gpu LR: 0.000159 Logit Scale: 49.182 Contrastive_loss: 2.0975 (2.0975) Loss: 2.0975 (2.0975)
|
665 |
+
2024-01-30,19:14:47 | INFO | Train Epoch: 5 [ 413696/16007168 (3%)] Data (t): 0.104 Batch (t): 0.365, 10269.6/s, 641.852/s/gpu LR: 0.000156 Logit Scale: 49.205 Contrastive_loss: 1.9071 (2.0023) Loss: 1.9071 (2.0023)
|
666 |
+
2024-01-30,19:15:23 | INFO | Train Epoch: 5 [ 823296/16007168 (5%)] Data (t): 0.079 Batch (t): 0.356, 8953.43/s, 559.589/s/gpu LR: 0.000154 Logit Scale: 49.276 Contrastive_loss: 1.9597 (1.9881) Loss: 1.9597 (1.9881)
|
667 |
+
2024-01-30,19:15:58 | INFO | Train Epoch: 5 [ 1232896/16007168 (8%)] Data (t): 0.065 Batch (t): 0.348, 12903.1/s, 806.444/s/gpu LR: 0.000152 Logit Scale: 49.364 Contrastive_loss: 1.8396 (1.9510) Loss: 1.8396 (1.9510)
|
668 |
+
2024-01-30,19:16:33 | INFO | Train Epoch: 5 [ 1642496/16007168 (10%)] Data (t): 0.072 Batch (t): 0.354, 12324.6/s, 770.287/s/gpu LR: 0.000149 Logit Scale: 49.419 Contrastive_loss: 1.4697 (1.8547) Loss: 1.4697 (1.8547)
|
669 |
+
2024-01-30,19:17:08 | INFO | Train Epoch: 5 [ 2052096/16007168 (13%)] Data (t): 0.073 Batch (t): 0.352, 12898.9/s, 806.184/s/gpu LR: 0.000147 Logit Scale: 49.518 Contrastive_loss: 1.7210 (1.8324) Loss: 1.7210 (1.8324)
|
670 |
+
2024-01-30,19:17:44 | INFO | Train Epoch: 5 [ 2461696/16007168 (15%)] Data (t): 0.072 Batch (t): 0.361, 12063.0/s, 753.937/s/gpu LR: 0.000145 Logit Scale: 49.623 Contrastive_loss: 1.6402 (1.8050) Loss: 1.6402 (1.8050)
|
671 |
+
2024-01-30,19:18:20 | INFO | Train Epoch: 5 [ 2871296/16007168 (18%)] Data (t): 0.077 Batch (t): 0.353, 13347.5/s, 834.220/s/gpu LR: 0.000142 Logit Scale: 49.686 Contrastive_loss: 2.2080 (1.8554) Loss: 2.2080 (1.8554)
|
672 |
+
2024-01-30,19:18:55 | INFO | Train Epoch: 5 [ 3280896/16007168 (20%)] Data (t): 0.064 Batch (t): 0.357, 7901.42/s, 493.839/s/gpu LR: 0.000140 Logit Scale: 49.734 Contrastive_loss: 1.9048 (1.8608) Loss: 1.9048 (1.8608)
|
673 |
+
2024-01-30,19:19:32 | INFO | Train Epoch: 5 [ 3690496/16007168 (23%)] Data (t): 0.070 Batch (t): 0.363, 12071.6/s, 754.478/s/gpu LR: 0.000138 Logit Scale: 49.811 Contrastive_loss: 2.2453 (1.8993) Loss: 2.2453 (1.8993)
|
674 |
+
2024-01-30,19:20:07 | INFO | Train Epoch: 5 [ 4100096/16007168 (26%)] Data (t): 0.076 Batch (t): 0.350, 13508.2/s, 844.262/s/gpu LR: 0.000136 Logit Scale: 49.866 Contrastive_loss: 1.9411 (1.9031) Loss: 1.9411 (1.9031)
|
675 |
+
2024-01-30,19:20:42 | INFO | Train Epoch: 5 [ 4509696/16007168 (28%)] Data (t): 0.073 Batch (t): 0.349, 11943.8/s, 746.490/s/gpu LR: 0.000133 Logit Scale: 49.948 Contrastive_loss: 1.5937 (1.8773) Loss: 1.5937 (1.8773)
|
676 |
+
2024-01-30,19:21:17 | INFO | Train Epoch: 5 [ 4919296/16007168 (31%)] Data (t): 0.072 Batch (t): 0.355, 13392.2/s, 837.013/s/gpu LR: 0.000131 Logit Scale: 50.015 Contrastive_loss: 1.7019 (1.8638) Loss: 1.7019 (1.8638)
|
677 |
+
2024-01-30,19:21:52 | INFO | Train Epoch: 5 [ 5328896/16007168 (33%)] Data (t): 0.071 Batch (t): 0.350, 12313.8/s, 769.611/s/gpu LR: 0.000129 Logit Scale: 50.066 Contrastive_loss: 2.0558 (1.8775) Loss: 2.0558 (1.8775)
|
678 |
+
2024-01-30,19:22:28 | INFO | Train Epoch: 5 [ 5738496/16007168 (36%)] Data (t): 0.076 Batch (t): 0.357, 12910.8/s, 806.925/s/gpu LR: 0.000127 Logit Scale: 50.177 Contrastive_loss: 2.0593 (1.8896) Loss: 2.0593 (1.8896)
|
679 |
+
2024-01-30,19:23:03 | INFO | Train Epoch: 5 [ 6148096/16007168 (38%)] Data (t): 0.074 Batch (t): 0.353, 13323.6/s, 832.726/s/gpu LR: 0.000124 Logit Scale: 50.284 Contrastive_loss: 1.7905 (1.8835) Loss: 1.7905 (1.8835)
|
680 |
+
2024-01-30,19:23:39 | INFO | Train Epoch: 5 [ 6557696/16007168 (41%)] Data (t): 0.066 Batch (t): 0.353, 12856.5/s, 803.528/s/gpu LR: 0.000122 Logit Scale: 50.433 Contrastive_loss: 2.0030 (1.8905) Loss: 2.0030 (1.8905)
|
681 |
+
2024-01-30,19:24:14 | INFO | Train Epoch: 5 [ 6967296/16007168 (44%)] Data (t): 0.068 Batch (t): 0.354, 13175.7/s, 823.481/s/gpu LR: 0.000120 Logit Scale: 50.501 Contrastive_loss: 1.6152 (1.8752) Loss: 1.6152 (1.8752)
|
682 |
+
2024-01-30,19:24:49 | INFO | Train Epoch: 5 [ 7376896/16007168 (46%)] Data (t): 0.068 Batch (t): 0.353, 13489.9/s, 843.120/s/gpu LR: 0.000118 Logit Scale: 50.597 Contrastive_loss: 1.6464 (1.8632) Loss: 1.6464 (1.8632)
|
683 |
+
2024-01-30,19:25:25 | INFO | Train Epoch: 5 [ 7786496/16007168 (49%)] Data (t): 0.064 Batch (t): 0.355, 12659.9/s, 791.243/s/gpu LR: 0.000116 Logit Scale: 50.658 Contrastive_loss: 1.5000 (1.8450) Loss: 1.5000 (1.8450)
|
684 |
+
2024-01-30,19:26:00 | INFO | Train Epoch: 5 [ 8196096/16007168 (51%)] Data (t): 0.069 Batch (t): 0.356, 12594.7/s, 787.168/s/gpu LR: 0.000113 Logit Scale: 50.704 Contrastive_loss: 1.7094 (1.8385) Loss: 1.7094 (1.8385)
|
685 |
+
2024-01-30,19:26:36 | INFO | Train Epoch: 5 [ 8605696/16007168 (54%)] Data (t): 0.076 Batch (t): 0.357, 13186.9/s, 824.181/s/gpu LR: 0.000111 Logit Scale: 50.820 Contrastive_loss: 2.0081 (1.8462) Loss: 2.0081 (1.8462)
|
686 |
+
2024-01-30,19:27:11 | INFO | Train Epoch: 5 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.354, 12717.5/s, 794.842/s/gpu LR: 0.000109 Logit Scale: 50.872 Contrastive_loss: 1.7084 (1.8403) Loss: 1.7084 (1.8403)
|
687 |
+
2024-01-30,19:27:47 | INFO | Train Epoch: 5 [ 9424896/16007168 (59%)] Data (t): 0.096 Batch (t): 0.354, 12393.7/s, 774.605/s/gpu LR: 0.000107 Logit Scale: 50.873 Contrastive_loss: 1.7075 (1.8347) Loss: 1.7075 (1.8347)
|
688 |
+
2024-01-30,19:28:22 | INFO | Train Epoch: 5 [ 9834496/16007168 (61%)] Data (t): 0.099 Batch (t): 0.356, 12925.9/s, 807.870/s/gpu LR: 0.000105 Logit Scale: 50.934 Contrastive_loss: 1.5658 (1.8240) Loss: 1.5658 (1.8240)
|
689 |
+
2024-01-30,19:28:58 | INFO | Train Epoch: 5 [10244096/16007168 (64%)] Data (t): 0.080 Batch (t): 0.357, 12388.6/s, 774.289/s/gpu LR: 0.000103 Logit Scale: 50.995 Contrastive_loss: 1.8623 (1.8254) Loss: 1.8623 (1.8254)
|
690 |
+
2024-01-30,19:29:33 | INFO | Train Epoch: 5 [10653696/16007168 (67%)] Data (t): 0.073 Batch (t): 0.354, 11617.0/s, 726.064/s/gpu LR: 0.000101 Logit Scale: 51.042 Contrastive_loss: 1.6210 (1.8179) Loss: 1.6210 (1.8179)
|
691 |
+
2024-01-30,19:30:09 | INFO | Train Epoch: 5 [11063296/16007168 (69%)] Data (t): 0.066 Batch (t): 0.353, 12053.1/s, 753.320/s/gpu LR: 0.000099 Logit Scale: 51.138 Contrastive_loss: 1.3658 (1.8017) Loss: 1.3658 (1.8017)
|
692 |
+
2024-01-30,19:30:44 | INFO | Train Epoch: 5 [11472896/16007168 (72%)] Data (t): 0.083 Batch (t): 0.348, 13582.5/s, 848.906/s/gpu LR: 0.000097 Logit Scale: 51.191 Contrastive_loss: 1.7078 (1.7985) Loss: 1.7078 (1.7985)
|
693 |
+
2024-01-30,19:31:19 | INFO | Train Epoch: 5 [11882496/16007168 (74%)] Data (t): 0.069 Batch (t): 0.359, 12737.8/s, 796.115/s/gpu LR: 0.000095 Logit Scale: 51.228 Contrastive_loss: 1.7966 (1.7984) Loss: 1.7966 (1.7984)
|
694 |
+
2024-01-30,19:31:55 | INFO | Train Epoch: 5 [12292096/16007168 (77%)] Data (t): 0.071 Batch (t): 0.359, 12703.7/s, 793.983/s/gpu LR: 0.000093 Logit Scale: 51.307 Contrastive_loss: 2.0864 (1.8077) Loss: 2.0864 (1.8077)
|
695 |
+
2024-01-30,19:32:30 | INFO | Train Epoch: 5 [12701696/16007168 (79%)] Data (t): 0.064 Batch (t): 0.349, 13001.1/s, 812.569/s/gpu LR: 0.000091 Logit Scale: 51.402 Contrastive_loss: 1.5984 (1.8012) Loss: 1.5984 (1.8012)
|
696 |
+
2024-01-30,19:33:05 | INFO | Train Epoch: 5 [13111296/16007168 (82%)] Data (t): 0.072 Batch (t): 0.353, 13475.8/s, 842.236/s/gpu LR: 0.000089 Logit Scale: 51.521 Contrastive_loss: 1.5116 (1.7924) Loss: 1.5116 (1.7924)
|
697 |
+
2024-01-30,19:33:41 | INFO | Train Epoch: 5 [13520896/16007168 (84%)] Data (t): 0.063 Batch (t): 0.351, 9459.66/s, 591.228/s/gpu LR: 0.000087 Logit Scale: 51.540 Contrastive_loss: 1.6216 (1.7874) Loss: 1.6216 (1.7874)
|
698 |
+
2024-01-30,19:34:16 | INFO | Train Epoch: 5 [13930496/16007168 (87%)] Data (t): 0.071 Batch (t): 0.355, 13310.4/s, 831.898/s/gpu LR: 0.000085 Logit Scale: 51.595 Contrastive_loss: 1.5775 (1.7814) Loss: 1.5775 (1.7814)
|
699 |
+
2024-01-30,19:34:51 | INFO | Train Epoch: 5 [14340096/16007168 (90%)] Data (t): 0.072 Batch (t): 0.348, 12531.8/s, 783.238/s/gpu LR: 0.000083 Logit Scale: 51.712 Contrastive_loss: 1.4687 (1.7727) Loss: 1.4687 (1.7727)
|
700 |
+
2024-01-30,19:35:26 | INFO | Train Epoch: 5 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.352, 12926.3/s, 807.891/s/gpu LR: 0.000081 Logit Scale: 51.812 Contrastive_loss: 1.6583 (1.7696) Loss: 1.6583 (1.7696)
|
701 |
+
2024-01-30,19:36:02 | INFO | Train Epoch: 5 [15159296/16007168 (95%)] Data (t): 0.070 Batch (t): 0.354, 13410.3/s, 838.141/s/gpu LR: 0.000079 Logit Scale: 51.873 Contrastive_loss: 1.3776 (1.7593) Loss: 1.3776 (1.7593)
|
702 |
+
2024-01-30,19:36:37 | INFO | Train Epoch: 5 [15568896/16007168 (97%)] Data (t): 0.069 Batch (t): 0.351, 13656.8/s, 853.553/s/gpu LR: 0.000077 Logit Scale: 51.973 Contrastive_loss: 1.2441 (1.7461) Loss: 1.2441 (1.7461)
|
703 |
+
2024-01-30,19:37:13 | INFO | Train Epoch: 5 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.361, 13553.1/s, 847.071/s/gpu LR: 0.000076 Logit Scale: 52.034 Contrastive_loss: 1.4139 (1.7378) Loss: 1.4139 (1.7378)
|
704 |
+
2024-01-30,19:37:15 | INFO | Train Epoch: 5 [16007168/16007168 (100%)] Data (t): 0.066 Batch (t): 0.330, 13647.7/s, 852.983/s/gpu LR: 0.000076 Logit Scale: 52.032 Contrastive_loss: 1.6360 (1.7353) Loss: 1.6360 (1.7353)
|
705 |
+
2024-01-30,19:37:18 | INFO | Start epoch 6
|
706 |
+
2024-01-30,19:37:20 | INFO | Train Epoch: 6 [ 4096/16007168 (0%)] Data (t): 1.749 Batch (t): 1.982, 2066.93/s, 129.183/s/gpu LR: 0.000075 Logit Scale: 52.033 Contrastive_loss: 1.0658 (1.0658) Loss: 1.0658 (1.0658)
|
707 |
+
2024-01-30,19:37:57 | INFO | Train Epoch: 6 [ 413696/16007168 (3%)] Data (t): 0.077 Batch (t): 0.370, 12355.9/s, 772.242/s/gpu LR: 0.000074 Logit Scale: 52.080 Contrastive_loss: 1.0963 (1.0810) Loss: 1.0963 (1.0810)
|
708 |
+
2024-01-30,19:38:32 | INFO | Train Epoch: 6 [ 823296/16007168 (5%)] Data (t): 0.070 Batch (t): 0.347, 12963.0/s, 810.190/s/gpu LR: 0.000072 Logit Scale: 52.173 Contrastive_loss: 1.4666 (1.2096) Loss: 1.4666 (1.2096)
|
709 |
+
2024-01-30,19:39:08 | INFO | Train Epoch: 6 [ 1232896/16007168 (8%)] Data (t): 0.076 Batch (t): 0.361, 12727.4/s, 795.460/s/gpu LR: 0.000070 Logit Scale: 52.238 Contrastive_loss: 1.3440 (1.2432) Loss: 1.3440 (1.2432)
|
710 |
+
2024-01-30,19:39:43 | INFO | Train Epoch: 6 [ 1642496/16007168 (10%)] Data (t): 0.073 Batch (t): 0.346, 13516.7/s, 844.791/s/gpu LR: 0.000068 Logit Scale: 52.257 Contrastive_loss: 1.4922 (1.2930) Loss: 1.4922 (1.2930)
|
711 |
+
2024-01-30,19:40:18 | INFO | Train Epoch: 6 [ 2052096/16007168 (13%)] Data (t): 0.073 Batch (t): 0.352, 12377.9/s, 773.620/s/gpu LR: 0.000067 Logit Scale: 52.324 Contrastive_loss: 1.1666 (1.2719) Loss: 1.1666 (1.2719)
|
712 |
+
2024-01-30,19:40:54 | INFO | Train Epoch: 6 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.359, 12619.1/s, 788.694/s/gpu LR: 0.000065 Logit Scale: 52.375 Contrastive_loss: 1.2888 (1.2743) Loss: 1.2888 (1.2743)
|
713 |
+
2024-01-30,19:41:28 | INFO | Train Epoch: 6 [ 2871296/16007168 (18%)] Data (t): 0.072 Batch (t): 0.348, 12388.3/s, 774.268/s/gpu LR: 0.000063 Logit Scale: 52.409 Contrastive_loss: 1.3383 (1.2823) Loss: 1.3383 (1.2823)
|
714 |
+
2024-01-30,19:42:04 | INFO | Train Epoch: 6 [ 3280896/16007168 (20%)] Data (t): 0.075 Batch (t): 0.355, 13288.6/s, 830.535/s/gpu LR: 0.000061 Logit Scale: 52.468 Contrastive_loss: 1.5474 (1.3118) Loss: 1.5474 (1.3118)
|
715 |
+
2024-01-30,19:42:40 | INFO | Train Epoch: 6 [ 3690496/16007168 (23%)] Data (t): 0.070 Batch (t): 0.361, 12477.3/s, 779.832/s/gpu LR: 0.000060 Logit Scale: 52.513 Contrastive_loss: 1.5795 (1.3385) Loss: 1.5795 (1.3385)
|
716 |
+
2024-01-30,19:43:15 | INFO | Train Epoch: 6 [ 4100096/16007168 (26%)] Data (t): 0.067 Batch (t): 0.348, 12309.1/s, 769.319/s/gpu LR: 0.000058 Logit Scale: 52.528 Contrastive_loss: 1.4972 (1.3530) Loss: 1.4972 (1.3530)
|
717 |
+
2024-01-30,19:43:50 | INFO | Train Epoch: 6 [ 4509696/16007168 (28%)] Data (t): 0.069 Batch (t): 0.355, 11887.5/s, 742.969/s/gpu LR: 0.000057 Logit Scale: 52.585 Contrastive_loss: 1.6193 (1.3752) Loss: 1.6193 (1.3752)
|
718 |
+
2024-01-30,19:44:25 | INFO | Train Epoch: 6 [ 4919296/16007168 (31%)] Data (t): 0.058 Batch (t): 0.347, 13067.5/s, 816.718/s/gpu LR: 0.000055 Logit Scale: 52.647 Contrastive_loss: 1.8198 (1.4094) Loss: 1.8198 (1.4094)
|
719 |
+
2024-01-30,19:45:00 | INFO | Train Epoch: 6 [ 5328896/16007168 (33%)] Data (t): 0.064 Batch (t): 0.351, 12633.8/s, 789.610/s/gpu LR: 0.000053 Logit Scale: 52.701 Contrastive_loss: 1.3740 (1.4068) Loss: 1.3740 (1.4068)
|
720 |
+
2024-01-30,19:45:36 | INFO | Train Epoch: 6 [ 5738496/16007168 (36%)] Data (t): 0.068 Batch (t): 0.360, 13096.7/s, 818.541/s/gpu LR: 0.000052 Logit Scale: 52.728 Contrastive_loss: 1.3667 (1.4042) Loss: 1.3667 (1.4042)
|
721 |
+
2024-01-30,19:46:11 | INFO | Train Epoch: 6 [ 6148096/16007168 (38%)] Data (t): 0.064 Batch (t): 0.352, 8366.12/s, 522.882/s/gpu LR: 0.000050 Logit Scale: 52.775 Contrastive_loss: 1.3083 (1.3982) Loss: 1.3083 (1.3982)
|
722 |
+
2024-01-30,19:46:47 | INFO | Train Epoch: 6 [ 6557696/16007168 (41%)] Data (t): 0.075 Batch (t): 0.354, 11085.1/s, 692.821/s/gpu LR: 0.000049 Logit Scale: 52.822 Contrastive_loss: 1.5067 (1.4046) Loss: 1.5067 (1.4046)
|
723 |
+
2024-01-30,19:47:22 | INFO | Train Epoch: 6 [ 6967296/16007168 (44%)] Data (t): 0.063 Batch (t): 0.347, 11300.8/s, 706.303/s/gpu LR: 0.000047 Logit Scale: 52.855 Contrastive_loss: 1.2395 (1.3954) Loss: 1.2395 (1.3954)
|
724 |
+
2024-01-30,19:47:57 | INFO | Train Epoch: 6 [ 7376896/16007168 (46%)] Data (t): 0.063 Batch (t): 0.356, 12457.1/s, 778.571/s/gpu LR: 0.000046 Logit Scale: 52.888 Contrastive_loss: 1.3419 (1.3926) Loss: 1.3419 (1.3926)
|
725 |
+
2024-01-30,19:48:32 | INFO | Train Epoch: 6 [ 7786496/16007168 (49%)] Data (t): 0.068 Batch (t): 0.347, 13299.6/s, 831.228/s/gpu LR: 0.000044 Logit Scale: 52.935 Contrastive_loss: 1.2756 (1.3867) Loss: 1.2756 (1.3867)
|
726 |
+
2024-01-30,19:49:07 | INFO | Train Epoch: 6 [ 8196096/16007168 (51%)] Data (t): 0.071 Batch (t): 0.356, 12283.5/s, 767.717/s/gpu LR: 0.000043 Logit Scale: 52.976 Contrastive_loss: 1.2726 (1.3813) Loss: 1.2726 (1.3813)
|
727 |
+
2024-01-30,19:49:44 | INFO | Train Epoch: 6 [ 8605696/16007168 (54%)] Data (t): 0.071 Batch (t): 0.367, 12221.9/s, 763.871/s/gpu LR: 0.000041 Logit Scale: 53.001 Contrastive_loss: 1.4887 (1.3862) Loss: 1.4887 (1.3862)
|
728 |
+
2024-01-30,19:50:20 | INFO | Train Epoch: 6 [ 9015296/16007168 (56%)] Data (t): 0.077 Batch (t): 0.361, 11816.9/s, 738.555/s/gpu LR: 0.000040 Logit Scale: 53.034 Contrastive_loss: 1.3458 (1.3844) Loss: 1.3458 (1.3844)
|
729 |
+
2024-01-30,19:50:56 | INFO | Train Epoch: 6 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.357, 13287.7/s, 830.479/s/gpu LR: 0.000039 Logit Scale: 53.081 Contrastive_loss: 1.7816 (1.4010) Loss: 1.7816 (1.4010)
|
730 |
+
2024-01-30,19:51:32 | INFO | Train Epoch: 6 [ 9834496/16007168 (61%)] Data (t): 0.073 Batch (t): 0.358, 11616.2/s, 726.012/s/gpu LR: 0.000037 Logit Scale: 53.122 Contrastive_loss: 1.4132 (1.4014) Loss: 1.4132 (1.4014)
|
731 |
+
2024-01-30,19:52:07 | INFO | Train Epoch: 6 [10244096/16007168 (64%)] Data (t): 0.065 Batch (t): 0.349, 13550.0/s, 846.874/s/gpu LR: 0.000036 Logit Scale: 53.157 Contrastive_loss: 1.3911 (1.4011) Loss: 1.3911 (1.4011)
|
732 |
+
2024-01-30,19:52:42 | INFO | Train Epoch: 6 [10653696/16007168 (67%)] Data (t): 0.070 Batch (t): 0.355, 13375.6/s, 835.977/s/gpu LR: 0.000035 Logit Scale: 53.180 Contrastive_loss: 1.5453 (1.4064) Loss: 1.5453 (1.4064)
|
733 |
+
2024-01-30,19:53:17 | INFO | Train Epoch: 6 [11063296/16007168 (69%)] Data (t): 0.071 Batch (t): 0.353, 12536.7/s, 783.544/s/gpu LR: 0.000033 Logit Scale: 53.208 Contrastive_loss: 1.6831 (1.4163) Loss: 1.6831 (1.4163)
|
734 |
+
2024-01-30,19:53:53 | INFO | Train Epoch: 6 [11472896/16007168 (72%)] Data (t): 0.067 Batch (t): 0.355, 12696.5/s, 793.529/s/gpu LR: 0.000032 Logit Scale: 53.272 Contrastive_loss: 0.85711 (1.3970) Loss: 0.85711 (1.3970)
|
735 |
+
2024-01-30,19:54:28 | INFO | Train Epoch: 6 [11882496/16007168 (74%)] Data (t): 0.069 Batch (t): 0.355, 13627.5/s, 851.718/s/gpu LR: 0.000031 Logit Scale: 53.309 Contrastive_loss: 1.0915 (1.3868) Loss: 1.0915 (1.3868)
|
736 |
+
2024-01-30,19:55:04 | INFO | Train Epoch: 6 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.353, 12537.6/s, 783.603/s/gpu LR: 0.000030 Logit Scale: 53.319 Contrastive_loss: 1.2502 (1.3824) Loss: 1.2502 (1.3824)
|
737 |
+
2024-01-30,19:55:39 | INFO | Train Epoch: 6 [12701696/16007168 (79%)] Data (t): 0.074 Batch (t): 0.357, 12280.3/s, 767.519/s/gpu LR: 0.000028 Logit Scale: 53.344 Contrastive_loss: 1.2768 (1.3791) Loss: 1.2768 (1.3791)
|
738 |
+
2024-01-30,19:56:15 | INFO | Train Epoch: 6 [13111296/16007168 (82%)] Data (t): 0.069 Batch (t): 0.357, 13484.9/s, 842.806/s/gpu LR: 0.000027 Logit Scale: 53.395 Contrastive_loss: 1.4222 (1.3804) Loss: 1.4222 (1.3804)
|
739 |
+
2024-01-30,19:56:50 | INFO | Train Epoch: 6 [13520896/16007168 (84%)] Data (t): 0.064 Batch (t): 0.350, 13345.3/s, 834.082/s/gpu LR: 0.000026 Logit Scale: 53.433 Contrastive_loss: 1.8349 (1.3938) Loss: 1.8349 (1.3938)
|
740 |
+
2024-01-30,19:57:26 | INFO | Train Epoch: 6 [13930496/16007168 (87%)] Data (t): 0.068 Batch (t): 0.360, 13076.0/s, 817.248/s/gpu LR: 0.000025 Logit Scale: 53.455 Contrastive_loss: 1.6498 (1.4011) Loss: 1.6498 (1.4011)
|
741 |
+
2024-01-30,19:58:02 | INFO | Train Epoch: 6 [14340096/16007168 (90%)] Data (t): 0.076 Batch (t): 0.357, 13321.8/s, 832.611/s/gpu LR: 0.000024 Logit Scale: 53.470 Contrastive_loss: 1.8709 (1.4141) Loss: 1.8709 (1.4141)
|
742 |
+
2024-01-30,19:58:37 | INFO | Train Epoch: 6 [14749696/16007168 (92%)] Data (t): 0.075 Batch (t): 0.355, 11993.4/s, 749.589/s/gpu LR: 0.000023 Logit Scale: 53.493 Contrastive_loss: 1.5078 (1.4167) Loss: 1.5078 (1.4167)
|
743 |
+
2024-01-30,19:59:13 | INFO | Train Epoch: 6 [15159296/16007168 (95%)] Data (t): 0.071 Batch (t): 0.354, 12332.7/s, 770.794/s/gpu LR: 0.000022 Logit Scale: 53.529 Contrastive_loss: 1.4282 (1.4170) Loss: 1.4282 (1.4170)
|
744 |
+
2024-01-30,19:59:48 | INFO | Train Epoch: 6 [15568896/16007168 (97%)] Data (t): 0.075 Batch (t): 0.354, 13164.6/s, 822.790/s/gpu LR: 0.000021 Logit Scale: 53.557 Contrastive_loss: 1.5449 (1.4203) Loss: 1.5449 (1.4203)
|
745 |
+
2024-01-30,20:00:24 | INFO | Train Epoch: 6 [15978496/16007168 (100%)] Data (t): 0.078 Batch (t): 0.355, 12206.0/s, 762.874/s/gpu LR: 0.000020 Logit Scale: 53.560 Contrastive_loss: 1.6639 (1.4263) Loss: 1.6639 (1.4263)
|
746 |
+
2024-01-30,20:00:26 | INFO | Train Epoch: 6 [16007168/16007168 (100%)] Data (t): 0.070 Batch (t): 0.307, 13646.1/s, 852.879/s/gpu LR: 0.000020 Logit Scale: 53.561 Contrastive_loss: 1.3248 (1.4239) Loss: 1.3248 (1.4239)
|
747 |
+
2024-01-30,20:00:29 | INFO | Start epoch 7
|
748 |
+
2024-01-30,20:00:31 | INFO | Train Epoch: 7 [ 4096/16007168 (0%)] Data (t): 1.850 Batch (t): 2.083, 1966.65/s, 122.916/s/gpu LR: 0.000020 Logit Scale: 53.562 Contrastive_loss: 1.2898 (1.2898) Loss: 1.2898 (1.2898)
|
749 |
+
2024-01-30,20:01:07 | INFO | Train Epoch: 7 [ 413696/16007168 (3%)] Data (t): 0.084 Batch (t): 0.364, 13017.1/s, 813.568/s/gpu LR: 0.000019 Logit Scale: 53.603 Contrastive_loss: 1.3532 (1.3215) Loss: 1.3532 (1.3215)
|
750 |
+
2024-01-30,20:01:42 | INFO | Train Epoch: 7 [ 823296/16007168 (5%)] Data (t): 0.068 Batch (t): 0.348, 12916.9/s, 807.305/s/gpu LR: 0.000018 Logit Scale: 53.625 Contrastive_loss: 1.6470 (1.4300) Loss: 1.6470 (1.4300)
|
751 |
+
2024-01-30,20:02:18 | INFO | Train Epoch: 7 [ 1232896/16007168 (8%)] Data (t): 0.071 Batch (t): 0.362, 4865.44/s, 304.090/s/gpu LR: 0.000017 Logit Scale: 53.646 Contrastive_loss: 1.8723 (1.5406) Loss: 1.8723 (1.5406)
|
752 |
+
2024-01-30,20:02:54 | INFO | Train Epoch: 7 [ 1642496/16007168 (10%)] Data (t): 0.076 Batch (t): 0.354, 12391.6/s, 774.472/s/gpu LR: 0.000016 Logit Scale: 53.665 Contrastive_loss: 1.5476 (1.5420) Loss: 1.5476 (1.5420)
|
753 |
+
2024-01-30,20:03:30 | INFO | Train Epoch: 7 [ 2052096/16007168 (13%)] Data (t): 0.066 Batch (t): 0.361, 12847.2/s, 802.952/s/gpu LR: 0.000015 Logit Scale: 53.691 Contrastive_loss: 1.6589 (1.5615) Loss: 1.6589 (1.5615)
|
754 |
+
2024-01-30,20:04:05 | INFO | Train Epoch: 7 [ 2461696/16007168 (15%)] Data (t): 0.073 Batch (t): 0.353, 13127.0/s, 820.436/s/gpu LR: 0.000014 Logit Scale: 53.712 Contrastive_loss: 1.5938 (1.5661) Loss: 1.5938 (1.5661)
|
755 |
+
2024-01-30,20:04:41 | INFO | Train Epoch: 7 [ 2871296/16007168 (18%)] Data (t): 0.082 Batch (t): 0.353, 12785.3/s, 799.082/s/gpu LR: 0.000013 Logit Scale: 53.733 Contrastive_loss: 1.3077 (1.5338) Loss: 1.3077 (1.5338)
|
756 |
+
2024-01-30,20:05:16 | INFO | Train Epoch: 7 [ 3280896/16007168 (20%)] Data (t): 0.095 Batch (t): 0.353, 12911.9/s, 806.992/s/gpu LR: 0.000012 Logit Scale: 53.750 Contrastive_loss: 1.5013 (1.5302) Loss: 1.5013 (1.5302)
|
757 |
+
2024-01-30,20:05:51 | INFO | Train Epoch: 7 [ 3690496/16007168 (23%)] Data (t): 0.099 Batch (t): 0.355, 12074.3/s, 754.646/s/gpu LR: 0.000012 Logit Scale: 53.762 Contrastive_loss: 1.5242 (1.5296) Loss: 1.5242 (1.5296)
|
758 |
+
2024-01-30,20:06:27 | INFO | Train Epoch: 7 [ 4100096/16007168 (26%)] Data (t): 0.107 Batch (t): 0.360, 12426.6/s, 776.663/s/gpu LR: 0.000011 Logit Scale: 53.755 Contrastive_loss: 1.5091 (1.5277) Loss: 1.5091 (1.5277)
|
759 |
+
2024-01-30,20:07:03 | INFO | Train Epoch: 7 [ 4509696/16007168 (28%)] Data (t): 0.081 Batch (t): 0.358, 12593.1/s, 787.068/s/gpu LR: 0.000010 Logit Scale: 53.763 Contrastive_loss: 1.3031 (1.5090) Loss: 1.3031 (1.5090)
|
760 |
+
2024-01-30,20:07:39 | INFO | Train Epoch: 7 [ 4919296/16007168 (31%)] Data (t): 0.066 Batch (t): 0.356, 12883.0/s, 805.186/s/gpu LR: 0.000009 Logit Scale: 53.772 Contrastive_loss: 1.6905 (1.5230) Loss: 1.6905 (1.5230)
|
761 |
+
2024-01-30,20:08:14 | INFO | Train Epoch: 7 [ 5328896/16007168 (33%)] Data (t): 0.073 Batch (t): 0.357, 12023.9/s, 751.494/s/gpu LR: 0.000009 Logit Scale: 53.783 Contrastive_loss: 1.8182 (1.5440) Loss: 1.8182 (1.5440)
|
762 |
+
2024-01-30,20:08:50 | INFO | Train Epoch: 7 [ 5738496/16007168 (36%)] Data (t): 0.072 Batch (t): 0.354, 12913.3/s, 807.084/s/gpu LR: 0.000008 Logit Scale: 53.797 Contrastive_loss: 1.4677 (1.5390) Loss: 1.4677 (1.5390)
|
763 |
+
2024-01-30,20:09:25 | INFO | Train Epoch: 7 [ 6148096/16007168 (38%)] Data (t): 0.071 Batch (t): 0.354, 13077.2/s, 817.324/s/gpu LR: 0.000008 Logit Scale: 53.808 Contrastive_loss: 1.5279 (1.5383) Loss: 1.5279 (1.5383)
|
764 |
+
2024-01-30,20:10:01 | INFO | Train Epoch: 7 [ 6557696/16007168 (41%)] Data (t): 0.073 Batch (t): 0.358, 13211.7/s, 825.731/s/gpu LR: 0.000007 Logit Scale: 53.819 Contrastive_loss: 1.6676 (1.5459) Loss: 1.6676 (1.5459)
|
765 |
+
2024-01-30,20:10:38 | INFO | Train Epoch: 7 [ 6967296/16007168 (44%)] Data (t): 0.076 Batch (t): 0.365, 12937.3/s, 808.582/s/gpu LR: 0.000006 Logit Scale: 53.828 Contrastive_loss: 1.1835 (1.5257) Loss: 1.1835 (1.5257)
|
766 |
+
2024-01-30,20:11:13 | INFO | Train Epoch: 7 [ 7376896/16007168 (46%)] Data (t): 0.075 Batch (t): 0.357, 12368.6/s, 773.034/s/gpu LR: 0.000006 Logit Scale: 53.838 Contrastive_loss: 1.3115 (1.5145) Loss: 1.3115 (1.5145)
|
767 |
+
2024-01-30,20:11:48 | INFO | Train Epoch: 7 [ 7786496/16007168 (49%)] Data (t): 0.075 Batch (t): 0.348, 13017.0/s, 813.565/s/gpu LR: 0.000005 Logit Scale: 53.848 Contrastive_loss: 1.5983 (1.5187) Loss: 1.5983 (1.5187)
|
768 |
+
2024-01-30,20:12:24 | INFO | Train Epoch: 7 [ 8196096/16007168 (51%)] Data (t): 0.073 Batch (t): 0.356, 12452.8/s, 778.303/s/gpu LR: 0.000005 Logit Scale: 53.854 Contrastive_loss: 1.4691 (1.5163) Loss: 1.4691 (1.5163)
|
769 |
+
2024-01-30,20:12:59 | INFO | Train Epoch: 7 [ 8605696/16007168 (54%)] Data (t): 0.076 Batch (t): 0.357, 12981.8/s, 811.362/s/gpu LR: 0.000004 Logit Scale: 53.864 Contrastive_loss: 1.1038 (1.4975) Loss: 1.1038 (1.4975)
|
770 |
+
2024-01-30,20:13:35 | INFO | Train Epoch: 7 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.353, 13244.6/s, 827.789/s/gpu LR: 0.000004 Logit Scale: 53.868 Contrastive_loss: 0.99178 (1.4756) Loss: 0.99178 (1.4756)
|
771 |
+
2024-01-30,20:14:10 | INFO | Train Epoch: 7 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.354, 12366.7/s, 772.916/s/gpu LR: 0.000003 Logit Scale: 53.875 Contrastive_loss: 1.3705 (1.4712) Loss: 1.3705 (1.4712)
|
772 |
+
2024-01-30,20:14:45 | INFO | Train Epoch: 7 [ 9834496/16007168 (61%)] Data (t): 0.070 Batch (t): 0.349, 12894.2/s, 805.888/s/gpu LR: 0.000003 Logit Scale: 53.884 Contrastive_loss: 1.4722 (1.4712) Loss: 1.4722 (1.4712)
|
773 |
+
2024-01-30,20:15:20 | INFO | Train Epoch: 7 [10244096/16007168 (64%)] Data (t): 0.060 Batch (t): 0.349, 12382.3/s, 773.896/s/gpu LR: 0.000003 Logit Scale: 53.884 Contrastive_loss: 1.7520 (1.4820) Loss: 1.7520 (1.4820)
|
774 |
+
2024-01-30,20:15:56 | INFO | Train Epoch: 7 [10653696/16007168 (67%)] Data (t): 0.065 Batch (t): 0.366, 12929.3/s, 808.081/s/gpu LR: 0.000002 Logit Scale: 53.882 Contrastive_loss: 1.3723 (1.4780) Loss: 1.3723 (1.4780)
|
775 |
+
2024-01-30,20:16:32 | INFO | Train Epoch: 7 [11063296/16007168 (69%)] Data (t): 0.076 Batch (t): 0.360, 12416.2/s, 776.009/s/gpu LR: 0.000002 Logit Scale: 53.884 Contrastive_loss: 1.0876 (1.4640) Loss: 1.0876 (1.4640)
|
776 |
+
2024-01-30,20:17:08 | INFO | Train Epoch: 7 [11472896/16007168 (72%)] Data (t): 0.075 Batch (t): 0.354, 13008.4/s, 813.024/s/gpu LR: 0.000002 Logit Scale: 53.887 Contrastive_loss: 1.4124 (1.4622) Loss: 1.4124 (1.4622)
|
777 |
+
2024-01-30,20:17:43 | INFO | Train Epoch: 7 [11882496/16007168 (74%)] Data (t): 0.071 Batch (t): 0.351, 12664.8/s, 791.548/s/gpu LR: 0.000001 Logit Scale: 53.892 Contrastive_loss: 1.4187 (1.4608) Loss: 1.4187 (1.4608)
|
778 |
+
2024-01-30,20:18:18 | INFO | Train Epoch: 7 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.355, 12457.1/s, 778.571/s/gpu LR: 0.000001 Logit Scale: 53.893 Contrastive_loss: 1.5712 (1.4643) Loss: 1.5712 (1.4643)
|
779 |
+
2024-01-30,20:18:54 | INFO | Train Epoch: 7 [12701696/16007168 (79%)] Data (t): 0.067 Batch (t): 0.357, 13221.5/s, 826.346/s/gpu LR: 0.000001 Logit Scale: 53.893 Contrastive_loss: 1.4487 (1.4639) Loss: 1.4487 (1.4639)
|
780 |
+
2024-01-30,20:19:30 | INFO | Train Epoch: 7 [13111296/16007168 (82%)] Data (t): 0.071 Batch (t): 0.357, 13395.8/s, 837.240/s/gpu LR: 0.000001 Logit Scale: 53.896 Contrastive_loss: 1.6786 (1.4704) Loss: 1.6786 (1.4704)
|
781 |
+
2024-01-30,20:20:05 | INFO | Train Epoch: 7 [13520896/16007168 (84%)] Data (t): 0.071 Batch (t): 0.350, 13017.9/s, 813.621/s/gpu LR: 0.000000 Logit Scale: 53.896 Contrastive_loss: 1.3835 (1.4678) Loss: 1.3835 (1.4678)
|
782 |
+
2024-01-30,20:20:40 | INFO | Train Epoch: 7 [13930496/16007168 (87%)] Data (t): 0.080 Batch (t): 0.347, 12232.5/s, 764.533/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 0.97439 (1.4537) Loss: 0.97439 (1.4537)
|
783 |
+
2024-01-30,20:21:15 | INFO | Train Epoch: 7 [14340096/16007168 (90%)] Data (t): 0.098 Batch (t): 0.356, 13000.3/s, 812.517/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.1748 (1.4460) Loss: 1.1748 (1.4460)
|
784 |
+
2024-01-30,20:21:51 | INFO | Train Epoch: 7 [14749696/16007168 (92%)] Data (t): 0.078 Batch (t): 0.363, 12443.8/s, 777.735/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4320 (1.4456) Loss: 1.4320 (1.4456)
|
785 |
+
2024-01-30,20:22:28 | INFO | Train Epoch: 7 [15159296/16007168 (95%)] Data (t): 0.074 Batch (t): 0.362, 13333.4/s, 833.339/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4076 (1.4446) Loss: 1.4076 (1.4446)
|
786 |
+
2024-01-30,20:23:03 | INFO | Train Epoch: 7 [15568896/16007168 (97%)] Data (t): 0.077 Batch (t): 0.357, 13036.9/s, 814.806/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.3819 (1.4430) Loss: 1.3819 (1.4430)
|
787 |
+
2024-01-30,20:23:39 | INFO | Train Epoch: 7 [15978496/16007168 (100%)] Data (t): 0.079 Batch (t): 0.362, 13060.0/s, 816.253/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4386 (1.4429) Loss: 1.4386 (1.4429)
|
788 |
+
2024-01-30,20:23:42 | INFO | Train Epoch: 7 [16007168/16007168 (100%)] Data (t): 0.064 Batch (t): 0.322, 13693.9/s, 855.869/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4389 (1.4428) Loss: 1.4389 (1.4428)
|
params.txt
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accum_freq: 1
|
2 |
+
aug_cfg: {}
|
3 |
+
batch_size: 256
|
4 |
+
beta1: 0.9
|
5 |
+
beta2: 0.98
|
6 |
+
checkpoint_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints
|
7 |
+
coca_caption_loss_weight: 2.0
|
8 |
+
coca_contrastive_loss_weight: 1.0
|
9 |
+
copy_codebase: False
|
10 |
+
csv_caption_key: title
|
11 |
+
csv_img_key: filepath
|
12 |
+
csv_separator:
|
13 |
+
dataset_resampled: True
|
14 |
+
dataset_type: webdataset
|
15 |
+
ddp_static_graph: True
|
16 |
+
debug: False
|
17 |
+
delete_previous_checkpoint: False
|
18 |
+
device: cuda:0
|
19 |
+
dist_backend: nccl
|
20 |
+
dist_url: env://
|
21 |
+
distill: False
|
22 |
+
distill_model: None
|
23 |
+
distill_pretrained: None
|
24 |
+
distributed: True
|
25 |
+
epochs: 8
|
26 |
+
epochs_cooldown: None
|
27 |
+
eps: 1e-06
|
28 |
+
force_custom_text: False
|
29 |
+
force_image_size: None
|
30 |
+
force_patch_dropout: None
|
31 |
+
force_quick_gelu: False
|
32 |
+
gather_with_grad: True
|
33 |
+
grad_checkpointing: True
|
34 |
+
grad_clip_norm: None
|
35 |
+
horovod: False
|
36 |
+
image_mean: None
|
37 |
+
image_std: None
|
38 |
+
imagenet_v2: None
|
39 |
+
imagenet_val: None
|
40 |
+
local_loss: True
|
41 |
+
local_rank: 0
|
42 |
+
lock_image: False
|
43 |
+
lock_image_freeze_bn_stats: False
|
44 |
+
lock_image_unlocked_groups: 0
|
45 |
+
lock_text: False
|
46 |
+
lock_text_freeze_layer_norm: False
|
47 |
+
lock_text_unlocked_layers: 0
|
48 |
+
log_every_n_steps: 100
|
49 |
+
log_level: 20
|
50 |
+
log_local: False
|
51 |
+
log_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/out.log
|
52 |
+
logs: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m
|
53 |
+
lr: 0.0005
|
54 |
+
lr_cooldown_end: 0.0
|
55 |
+
lr_cooldown_power: 1.0
|
56 |
+
lr_scheduler: cosine
|
57 |
+
model: ViT-B-32
|
58 |
+
name: medium_object_detail_fulfillment_th_20_mutli_score_and
|
59 |
+
no_set_device_rank: False
|
60 |
+
precision: amp
|
61 |
+
pretrained:
|
62 |
+
pretrained_image: False
|
63 |
+
rank: 0
|
64 |
+
remote_sync: None
|
65 |
+
remote_sync_frequency: 300
|
66 |
+
remote_sync_protocol: s3
|
67 |
+
report_to:
|
68 |
+
resume: None
|
69 |
+
save_frequency: 0
|
70 |
+
save_most_recent: True
|
71 |
+
seed: 0
|
72 |
+
skip_scheduler: False
|
73 |
+
tensorboard: False
|
74 |
+
tensorboard_path:
|
75 |
+
torchscript: False
|
76 |
+
trace: False
|
77 |
+
train_data: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/filtered_shards_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/{00000000..00003219}.tar
|
78 |
+
train_data_upsampling_factors: None
|
79 |
+
train_num_samples: 16000000
|
80 |
+
use_bn_sync: False
|
81 |
+
val_data: None
|
82 |
+
val_frequency: 1
|
83 |
+
val_num_samples: None
|
84 |
+
wandb: False
|
85 |
+
wandb_notes:
|
86 |
+
wandb_project_name: open-clip
|
87 |
+
warmup: 500
|
88 |
+
wd: 0.2
|
89 |
+
workers: 4
|
90 |
+
world_size: 16
|
91 |
+
zeroshot_frequency: 2
|