rileybol commited on
Commit
791afe6
1 Parent(s): 0f91324

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - object-detection
6
+ - vision
7
+ widget:
8
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
9
+ example_title: Tiger
10
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
11
+ example_title: Teapot
12
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
13
+ example_title: Palace
14
+ ---
15
+
16
+ # Model Trained Using AutoTrain
17
+
18
+ - Problem type: Object Detection
19
+
20
+ ## Validation Metrics
21
+ loss: 0.7565978169441223
22
+
23
+ map: 0.5582
24
+
25
+ map_50: 0.9326
26
+
27
+ map_75: 0.6075
28
+
29
+ map_small: 0.025
30
+
31
+ map_medium: 0.3937
32
+
33
+ map_large: 0.618
34
+
35
+ mar_1: 0.2923
36
+
37
+ mar_10: 0.6329
38
+
39
+ mar_100: 0.697
40
+
41
+ mar_small: 0.3
42
+
43
+ mar_medium: 0.5641
44
+
45
+ mar_large: 0.7497
checkpoint-1017/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hustvl/yolos-tiny",
3
+ "architectures": [
4
+ "YolosForObjectDetection"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_loss": false,
8
+ "bbox_cost": 5,
9
+ "bbox_loss_coefficient": 5,
10
+ "class_cost": 1,
11
+ "eos_coefficient": 0.1,
12
+ "giou_cost": 2,
13
+ "giou_loss_coefficient": 2,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.0,
16
+ "hidden_size": 192,
17
+ "id2label": {
18
+ "0": "Face"
19
+ },
20
+ "image_size": [
21
+ 800,
22
+ 1333
23
+ ],
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 768,
26
+ "label2id": {
27
+ "Face": 0
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "model_type": "yolos",
31
+ "num_attention_heads": 3,
32
+ "num_channels": 3,
33
+ "num_detection_tokens": 100,
34
+ "num_hidden_layers": 12,
35
+ "patch_size": 16,
36
+ "qkv_bias": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.41.0",
39
+ "use_mid_position_embeddings": false
40
+ }
checkpoint-1017/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d302c1624aade53174907fed704a071b5ca5ceff59f3a995c1f3f61ddc66c48
3
+ size 25909400
checkpoint-1017/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd2da5f32ea0309ee22f2083d9200b952e861182a5ef9eade966bbe7eeef4d9
3
+ size 51946362
checkpoint-1017/preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "annotations",
5
+ "return_segmentation_masks",
6
+ "masks_path",
7
+ "do_resize",
8
+ "size",
9
+ "resample",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_annotations",
16
+ "do_pad",
17
+ "format",
18
+ "return_tensors",
19
+ "data_format",
20
+ "input_data_format"
21
+ ],
22
+ "do_convert_annotations": true,
23
+ "do_normalize": true,
24
+ "do_pad": false,
25
+ "do_rescale": true,
26
+ "do_resize": false,
27
+ "format": "coco_detection",
28
+ "image_mean": [
29
+ 0.485,
30
+ 0.456,
31
+ 0.406
32
+ ],
33
+ "image_processor_type": "YolosImageProcessor",
34
+ "image_std": [
35
+ 0.229,
36
+ 0.224,
37
+ 0.225
38
+ ],
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "longest_edge": 600
43
+ }
44
+ }
checkpoint-1017/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b9c51d6cc590e777c30aca5726fb84e42b191b8887472fb23b4a2601c7e297
3
+ size 14180
checkpoint-1017/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a277adef8ec8a4f21fc75ab7804713adcdb684eb9f49d08f9078fc42e29338c9
3
+ size 1064
checkpoint-1017/trainer_state.json ADDED
@@ -0,0 +1,3796 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7565978169441223,
3
+ "best_model_checkpoint": "autotrain-80y7u-aoweu/checkpoint-1017",
4
+ "epoch": 9.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1017,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.017699115044247787,
13
+ "grad_norm": Infinity,
14
+ "learning_rate": 0.0,
15
+ "loss": 1.8501,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.035398230088495575,
20
+ "grad_norm": 45.10062026977539,
21
+ "learning_rate": 8.849557522123894e-07,
22
+ "loss": 1.857,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.05309734513274336,
27
+ "grad_norm": 58.66633605957031,
28
+ "learning_rate": 1.3274336283185841e-06,
29
+ "loss": 1.8116,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.07079646017699115,
34
+ "grad_norm": 56.49775695800781,
35
+ "learning_rate": 2.2123893805309734e-06,
36
+ "loss": 2.0345,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.08849557522123894,
41
+ "grad_norm": 58.526424407958984,
42
+ "learning_rate": 3.097345132743363e-06,
43
+ "loss": 1.9499,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.10619469026548672,
48
+ "grad_norm": 52.64912796020508,
49
+ "learning_rate": 3.982300884955752e-06,
50
+ "loss": 1.9647,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.12389380530973451,
55
+ "grad_norm": 31.514617919921875,
56
+ "learning_rate": 4.867256637168142e-06,
57
+ "loss": 1.6022,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.1415929203539823,
62
+ "grad_norm": 39.92071533203125,
63
+ "learning_rate": 5.752212389380531e-06,
64
+ "loss": 1.7882,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.1592920353982301,
69
+ "grad_norm": 57.345947265625,
70
+ "learning_rate": 6.6371681415929215e-06,
71
+ "loss": 1.7493,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.17699115044247787,
76
+ "grad_norm": 40.93531799316406,
77
+ "learning_rate": 7.52212389380531e-06,
78
+ "loss": 1.5886,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.19469026548672566,
83
+ "grad_norm": 34.53605651855469,
84
+ "learning_rate": 8.407079646017701e-06,
85
+ "loss": 1.6985,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.21238938053097345,
90
+ "grad_norm": 59.89328384399414,
91
+ "learning_rate": 9.29203539823009e-06,
92
+ "loss": 1.8046,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.23008849557522124,
97
+ "grad_norm": 36.242183685302734,
98
+ "learning_rate": 1.0176991150442479e-05,
99
+ "loss": 1.5316,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.24778761061946902,
104
+ "grad_norm": 19.723634719848633,
105
+ "learning_rate": 1.1061946902654869e-05,
106
+ "loss": 1.612,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.26548672566371684,
111
+ "grad_norm": 33.05853271484375,
112
+ "learning_rate": 1.1946902654867258e-05,
113
+ "loss": 1.7078,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.2831858407079646,
118
+ "grad_norm": 25.069561004638672,
119
+ "learning_rate": 1.2831858407079647e-05,
120
+ "loss": 1.616,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.3008849557522124,
125
+ "grad_norm": 58.163509368896484,
126
+ "learning_rate": 1.3716814159292036e-05,
127
+ "loss": 1.6574,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3185840707964602,
132
+ "grad_norm": 32.350101470947266,
133
+ "learning_rate": 1.4601769911504426e-05,
134
+ "loss": 1.3009,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.336283185840708,
139
+ "grad_norm": 36.51224899291992,
140
+ "learning_rate": 1.5486725663716813e-05,
141
+ "loss": 1.36,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.35398230088495575,
146
+ "grad_norm": 32.10768508911133,
147
+ "learning_rate": 1.6371681415929206e-05,
148
+ "loss": 1.515,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.37168141592920356,
153
+ "grad_norm": 25.605300903320312,
154
+ "learning_rate": 1.7256637168141594e-05,
155
+ "loss": 1.3663,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.3893805309734513,
160
+ "grad_norm": 146.0785369873047,
161
+ "learning_rate": 1.8141592920353983e-05,
162
+ "loss": 1.3139,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.40707964601769914,
167
+ "grad_norm": 37.9253044128418,
168
+ "learning_rate": 1.9026548672566372e-05,
169
+ "loss": 1.3613,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4247787610619469,
174
+ "grad_norm": 36.18649673461914,
175
+ "learning_rate": 1.991150442477876e-05,
176
+ "loss": 1.4165,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.4424778761061947,
181
+ "grad_norm": 22.322145462036133,
182
+ "learning_rate": 2.079646017699115e-05,
183
+ "loss": 1.3432,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.46017699115044247,
188
+ "grad_norm": 28.047632217407227,
189
+ "learning_rate": 2.1681415929203542e-05,
190
+ "loss": 1.1527,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.4778761061946903,
195
+ "grad_norm": 34.25035858154297,
196
+ "learning_rate": 2.2566371681415928e-05,
197
+ "loss": 1.0954,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.49557522123893805,
202
+ "grad_norm": 154.13543701171875,
203
+ "learning_rate": 2.345132743362832e-05,
204
+ "loss": 1.289,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5132743362831859,
209
+ "grad_norm": 34.313114166259766,
210
+ "learning_rate": 2.433628318584071e-05,
211
+ "loss": 1.4081,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.5309734513274337,
216
+ "grad_norm": 31.601171493530273,
217
+ "learning_rate": 2.5221238938053098e-05,
218
+ "loss": 1.1908,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.5486725663716814,
223
+ "grad_norm": 40.591888427734375,
224
+ "learning_rate": 2.610619469026549e-05,
225
+ "loss": 1.3728,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.5663716814159292,
230
+ "grad_norm": 39.01655578613281,
231
+ "learning_rate": 2.6991150442477875e-05,
232
+ "loss": 1.1882,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.584070796460177,
237
+ "grad_norm": 35.67778778076172,
238
+ "learning_rate": 2.7876106194690264e-05,
239
+ "loss": 0.9822,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.6017699115044248,
244
+ "grad_norm": 37.64238357543945,
245
+ "learning_rate": 2.8761061946902656e-05,
246
+ "loss": 0.9998,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.6194690265486725,
251
+ "grad_norm": 29.477062225341797,
252
+ "learning_rate": 2.9203539823008852e-05,
253
+ "loss": 1.3209,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.6371681415929203,
258
+ "grad_norm": 21.486434936523438,
259
+ "learning_rate": 3.008849557522124e-05,
260
+ "loss": 1.2007,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.6548672566371682,
265
+ "grad_norm": 32.44023513793945,
266
+ "learning_rate": 3.097345132743363e-05,
267
+ "loss": 1.2391,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.672566371681416,
272
+ "grad_norm": 87.39546203613281,
273
+ "learning_rate": 3.185840707964602e-05,
274
+ "loss": 1.2293,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.6902654867256637,
279
+ "grad_norm": 22.6693058013916,
280
+ "learning_rate": 3.274336283185841e-05,
281
+ "loss": 1.3051,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.7079646017699115,
286
+ "grad_norm": 22.473268508911133,
287
+ "learning_rate": 3.3628318584070804e-05,
288
+ "loss": 1.1976,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.7256637168141593,
293
+ "grad_norm": 27.98000144958496,
294
+ "learning_rate": 3.451327433628319e-05,
295
+ "loss": 1.172,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.7433628318584071,
300
+ "grad_norm": 30.333744049072266,
301
+ "learning_rate": 3.5398230088495574e-05,
302
+ "loss": 1.4081,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.7610619469026548,
307
+ "grad_norm": 32.933128356933594,
308
+ "learning_rate": 3.628318584070797e-05,
309
+ "loss": 1.0802,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.7787610619469026,
314
+ "grad_norm": 29.343582153320312,
315
+ "learning_rate": 3.716814159292036e-05,
316
+ "loss": 1.0651,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.7964601769911505,
321
+ "grad_norm": 28.437707901000977,
322
+ "learning_rate": 3.8053097345132744e-05,
323
+ "loss": 1.079,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.8141592920353983,
328
+ "grad_norm": 34.3442268371582,
329
+ "learning_rate": 3.893805309734514e-05,
330
+ "loss": 1.1715,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.831858407079646,
335
+ "grad_norm": 24.838363647460938,
336
+ "learning_rate": 3.982300884955752e-05,
337
+ "loss": 1.2568,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.8495575221238938,
342
+ "grad_norm": 19.97493553161621,
343
+ "learning_rate": 4.0707964601769914e-05,
344
+ "loss": 1.0946,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.8672566371681416,
349
+ "grad_norm": 26.681554794311523,
350
+ "learning_rate": 4.15929203539823e-05,
351
+ "loss": 0.9971,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 0.8849557522123894,
356
+ "grad_norm": 20.66066551208496,
357
+ "learning_rate": 4.247787610619469e-05,
358
+ "loss": 1.205,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 0.9026548672566371,
363
+ "grad_norm": 30.359079360961914,
364
+ "learning_rate": 4.3362831858407084e-05,
365
+ "loss": 1.0563,
366
+ "step": 102
367
+ },
368
+ {
369
+ "epoch": 0.9203539823008849,
370
+ "grad_norm": 42.22723388671875,
371
+ "learning_rate": 4.4247787610619477e-05,
372
+ "loss": 1.0019,
373
+ "step": 104
374
+ },
375
+ {
376
+ "epoch": 0.9380530973451328,
377
+ "grad_norm": 28.618349075317383,
378
+ "learning_rate": 4.5132743362831855e-05,
379
+ "loss": 1.1699,
380
+ "step": 106
381
+ },
382
+ {
383
+ "epoch": 0.9557522123893806,
384
+ "grad_norm": 47.17839431762695,
385
+ "learning_rate": 4.601769911504425e-05,
386
+ "loss": 1.0793,
387
+ "step": 108
388
+ },
389
+ {
390
+ "epoch": 0.9734513274336283,
391
+ "grad_norm": 28.66024398803711,
392
+ "learning_rate": 4.690265486725664e-05,
393
+ "loss": 1.1115,
394
+ "step": 110
395
+ },
396
+ {
397
+ "epoch": 0.9911504424778761,
398
+ "grad_norm": 24.186906814575195,
399
+ "learning_rate": 4.778761061946903e-05,
400
+ "loss": 0.9751,
401
+ "step": 112
402
+ },
403
+ {
404
+ "epoch": 1.0,
405
+ "eval_loss": 1.1276780366897583,
406
+ "eval_map": 0.1062,
407
+ "eval_map_50": 0.2136,
408
+ "eval_map_75": 0.0953,
409
+ "eval_map_large": 0.1451,
410
+ "eval_map_medium": 0.0275,
411
+ "eval_map_per_class": 0.1062,
412
+ "eval_map_small": 0.0,
413
+ "eval_mar_1": 0.1671,
414
+ "eval_mar_10": 0.3786,
415
+ "eval_mar_100": 0.6103,
416
+ "eval_mar_100_per_class": 0.6103,
417
+ "eval_mar_large": 0.6775,
418
+ "eval_mar_medium": 0.4422,
419
+ "eval_mar_small": 0.0,
420
+ "eval_runtime": 4.6231,
421
+ "eval_samples_per_second": 21.631,
422
+ "eval_steps_per_second": 1.514,
423
+ "step": 113
424
+ },
425
+ {
426
+ "epoch": 1.008849557522124,
427
+ "grad_norm": 29.109996795654297,
428
+ "learning_rate": 4.867256637168142e-05,
429
+ "loss": 1.0909,
430
+ "step": 114
431
+ },
432
+ {
433
+ "epoch": 1.0265486725663717,
434
+ "grad_norm": 36.879417419433594,
435
+ "learning_rate": 4.955752212389381e-05,
436
+ "loss": 1.0213,
437
+ "step": 116
438
+ },
439
+ {
440
+ "epoch": 1.0442477876106195,
441
+ "grad_norm": 25.578012466430664,
442
+ "learning_rate": 4.9950835791543757e-05,
443
+ "loss": 1.0314,
444
+ "step": 118
445
+ },
446
+ {
447
+ "epoch": 1.0619469026548674,
448
+ "grad_norm": 20.469444274902344,
449
+ "learning_rate": 4.985250737463127e-05,
450
+ "loss": 1.0589,
451
+ "step": 120
452
+ },
453
+ {
454
+ "epoch": 1.079646017699115,
455
+ "grad_norm": 18.58279037475586,
456
+ "learning_rate": 4.9754178957718786e-05,
457
+ "loss": 1.0507,
458
+ "step": 122
459
+ },
460
+ {
461
+ "epoch": 1.0973451327433628,
462
+ "grad_norm": 29.6292724609375,
463
+ "learning_rate": 4.9655850540806295e-05,
464
+ "loss": 1.0577,
465
+ "step": 124
466
+ },
467
+ {
468
+ "epoch": 1.1150442477876106,
469
+ "grad_norm": 57.019222259521484,
470
+ "learning_rate": 4.955752212389381e-05,
471
+ "loss": 1.0879,
472
+ "step": 126
473
+ },
474
+ {
475
+ "epoch": 1.1327433628318584,
476
+ "grad_norm": 39.2066650390625,
477
+ "learning_rate": 4.9459193706981325e-05,
478
+ "loss": 1.1121,
479
+ "step": 128
480
+ },
481
+ {
482
+ "epoch": 1.1504424778761062,
483
+ "grad_norm": 26.280122756958008,
484
+ "learning_rate": 4.936086529006883e-05,
485
+ "loss": 1.001,
486
+ "step": 130
487
+ },
488
+ {
489
+ "epoch": 1.168141592920354,
490
+ "grad_norm": 41.13880920410156,
491
+ "learning_rate": 4.926253687315635e-05,
492
+ "loss": 1.0486,
493
+ "step": 132
494
+ },
495
+ {
496
+ "epoch": 1.1858407079646018,
497
+ "grad_norm": 45.9874267578125,
498
+ "learning_rate": 4.9164208456243856e-05,
499
+ "loss": 1.0237,
500
+ "step": 134
501
+ },
502
+ {
503
+ "epoch": 1.2035398230088497,
504
+ "grad_norm": 31.62242317199707,
505
+ "learning_rate": 4.906588003933137e-05,
506
+ "loss": 1.0973,
507
+ "step": 136
508
+ },
509
+ {
510
+ "epoch": 1.2212389380530975,
511
+ "grad_norm": 30.55558967590332,
512
+ "learning_rate": 4.8967551622418886e-05,
513
+ "loss": 1.0251,
514
+ "step": 138
515
+ },
516
+ {
517
+ "epoch": 1.238938053097345,
518
+ "grad_norm": 40.2532844543457,
519
+ "learning_rate": 4.8869223205506394e-05,
520
+ "loss": 0.9459,
521
+ "step": 140
522
+ },
523
+ {
524
+ "epoch": 1.2566371681415929,
525
+ "grad_norm": 77.14492797851562,
526
+ "learning_rate": 4.877089478859391e-05,
527
+ "loss": 1.0721,
528
+ "step": 142
529
+ },
530
+ {
531
+ "epoch": 1.2743362831858407,
532
+ "grad_norm": 30.2625732421875,
533
+ "learning_rate": 4.867256637168142e-05,
534
+ "loss": 1.0249,
535
+ "step": 144
536
+ },
537
+ {
538
+ "epoch": 1.2920353982300885,
539
+ "grad_norm": 39.786590576171875,
540
+ "learning_rate": 4.857423795476893e-05,
541
+ "loss": 1.0735,
542
+ "step": 146
543
+ },
544
+ {
545
+ "epoch": 1.3097345132743363,
546
+ "grad_norm": 33.97883605957031,
547
+ "learning_rate": 4.847590953785645e-05,
548
+ "loss": 0.9914,
549
+ "step": 148
550
+ },
551
+ {
552
+ "epoch": 1.3274336283185841,
553
+ "grad_norm": 27.49061393737793,
554
+ "learning_rate": 4.8377581120943956e-05,
555
+ "loss": 1.2146,
556
+ "step": 150
557
+ },
558
+ {
559
+ "epoch": 1.3451327433628317,
560
+ "grad_norm": 25.95128059387207,
561
+ "learning_rate": 4.827925270403147e-05,
562
+ "loss": 1.048,
563
+ "step": 152
564
+ },
565
+ {
566
+ "epoch": 1.3628318584070795,
567
+ "grad_norm": 25.32373809814453,
568
+ "learning_rate": 4.818092428711898e-05,
569
+ "loss": 1.0952,
570
+ "step": 154
571
+ },
572
+ {
573
+ "epoch": 1.3805309734513274,
574
+ "grad_norm": 29.068500518798828,
575
+ "learning_rate": 4.8082595870206494e-05,
576
+ "loss": 1.0903,
577
+ "step": 156
578
+ },
579
+ {
580
+ "epoch": 1.3982300884955752,
581
+ "grad_norm": 30.730670928955078,
582
+ "learning_rate": 4.7984267453294e-05,
583
+ "loss": 1.0388,
584
+ "step": 158
585
+ },
586
+ {
587
+ "epoch": 1.415929203539823,
588
+ "grad_norm": 34.4339714050293,
589
+ "learning_rate": 4.788593903638152e-05,
590
+ "loss": 1.0369,
591
+ "step": 160
592
+ },
593
+ {
594
+ "epoch": 1.4336283185840708,
595
+ "grad_norm": 74.3895263671875,
596
+ "learning_rate": 4.778761061946903e-05,
597
+ "loss": 1.0317,
598
+ "step": 162
599
+ },
600
+ {
601
+ "epoch": 1.4513274336283186,
602
+ "grad_norm": 25.546483993530273,
603
+ "learning_rate": 4.768928220255654e-05,
604
+ "loss": 0.9741,
605
+ "step": 164
606
+ },
607
+ {
608
+ "epoch": 1.4690265486725664,
609
+ "grad_norm": 26.646549224853516,
610
+ "learning_rate": 4.7590953785644055e-05,
611
+ "loss": 1.1132,
612
+ "step": 166
613
+ },
614
+ {
615
+ "epoch": 1.4867256637168142,
616
+ "grad_norm": 27.06178092956543,
617
+ "learning_rate": 4.749262536873156e-05,
618
+ "loss": 0.9592,
619
+ "step": 168
620
+ },
621
+ {
622
+ "epoch": 1.504424778761062,
623
+ "grad_norm": 27.895265579223633,
624
+ "learning_rate": 4.739429695181908e-05,
625
+ "loss": 1.0061,
626
+ "step": 170
627
+ },
628
+ {
629
+ "epoch": 1.5221238938053099,
630
+ "grad_norm": 28.37946319580078,
631
+ "learning_rate": 4.729596853490659e-05,
632
+ "loss": 0.9069,
633
+ "step": 172
634
+ },
635
+ {
636
+ "epoch": 1.5398230088495575,
637
+ "grad_norm": 22.880842208862305,
638
+ "learning_rate": 4.71976401179941e-05,
639
+ "loss": 1.0585,
640
+ "step": 174
641
+ },
642
+ {
643
+ "epoch": 1.5575221238938053,
644
+ "grad_norm": 27.391437530517578,
645
+ "learning_rate": 4.7099311701081617e-05,
646
+ "loss": 1.1805,
647
+ "step": 176
648
+ },
649
+ {
650
+ "epoch": 1.575221238938053,
651
+ "grad_norm": 76.33792877197266,
652
+ "learning_rate": 4.7000983284169125e-05,
653
+ "loss": 0.93,
654
+ "step": 178
655
+ },
656
+ {
657
+ "epoch": 1.592920353982301,
658
+ "grad_norm": 32.36155700683594,
659
+ "learning_rate": 4.690265486725664e-05,
660
+ "loss": 1.066,
661
+ "step": 180
662
+ },
663
+ {
664
+ "epoch": 1.6106194690265485,
665
+ "grad_norm": 23.659805297851562,
666
+ "learning_rate": 4.680432645034415e-05,
667
+ "loss": 0.9452,
668
+ "step": 182
669
+ },
670
+ {
671
+ "epoch": 1.6283185840707963,
672
+ "grad_norm": 40.60184097290039,
673
+ "learning_rate": 4.670599803343166e-05,
674
+ "loss": 0.9825,
675
+ "step": 184
676
+ },
677
+ {
678
+ "epoch": 1.6460176991150441,
679
+ "grad_norm": 47.14199447631836,
680
+ "learning_rate": 4.660766961651918e-05,
681
+ "loss": 0.9014,
682
+ "step": 186
683
+ },
684
+ {
685
+ "epoch": 1.663716814159292,
686
+ "grad_norm": 26.055511474609375,
687
+ "learning_rate": 4.6509341199606686e-05,
688
+ "loss": 1.0034,
689
+ "step": 188
690
+ },
691
+ {
692
+ "epoch": 1.6814159292035398,
693
+ "grad_norm": 31.73177146911621,
694
+ "learning_rate": 4.64110127826942e-05,
695
+ "loss": 1.0716,
696
+ "step": 190
697
+ },
698
+ {
699
+ "epoch": 1.6991150442477876,
700
+ "grad_norm": 28.099885940551758,
701
+ "learning_rate": 4.631268436578171e-05,
702
+ "loss": 1.1829,
703
+ "step": 192
704
+ },
705
+ {
706
+ "epoch": 1.7168141592920354,
707
+ "grad_norm": 13.378188133239746,
708
+ "learning_rate": 4.6214355948869224e-05,
709
+ "loss": 0.9111,
710
+ "step": 194
711
+ },
712
+ {
713
+ "epoch": 1.7345132743362832,
714
+ "grad_norm": 22.647581100463867,
715
+ "learning_rate": 4.611602753195674e-05,
716
+ "loss": 1.0079,
717
+ "step": 196
718
+ },
719
+ {
720
+ "epoch": 1.752212389380531,
721
+ "grad_norm": 22.10202407836914,
722
+ "learning_rate": 4.601769911504425e-05,
723
+ "loss": 1.0541,
724
+ "step": 198
725
+ },
726
+ {
727
+ "epoch": 1.7699115044247788,
728
+ "grad_norm": 19.46767807006836,
729
+ "learning_rate": 4.591937069813176e-05,
730
+ "loss": 0.8407,
731
+ "step": 200
732
+ },
733
+ {
734
+ "epoch": 1.7876106194690267,
735
+ "grad_norm": 34.163822174072266,
736
+ "learning_rate": 4.582104228121927e-05,
737
+ "loss": 1.157,
738
+ "step": 202
739
+ },
740
+ {
741
+ "epoch": 1.8053097345132745,
742
+ "grad_norm": 24.074146270751953,
743
+ "learning_rate": 4.5722713864306786e-05,
744
+ "loss": 0.9934,
745
+ "step": 204
746
+ },
747
+ {
748
+ "epoch": 1.823008849557522,
749
+ "grad_norm": 150.62364196777344,
750
+ "learning_rate": 4.5624385447394294e-05,
751
+ "loss": 0.9615,
752
+ "step": 206
753
+ },
754
+ {
755
+ "epoch": 1.8407079646017699,
756
+ "grad_norm": 20.424564361572266,
757
+ "learning_rate": 4.552605703048181e-05,
758
+ "loss": 1.0251,
759
+ "step": 208
760
+ },
761
+ {
762
+ "epoch": 1.8584070796460177,
763
+ "grad_norm": 30.88136100769043,
764
+ "learning_rate": 4.5427728613569324e-05,
765
+ "loss": 0.9429,
766
+ "step": 210
767
+ },
768
+ {
769
+ "epoch": 1.8761061946902655,
770
+ "grad_norm": 25.247404098510742,
771
+ "learning_rate": 4.532940019665683e-05,
772
+ "loss": 0.9711,
773
+ "step": 212
774
+ },
775
+ {
776
+ "epoch": 1.893805309734513,
777
+ "grad_norm": 37.985774993896484,
778
+ "learning_rate": 4.523107177974435e-05,
779
+ "loss": 0.9705,
780
+ "step": 214
781
+ },
782
+ {
783
+ "epoch": 1.911504424778761,
784
+ "grad_norm": 22.748384475708008,
785
+ "learning_rate": 4.5132743362831855e-05,
786
+ "loss": 1.0017,
787
+ "step": 216
788
+ },
789
+ {
790
+ "epoch": 1.9292035398230087,
791
+ "grad_norm": 19.267290115356445,
792
+ "learning_rate": 4.503441494591937e-05,
793
+ "loss": 0.9072,
794
+ "step": 218
795
+ },
796
+ {
797
+ "epoch": 1.9469026548672566,
798
+ "grad_norm": 19.34298324584961,
799
+ "learning_rate": 4.4936086529006885e-05,
800
+ "loss": 1.0597,
801
+ "step": 220
802
+ },
803
+ {
804
+ "epoch": 1.9646017699115044,
805
+ "grad_norm": 23.347171783447266,
806
+ "learning_rate": 4.48377581120944e-05,
807
+ "loss": 0.9139,
808
+ "step": 222
809
+ },
810
+ {
811
+ "epoch": 1.9823008849557522,
812
+ "grad_norm": 20.579362869262695,
813
+ "learning_rate": 4.473942969518191e-05,
814
+ "loss": 0.8591,
815
+ "step": 224
816
+ },
817
+ {
818
+ "epoch": 2.0,
819
+ "grad_norm": 64.1324691772461,
820
+ "learning_rate": 4.464110127826942e-05,
821
+ "loss": 1.0063,
822
+ "step": 226
823
+ },
824
+ {
825
+ "epoch": 2.0,
826
+ "eval_loss": 0.9410255551338196,
827
+ "eval_map": 0.2993,
828
+ "eval_map_50": 0.536,
829
+ "eval_map_75": 0.3136,
830
+ "eval_map_large": 0.3631,
831
+ "eval_map_medium": 0.1534,
832
+ "eval_map_per_class": 0.2993,
833
+ "eval_map_small": 0.0,
834
+ "eval_mar_1": 0.2427,
835
+ "eval_mar_10": 0.5261,
836
+ "eval_mar_100": 0.6393,
837
+ "eval_mar_100_per_class": 0.6393,
838
+ "eval_mar_large": 0.7107,
839
+ "eval_mar_medium": 0.4609,
840
+ "eval_mar_small": 0.0,
841
+ "eval_runtime": 3.9582,
842
+ "eval_samples_per_second": 25.264,
843
+ "eval_steps_per_second": 1.768,
844
+ "step": 226
845
+ },
846
+ {
847
+ "epoch": 2.017699115044248,
848
+ "grad_norm": 27.324804306030273,
849
+ "learning_rate": 4.454277286135694e-05,
850
+ "loss": 0.9027,
851
+ "step": 228
852
+ },
853
+ {
854
+ "epoch": 2.0353982300884956,
855
+ "grad_norm": 20.43166732788086,
856
+ "learning_rate": 4.4444444444444447e-05,
857
+ "loss": 0.9257,
858
+ "step": 230
859
+ },
860
+ {
861
+ "epoch": 2.0530973451327434,
862
+ "grad_norm": 18.177186965942383,
863
+ "learning_rate": 4.434611602753196e-05,
864
+ "loss": 0.9379,
865
+ "step": 232
866
+ },
867
+ {
868
+ "epoch": 2.0707964601769913,
869
+ "grad_norm": 24.545679092407227,
870
+ "learning_rate": 4.4247787610619477e-05,
871
+ "loss": 1.0571,
872
+ "step": 234
873
+ },
874
+ {
875
+ "epoch": 2.088495575221239,
876
+ "grad_norm": 21.458559036254883,
877
+ "learning_rate": 4.4149459193706985e-05,
878
+ "loss": 1.0239,
879
+ "step": 236
880
+ },
881
+ {
882
+ "epoch": 2.106194690265487,
883
+ "grad_norm": 20.25364112854004,
884
+ "learning_rate": 4.40511307767945e-05,
885
+ "loss": 0.9194,
886
+ "step": 238
887
+ },
888
+ {
889
+ "epoch": 2.1238938053097347,
890
+ "grad_norm": 24.264408111572266,
891
+ "learning_rate": 4.395280235988201e-05,
892
+ "loss": 0.9601,
893
+ "step": 240
894
+ },
895
+ {
896
+ "epoch": 2.1415929203539825,
897
+ "grad_norm": 17.67040252685547,
898
+ "learning_rate": 4.385447394296952e-05,
899
+ "loss": 1.1069,
900
+ "step": 242
901
+ },
902
+ {
903
+ "epoch": 2.15929203539823,
904
+ "grad_norm": 25.185571670532227,
905
+ "learning_rate": 4.375614552605704e-05,
906
+ "loss": 0.9537,
907
+ "step": 244
908
+ },
909
+ {
910
+ "epoch": 2.1769911504424777,
911
+ "grad_norm": 39.48079299926758,
912
+ "learning_rate": 4.3657817109144546e-05,
913
+ "loss": 1.082,
914
+ "step": 246
915
+ },
916
+ {
917
+ "epoch": 2.1946902654867255,
918
+ "grad_norm": 22.608633041381836,
919
+ "learning_rate": 4.355948869223206e-05,
920
+ "loss": 0.8366,
921
+ "step": 248
922
+ },
923
+ {
924
+ "epoch": 2.2123893805309733,
925
+ "grad_norm": 57.35368347167969,
926
+ "learning_rate": 4.346116027531957e-05,
927
+ "loss": 0.8564,
928
+ "step": 250
929
+ },
930
+ {
931
+ "epoch": 2.230088495575221,
932
+ "grad_norm": 34.98164367675781,
933
+ "learning_rate": 4.3362831858407084e-05,
934
+ "loss": 0.8928,
935
+ "step": 252
936
+ },
937
+ {
938
+ "epoch": 2.247787610619469,
939
+ "grad_norm": 19.134063720703125,
940
+ "learning_rate": 4.326450344149459e-05,
941
+ "loss": 0.9219,
942
+ "step": 254
943
+ },
944
+ {
945
+ "epoch": 2.265486725663717,
946
+ "grad_norm": 33.00546646118164,
947
+ "learning_rate": 4.316617502458211e-05,
948
+ "loss": 0.8816,
949
+ "step": 256
950
+ },
951
+ {
952
+ "epoch": 2.2831858407079646,
953
+ "grad_norm": 26.70623779296875,
954
+ "learning_rate": 4.306784660766962e-05,
955
+ "loss": 0.9003,
956
+ "step": 258
957
+ },
958
+ {
959
+ "epoch": 2.3008849557522124,
960
+ "grad_norm": 16.002517700195312,
961
+ "learning_rate": 4.296951819075713e-05,
962
+ "loss": 0.9086,
963
+ "step": 260
964
+ },
965
+ {
966
+ "epoch": 2.3185840707964602,
967
+ "grad_norm": 26.876773834228516,
968
+ "learning_rate": 4.2871189773844646e-05,
969
+ "loss": 0.8936,
970
+ "step": 262
971
+ },
972
+ {
973
+ "epoch": 2.336283185840708,
974
+ "grad_norm": 19.438966751098633,
975
+ "learning_rate": 4.2772861356932154e-05,
976
+ "loss": 0.886,
977
+ "step": 264
978
+ },
979
+ {
980
+ "epoch": 2.353982300884956,
981
+ "grad_norm": 18.447446823120117,
982
+ "learning_rate": 4.267453294001967e-05,
983
+ "loss": 0.9216,
984
+ "step": 266
985
+ },
986
+ {
987
+ "epoch": 2.3716814159292037,
988
+ "grad_norm": 17.754484176635742,
989
+ "learning_rate": 4.2576204523107184e-05,
990
+ "loss": 0.8503,
991
+ "step": 268
992
+ },
993
+ {
994
+ "epoch": 2.3893805309734515,
995
+ "grad_norm": 21.90740394592285,
996
+ "learning_rate": 4.247787610619469e-05,
997
+ "loss": 0.8937,
998
+ "step": 270
999
+ },
1000
+ {
1001
+ "epoch": 2.4070796460176993,
1002
+ "grad_norm": 17.798200607299805,
1003
+ "learning_rate": 4.237954768928221e-05,
1004
+ "loss": 0.97,
1005
+ "step": 272
1006
+ },
1007
+ {
1008
+ "epoch": 2.4247787610619467,
1009
+ "grad_norm": 26.43834114074707,
1010
+ "learning_rate": 4.2281219272369715e-05,
1011
+ "loss": 0.7791,
1012
+ "step": 274
1013
+ },
1014
+ {
1015
+ "epoch": 2.442477876106195,
1016
+ "grad_norm": 18.365053176879883,
1017
+ "learning_rate": 4.218289085545723e-05,
1018
+ "loss": 0.9906,
1019
+ "step": 276
1020
+ },
1021
+ {
1022
+ "epoch": 2.4601769911504423,
1023
+ "grad_norm": 26.327442169189453,
1024
+ "learning_rate": 4.208456243854474e-05,
1025
+ "loss": 1.001,
1026
+ "step": 278
1027
+ },
1028
+ {
1029
+ "epoch": 2.47787610619469,
1030
+ "grad_norm": 22.054363250732422,
1031
+ "learning_rate": 4.1986234021632253e-05,
1032
+ "loss": 1.0239,
1033
+ "step": 280
1034
+ },
1035
+ {
1036
+ "epoch": 2.495575221238938,
1037
+ "grad_norm": 39.17606735229492,
1038
+ "learning_rate": 4.188790560471977e-05,
1039
+ "loss": 0.9184,
1040
+ "step": 282
1041
+ },
1042
+ {
1043
+ "epoch": 2.5132743362831858,
1044
+ "grad_norm": 18.545665740966797,
1045
+ "learning_rate": 4.178957718780728e-05,
1046
+ "loss": 0.8107,
1047
+ "step": 284
1048
+ },
1049
+ {
1050
+ "epoch": 2.5309734513274336,
1051
+ "grad_norm": 15.98513412475586,
1052
+ "learning_rate": 4.169124877089479e-05,
1053
+ "loss": 1.0371,
1054
+ "step": 286
1055
+ },
1056
+ {
1057
+ "epoch": 2.5486725663716814,
1058
+ "grad_norm": 23.60863494873047,
1059
+ "learning_rate": 4.15929203539823e-05,
1060
+ "loss": 0.8471,
1061
+ "step": 288
1062
+ },
1063
+ {
1064
+ "epoch": 2.566371681415929,
1065
+ "grad_norm": 39.60649108886719,
1066
+ "learning_rate": 4.1494591937069815e-05,
1067
+ "loss": 0.9184,
1068
+ "step": 290
1069
+ },
1070
+ {
1071
+ "epoch": 2.584070796460177,
1072
+ "grad_norm": 32.43935012817383,
1073
+ "learning_rate": 4.139626352015733e-05,
1074
+ "loss": 0.8087,
1075
+ "step": 292
1076
+ },
1077
+ {
1078
+ "epoch": 2.601769911504425,
1079
+ "grad_norm": 22.511394500732422,
1080
+ "learning_rate": 4.129793510324484e-05,
1081
+ "loss": 0.8604,
1082
+ "step": 294
1083
+ },
1084
+ {
1085
+ "epoch": 2.6194690265486726,
1086
+ "grad_norm": 17.795698165893555,
1087
+ "learning_rate": 4.119960668633235e-05,
1088
+ "loss": 0.9068,
1089
+ "step": 296
1090
+ },
1091
+ {
1092
+ "epoch": 2.6371681415929205,
1093
+ "grad_norm": 25.883586883544922,
1094
+ "learning_rate": 4.110127826941986e-05,
1095
+ "loss": 0.7654,
1096
+ "step": 298
1097
+ },
1098
+ {
1099
+ "epoch": 2.6548672566371683,
1100
+ "grad_norm": 17.679630279541016,
1101
+ "learning_rate": 4.1002949852507376e-05,
1102
+ "loss": 0.8956,
1103
+ "step": 300
1104
+ },
1105
+ {
1106
+ "epoch": 2.672566371681416,
1107
+ "grad_norm": 40.860904693603516,
1108
+ "learning_rate": 4.0904621435594884e-05,
1109
+ "loss": 0.9776,
1110
+ "step": 302
1111
+ },
1112
+ {
1113
+ "epoch": 2.6902654867256635,
1114
+ "grad_norm": 30.485763549804688,
1115
+ "learning_rate": 4.08062930186824e-05,
1116
+ "loss": 0.7637,
1117
+ "step": 304
1118
+ },
1119
+ {
1120
+ "epoch": 2.7079646017699117,
1121
+ "grad_norm": 20.94835090637207,
1122
+ "learning_rate": 4.0707964601769914e-05,
1123
+ "loss": 0.972,
1124
+ "step": 306
1125
+ },
1126
+ {
1127
+ "epoch": 2.725663716814159,
1128
+ "grad_norm": 28.85026741027832,
1129
+ "learning_rate": 4.060963618485742e-05,
1130
+ "loss": 1.0257,
1131
+ "step": 308
1132
+ },
1133
+ {
1134
+ "epoch": 2.7433628318584073,
1135
+ "grad_norm": 20.580015182495117,
1136
+ "learning_rate": 4.051130776794494e-05,
1137
+ "loss": 0.9242,
1138
+ "step": 310
1139
+ },
1140
+ {
1141
+ "epoch": 2.7610619469026547,
1142
+ "grad_norm": 19.40909767150879,
1143
+ "learning_rate": 4.0412979351032446e-05,
1144
+ "loss": 1.0149,
1145
+ "step": 312
1146
+ },
1147
+ {
1148
+ "epoch": 2.7787610619469025,
1149
+ "grad_norm": 17.306678771972656,
1150
+ "learning_rate": 4.031465093411996e-05,
1151
+ "loss": 0.8118,
1152
+ "step": 314
1153
+ },
1154
+ {
1155
+ "epoch": 2.7964601769911503,
1156
+ "grad_norm": 30.43448829650879,
1157
+ "learning_rate": 4.0216322517207476e-05,
1158
+ "loss": 0.8623,
1159
+ "step": 316
1160
+ },
1161
+ {
1162
+ "epoch": 2.814159292035398,
1163
+ "grad_norm": 20.09423828125,
1164
+ "learning_rate": 4.0117994100294984e-05,
1165
+ "loss": 0.8364,
1166
+ "step": 318
1167
+ },
1168
+ {
1169
+ "epoch": 2.831858407079646,
1170
+ "grad_norm": 16.069490432739258,
1171
+ "learning_rate": 4.00196656833825e-05,
1172
+ "loss": 0.812,
1173
+ "step": 320
1174
+ },
1175
+ {
1176
+ "epoch": 2.849557522123894,
1177
+ "grad_norm": 25.968656539916992,
1178
+ "learning_rate": 3.992133726647001e-05,
1179
+ "loss": 0.8953,
1180
+ "step": 322
1181
+ },
1182
+ {
1183
+ "epoch": 2.8672566371681416,
1184
+ "grad_norm": 28.93232536315918,
1185
+ "learning_rate": 3.982300884955752e-05,
1186
+ "loss": 1.0083,
1187
+ "step": 324
1188
+ },
1189
+ {
1190
+ "epoch": 2.8849557522123894,
1191
+ "grad_norm": 32.24972152709961,
1192
+ "learning_rate": 3.972468043264504e-05,
1193
+ "loss": 0.9767,
1194
+ "step": 326
1195
+ },
1196
+ {
1197
+ "epoch": 2.9026548672566372,
1198
+ "grad_norm": 32.170352935791016,
1199
+ "learning_rate": 3.9626352015732545e-05,
1200
+ "loss": 0.787,
1201
+ "step": 328
1202
+ },
1203
+ {
1204
+ "epoch": 2.920353982300885,
1205
+ "grad_norm": 19.10132598876953,
1206
+ "learning_rate": 3.952802359882006e-05,
1207
+ "loss": 0.8321,
1208
+ "step": 330
1209
+ },
1210
+ {
1211
+ "epoch": 2.938053097345133,
1212
+ "grad_norm": 30.334962844848633,
1213
+ "learning_rate": 3.9429695181907575e-05,
1214
+ "loss": 0.8306,
1215
+ "step": 332
1216
+ },
1217
+ {
1218
+ "epoch": 2.9557522123893807,
1219
+ "grad_norm": 24.146038055419922,
1220
+ "learning_rate": 3.9331366764995083e-05,
1221
+ "loss": 0.8859,
1222
+ "step": 334
1223
+ },
1224
+ {
1225
+ "epoch": 2.9734513274336285,
1226
+ "grad_norm": 24.66895294189453,
1227
+ "learning_rate": 3.92330383480826e-05,
1228
+ "loss": 0.8035,
1229
+ "step": 336
1230
+ },
1231
+ {
1232
+ "epoch": 2.991150442477876,
1233
+ "grad_norm": 18.740558624267578,
1234
+ "learning_rate": 3.9134709931170113e-05,
1235
+ "loss": 0.8284,
1236
+ "step": 338
1237
+ },
1238
+ {
1239
+ "epoch": 3.0,
1240
+ "eval_loss": 0.883878231048584,
1241
+ "eval_map": 0.4451,
1242
+ "eval_map_50": 0.8057,
1243
+ "eval_map_75": 0.399,
1244
+ "eval_map_large": 0.5228,
1245
+ "eval_map_medium": 0.2384,
1246
+ "eval_map_per_class": 0.4451,
1247
+ "eval_map_small": 0.0123,
1248
+ "eval_mar_1": 0.2662,
1249
+ "eval_mar_10": 0.5637,
1250
+ "eval_mar_100": 0.6295,
1251
+ "eval_mar_100_per_class": 0.6295,
1252
+ "eval_mar_large": 0.6976,
1253
+ "eval_mar_medium": 0.4547,
1254
+ "eval_mar_small": 0.3,
1255
+ "eval_runtime": 4.2753,
1256
+ "eval_samples_per_second": 23.39,
1257
+ "eval_steps_per_second": 1.637,
1258
+ "step": 339
1259
+ },
1260
+ {
1261
+ "epoch": 3.0088495575221237,
1262
+ "grad_norm": 17.387792587280273,
1263
+ "learning_rate": 3.903638151425762e-05,
1264
+ "loss": 0.9204,
1265
+ "step": 340
1266
+ },
1267
+ {
1268
+ "epoch": 3.0265486725663715,
1269
+ "grad_norm": 23.816524505615234,
1270
+ "learning_rate": 3.893805309734514e-05,
1271
+ "loss": 0.7563,
1272
+ "step": 342
1273
+ },
1274
+ {
1275
+ "epoch": 3.0442477876106193,
1276
+ "grad_norm": 23.070165634155273,
1277
+ "learning_rate": 3.883972468043265e-05,
1278
+ "loss": 0.6889,
1279
+ "step": 344
1280
+ },
1281
+ {
1282
+ "epoch": 3.061946902654867,
1283
+ "grad_norm": 21.282630920410156,
1284
+ "learning_rate": 3.874139626352016e-05,
1285
+ "loss": 0.7727,
1286
+ "step": 346
1287
+ },
1288
+ {
1289
+ "epoch": 3.079646017699115,
1290
+ "grad_norm": 32.04922866821289,
1291
+ "learning_rate": 3.8643067846607675e-05,
1292
+ "loss": 0.7494,
1293
+ "step": 348
1294
+ },
1295
+ {
1296
+ "epoch": 3.0973451327433628,
1297
+ "grad_norm": 22.994802474975586,
1298
+ "learning_rate": 3.854473942969518e-05,
1299
+ "loss": 0.9539,
1300
+ "step": 350
1301
+ },
1302
+ {
1303
+ "epoch": 3.1150442477876106,
1304
+ "grad_norm": 19.214599609375,
1305
+ "learning_rate": 3.84464110127827e-05,
1306
+ "loss": 0.6915,
1307
+ "step": 352
1308
+ },
1309
+ {
1310
+ "epoch": 3.1327433628318584,
1311
+ "grad_norm": 15.473613739013672,
1312
+ "learning_rate": 3.834808259587021e-05,
1313
+ "loss": 0.8087,
1314
+ "step": 354
1315
+ },
1316
+ {
1317
+ "epoch": 3.150442477876106,
1318
+ "grad_norm": 27.318443298339844,
1319
+ "learning_rate": 3.824975417895772e-05,
1320
+ "loss": 0.7237,
1321
+ "step": 356
1322
+ },
1323
+ {
1324
+ "epoch": 3.168141592920354,
1325
+ "grad_norm": 20.30460548400879,
1326
+ "learning_rate": 3.8151425762045236e-05,
1327
+ "loss": 0.8245,
1328
+ "step": 358
1329
+ },
1330
+ {
1331
+ "epoch": 3.185840707964602,
1332
+ "grad_norm": 20.886333465576172,
1333
+ "learning_rate": 3.8053097345132744e-05,
1334
+ "loss": 0.9283,
1335
+ "step": 360
1336
+ },
1337
+ {
1338
+ "epoch": 3.2035398230088497,
1339
+ "grad_norm": 19.330215454101562,
1340
+ "learning_rate": 3.795476892822026e-05,
1341
+ "loss": 0.8195,
1342
+ "step": 362
1343
+ },
1344
+ {
1345
+ "epoch": 3.2212389380530975,
1346
+ "grad_norm": 28.00650405883789,
1347
+ "learning_rate": 3.7856440511307774e-05,
1348
+ "loss": 0.8749,
1349
+ "step": 364
1350
+ },
1351
+ {
1352
+ "epoch": 3.2389380530973453,
1353
+ "grad_norm": 23.165494918823242,
1354
+ "learning_rate": 3.775811209439528e-05,
1355
+ "loss": 0.8077,
1356
+ "step": 366
1357
+ },
1358
+ {
1359
+ "epoch": 3.256637168141593,
1360
+ "grad_norm": 24.138504028320312,
1361
+ "learning_rate": 3.76597836774828e-05,
1362
+ "loss": 0.7378,
1363
+ "step": 368
1364
+ },
1365
+ {
1366
+ "epoch": 3.274336283185841,
1367
+ "grad_norm": 26.703155517578125,
1368
+ "learning_rate": 3.7561455260570306e-05,
1369
+ "loss": 0.771,
1370
+ "step": 370
1371
+ },
1372
+ {
1373
+ "epoch": 3.2920353982300883,
1374
+ "grad_norm": 19.84184455871582,
1375
+ "learning_rate": 3.746312684365782e-05,
1376
+ "loss": 0.8013,
1377
+ "step": 372
1378
+ },
1379
+ {
1380
+ "epoch": 3.309734513274336,
1381
+ "grad_norm": 23.238510131835938,
1382
+ "learning_rate": 3.736479842674533e-05,
1383
+ "loss": 0.7144,
1384
+ "step": 374
1385
+ },
1386
+ {
1387
+ "epoch": 3.327433628318584,
1388
+ "grad_norm": 27.819055557250977,
1389
+ "learning_rate": 3.7266470009832844e-05,
1390
+ "loss": 0.9366,
1391
+ "step": 376
1392
+ },
1393
+ {
1394
+ "epoch": 3.3451327433628317,
1395
+ "grad_norm": 18.144052505493164,
1396
+ "learning_rate": 3.716814159292036e-05,
1397
+ "loss": 0.8564,
1398
+ "step": 378
1399
+ },
1400
+ {
1401
+ "epoch": 3.3628318584070795,
1402
+ "grad_norm": 16.014623641967773,
1403
+ "learning_rate": 3.706981317600787e-05,
1404
+ "loss": 0.9854,
1405
+ "step": 380
1406
+ },
1407
+ {
1408
+ "epoch": 3.3805309734513274,
1409
+ "grad_norm": 19.18610191345215,
1410
+ "learning_rate": 3.697148475909538e-05,
1411
+ "loss": 0.8153,
1412
+ "step": 382
1413
+ },
1414
+ {
1415
+ "epoch": 3.398230088495575,
1416
+ "grad_norm": 28.12213134765625,
1417
+ "learning_rate": 3.687315634218289e-05,
1418
+ "loss": 0.8107,
1419
+ "step": 384
1420
+ },
1421
+ {
1422
+ "epoch": 3.415929203539823,
1423
+ "grad_norm": 42.245262145996094,
1424
+ "learning_rate": 3.6774827925270405e-05,
1425
+ "loss": 0.8498,
1426
+ "step": 386
1427
+ },
1428
+ {
1429
+ "epoch": 3.433628318584071,
1430
+ "grad_norm": 20.642005920410156,
1431
+ "learning_rate": 3.667649950835792e-05,
1432
+ "loss": 0.8426,
1433
+ "step": 388
1434
+ },
1435
+ {
1436
+ "epoch": 3.4513274336283186,
1437
+ "grad_norm": 20.72900390625,
1438
+ "learning_rate": 3.657817109144543e-05,
1439
+ "loss": 0.7979,
1440
+ "step": 390
1441
+ },
1442
+ {
1443
+ "epoch": 3.4690265486725664,
1444
+ "grad_norm": 36.909027099609375,
1445
+ "learning_rate": 3.6479842674532944e-05,
1446
+ "loss": 1.0862,
1447
+ "step": 392
1448
+ },
1449
+ {
1450
+ "epoch": 3.4867256637168142,
1451
+ "grad_norm": 34.34043884277344,
1452
+ "learning_rate": 3.638151425762045e-05,
1453
+ "loss": 0.9602,
1454
+ "step": 394
1455
+ },
1456
+ {
1457
+ "epoch": 3.504424778761062,
1458
+ "grad_norm": 17.44400978088379,
1459
+ "learning_rate": 3.628318584070797e-05,
1460
+ "loss": 0.7522,
1461
+ "step": 396
1462
+ },
1463
+ {
1464
+ "epoch": 3.52212389380531,
1465
+ "grad_norm": 29.59943962097168,
1466
+ "learning_rate": 3.6184857423795475e-05,
1467
+ "loss": 0.8881,
1468
+ "step": 398
1469
+ },
1470
+ {
1471
+ "epoch": 3.5398230088495577,
1472
+ "grad_norm": 33.33137893676758,
1473
+ "learning_rate": 3.608652900688299e-05,
1474
+ "loss": 0.9806,
1475
+ "step": 400
1476
+ },
1477
+ {
1478
+ "epoch": 3.557522123893805,
1479
+ "grad_norm": 25.366241455078125,
1480
+ "learning_rate": 3.5988200589970505e-05,
1481
+ "loss": 1.0134,
1482
+ "step": 402
1483
+ },
1484
+ {
1485
+ "epoch": 3.5752212389380533,
1486
+ "grad_norm": 28.731691360473633,
1487
+ "learning_rate": 3.588987217305801e-05,
1488
+ "loss": 0.926,
1489
+ "step": 404
1490
+ },
1491
+ {
1492
+ "epoch": 3.5929203539823007,
1493
+ "grad_norm": 24.277082443237305,
1494
+ "learning_rate": 3.579154375614553e-05,
1495
+ "loss": 0.8202,
1496
+ "step": 406
1497
+ },
1498
+ {
1499
+ "epoch": 3.6106194690265485,
1500
+ "grad_norm": 15.626072883605957,
1501
+ "learning_rate": 3.5693215339233036e-05,
1502
+ "loss": 0.8415,
1503
+ "step": 408
1504
+ },
1505
+ {
1506
+ "epoch": 3.6283185840707963,
1507
+ "grad_norm": 35.861610412597656,
1508
+ "learning_rate": 3.559488692232055e-05,
1509
+ "loss": 0.8739,
1510
+ "step": 410
1511
+ },
1512
+ {
1513
+ "epoch": 3.646017699115044,
1514
+ "grad_norm": 30.36337661743164,
1515
+ "learning_rate": 3.5496558505408066e-05,
1516
+ "loss": 0.7045,
1517
+ "step": 412
1518
+ },
1519
+ {
1520
+ "epoch": 3.663716814159292,
1521
+ "grad_norm": 25.959348678588867,
1522
+ "learning_rate": 3.5398230088495574e-05,
1523
+ "loss": 0.6826,
1524
+ "step": 414
1525
+ },
1526
+ {
1527
+ "epoch": 3.6814159292035398,
1528
+ "grad_norm": 20.39558219909668,
1529
+ "learning_rate": 3.529990167158309e-05,
1530
+ "loss": 0.8847,
1531
+ "step": 416
1532
+ },
1533
+ {
1534
+ "epoch": 3.6991150442477876,
1535
+ "grad_norm": 13.092119216918945,
1536
+ "learning_rate": 3.52015732546706e-05,
1537
+ "loss": 1.0101,
1538
+ "step": 418
1539
+ },
1540
+ {
1541
+ "epoch": 3.7168141592920354,
1542
+ "grad_norm": 32.8824462890625,
1543
+ "learning_rate": 3.510324483775811e-05,
1544
+ "loss": 0.8229,
1545
+ "step": 420
1546
+ },
1547
+ {
1548
+ "epoch": 3.734513274336283,
1549
+ "grad_norm": 52.47900390625,
1550
+ "learning_rate": 3.500491642084562e-05,
1551
+ "loss": 0.8438,
1552
+ "step": 422
1553
+ },
1554
+ {
1555
+ "epoch": 3.752212389380531,
1556
+ "grad_norm": 40.38644790649414,
1557
+ "learning_rate": 3.4906588003933136e-05,
1558
+ "loss": 0.8129,
1559
+ "step": 424
1560
+ },
1561
+ {
1562
+ "epoch": 3.769911504424779,
1563
+ "grad_norm": 24.974903106689453,
1564
+ "learning_rate": 3.480825958702065e-05,
1565
+ "loss": 0.8478,
1566
+ "step": 426
1567
+ },
1568
+ {
1569
+ "epoch": 3.7876106194690267,
1570
+ "grad_norm": 26.479198455810547,
1571
+ "learning_rate": 3.470993117010816e-05,
1572
+ "loss": 0.8803,
1573
+ "step": 428
1574
+ },
1575
+ {
1576
+ "epoch": 3.8053097345132745,
1577
+ "grad_norm": 20.01055908203125,
1578
+ "learning_rate": 3.4611602753195674e-05,
1579
+ "loss": 0.8078,
1580
+ "step": 430
1581
+ },
1582
+ {
1583
+ "epoch": 3.823008849557522,
1584
+ "grad_norm": 34.75425720214844,
1585
+ "learning_rate": 3.451327433628319e-05,
1586
+ "loss": 0.9117,
1587
+ "step": 432
1588
+ },
1589
+ {
1590
+ "epoch": 3.84070796460177,
1591
+ "grad_norm": 30.763608932495117,
1592
+ "learning_rate": 3.44149459193707e-05,
1593
+ "loss": 0.7776,
1594
+ "step": 434
1595
+ },
1596
+ {
1597
+ "epoch": 3.8584070796460175,
1598
+ "grad_norm": 38.77566909790039,
1599
+ "learning_rate": 3.431661750245821e-05,
1600
+ "loss": 0.8284,
1601
+ "step": 436
1602
+ },
1603
+ {
1604
+ "epoch": 3.8761061946902657,
1605
+ "grad_norm": 24.3690128326416,
1606
+ "learning_rate": 3.421828908554573e-05,
1607
+ "loss": 0.7673,
1608
+ "step": 438
1609
+ },
1610
+ {
1611
+ "epoch": 3.893805309734513,
1612
+ "grad_norm": 22.95048713684082,
1613
+ "learning_rate": 3.4119960668633235e-05,
1614
+ "loss": 0.7641,
1615
+ "step": 440
1616
+ },
1617
+ {
1618
+ "epoch": 3.911504424778761,
1619
+ "grad_norm": 21.6431827545166,
1620
+ "learning_rate": 3.402163225172075e-05,
1621
+ "loss": 0.832,
1622
+ "step": 442
1623
+ },
1624
+ {
1625
+ "epoch": 3.9292035398230087,
1626
+ "grad_norm": 30.03487777709961,
1627
+ "learning_rate": 3.3923303834808265e-05,
1628
+ "loss": 0.8857,
1629
+ "step": 444
1630
+ },
1631
+ {
1632
+ "epoch": 3.9469026548672566,
1633
+ "grad_norm": 32.90857696533203,
1634
+ "learning_rate": 3.3824975417895774e-05,
1635
+ "loss": 0.8063,
1636
+ "step": 446
1637
+ },
1638
+ {
1639
+ "epoch": 3.9646017699115044,
1640
+ "grad_norm": 18.506166458129883,
1641
+ "learning_rate": 3.372664700098329e-05,
1642
+ "loss": 0.8189,
1643
+ "step": 448
1644
+ },
1645
+ {
1646
+ "epoch": 3.982300884955752,
1647
+ "grad_norm": 21.59197235107422,
1648
+ "learning_rate": 3.3628318584070804e-05,
1649
+ "loss": 0.711,
1650
+ "step": 450
1651
+ },
1652
+ {
1653
+ "epoch": 4.0,
1654
+ "grad_norm": 55.65620422363281,
1655
+ "learning_rate": 3.352999016715831e-05,
1656
+ "loss": 0.8934,
1657
+ "step": 452
1658
+ },
1659
+ {
1660
+ "epoch": 4.0,
1661
+ "eval_loss": 0.8967828154563904,
1662
+ "eval_map": 0.4488,
1663
+ "eval_map_50": 0.8568,
1664
+ "eval_map_75": 0.4271,
1665
+ "eval_map_large": 0.5174,
1666
+ "eval_map_medium": 0.264,
1667
+ "eval_map_per_class": 0.4488,
1668
+ "eval_map_small": 0.0,
1669
+ "eval_mar_1": 0.2483,
1670
+ "eval_mar_10": 0.5628,
1671
+ "eval_mar_100": 0.6197,
1672
+ "eval_mar_100_per_class": 0.6197,
1673
+ "eval_mar_large": 0.6834,
1674
+ "eval_mar_medium": 0.4609,
1675
+ "eval_mar_small": 0.0,
1676
+ "eval_runtime": 3.9666,
1677
+ "eval_samples_per_second": 25.21,
1678
+ "eval_steps_per_second": 1.765,
1679
+ "step": 452
1680
+ },
1681
+ {
1682
+ "epoch": 4.017699115044247,
1683
+ "grad_norm": 27.480350494384766,
1684
+ "learning_rate": 3.343166175024583e-05,
1685
+ "loss": 0.8197,
1686
+ "step": 454
1687
+ },
1688
+ {
1689
+ "epoch": 4.035398230088496,
1690
+ "grad_norm": 17.332786560058594,
1691
+ "learning_rate": 3.3333333333333335e-05,
1692
+ "loss": 0.8171,
1693
+ "step": 456
1694
+ },
1695
+ {
1696
+ "epoch": 4.053097345132743,
1697
+ "grad_norm": 14.860098838806152,
1698
+ "learning_rate": 3.323500491642085e-05,
1699
+ "loss": 0.7993,
1700
+ "step": 458
1701
+ },
1702
+ {
1703
+ "epoch": 4.070796460176991,
1704
+ "grad_norm": 16.08732795715332,
1705
+ "learning_rate": 3.313667649950836e-05,
1706
+ "loss": 0.7328,
1707
+ "step": 460
1708
+ },
1709
+ {
1710
+ "epoch": 4.088495575221239,
1711
+ "grad_norm": 24.5137939453125,
1712
+ "learning_rate": 3.303834808259587e-05,
1713
+ "loss": 0.7629,
1714
+ "step": 462
1715
+ },
1716
+ {
1717
+ "epoch": 4.106194690265487,
1718
+ "grad_norm": 28.86885643005371,
1719
+ "learning_rate": 3.294001966568339e-05,
1720
+ "loss": 0.9142,
1721
+ "step": 464
1722
+ },
1723
+ {
1724
+ "epoch": 4.123893805309734,
1725
+ "grad_norm": 26.640892028808594,
1726
+ "learning_rate": 3.2841691248770896e-05,
1727
+ "loss": 0.8952,
1728
+ "step": 466
1729
+ },
1730
+ {
1731
+ "epoch": 4.1415929203539825,
1732
+ "grad_norm": 16.20265769958496,
1733
+ "learning_rate": 3.274336283185841e-05,
1734
+ "loss": 0.7812,
1735
+ "step": 468
1736
+ },
1737
+ {
1738
+ "epoch": 4.15929203539823,
1739
+ "grad_norm": 22.30803871154785,
1740
+ "learning_rate": 3.264503441494592e-05,
1741
+ "loss": 0.8641,
1742
+ "step": 470
1743
+ },
1744
+ {
1745
+ "epoch": 4.176991150442478,
1746
+ "grad_norm": 17.450302124023438,
1747
+ "learning_rate": 3.2546705998033434e-05,
1748
+ "loss": 0.8061,
1749
+ "step": 472
1750
+ },
1751
+ {
1752
+ "epoch": 4.1946902654867255,
1753
+ "grad_norm": 20.605792999267578,
1754
+ "learning_rate": 3.244837758112095e-05,
1755
+ "loss": 0.8872,
1756
+ "step": 474
1757
+ },
1758
+ {
1759
+ "epoch": 4.212389380530974,
1760
+ "grad_norm": 21.550548553466797,
1761
+ "learning_rate": 3.235004916420846e-05,
1762
+ "loss": 0.8599,
1763
+ "step": 476
1764
+ },
1765
+ {
1766
+ "epoch": 4.230088495575221,
1767
+ "grad_norm": 14.21965217590332,
1768
+ "learning_rate": 3.225172074729597e-05,
1769
+ "loss": 0.7412,
1770
+ "step": 478
1771
+ },
1772
+ {
1773
+ "epoch": 4.247787610619469,
1774
+ "grad_norm": 24.305540084838867,
1775
+ "learning_rate": 3.215339233038348e-05,
1776
+ "loss": 0.7362,
1777
+ "step": 480
1778
+ },
1779
+ {
1780
+ "epoch": 4.265486725663717,
1781
+ "grad_norm": 26.687883377075195,
1782
+ "learning_rate": 3.2055063913470996e-05,
1783
+ "loss": 0.8364,
1784
+ "step": 482
1785
+ },
1786
+ {
1787
+ "epoch": 4.283185840707965,
1788
+ "grad_norm": 23.61547088623047,
1789
+ "learning_rate": 3.1956735496558504e-05,
1790
+ "loss": 0.6807,
1791
+ "step": 484
1792
+ },
1793
+ {
1794
+ "epoch": 4.300884955752212,
1795
+ "grad_norm": 36.08306121826172,
1796
+ "learning_rate": 3.185840707964602e-05,
1797
+ "loss": 0.7484,
1798
+ "step": 486
1799
+ },
1800
+ {
1801
+ "epoch": 4.31858407079646,
1802
+ "grad_norm": 24.663066864013672,
1803
+ "learning_rate": 3.1760078662733534e-05,
1804
+ "loss": 0.7722,
1805
+ "step": 488
1806
+ },
1807
+ {
1808
+ "epoch": 4.336283185840708,
1809
+ "grad_norm": 25.614458084106445,
1810
+ "learning_rate": 3.166175024582104e-05,
1811
+ "loss": 0.9276,
1812
+ "step": 490
1813
+ },
1814
+ {
1815
+ "epoch": 4.353982300884955,
1816
+ "grad_norm": 22.014490127563477,
1817
+ "learning_rate": 3.156342182890856e-05,
1818
+ "loss": 0.8552,
1819
+ "step": 492
1820
+ },
1821
+ {
1822
+ "epoch": 4.371681415929204,
1823
+ "grad_norm": 16.620935440063477,
1824
+ "learning_rate": 3.1465093411996065e-05,
1825
+ "loss": 0.8883,
1826
+ "step": 494
1827
+ },
1828
+ {
1829
+ "epoch": 4.389380530973451,
1830
+ "grad_norm": 19.76045036315918,
1831
+ "learning_rate": 3.136676499508358e-05,
1832
+ "loss": 0.5994,
1833
+ "step": 496
1834
+ },
1835
+ {
1836
+ "epoch": 4.407079646017699,
1837
+ "grad_norm": 27.53520393371582,
1838
+ "learning_rate": 3.1268436578171095e-05,
1839
+ "loss": 0.7127,
1840
+ "step": 498
1841
+ },
1842
+ {
1843
+ "epoch": 4.424778761061947,
1844
+ "grad_norm": 17.578168869018555,
1845
+ "learning_rate": 3.1170108161258604e-05,
1846
+ "loss": 0.8606,
1847
+ "step": 500
1848
+ },
1849
+ {
1850
+ "epoch": 4.442477876106195,
1851
+ "grad_norm": 25.594207763671875,
1852
+ "learning_rate": 3.107177974434612e-05,
1853
+ "loss": 0.736,
1854
+ "step": 502
1855
+ },
1856
+ {
1857
+ "epoch": 4.460176991150442,
1858
+ "grad_norm": 17.3150691986084,
1859
+ "learning_rate": 3.097345132743363e-05,
1860
+ "loss": 0.8871,
1861
+ "step": 504
1862
+ },
1863
+ {
1864
+ "epoch": 4.477876106194691,
1865
+ "grad_norm": 34.62860107421875,
1866
+ "learning_rate": 3.087512291052114e-05,
1867
+ "loss": 0.8176,
1868
+ "step": 506
1869
+ },
1870
+ {
1871
+ "epoch": 4.495575221238938,
1872
+ "grad_norm": 21.978734970092773,
1873
+ "learning_rate": 3.077679449360865e-05,
1874
+ "loss": 0.8523,
1875
+ "step": 508
1876
+ },
1877
+ {
1878
+ "epoch": 4.513274336283186,
1879
+ "grad_norm": 37.72077941894531,
1880
+ "learning_rate": 3.0678466076696165e-05,
1881
+ "loss": 0.6939,
1882
+ "step": 510
1883
+ },
1884
+ {
1885
+ "epoch": 4.530973451327434,
1886
+ "grad_norm": 29.62040901184082,
1887
+ "learning_rate": 3.058013765978368e-05,
1888
+ "loss": 0.9062,
1889
+ "step": 512
1890
+ },
1891
+ {
1892
+ "epoch": 4.548672566371682,
1893
+ "grad_norm": 43.172821044921875,
1894
+ "learning_rate": 3.048180924287119e-05,
1895
+ "loss": 0.9389,
1896
+ "step": 514
1897
+ },
1898
+ {
1899
+ "epoch": 4.566371681415929,
1900
+ "grad_norm": 19.374061584472656,
1901
+ "learning_rate": 3.0383480825958703e-05,
1902
+ "loss": 0.8577,
1903
+ "step": 516
1904
+ },
1905
+ {
1906
+ "epoch": 4.584070796460177,
1907
+ "grad_norm": 18.854230880737305,
1908
+ "learning_rate": 3.0285152409046215e-05,
1909
+ "loss": 0.7861,
1910
+ "step": 518
1911
+ },
1912
+ {
1913
+ "epoch": 4.601769911504425,
1914
+ "grad_norm": 27.53508949279785,
1915
+ "learning_rate": 3.018682399213373e-05,
1916
+ "loss": 0.8935,
1917
+ "step": 520
1918
+ },
1919
+ {
1920
+ "epoch": 4.619469026548672,
1921
+ "grad_norm": 25.085142135620117,
1922
+ "learning_rate": 3.008849557522124e-05,
1923
+ "loss": 0.6619,
1924
+ "step": 522
1925
+ },
1926
+ {
1927
+ "epoch": 4.6371681415929205,
1928
+ "grad_norm": 22.648717880249023,
1929
+ "learning_rate": 2.9990167158308753e-05,
1930
+ "loss": 0.7065,
1931
+ "step": 524
1932
+ },
1933
+ {
1934
+ "epoch": 4.654867256637168,
1935
+ "grad_norm": 19.692138671875,
1936
+ "learning_rate": 2.9891838741396268e-05,
1937
+ "loss": 0.8524,
1938
+ "step": 526
1939
+ },
1940
+ {
1941
+ "epoch": 4.672566371681416,
1942
+ "grad_norm": 17.703353881835938,
1943
+ "learning_rate": 2.9793510324483776e-05,
1944
+ "loss": 0.8405,
1945
+ "step": 528
1946
+ },
1947
+ {
1948
+ "epoch": 4.6902654867256635,
1949
+ "grad_norm": 19.434688568115234,
1950
+ "learning_rate": 2.969518190757129e-05,
1951
+ "loss": 0.7932,
1952
+ "step": 530
1953
+ },
1954
+ {
1955
+ "epoch": 4.707964601769912,
1956
+ "grad_norm": 19.711090087890625,
1957
+ "learning_rate": 2.95968534906588e-05,
1958
+ "loss": 0.6708,
1959
+ "step": 532
1960
+ },
1961
+ {
1962
+ "epoch": 4.725663716814159,
1963
+ "grad_norm": 32.07950973510742,
1964
+ "learning_rate": 2.9498525073746314e-05,
1965
+ "loss": 0.8732,
1966
+ "step": 534
1967
+ },
1968
+ {
1969
+ "epoch": 4.743362831858407,
1970
+ "grad_norm": 26.682344436645508,
1971
+ "learning_rate": 2.940019665683383e-05,
1972
+ "loss": 0.8234,
1973
+ "step": 536
1974
+ },
1975
+ {
1976
+ "epoch": 4.761061946902655,
1977
+ "grad_norm": 20.226280212402344,
1978
+ "learning_rate": 2.9301868239921338e-05,
1979
+ "loss": 0.6903,
1980
+ "step": 538
1981
+ },
1982
+ {
1983
+ "epoch": 4.778761061946903,
1984
+ "grad_norm": 28.748722076416016,
1985
+ "learning_rate": 2.9203539823008852e-05,
1986
+ "loss": 0.8606,
1987
+ "step": 540
1988
+ },
1989
+ {
1990
+ "epoch": 4.79646017699115,
1991
+ "grad_norm": 21.78730583190918,
1992
+ "learning_rate": 2.910521140609636e-05,
1993
+ "loss": 0.7924,
1994
+ "step": 542
1995
+ },
1996
+ {
1997
+ "epoch": 4.814159292035399,
1998
+ "grad_norm": 24.920272827148438,
1999
+ "learning_rate": 2.9006882989183876e-05,
2000
+ "loss": 0.7917,
2001
+ "step": 544
2002
+ },
2003
+ {
2004
+ "epoch": 4.831858407079646,
2005
+ "grad_norm": 32.964298248291016,
2006
+ "learning_rate": 2.890855457227139e-05,
2007
+ "loss": 0.8017,
2008
+ "step": 546
2009
+ },
2010
+ {
2011
+ "epoch": 4.849557522123893,
2012
+ "grad_norm": 27.990001678466797,
2013
+ "learning_rate": 2.88102261553589e-05,
2014
+ "loss": 0.8392,
2015
+ "step": 548
2016
+ },
2017
+ {
2018
+ "epoch": 4.867256637168142,
2019
+ "grad_norm": 24.096538543701172,
2020
+ "learning_rate": 2.8711897738446414e-05,
2021
+ "loss": 0.9032,
2022
+ "step": 550
2023
+ },
2024
+ {
2025
+ "epoch": 4.88495575221239,
2026
+ "grad_norm": 22.516420364379883,
2027
+ "learning_rate": 2.8613569321533922e-05,
2028
+ "loss": 0.8411,
2029
+ "step": 552
2030
+ },
2031
+ {
2032
+ "epoch": 4.902654867256637,
2033
+ "grad_norm": 32.133506774902344,
2034
+ "learning_rate": 2.8515240904621437e-05,
2035
+ "loss": 0.824,
2036
+ "step": 554
2037
+ },
2038
+ {
2039
+ "epoch": 4.920353982300885,
2040
+ "grad_norm": 20.993709564208984,
2041
+ "learning_rate": 2.8416912487708945e-05,
2042
+ "loss": 0.6233,
2043
+ "step": 556
2044
+ },
2045
+ {
2046
+ "epoch": 4.938053097345133,
2047
+ "grad_norm": 17.405899047851562,
2048
+ "learning_rate": 2.831858407079646e-05,
2049
+ "loss": 0.8157,
2050
+ "step": 558
2051
+ },
2052
+ {
2053
+ "epoch": 4.95575221238938,
2054
+ "grad_norm": 18.684974670410156,
2055
+ "learning_rate": 2.8220255653883975e-05,
2056
+ "loss": 0.889,
2057
+ "step": 560
2058
+ },
2059
+ {
2060
+ "epoch": 4.9734513274336285,
2061
+ "grad_norm": 22.140762329101562,
2062
+ "learning_rate": 2.8121927236971483e-05,
2063
+ "loss": 0.7555,
2064
+ "step": 562
2065
+ },
2066
+ {
2067
+ "epoch": 4.991150442477876,
2068
+ "grad_norm": 93.78765869140625,
2069
+ "learning_rate": 2.8023598820059e-05,
2070
+ "loss": 0.8166,
2071
+ "step": 564
2072
+ },
2073
+ {
2074
+ "epoch": 5.0,
2075
+ "eval_loss": 0.8204176425933838,
2076
+ "eval_map": 0.4963,
2077
+ "eval_map_50": 0.8784,
2078
+ "eval_map_75": 0.5214,
2079
+ "eval_map_large": 0.56,
2080
+ "eval_map_medium": 0.3309,
2081
+ "eval_map_per_class": 0.4963,
2082
+ "eval_map_small": 0.0209,
2083
+ "eval_mar_1": 0.2598,
2084
+ "eval_mar_10": 0.5983,
2085
+ "eval_mar_100": 0.6581,
2086
+ "eval_mar_100_per_class": 0.6581,
2087
+ "eval_mar_large": 0.7207,
2088
+ "eval_mar_medium": 0.4969,
2089
+ "eval_mar_small": 0.4,
2090
+ "eval_runtime": 4.466,
2091
+ "eval_samples_per_second": 22.392,
2092
+ "eval_steps_per_second": 1.567,
2093
+ "step": 565
2094
+ },
2095
+ {
2096
+ "epoch": 5.008849557522124,
2097
+ "grad_norm": 17.966142654418945,
2098
+ "learning_rate": 2.792527040314651e-05,
2099
+ "loss": 0.7743,
2100
+ "step": 566
2101
+ },
2102
+ {
2103
+ "epoch": 5.0265486725663715,
2104
+ "grad_norm": 17.44831657409668,
2105
+ "learning_rate": 2.782694198623402e-05,
2106
+ "loss": 0.8002,
2107
+ "step": 568
2108
+ },
2109
+ {
2110
+ "epoch": 5.04424778761062,
2111
+ "grad_norm": 16.89792823791504,
2112
+ "learning_rate": 2.7728613569321537e-05,
2113
+ "loss": 0.8238,
2114
+ "step": 570
2115
+ },
2116
+ {
2117
+ "epoch": 5.061946902654867,
2118
+ "grad_norm": 15.499351501464844,
2119
+ "learning_rate": 2.7630285152409048e-05,
2120
+ "loss": 0.5594,
2121
+ "step": 572
2122
+ },
2123
+ {
2124
+ "epoch": 5.079646017699115,
2125
+ "grad_norm": 26.665563583374023,
2126
+ "learning_rate": 2.753195673549656e-05,
2127
+ "loss": 0.7994,
2128
+ "step": 574
2129
+ },
2130
+ {
2131
+ "epoch": 5.097345132743363,
2132
+ "grad_norm": 19.839536666870117,
2133
+ "learning_rate": 2.743362831858407e-05,
2134
+ "loss": 0.6578,
2135
+ "step": 576
2136
+ },
2137
+ {
2138
+ "epoch": 5.115044247787611,
2139
+ "grad_norm": 24.534902572631836,
2140
+ "learning_rate": 2.7335299901671586e-05,
2141
+ "loss": 0.7748,
2142
+ "step": 578
2143
+ },
2144
+ {
2145
+ "epoch": 5.132743362831858,
2146
+ "grad_norm": 23.61812400817871,
2147
+ "learning_rate": 2.7236971484759095e-05,
2148
+ "loss": 0.7771,
2149
+ "step": 580
2150
+ },
2151
+ {
2152
+ "epoch": 5.150442477876107,
2153
+ "grad_norm": 17.933223724365234,
2154
+ "learning_rate": 2.713864306784661e-05,
2155
+ "loss": 0.7349,
2156
+ "step": 582
2157
+ },
2158
+ {
2159
+ "epoch": 5.168141592920354,
2160
+ "grad_norm": 18.23980712890625,
2161
+ "learning_rate": 2.7040314650934125e-05,
2162
+ "loss": 0.7771,
2163
+ "step": 584
2164
+ },
2165
+ {
2166
+ "epoch": 5.185840707964601,
2167
+ "grad_norm": 20.185016632080078,
2168
+ "learning_rate": 2.6941986234021633e-05,
2169
+ "loss": 0.8009,
2170
+ "step": 586
2171
+ },
2172
+ {
2173
+ "epoch": 5.20353982300885,
2174
+ "grad_norm": 26.6129093170166,
2175
+ "learning_rate": 2.6843657817109148e-05,
2176
+ "loss": 0.8614,
2177
+ "step": 588
2178
+ },
2179
+ {
2180
+ "epoch": 5.221238938053097,
2181
+ "grad_norm": 27.28291893005371,
2182
+ "learning_rate": 2.6745329400196656e-05,
2183
+ "loss": 0.8594,
2184
+ "step": 590
2185
+ },
2186
+ {
2187
+ "epoch": 5.238938053097345,
2188
+ "grad_norm": 19.851408004760742,
2189
+ "learning_rate": 2.664700098328417e-05,
2190
+ "loss": 0.6753,
2191
+ "step": 592
2192
+ },
2193
+ {
2194
+ "epoch": 5.256637168141593,
2195
+ "grad_norm": 15.925873756408691,
2196
+ "learning_rate": 2.6548672566371686e-05,
2197
+ "loss": 0.7364,
2198
+ "step": 594
2199
+ },
2200
+ {
2201
+ "epoch": 5.274336283185841,
2202
+ "grad_norm": 16.969070434570312,
2203
+ "learning_rate": 2.6450344149459194e-05,
2204
+ "loss": 0.7607,
2205
+ "step": 596
2206
+ },
2207
+ {
2208
+ "epoch": 5.292035398230088,
2209
+ "grad_norm": 29.575332641601562,
2210
+ "learning_rate": 2.635201573254671e-05,
2211
+ "loss": 0.6162,
2212
+ "step": 598
2213
+ },
2214
+ {
2215
+ "epoch": 5.3097345132743365,
2216
+ "grad_norm": 27.425867080688477,
2217
+ "learning_rate": 2.6253687315634217e-05,
2218
+ "loss": 0.7873,
2219
+ "step": 600
2220
+ },
2221
+ {
2222
+ "epoch": 5.327433628318584,
2223
+ "grad_norm": 36.430233001708984,
2224
+ "learning_rate": 2.6155358898721732e-05,
2225
+ "loss": 0.7635,
2226
+ "step": 602
2227
+ },
2228
+ {
2229
+ "epoch": 5.345132743362832,
2230
+ "grad_norm": 23.08193588256836,
2231
+ "learning_rate": 2.605703048180924e-05,
2232
+ "loss": 0.8998,
2233
+ "step": 604
2234
+ },
2235
+ {
2236
+ "epoch": 5.3628318584070795,
2237
+ "grad_norm": 19.06671714782715,
2238
+ "learning_rate": 2.5958702064896756e-05,
2239
+ "loss": 0.6907,
2240
+ "step": 606
2241
+ },
2242
+ {
2243
+ "epoch": 5.380530973451328,
2244
+ "grad_norm": 17.793241500854492,
2245
+ "learning_rate": 2.586037364798427e-05,
2246
+ "loss": 0.7653,
2247
+ "step": 608
2248
+ },
2249
+ {
2250
+ "epoch": 5.398230088495575,
2251
+ "grad_norm": 18.47113800048828,
2252
+ "learning_rate": 2.576204523107178e-05,
2253
+ "loss": 0.7344,
2254
+ "step": 610
2255
+ },
2256
+ {
2257
+ "epoch": 5.415929203539823,
2258
+ "grad_norm": 23.754230499267578,
2259
+ "learning_rate": 2.5663716814159294e-05,
2260
+ "loss": 0.7098,
2261
+ "step": 612
2262
+ },
2263
+ {
2264
+ "epoch": 5.433628318584071,
2265
+ "grad_norm": 17.286996841430664,
2266
+ "learning_rate": 2.5565388397246802e-05,
2267
+ "loss": 0.7242,
2268
+ "step": 614
2269
+ },
2270
+ {
2271
+ "epoch": 5.451327433628318,
2272
+ "grad_norm": 33.41328811645508,
2273
+ "learning_rate": 2.5467059980334317e-05,
2274
+ "loss": 0.8577,
2275
+ "step": 616
2276
+ },
2277
+ {
2278
+ "epoch": 5.469026548672566,
2279
+ "grad_norm": 23.295421600341797,
2280
+ "learning_rate": 2.5368731563421832e-05,
2281
+ "loss": 0.765,
2282
+ "step": 618
2283
+ },
2284
+ {
2285
+ "epoch": 5.486725663716814,
2286
+ "grad_norm": 29.50783348083496,
2287
+ "learning_rate": 2.527040314650934e-05,
2288
+ "loss": 0.7021,
2289
+ "step": 620
2290
+ },
2291
+ {
2292
+ "epoch": 5.504424778761062,
2293
+ "grad_norm": 20.60035514831543,
2294
+ "learning_rate": 2.5172074729596855e-05,
2295
+ "loss": 0.8331,
2296
+ "step": 622
2297
+ },
2298
+ {
2299
+ "epoch": 5.522123893805309,
2300
+ "grad_norm": 28.808502197265625,
2301
+ "learning_rate": 2.5073746312684367e-05,
2302
+ "loss": 0.9017,
2303
+ "step": 624
2304
+ },
2305
+ {
2306
+ "epoch": 5.539823008849558,
2307
+ "grad_norm": 63.102195739746094,
2308
+ "learning_rate": 2.4975417895771878e-05,
2309
+ "loss": 0.7724,
2310
+ "step": 626
2311
+ },
2312
+ {
2313
+ "epoch": 5.557522123893805,
2314
+ "grad_norm": 31.407791137695312,
2315
+ "learning_rate": 2.4877089478859393e-05,
2316
+ "loss": 0.775,
2317
+ "step": 628
2318
+ },
2319
+ {
2320
+ "epoch": 5.575221238938053,
2321
+ "grad_norm": 27.927444458007812,
2322
+ "learning_rate": 2.4778761061946905e-05,
2323
+ "loss": 0.6807,
2324
+ "step": 630
2325
+ },
2326
+ {
2327
+ "epoch": 5.592920353982301,
2328
+ "grad_norm": 31.49755096435547,
2329
+ "learning_rate": 2.4680432645034416e-05,
2330
+ "loss": 0.7617,
2331
+ "step": 632
2332
+ },
2333
+ {
2334
+ "epoch": 5.610619469026549,
2335
+ "grad_norm": 24.49897003173828,
2336
+ "learning_rate": 2.4582104228121928e-05,
2337
+ "loss": 0.7275,
2338
+ "step": 634
2339
+ },
2340
+ {
2341
+ "epoch": 5.628318584070796,
2342
+ "grad_norm": 29.279388427734375,
2343
+ "learning_rate": 2.4483775811209443e-05,
2344
+ "loss": 0.6349,
2345
+ "step": 636
2346
+ },
2347
+ {
2348
+ "epoch": 5.646017699115045,
2349
+ "grad_norm": 18.994455337524414,
2350
+ "learning_rate": 2.4385447394296955e-05,
2351
+ "loss": 0.6773,
2352
+ "step": 638
2353
+ },
2354
+ {
2355
+ "epoch": 5.663716814159292,
2356
+ "grad_norm": 21.28229522705078,
2357
+ "learning_rate": 2.4287118977384466e-05,
2358
+ "loss": 0.8142,
2359
+ "step": 640
2360
+ },
2361
+ {
2362
+ "epoch": 5.68141592920354,
2363
+ "grad_norm": 28.956829071044922,
2364
+ "learning_rate": 2.4188790560471978e-05,
2365
+ "loss": 0.7215,
2366
+ "step": 642
2367
+ },
2368
+ {
2369
+ "epoch": 5.699115044247788,
2370
+ "grad_norm": 35.786991119384766,
2371
+ "learning_rate": 2.409046214355949e-05,
2372
+ "loss": 0.8148,
2373
+ "step": 644
2374
+ },
2375
+ {
2376
+ "epoch": 5.716814159292035,
2377
+ "grad_norm": 17.956323623657227,
2378
+ "learning_rate": 2.3992133726647e-05,
2379
+ "loss": 0.9103,
2380
+ "step": 646
2381
+ },
2382
+ {
2383
+ "epoch": 5.734513274336283,
2384
+ "grad_norm": 21.570697784423828,
2385
+ "learning_rate": 2.3893805309734516e-05,
2386
+ "loss": 0.6904,
2387
+ "step": 648
2388
+ },
2389
+ {
2390
+ "epoch": 5.752212389380531,
2391
+ "grad_norm": 13.703173637390137,
2392
+ "learning_rate": 2.3795476892822028e-05,
2393
+ "loss": 0.6716,
2394
+ "step": 650
2395
+ },
2396
+ {
2397
+ "epoch": 5.769911504424779,
2398
+ "grad_norm": 19.16945457458496,
2399
+ "learning_rate": 2.369714847590954e-05,
2400
+ "loss": 0.8447,
2401
+ "step": 652
2402
+ },
2403
+ {
2404
+ "epoch": 5.787610619469026,
2405
+ "grad_norm": 15.37072467803955,
2406
+ "learning_rate": 2.359882005899705e-05,
2407
+ "loss": 0.7729,
2408
+ "step": 654
2409
+ },
2410
+ {
2411
+ "epoch": 5.8053097345132745,
2412
+ "grad_norm": 16.8475341796875,
2413
+ "learning_rate": 2.3500491642084562e-05,
2414
+ "loss": 0.8331,
2415
+ "step": 656
2416
+ },
2417
+ {
2418
+ "epoch": 5.823008849557522,
2419
+ "grad_norm": 23.530323028564453,
2420
+ "learning_rate": 2.3402163225172074e-05,
2421
+ "loss": 0.7384,
2422
+ "step": 658
2423
+ },
2424
+ {
2425
+ "epoch": 5.84070796460177,
2426
+ "grad_norm": 29.106124877929688,
2427
+ "learning_rate": 2.330383480825959e-05,
2428
+ "loss": 0.7302,
2429
+ "step": 660
2430
+ },
2431
+ {
2432
+ "epoch": 5.8584070796460175,
2433
+ "grad_norm": 13.785088539123535,
2434
+ "learning_rate": 2.32055063913471e-05,
2435
+ "loss": 0.6624,
2436
+ "step": 662
2437
+ },
2438
+ {
2439
+ "epoch": 5.876106194690266,
2440
+ "grad_norm": 24.307811737060547,
2441
+ "learning_rate": 2.3107177974434612e-05,
2442
+ "loss": 0.8129,
2443
+ "step": 664
2444
+ },
2445
+ {
2446
+ "epoch": 5.893805309734513,
2447
+ "grad_norm": 32.765541076660156,
2448
+ "learning_rate": 2.3008849557522124e-05,
2449
+ "loss": 0.7463,
2450
+ "step": 666
2451
+ },
2452
+ {
2453
+ "epoch": 5.911504424778761,
2454
+ "grad_norm": 15.36600112915039,
2455
+ "learning_rate": 2.2910521140609635e-05,
2456
+ "loss": 0.8945,
2457
+ "step": 668
2458
+ },
2459
+ {
2460
+ "epoch": 5.929203539823009,
2461
+ "grad_norm": 21.934234619140625,
2462
+ "learning_rate": 2.2812192723697147e-05,
2463
+ "loss": 0.7991,
2464
+ "step": 670
2465
+ },
2466
+ {
2467
+ "epoch": 5.946902654867257,
2468
+ "grad_norm": 18.248523712158203,
2469
+ "learning_rate": 2.2713864306784662e-05,
2470
+ "loss": 0.7406,
2471
+ "step": 672
2472
+ },
2473
+ {
2474
+ "epoch": 5.964601769911504,
2475
+ "grad_norm": 18.006677627563477,
2476
+ "learning_rate": 2.2615535889872174e-05,
2477
+ "loss": 0.7947,
2478
+ "step": 674
2479
+ },
2480
+ {
2481
+ "epoch": 5.982300884955752,
2482
+ "grad_norm": 34.55229568481445,
2483
+ "learning_rate": 2.2517207472959685e-05,
2484
+ "loss": 0.7464,
2485
+ "step": 676
2486
+ },
2487
+ {
2488
+ "epoch": 6.0,
2489
+ "grad_norm": 36.384239196777344,
2490
+ "learning_rate": 2.24188790560472e-05,
2491
+ "loss": 0.6889,
2492
+ "step": 678
2493
+ },
2494
+ {
2495
+ "epoch": 6.0,
2496
+ "eval_loss": 0.7955648303031921,
2497
+ "eval_map": 0.5081,
2498
+ "eval_map_50": 0.8984,
2499
+ "eval_map_75": 0.5276,
2500
+ "eval_map_large": 0.5756,
2501
+ "eval_map_medium": 0.3127,
2502
+ "eval_map_per_class": 0.5081,
2503
+ "eval_map_small": 0.0226,
2504
+ "eval_mar_1": 0.2658,
2505
+ "eval_mar_10": 0.5991,
2506
+ "eval_mar_100": 0.6538,
2507
+ "eval_mar_100_per_class": 0.6538,
2508
+ "eval_mar_large": 0.713,
2509
+ "eval_mar_medium": 0.5016,
2510
+ "eval_mar_small": 0.4,
2511
+ "eval_runtime": 4.0138,
2512
+ "eval_samples_per_second": 24.914,
2513
+ "eval_steps_per_second": 1.744,
2514
+ "step": 678
2515
+ },
2516
+ {
2517
+ "epoch": 6.017699115044247,
2518
+ "grad_norm": 30.158843994140625,
2519
+ "learning_rate": 2.232055063913471e-05,
2520
+ "loss": 0.7447,
2521
+ "step": 680
2522
+ },
2523
+ {
2524
+ "epoch": 6.035398230088496,
2525
+ "grad_norm": 21.382171630859375,
2526
+ "learning_rate": 2.2222222222222223e-05,
2527
+ "loss": 0.7444,
2528
+ "step": 682
2529
+ },
2530
+ {
2531
+ "epoch": 6.053097345132743,
2532
+ "grad_norm": 16.47271156311035,
2533
+ "learning_rate": 2.2123893805309738e-05,
2534
+ "loss": 0.7564,
2535
+ "step": 684
2536
+ },
2537
+ {
2538
+ "epoch": 6.070796460176991,
2539
+ "grad_norm": 26.326805114746094,
2540
+ "learning_rate": 2.202556538839725e-05,
2541
+ "loss": 0.7716,
2542
+ "step": 686
2543
+ },
2544
+ {
2545
+ "epoch": 6.088495575221239,
2546
+ "grad_norm": 21.000934600830078,
2547
+ "learning_rate": 2.192723697148476e-05,
2548
+ "loss": 0.9769,
2549
+ "step": 688
2550
+ },
2551
+ {
2552
+ "epoch": 6.106194690265487,
2553
+ "grad_norm": 14.34850025177002,
2554
+ "learning_rate": 2.1828908554572273e-05,
2555
+ "loss": 0.6075,
2556
+ "step": 690
2557
+ },
2558
+ {
2559
+ "epoch": 6.123893805309734,
2560
+ "grad_norm": 14.81210994720459,
2561
+ "learning_rate": 2.1730580137659785e-05,
2562
+ "loss": 0.6219,
2563
+ "step": 692
2564
+ },
2565
+ {
2566
+ "epoch": 6.1415929203539825,
2567
+ "grad_norm": 15.356616973876953,
2568
+ "learning_rate": 2.1632251720747296e-05,
2569
+ "loss": 0.6604,
2570
+ "step": 694
2571
+ },
2572
+ {
2573
+ "epoch": 6.15929203539823,
2574
+ "grad_norm": 25.215105056762695,
2575
+ "learning_rate": 2.153392330383481e-05,
2576
+ "loss": 0.8016,
2577
+ "step": 696
2578
+ },
2579
+ {
2580
+ "epoch": 6.176991150442478,
2581
+ "grad_norm": 29.563343048095703,
2582
+ "learning_rate": 2.1435594886922323e-05,
2583
+ "loss": 0.6862,
2584
+ "step": 698
2585
+ },
2586
+ {
2587
+ "epoch": 6.1946902654867255,
2588
+ "grad_norm": 21.926513671875,
2589
+ "learning_rate": 2.1337266470009834e-05,
2590
+ "loss": 0.7859,
2591
+ "step": 700
2592
+ },
2593
+ {
2594
+ "epoch": 6.212389380530974,
2595
+ "grad_norm": 23.07330322265625,
2596
+ "learning_rate": 2.1238938053097346e-05,
2597
+ "loss": 0.6345,
2598
+ "step": 702
2599
+ },
2600
+ {
2601
+ "epoch": 6.230088495575221,
2602
+ "grad_norm": 17.459535598754883,
2603
+ "learning_rate": 2.1140609636184858e-05,
2604
+ "loss": 0.7799,
2605
+ "step": 704
2606
+ },
2607
+ {
2608
+ "epoch": 6.247787610619469,
2609
+ "grad_norm": 16.414749145507812,
2610
+ "learning_rate": 2.104228121927237e-05,
2611
+ "loss": 0.8234,
2612
+ "step": 706
2613
+ },
2614
+ {
2615
+ "epoch": 6.265486725663717,
2616
+ "grad_norm": 30.402633666992188,
2617
+ "learning_rate": 2.0943952802359884e-05,
2618
+ "loss": 0.8438,
2619
+ "step": 708
2620
+ },
2621
+ {
2622
+ "epoch": 6.283185840707965,
2623
+ "grad_norm": 36.2429084777832,
2624
+ "learning_rate": 2.0845624385447396e-05,
2625
+ "loss": 0.8062,
2626
+ "step": 710
2627
+ },
2628
+ {
2629
+ "epoch": 6.300884955752212,
2630
+ "grad_norm": 29.136394500732422,
2631
+ "learning_rate": 2.0747295968534907e-05,
2632
+ "loss": 0.7717,
2633
+ "step": 712
2634
+ },
2635
+ {
2636
+ "epoch": 6.31858407079646,
2637
+ "grad_norm": 22.905851364135742,
2638
+ "learning_rate": 2.064896755162242e-05,
2639
+ "loss": 0.7359,
2640
+ "step": 714
2641
+ },
2642
+ {
2643
+ "epoch": 6.336283185840708,
2644
+ "grad_norm": 20.239849090576172,
2645
+ "learning_rate": 2.055063913470993e-05,
2646
+ "loss": 0.6981,
2647
+ "step": 716
2648
+ },
2649
+ {
2650
+ "epoch": 6.353982300884955,
2651
+ "grad_norm": 24.594646453857422,
2652
+ "learning_rate": 2.0452310717797442e-05,
2653
+ "loss": 0.6381,
2654
+ "step": 718
2655
+ },
2656
+ {
2657
+ "epoch": 6.371681415929204,
2658
+ "grad_norm": 26.90498161315918,
2659
+ "learning_rate": 2.0353982300884957e-05,
2660
+ "loss": 0.8903,
2661
+ "step": 720
2662
+ },
2663
+ {
2664
+ "epoch": 6.389380530973451,
2665
+ "grad_norm": 29.696321487426758,
2666
+ "learning_rate": 2.025565388397247e-05,
2667
+ "loss": 0.6345,
2668
+ "step": 722
2669
+ },
2670
+ {
2671
+ "epoch": 6.407079646017699,
2672
+ "grad_norm": 30.505664825439453,
2673
+ "learning_rate": 2.015732546705998e-05,
2674
+ "loss": 0.7814,
2675
+ "step": 724
2676
+ },
2677
+ {
2678
+ "epoch": 6.424778761061947,
2679
+ "grad_norm": 36.33932876586914,
2680
+ "learning_rate": 2.0058997050147492e-05,
2681
+ "loss": 0.7754,
2682
+ "step": 726
2683
+ },
2684
+ {
2685
+ "epoch": 6.442477876106195,
2686
+ "grad_norm": 18.5018310546875,
2687
+ "learning_rate": 1.9960668633235004e-05,
2688
+ "loss": 0.7061,
2689
+ "step": 728
2690
+ },
2691
+ {
2692
+ "epoch": 6.460176991150442,
2693
+ "grad_norm": 16.27545166015625,
2694
+ "learning_rate": 1.986234021632252e-05,
2695
+ "loss": 0.7759,
2696
+ "step": 730
2697
+ },
2698
+ {
2699
+ "epoch": 6.477876106194691,
2700
+ "grad_norm": 19.90315055847168,
2701
+ "learning_rate": 1.976401179941003e-05,
2702
+ "loss": 0.7227,
2703
+ "step": 732
2704
+ },
2705
+ {
2706
+ "epoch": 6.495575221238938,
2707
+ "grad_norm": 23.48577880859375,
2708
+ "learning_rate": 1.9665683382497542e-05,
2709
+ "loss": 0.7967,
2710
+ "step": 734
2711
+ },
2712
+ {
2713
+ "epoch": 6.513274336283186,
2714
+ "grad_norm": 25.51618003845215,
2715
+ "learning_rate": 1.9567354965585057e-05,
2716
+ "loss": 0.7768,
2717
+ "step": 736
2718
+ },
2719
+ {
2720
+ "epoch": 6.530973451327434,
2721
+ "grad_norm": 17.899826049804688,
2722
+ "learning_rate": 1.946902654867257e-05,
2723
+ "loss": 0.6657,
2724
+ "step": 738
2725
+ },
2726
+ {
2727
+ "epoch": 6.548672566371682,
2728
+ "grad_norm": 19.610185623168945,
2729
+ "learning_rate": 1.937069813176008e-05,
2730
+ "loss": 0.8086,
2731
+ "step": 740
2732
+ },
2733
+ {
2734
+ "epoch": 6.566371681415929,
2735
+ "grad_norm": 25.385841369628906,
2736
+ "learning_rate": 1.927236971484759e-05,
2737
+ "loss": 0.6468,
2738
+ "step": 742
2739
+ },
2740
+ {
2741
+ "epoch": 6.584070796460177,
2742
+ "grad_norm": 15.946438789367676,
2743
+ "learning_rate": 1.9174041297935107e-05,
2744
+ "loss": 0.676,
2745
+ "step": 744
2746
+ },
2747
+ {
2748
+ "epoch": 6.601769911504425,
2749
+ "grad_norm": 17.454030990600586,
2750
+ "learning_rate": 1.9075712881022618e-05,
2751
+ "loss": 0.7586,
2752
+ "step": 746
2753
+ },
2754
+ {
2755
+ "epoch": 6.619469026548672,
2756
+ "grad_norm": 24.09688949584961,
2757
+ "learning_rate": 1.897738446411013e-05,
2758
+ "loss": 0.5866,
2759
+ "step": 748
2760
+ },
2761
+ {
2762
+ "epoch": 6.6371681415929205,
2763
+ "grad_norm": 20.896167755126953,
2764
+ "learning_rate": 1.887905604719764e-05,
2765
+ "loss": 0.6377,
2766
+ "step": 750
2767
+ },
2768
+ {
2769
+ "epoch": 6.654867256637168,
2770
+ "grad_norm": 13.922218322753906,
2771
+ "learning_rate": 1.8780727630285153e-05,
2772
+ "loss": 0.6401,
2773
+ "step": 752
2774
+ },
2775
+ {
2776
+ "epoch": 6.672566371681416,
2777
+ "grad_norm": 22.96921157836914,
2778
+ "learning_rate": 1.8682399213372664e-05,
2779
+ "loss": 0.7318,
2780
+ "step": 754
2781
+ },
2782
+ {
2783
+ "epoch": 6.6902654867256635,
2784
+ "grad_norm": 24.575101852416992,
2785
+ "learning_rate": 1.858407079646018e-05,
2786
+ "loss": 0.7185,
2787
+ "step": 756
2788
+ },
2789
+ {
2790
+ "epoch": 6.707964601769912,
2791
+ "grad_norm": 19.46567726135254,
2792
+ "learning_rate": 1.848574237954769e-05,
2793
+ "loss": 0.7333,
2794
+ "step": 758
2795
+ },
2796
+ {
2797
+ "epoch": 6.725663716814159,
2798
+ "grad_norm": 16.00038719177246,
2799
+ "learning_rate": 1.8387413962635203e-05,
2800
+ "loss": 0.6284,
2801
+ "step": 760
2802
+ },
2803
+ {
2804
+ "epoch": 6.743362831858407,
2805
+ "grad_norm": 19.083911895751953,
2806
+ "learning_rate": 1.8289085545722714e-05,
2807
+ "loss": 0.7064,
2808
+ "step": 762
2809
+ },
2810
+ {
2811
+ "epoch": 6.761061946902655,
2812
+ "grad_norm": 21.226545333862305,
2813
+ "learning_rate": 1.8190757128810226e-05,
2814
+ "loss": 0.8701,
2815
+ "step": 764
2816
+ },
2817
+ {
2818
+ "epoch": 6.778761061946903,
2819
+ "grad_norm": 20.60919189453125,
2820
+ "learning_rate": 1.8092428711897737e-05,
2821
+ "loss": 0.8677,
2822
+ "step": 766
2823
+ },
2824
+ {
2825
+ "epoch": 6.79646017699115,
2826
+ "grad_norm": 26.225242614746094,
2827
+ "learning_rate": 1.7994100294985252e-05,
2828
+ "loss": 0.7429,
2829
+ "step": 768
2830
+ },
2831
+ {
2832
+ "epoch": 6.814159292035399,
2833
+ "grad_norm": 23.108654022216797,
2834
+ "learning_rate": 1.7895771878072764e-05,
2835
+ "loss": 0.6417,
2836
+ "step": 770
2837
+ },
2838
+ {
2839
+ "epoch": 6.831858407079646,
2840
+ "grad_norm": 21.556283950805664,
2841
+ "learning_rate": 1.7797443461160276e-05,
2842
+ "loss": 0.6965,
2843
+ "step": 772
2844
+ },
2845
+ {
2846
+ "epoch": 6.849557522123893,
2847
+ "grad_norm": 14.243925094604492,
2848
+ "learning_rate": 1.7699115044247787e-05,
2849
+ "loss": 0.6269,
2850
+ "step": 774
2851
+ },
2852
+ {
2853
+ "epoch": 6.867256637168142,
2854
+ "grad_norm": 25.8636417388916,
2855
+ "learning_rate": 1.76007866273353e-05,
2856
+ "loss": 0.707,
2857
+ "step": 776
2858
+ },
2859
+ {
2860
+ "epoch": 6.88495575221239,
2861
+ "grad_norm": 15.025083541870117,
2862
+ "learning_rate": 1.750245821042281e-05,
2863
+ "loss": 0.5885,
2864
+ "step": 778
2865
+ },
2866
+ {
2867
+ "epoch": 6.902654867256637,
2868
+ "grad_norm": 12.319948196411133,
2869
+ "learning_rate": 1.7404129793510325e-05,
2870
+ "loss": 0.6618,
2871
+ "step": 780
2872
+ },
2873
+ {
2874
+ "epoch": 6.920353982300885,
2875
+ "grad_norm": 18.02228355407715,
2876
+ "learning_rate": 1.7305801376597837e-05,
2877
+ "loss": 0.7721,
2878
+ "step": 782
2879
+ },
2880
+ {
2881
+ "epoch": 6.938053097345133,
2882
+ "grad_norm": 13.130449295043945,
2883
+ "learning_rate": 1.720747295968535e-05,
2884
+ "loss": 0.7447,
2885
+ "step": 784
2886
+ },
2887
+ {
2888
+ "epoch": 6.95575221238938,
2889
+ "grad_norm": 30.51601219177246,
2890
+ "learning_rate": 1.7109144542772864e-05,
2891
+ "loss": 0.7415,
2892
+ "step": 786
2893
+ },
2894
+ {
2895
+ "epoch": 6.9734513274336285,
2896
+ "grad_norm": 26.357257843017578,
2897
+ "learning_rate": 1.7010816125860375e-05,
2898
+ "loss": 0.6869,
2899
+ "step": 788
2900
+ },
2901
+ {
2902
+ "epoch": 6.991150442477876,
2903
+ "grad_norm": 23.887100219726562,
2904
+ "learning_rate": 1.6912487708947887e-05,
2905
+ "loss": 0.673,
2906
+ "step": 790
2907
+ },
2908
+ {
2909
+ "epoch": 7.0,
2910
+ "eval_loss": 0.7656337022781372,
2911
+ "eval_map": 0.5423,
2912
+ "eval_map_50": 0.9234,
2913
+ "eval_map_75": 0.5491,
2914
+ "eval_map_large": 0.6107,
2915
+ "eval_map_medium": 0.361,
2916
+ "eval_map_per_class": 0.5423,
2917
+ "eval_map_small": 0.025,
2918
+ "eval_mar_1": 0.2774,
2919
+ "eval_mar_10": 0.6252,
2920
+ "eval_mar_100": 0.6808,
2921
+ "eval_mar_100_per_class": 0.6808,
2922
+ "eval_mar_large": 0.7473,
2923
+ "eval_mar_medium": 0.5094,
2924
+ "eval_mar_small": 0.4,
2925
+ "eval_runtime": 3.6757,
2926
+ "eval_samples_per_second": 27.206,
2927
+ "eval_steps_per_second": 1.904,
2928
+ "step": 791
2929
+ },
2930
+ {
2931
+ "epoch": 7.008849557522124,
2932
+ "grad_norm": 16.251768112182617,
2933
+ "learning_rate": 1.6814159292035402e-05,
2934
+ "loss": 0.6063,
2935
+ "step": 792
2936
+ },
2937
+ {
2938
+ "epoch": 7.0265486725663715,
2939
+ "grad_norm": 15.13443660736084,
2940
+ "learning_rate": 1.6715830875122913e-05,
2941
+ "loss": 0.7639,
2942
+ "step": 794
2943
+ },
2944
+ {
2945
+ "epoch": 7.04424778761062,
2946
+ "grad_norm": 18.05428123474121,
2947
+ "learning_rate": 1.6617502458210425e-05,
2948
+ "loss": 0.6947,
2949
+ "step": 796
2950
+ },
2951
+ {
2952
+ "epoch": 7.061946902654867,
2953
+ "grad_norm": 18.904369354248047,
2954
+ "learning_rate": 1.6519174041297937e-05,
2955
+ "loss": 0.6764,
2956
+ "step": 798
2957
+ },
2958
+ {
2959
+ "epoch": 7.079646017699115,
2960
+ "grad_norm": 25.891990661621094,
2961
+ "learning_rate": 1.6420845624385448e-05,
2962
+ "loss": 0.6985,
2963
+ "step": 800
2964
+ },
2965
+ {
2966
+ "epoch": 7.097345132743363,
2967
+ "grad_norm": 21.057519912719727,
2968
+ "learning_rate": 1.632251720747296e-05,
2969
+ "loss": 0.7379,
2970
+ "step": 802
2971
+ },
2972
+ {
2973
+ "epoch": 7.115044247787611,
2974
+ "grad_norm": 19.18006706237793,
2975
+ "learning_rate": 1.6224188790560475e-05,
2976
+ "loss": 0.6897,
2977
+ "step": 804
2978
+ },
2979
+ {
2980
+ "epoch": 7.132743362831858,
2981
+ "grad_norm": 18.494470596313477,
2982
+ "learning_rate": 1.6125860373647986e-05,
2983
+ "loss": 0.6482,
2984
+ "step": 806
2985
+ },
2986
+ {
2987
+ "epoch": 7.150442477876107,
2988
+ "grad_norm": 30.54767608642578,
2989
+ "learning_rate": 1.6027531956735498e-05,
2990
+ "loss": 0.7792,
2991
+ "step": 808
2992
+ },
2993
+ {
2994
+ "epoch": 7.168141592920354,
2995
+ "grad_norm": 26.467973709106445,
2996
+ "learning_rate": 1.592920353982301e-05,
2997
+ "loss": 0.6576,
2998
+ "step": 810
2999
+ },
3000
+ {
3001
+ "epoch": 7.185840707964601,
3002
+ "grad_norm": 24.232133865356445,
3003
+ "learning_rate": 1.583087512291052e-05,
3004
+ "loss": 0.7487,
3005
+ "step": 812
3006
+ },
3007
+ {
3008
+ "epoch": 7.20353982300885,
3009
+ "grad_norm": 22.034992218017578,
3010
+ "learning_rate": 1.5732546705998033e-05,
3011
+ "loss": 0.7208,
3012
+ "step": 814
3013
+ },
3014
+ {
3015
+ "epoch": 7.221238938053097,
3016
+ "grad_norm": 17.85079574584961,
3017
+ "learning_rate": 1.5634218289085548e-05,
3018
+ "loss": 0.7738,
3019
+ "step": 816
3020
+ },
3021
+ {
3022
+ "epoch": 7.238938053097345,
3023
+ "grad_norm": 27.755950927734375,
3024
+ "learning_rate": 1.553588987217306e-05,
3025
+ "loss": 0.6782,
3026
+ "step": 818
3027
+ },
3028
+ {
3029
+ "epoch": 7.256637168141593,
3030
+ "grad_norm": 28.554393768310547,
3031
+ "learning_rate": 1.543756145526057e-05,
3032
+ "loss": 0.8659,
3033
+ "step": 820
3034
+ },
3035
+ {
3036
+ "epoch": 7.274336283185841,
3037
+ "grad_norm": 17.34409523010254,
3038
+ "learning_rate": 1.5339233038348082e-05,
3039
+ "loss": 0.6944,
3040
+ "step": 822
3041
+ },
3042
+ {
3043
+ "epoch": 7.292035398230088,
3044
+ "grad_norm": 17.795570373535156,
3045
+ "learning_rate": 1.5240904621435596e-05,
3046
+ "loss": 0.804,
3047
+ "step": 824
3048
+ },
3049
+ {
3050
+ "epoch": 7.3097345132743365,
3051
+ "grad_norm": 23.85445213317871,
3052
+ "learning_rate": 1.5142576204523107e-05,
3053
+ "loss": 0.7226,
3054
+ "step": 826
3055
+ },
3056
+ {
3057
+ "epoch": 7.327433628318584,
3058
+ "grad_norm": 22.983423233032227,
3059
+ "learning_rate": 1.504424778761062e-05,
3060
+ "loss": 0.7246,
3061
+ "step": 828
3062
+ },
3063
+ {
3064
+ "epoch": 7.345132743362832,
3065
+ "grad_norm": 22.66045570373535,
3066
+ "learning_rate": 1.4945919370698134e-05,
3067
+ "loss": 0.596,
3068
+ "step": 830
3069
+ },
3070
+ {
3071
+ "epoch": 7.3628318584070795,
3072
+ "grad_norm": 21.034364700317383,
3073
+ "learning_rate": 1.4847590953785646e-05,
3074
+ "loss": 0.709,
3075
+ "step": 832
3076
+ },
3077
+ {
3078
+ "epoch": 7.380530973451328,
3079
+ "grad_norm": 22.207504272460938,
3080
+ "learning_rate": 1.4749262536873157e-05,
3081
+ "loss": 0.6881,
3082
+ "step": 834
3083
+ },
3084
+ {
3085
+ "epoch": 7.398230088495575,
3086
+ "grad_norm": 32.008148193359375,
3087
+ "learning_rate": 1.4650934119960669e-05,
3088
+ "loss": 0.5796,
3089
+ "step": 836
3090
+ },
3091
+ {
3092
+ "epoch": 7.415929203539823,
3093
+ "grad_norm": 20.24285125732422,
3094
+ "learning_rate": 1.455260570304818e-05,
3095
+ "loss": 0.6238,
3096
+ "step": 838
3097
+ },
3098
+ {
3099
+ "epoch": 7.433628318584071,
3100
+ "grad_norm": 21.326202392578125,
3101
+ "learning_rate": 1.4454277286135695e-05,
3102
+ "loss": 0.7368,
3103
+ "step": 840
3104
+ },
3105
+ {
3106
+ "epoch": 7.451327433628318,
3107
+ "grad_norm": 30.127544403076172,
3108
+ "learning_rate": 1.4355948869223207e-05,
3109
+ "loss": 0.7563,
3110
+ "step": 842
3111
+ },
3112
+ {
3113
+ "epoch": 7.469026548672566,
3114
+ "grad_norm": 21.667497634887695,
3115
+ "learning_rate": 1.4257620452310719e-05,
3116
+ "loss": 0.7823,
3117
+ "step": 844
3118
+ },
3119
+ {
3120
+ "epoch": 7.486725663716814,
3121
+ "grad_norm": 25.011871337890625,
3122
+ "learning_rate": 1.415929203539823e-05,
3123
+ "loss": 0.7403,
3124
+ "step": 846
3125
+ },
3126
+ {
3127
+ "epoch": 7.504424778761062,
3128
+ "grad_norm": 22.891185760498047,
3129
+ "learning_rate": 1.4060963618485742e-05,
3130
+ "loss": 0.765,
3131
+ "step": 848
3132
+ },
3133
+ {
3134
+ "epoch": 7.522123893805309,
3135
+ "grad_norm": 20.289926528930664,
3136
+ "learning_rate": 1.3962635201573255e-05,
3137
+ "loss": 0.6539,
3138
+ "step": 850
3139
+ },
3140
+ {
3141
+ "epoch": 7.539823008849558,
3142
+ "grad_norm": 34.45566177368164,
3143
+ "learning_rate": 1.3864306784660768e-05,
3144
+ "loss": 0.6781,
3145
+ "step": 852
3146
+ },
3147
+ {
3148
+ "epoch": 7.557522123893805,
3149
+ "grad_norm": 16.83544921875,
3150
+ "learning_rate": 1.376597836774828e-05,
3151
+ "loss": 0.6451,
3152
+ "step": 854
3153
+ },
3154
+ {
3155
+ "epoch": 7.575221238938053,
3156
+ "grad_norm": 23.303050994873047,
3157
+ "learning_rate": 1.3667649950835793e-05,
3158
+ "loss": 0.6929,
3159
+ "step": 856
3160
+ },
3161
+ {
3162
+ "epoch": 7.592920353982301,
3163
+ "grad_norm": 22.088911056518555,
3164
+ "learning_rate": 1.3569321533923305e-05,
3165
+ "loss": 0.5961,
3166
+ "step": 858
3167
+ },
3168
+ {
3169
+ "epoch": 7.610619469026549,
3170
+ "grad_norm": 14.59677791595459,
3171
+ "learning_rate": 1.3470993117010816e-05,
3172
+ "loss": 0.6993,
3173
+ "step": 860
3174
+ },
3175
+ {
3176
+ "epoch": 7.628318584070796,
3177
+ "grad_norm": 23.7571964263916,
3178
+ "learning_rate": 1.3372664700098328e-05,
3179
+ "loss": 0.7123,
3180
+ "step": 862
3181
+ },
3182
+ {
3183
+ "epoch": 7.646017699115045,
3184
+ "grad_norm": 22.422454833984375,
3185
+ "learning_rate": 1.3274336283185843e-05,
3186
+ "loss": 0.8684,
3187
+ "step": 864
3188
+ },
3189
+ {
3190
+ "epoch": 7.663716814159292,
3191
+ "grad_norm": 18.472415924072266,
3192
+ "learning_rate": 1.3176007866273355e-05,
3193
+ "loss": 0.7778,
3194
+ "step": 866
3195
+ },
3196
+ {
3197
+ "epoch": 7.68141592920354,
3198
+ "grad_norm": 18.021146774291992,
3199
+ "learning_rate": 1.3077679449360866e-05,
3200
+ "loss": 0.8397,
3201
+ "step": 868
3202
+ },
3203
+ {
3204
+ "epoch": 7.699115044247788,
3205
+ "grad_norm": 19.535381317138672,
3206
+ "learning_rate": 1.2979351032448378e-05,
3207
+ "loss": 0.7257,
3208
+ "step": 870
3209
+ },
3210
+ {
3211
+ "epoch": 7.716814159292035,
3212
+ "grad_norm": 20.843671798706055,
3213
+ "learning_rate": 1.288102261553589e-05,
3214
+ "loss": 0.7802,
3215
+ "step": 872
3216
+ },
3217
+ {
3218
+ "epoch": 7.734513274336283,
3219
+ "grad_norm": 14.444415092468262,
3220
+ "learning_rate": 1.2782694198623401e-05,
3221
+ "loss": 0.6855,
3222
+ "step": 874
3223
+ },
3224
+ {
3225
+ "epoch": 7.752212389380531,
3226
+ "grad_norm": 15.192424774169922,
3227
+ "learning_rate": 1.2684365781710916e-05,
3228
+ "loss": 0.7157,
3229
+ "step": 876
3230
+ },
3231
+ {
3232
+ "epoch": 7.769911504424779,
3233
+ "grad_norm": 14.531706809997559,
3234
+ "learning_rate": 1.2586037364798428e-05,
3235
+ "loss": 0.7733,
3236
+ "step": 878
3237
+ },
3238
+ {
3239
+ "epoch": 7.787610619469026,
3240
+ "grad_norm": 22.154911041259766,
3241
+ "learning_rate": 1.2487708947885939e-05,
3242
+ "loss": 0.6404,
3243
+ "step": 880
3244
+ },
3245
+ {
3246
+ "epoch": 7.8053097345132745,
3247
+ "grad_norm": 18.85184669494629,
3248
+ "learning_rate": 1.2389380530973452e-05,
3249
+ "loss": 0.5891,
3250
+ "step": 882
3251
+ },
3252
+ {
3253
+ "epoch": 7.823008849557522,
3254
+ "grad_norm": 18.25426483154297,
3255
+ "learning_rate": 1.2291052114060964e-05,
3256
+ "loss": 0.8318,
3257
+ "step": 884
3258
+ },
3259
+ {
3260
+ "epoch": 7.84070796460177,
3261
+ "grad_norm": 15.124105453491211,
3262
+ "learning_rate": 1.2192723697148477e-05,
3263
+ "loss": 0.728,
3264
+ "step": 886
3265
+ },
3266
+ {
3267
+ "epoch": 7.8584070796460175,
3268
+ "grad_norm": 18.860633850097656,
3269
+ "learning_rate": 1.2094395280235989e-05,
3270
+ "loss": 0.8461,
3271
+ "step": 888
3272
+ },
3273
+ {
3274
+ "epoch": 7.876106194690266,
3275
+ "grad_norm": 21.146745681762695,
3276
+ "learning_rate": 1.19960668633235e-05,
3277
+ "loss": 0.7089,
3278
+ "step": 890
3279
+ },
3280
+ {
3281
+ "epoch": 7.893805309734513,
3282
+ "grad_norm": 19.12373924255371,
3283
+ "learning_rate": 1.1897738446411014e-05,
3284
+ "loss": 0.668,
3285
+ "step": 892
3286
+ },
3287
+ {
3288
+ "epoch": 7.911504424778761,
3289
+ "grad_norm": 12.939081192016602,
3290
+ "learning_rate": 1.1799410029498525e-05,
3291
+ "loss": 0.8069,
3292
+ "step": 894
3293
+ },
3294
+ {
3295
+ "epoch": 7.929203539823009,
3296
+ "grad_norm": 14.852071762084961,
3297
+ "learning_rate": 1.1701081612586037e-05,
3298
+ "loss": 0.6977,
3299
+ "step": 896
3300
+ },
3301
+ {
3302
+ "epoch": 7.946902654867257,
3303
+ "grad_norm": 16.5046329498291,
3304
+ "learning_rate": 1.160275319567355e-05,
3305
+ "loss": 0.6836,
3306
+ "step": 898
3307
+ },
3308
+ {
3309
+ "epoch": 7.964601769911504,
3310
+ "grad_norm": 27.28510284423828,
3311
+ "learning_rate": 1.1504424778761062e-05,
3312
+ "loss": 0.7441,
3313
+ "step": 900
3314
+ },
3315
+ {
3316
+ "epoch": 7.982300884955752,
3317
+ "grad_norm": 20.414901733398438,
3318
+ "learning_rate": 1.1406096361848573e-05,
3319
+ "loss": 0.7189,
3320
+ "step": 902
3321
+ },
3322
+ {
3323
+ "epoch": 8.0,
3324
+ "grad_norm": 15.717076301574707,
3325
+ "learning_rate": 1.1307767944936087e-05,
3326
+ "loss": 0.7197,
3327
+ "step": 904
3328
+ },
3329
+ {
3330
+ "epoch": 8.0,
3331
+ "eval_loss": 0.7693684101104736,
3332
+ "eval_map": 0.5471,
3333
+ "eval_map_50": 0.9308,
3334
+ "eval_map_75": 0.5913,
3335
+ "eval_map_large": 0.6096,
3336
+ "eval_map_medium": 0.3715,
3337
+ "eval_map_per_class": 0.5471,
3338
+ "eval_map_small": 0.0188,
3339
+ "eval_mar_1": 0.2833,
3340
+ "eval_mar_10": 0.6235,
3341
+ "eval_mar_100": 0.6761,
3342
+ "eval_mar_100_per_class": 0.6761,
3343
+ "eval_mar_large": 0.7278,
3344
+ "eval_mar_medium": 0.5453,
3345
+ "eval_mar_small": 0.3,
3346
+ "eval_runtime": 3.669,
3347
+ "eval_samples_per_second": 27.255,
3348
+ "eval_steps_per_second": 1.908,
3349
+ "step": 904
3350
+ },
3351
+ {
3352
+ "epoch": 8.017699115044248,
3353
+ "grad_norm": 19.057044982910156,
3354
+ "learning_rate": 1.12094395280236e-05,
3355
+ "loss": 0.6378,
3356
+ "step": 906
3357
+ },
3358
+ {
3359
+ "epoch": 8.035398230088495,
3360
+ "grad_norm": 26.174707412719727,
3361
+ "learning_rate": 1.1111111111111112e-05,
3362
+ "loss": 0.6774,
3363
+ "step": 908
3364
+ },
3365
+ {
3366
+ "epoch": 8.053097345132743,
3367
+ "grad_norm": 23.515504837036133,
3368
+ "learning_rate": 1.1012782694198625e-05,
3369
+ "loss": 0.7632,
3370
+ "step": 910
3371
+ },
3372
+ {
3373
+ "epoch": 8.070796460176991,
3374
+ "grad_norm": 32.522216796875,
3375
+ "learning_rate": 1.0914454277286137e-05,
3376
+ "loss": 0.7866,
3377
+ "step": 912
3378
+ },
3379
+ {
3380
+ "epoch": 8.08849557522124,
3381
+ "grad_norm": 17.90611457824707,
3382
+ "learning_rate": 1.0816125860373648e-05,
3383
+ "loss": 0.6607,
3384
+ "step": 914
3385
+ },
3386
+ {
3387
+ "epoch": 8.106194690265486,
3388
+ "grad_norm": 17.308259963989258,
3389
+ "learning_rate": 1.0717797443461161e-05,
3390
+ "loss": 0.9028,
3391
+ "step": 916
3392
+ },
3393
+ {
3394
+ "epoch": 8.123893805309734,
3395
+ "grad_norm": 23.00870704650879,
3396
+ "learning_rate": 1.0619469026548673e-05,
3397
+ "loss": 0.6444,
3398
+ "step": 918
3399
+ },
3400
+ {
3401
+ "epoch": 8.141592920353983,
3402
+ "grad_norm": 23.513565063476562,
3403
+ "learning_rate": 1.0521140609636185e-05,
3404
+ "loss": 0.7422,
3405
+ "step": 920
3406
+ },
3407
+ {
3408
+ "epoch": 8.15929203539823,
3409
+ "grad_norm": 22.787883758544922,
3410
+ "learning_rate": 1.0422812192723698e-05,
3411
+ "loss": 0.8156,
3412
+ "step": 922
3413
+ },
3414
+ {
3415
+ "epoch": 8.176991150442477,
3416
+ "grad_norm": 26.117023468017578,
3417
+ "learning_rate": 1.032448377581121e-05,
3418
+ "loss": 0.538,
3419
+ "step": 924
3420
+ },
3421
+ {
3422
+ "epoch": 8.194690265486726,
3423
+ "grad_norm": 21.87485122680664,
3424
+ "learning_rate": 1.0226155358898721e-05,
3425
+ "loss": 0.6871,
3426
+ "step": 926
3427
+ },
3428
+ {
3429
+ "epoch": 8.212389380530974,
3430
+ "grad_norm": 21.752988815307617,
3431
+ "learning_rate": 1.0127826941986234e-05,
3432
+ "loss": 0.7181,
3433
+ "step": 928
3434
+ },
3435
+ {
3436
+ "epoch": 8.230088495575222,
3437
+ "grad_norm": 24.260828018188477,
3438
+ "learning_rate": 1.0029498525073746e-05,
3439
+ "loss": 0.7211,
3440
+ "step": 930
3441
+ },
3442
+ {
3443
+ "epoch": 8.247787610619469,
3444
+ "grad_norm": 12.92242431640625,
3445
+ "learning_rate": 9.93117010816126e-06,
3446
+ "loss": 0.8347,
3447
+ "step": 932
3448
+ },
3449
+ {
3450
+ "epoch": 8.265486725663717,
3451
+ "grad_norm": 16.984403610229492,
3452
+ "learning_rate": 9.832841691248771e-06,
3453
+ "loss": 0.7955,
3454
+ "step": 934
3455
+ },
3456
+ {
3457
+ "epoch": 8.283185840707965,
3458
+ "grad_norm": 36.490318298339844,
3459
+ "learning_rate": 9.734513274336284e-06,
3460
+ "loss": 0.7464,
3461
+ "step": 936
3462
+ },
3463
+ {
3464
+ "epoch": 8.300884955752213,
3465
+ "grad_norm": 16.81266975402832,
3466
+ "learning_rate": 9.636184857423796e-06,
3467
+ "loss": 0.5796,
3468
+ "step": 938
3469
+ },
3470
+ {
3471
+ "epoch": 8.31858407079646,
3472
+ "grad_norm": 14.971821784973145,
3473
+ "learning_rate": 9.537856440511309e-06,
3474
+ "loss": 0.6221,
3475
+ "step": 940
3476
+ },
3477
+ {
3478
+ "epoch": 8.336283185840708,
3479
+ "grad_norm": 21.590499877929688,
3480
+ "learning_rate": 9.43952802359882e-06,
3481
+ "loss": 0.6271,
3482
+ "step": 942
3483
+ },
3484
+ {
3485
+ "epoch": 8.353982300884956,
3486
+ "grad_norm": 31.352338790893555,
3487
+ "learning_rate": 9.341199606686332e-06,
3488
+ "loss": 0.7592,
3489
+ "step": 944
3490
+ },
3491
+ {
3492
+ "epoch": 8.371681415929203,
3493
+ "grad_norm": 23.651779174804688,
3494
+ "learning_rate": 9.242871189773846e-06,
3495
+ "loss": 0.6352,
3496
+ "step": 946
3497
+ },
3498
+ {
3499
+ "epoch": 8.389380530973451,
3500
+ "grad_norm": 20.58877182006836,
3501
+ "learning_rate": 9.144542772861357e-06,
3502
+ "loss": 0.6383,
3503
+ "step": 948
3504
+ },
3505
+ {
3506
+ "epoch": 8.4070796460177,
3507
+ "grad_norm": 24.20329475402832,
3508
+ "learning_rate": 9.046214355948869e-06,
3509
+ "loss": 0.7669,
3510
+ "step": 950
3511
+ },
3512
+ {
3513
+ "epoch": 8.424778761061948,
3514
+ "grad_norm": 18.367490768432617,
3515
+ "learning_rate": 8.947885939036382e-06,
3516
+ "loss": 0.6304,
3517
+ "step": 952
3518
+ },
3519
+ {
3520
+ "epoch": 8.442477876106194,
3521
+ "grad_norm": 27.963157653808594,
3522
+ "learning_rate": 8.849557522123894e-06,
3523
+ "loss": 0.7402,
3524
+ "step": 954
3525
+ },
3526
+ {
3527
+ "epoch": 8.460176991150442,
3528
+ "grad_norm": 19.442922592163086,
3529
+ "learning_rate": 8.751229105211405e-06,
3530
+ "loss": 0.6278,
3531
+ "step": 956
3532
+ },
3533
+ {
3534
+ "epoch": 8.47787610619469,
3535
+ "grad_norm": 13.04371166229248,
3536
+ "learning_rate": 8.652900688298919e-06,
3537
+ "loss": 0.7028,
3538
+ "step": 958
3539
+ },
3540
+ {
3541
+ "epoch": 8.495575221238939,
3542
+ "grad_norm": 25.378374099731445,
3543
+ "learning_rate": 8.554572271386432e-06,
3544
+ "loss": 0.6889,
3545
+ "step": 960
3546
+ },
3547
+ {
3548
+ "epoch": 8.513274336283185,
3549
+ "grad_norm": 19.712556838989258,
3550
+ "learning_rate": 8.456243854473943e-06,
3551
+ "loss": 0.581,
3552
+ "step": 962
3553
+ },
3554
+ {
3555
+ "epoch": 8.530973451327434,
3556
+ "grad_norm": 20.15717315673828,
3557
+ "learning_rate": 8.357915437561457e-06,
3558
+ "loss": 0.6877,
3559
+ "step": 964
3560
+ },
3561
+ {
3562
+ "epoch": 8.548672566371682,
3563
+ "grad_norm": 24.568340301513672,
3564
+ "learning_rate": 8.259587020648968e-06,
3565
+ "loss": 0.5734,
3566
+ "step": 966
3567
+ },
3568
+ {
3569
+ "epoch": 8.56637168141593,
3570
+ "grad_norm": 20.580244064331055,
3571
+ "learning_rate": 8.16125860373648e-06,
3572
+ "loss": 0.695,
3573
+ "step": 968
3574
+ },
3575
+ {
3576
+ "epoch": 8.584070796460177,
3577
+ "grad_norm": 25.639179229736328,
3578
+ "learning_rate": 8.062930186823993e-06,
3579
+ "loss": 0.7367,
3580
+ "step": 970
3581
+ },
3582
+ {
3583
+ "epoch": 8.601769911504425,
3584
+ "grad_norm": 14.882777214050293,
3585
+ "learning_rate": 7.964601769911505e-06,
3586
+ "loss": 0.7085,
3587
+ "step": 972
3588
+ },
3589
+ {
3590
+ "epoch": 8.619469026548673,
3591
+ "grad_norm": 25.450916290283203,
3592
+ "learning_rate": 7.866273352999016e-06,
3593
+ "loss": 0.7158,
3594
+ "step": 974
3595
+ },
3596
+ {
3597
+ "epoch": 8.63716814159292,
3598
+ "grad_norm": 17.252824783325195,
3599
+ "learning_rate": 7.76794493608653e-06,
3600
+ "loss": 0.722,
3601
+ "step": 976
3602
+ },
3603
+ {
3604
+ "epoch": 8.654867256637168,
3605
+ "grad_norm": 17.026081085205078,
3606
+ "learning_rate": 7.669616519174041e-06,
3607
+ "loss": 0.7374,
3608
+ "step": 978
3609
+ },
3610
+ {
3611
+ "epoch": 8.672566371681416,
3612
+ "grad_norm": 17.954788208007812,
3613
+ "learning_rate": 7.571288102261554e-06,
3614
+ "loss": 0.7943,
3615
+ "step": 980
3616
+ },
3617
+ {
3618
+ "epoch": 8.690265486725664,
3619
+ "grad_norm": 14.818877220153809,
3620
+ "learning_rate": 7.472959685349067e-06,
3621
+ "loss": 0.687,
3622
+ "step": 982
3623
+ },
3624
+ {
3625
+ "epoch": 8.70796460176991,
3626
+ "grad_norm": 14.548257827758789,
3627
+ "learning_rate": 7.374631268436579e-06,
3628
+ "loss": 0.6824,
3629
+ "step": 984
3630
+ },
3631
+ {
3632
+ "epoch": 8.725663716814159,
3633
+ "grad_norm": 26.486650466918945,
3634
+ "learning_rate": 7.27630285152409e-06,
3635
+ "loss": 0.7428,
3636
+ "step": 986
3637
+ },
3638
+ {
3639
+ "epoch": 8.743362831858407,
3640
+ "grad_norm": 14.338964462280273,
3641
+ "learning_rate": 7.1779744346116035e-06,
3642
+ "loss": 0.6337,
3643
+ "step": 988
3644
+ },
3645
+ {
3646
+ "epoch": 8.761061946902656,
3647
+ "grad_norm": 11.065580368041992,
3648
+ "learning_rate": 7.079646017699115e-06,
3649
+ "loss": 0.7577,
3650
+ "step": 990
3651
+ },
3652
+ {
3653
+ "epoch": 8.778761061946902,
3654
+ "grad_norm": 20.75477409362793,
3655
+ "learning_rate": 6.9813176007866275e-06,
3656
+ "loss": 0.8219,
3657
+ "step": 992
3658
+ },
3659
+ {
3660
+ "epoch": 8.79646017699115,
3661
+ "grad_norm": 29.077869415283203,
3662
+ "learning_rate": 6.88298918387414e-06,
3663
+ "loss": 0.6982,
3664
+ "step": 994
3665
+ },
3666
+ {
3667
+ "epoch": 8.814159292035399,
3668
+ "grad_norm": 19.636323928833008,
3669
+ "learning_rate": 6.784660766961652e-06,
3670
+ "loss": 0.7256,
3671
+ "step": 996
3672
+ },
3673
+ {
3674
+ "epoch": 8.831858407079647,
3675
+ "grad_norm": 34.08713912963867,
3676
+ "learning_rate": 6.686332350049164e-06,
3677
+ "loss": 0.6156,
3678
+ "step": 998
3679
+ },
3680
+ {
3681
+ "epoch": 8.849557522123893,
3682
+ "grad_norm": 21.24959945678711,
3683
+ "learning_rate": 6.588003933136677e-06,
3684
+ "loss": 0.7501,
3685
+ "step": 1000
3686
+ },
3687
+ {
3688
+ "epoch": 8.867256637168142,
3689
+ "grad_norm": 24.227680206298828,
3690
+ "learning_rate": 6.489675516224189e-06,
3691
+ "loss": 0.7819,
3692
+ "step": 1002
3693
+ },
3694
+ {
3695
+ "epoch": 8.88495575221239,
3696
+ "grad_norm": 19.83379364013672,
3697
+ "learning_rate": 6.3913470993117005e-06,
3698
+ "loss": 0.6253,
3699
+ "step": 1004
3700
+ },
3701
+ {
3702
+ "epoch": 8.902654867256636,
3703
+ "grad_norm": 17.381193161010742,
3704
+ "learning_rate": 6.293018682399214e-06,
3705
+ "loss": 0.7019,
3706
+ "step": 1006
3707
+ },
3708
+ {
3709
+ "epoch": 8.920353982300885,
3710
+ "grad_norm": 19.214319229125977,
3711
+ "learning_rate": 6.194690265486726e-06,
3712
+ "loss": 0.7062,
3713
+ "step": 1008
3714
+ },
3715
+ {
3716
+ "epoch": 8.938053097345133,
3717
+ "grad_norm": 13.928613662719727,
3718
+ "learning_rate": 6.096361848574239e-06,
3719
+ "loss": 0.6825,
3720
+ "step": 1010
3721
+ },
3722
+ {
3723
+ "epoch": 8.955752212389381,
3724
+ "grad_norm": 29.10097885131836,
3725
+ "learning_rate": 5.99803343166175e-06,
3726
+ "loss": 0.6878,
3727
+ "step": 1012
3728
+ },
3729
+ {
3730
+ "epoch": 8.973451327433628,
3731
+ "grad_norm": 24.121362686157227,
3732
+ "learning_rate": 5.899705014749263e-06,
3733
+ "loss": 0.7148,
3734
+ "step": 1014
3735
+ },
3736
+ {
3737
+ "epoch": 8.991150442477876,
3738
+ "grad_norm": 23.926654815673828,
3739
+ "learning_rate": 5.801376597836775e-06,
3740
+ "loss": 0.7509,
3741
+ "step": 1016
3742
+ },
3743
+ {
3744
+ "epoch": 9.0,
3745
+ "eval_loss": 0.7565978169441223,
3746
+ "eval_map": 0.5582,
3747
+ "eval_map_50": 0.9326,
3748
+ "eval_map_75": 0.6075,
3749
+ "eval_map_large": 0.618,
3750
+ "eval_map_medium": 0.3937,
3751
+ "eval_map_per_class": 0.5582,
3752
+ "eval_map_small": 0.025,
3753
+ "eval_mar_1": 0.2923,
3754
+ "eval_mar_10": 0.6329,
3755
+ "eval_mar_100": 0.697,
3756
+ "eval_mar_100_per_class": 0.697,
3757
+ "eval_mar_large": 0.7497,
3758
+ "eval_mar_medium": 0.5641,
3759
+ "eval_mar_small": 0.3,
3760
+ "eval_runtime": 3.7908,
3761
+ "eval_samples_per_second": 26.38,
3762
+ "eval_steps_per_second": 1.847,
3763
+ "step": 1017
3764
+ }
3765
+ ],
3766
+ "logging_steps": 2,
3767
+ "max_steps": 1130,
3768
+ "num_input_tokens_seen": 0,
3769
+ "num_train_epochs": 10,
3770
+ "save_steps": 500,
3771
+ "stateful_callbacks": {
3772
+ "EarlyStoppingCallback": {
3773
+ "args": {
3774
+ "early_stopping_patience": 5,
3775
+ "early_stopping_threshold": 0.01
3776
+ },
3777
+ "attributes": {
3778
+ "early_stopping_patience_counter": 0
3779
+ }
3780
+ },
3781
+ "TrainerControl": {
3782
+ "args": {
3783
+ "should_epoch_stop": false,
3784
+ "should_evaluate": false,
3785
+ "should_log": false,
3786
+ "should_save": true,
3787
+ "should_training_stop": false
3788
+ },
3789
+ "attributes": {}
3790
+ }
3791
+ },
3792
+ "total_flos": 3.3929164854288e+17,
3793
+ "train_batch_size": 8,
3794
+ "trial_name": null,
3795
+ "trial_params": null
3796
+ }
checkpoint-1017/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579e6b0a09cc4a284085d274800ba6a1303a61f0c60385d6fbf85907b7c1865
3
+ size 5176
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hustvl/yolos-tiny",
3
+ "architectures": [
4
+ "YolosForObjectDetection"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_loss": false,
8
+ "bbox_cost": 5,
9
+ "bbox_loss_coefficient": 5,
10
+ "class_cost": 1,
11
+ "eos_coefficient": 0.1,
12
+ "giou_cost": 2,
13
+ "giou_loss_coefficient": 2,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.0,
16
+ "hidden_size": 192,
17
+ "id2label": {
18
+ "0": "Face"
19
+ },
20
+ "image_size": [
21
+ 800,
22
+ 1333
23
+ ],
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 768,
26
+ "label2id": {
27
+ "Face": 0
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "model_type": "yolos",
31
+ "num_attention_heads": 3,
32
+ "num_channels": 3,
33
+ "num_detection_tokens": 100,
34
+ "num_hidden_layers": 12,
35
+ "patch_size": 16,
36
+ "qkv_bias": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.41.0",
39
+ "use_mid_position_embeddings": false
40
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d302c1624aade53174907fed704a071b5ca5ceff59f3a995c1f3f61ddc66c48
3
+ size 25909400
preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "annotations",
5
+ "return_segmentation_masks",
6
+ "masks_path",
7
+ "do_resize",
8
+ "size",
9
+ "resample",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_annotations",
16
+ "do_pad",
17
+ "format",
18
+ "return_tensors",
19
+ "data_format",
20
+ "input_data_format"
21
+ ],
22
+ "do_convert_annotations": true,
23
+ "do_normalize": true,
24
+ "do_pad": false,
25
+ "do_rescale": true,
26
+ "do_resize": false,
27
+ "format": "coco_detection",
28
+ "image_mean": [
29
+ 0.485,
30
+ 0.456,
31
+ 0.406
32
+ ],
33
+ "image_processor_type": "YolosImageProcessor",
34
+ "image_std": [
35
+ 0.229,
36
+ 0.224,
37
+ 0.225
38
+ ],
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "longest_edge": 600
43
+ }
44
+ }
runs/May25_15-08-52_r-rileybol-face-detection-v1-02a2uktc-0be61-3ycbl/events.out.tfevents.1716649733.r-rileybol-face-detection-v1-02a2uktc-0be61-3ycbl.157.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b3a1d34b6962ce9a3a92acbd132f38291c6c43cc7794a5f330268bd817c145
3
- size 5070
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbfd164b4ae0dae88ea0c35c623135889fdb574e8d86cb57e3d2d04a866eeb6
3
+ size 134428
runs/May25_15-08-52_r-rileybol-face-detection-v1-02a2uktc-0be61-3ycbl/events.out.tfevents.1716650137.r-rileybol-face-detection-v1-02a2uktc-0be61-3ycbl.157.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f093fe89adf06f17a3343003a0884b63cbb5a52d332ad614fff38f79e7369e
3
+ size 1094
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0579e6b0a09cc4a284085d274800ba6a1303a61f0c60385d6fbf85907b7c1865
3
+ size 5176
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-80y7u-aoweu/autotrain-data",
3
+ "model": "hustvl/yolos-tiny",
4
+ "username": "rileybol",
5
+ "lr": 5e-05,
6
+ "epochs": 10,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "logging_steps": -1,
18
+ "project_name": "autotrain-80y7u-aoweu",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "fp16",
21
+ "save_total_limit": 1,
22
+ "push_to_hub": true,
23
+ "evaluation_strategy": "epoch",
24
+ "image_column": "autotrain_image",
25
+ "objects_column": "autotrain_objects",
26
+ "log": "tensorboard",
27
+ "image_square_size": 600,
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }