flozi00 commited on
Commit
6e1cd2e
1 Parent(s): fc88da5

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 8.709332525719078e-05,
4
- "train_runtime": 189.6957,
5
- "train_samples": 21538,
6
- "train_samples_per_second": 113.54,
7
- "train_steps_per_second": 1.777
8
  }
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.009268463998593412,
4
+ "train_runtime": 186.2906,
5
+ "train_samples": 21673,
6
+ "train_samples_per_second": 116.34,
7
+ "train_steps_per_second": 1.82
8
  }
config.json CHANGED
@@ -51,7 +51,7 @@
51
  }
52
  },
53
  "torch_dtype": "float32",
54
- "transformers_version": "4.12.0.dev0",
55
  "use_cache": true,
56
  "vocab_size": 32100
57
  }
51
  }
52
  },
53
  "torch_dtype": "float32",
54
+ "transformers_version": "4.12.5",
55
  "use_cache": true,
56
  "vocab_size": 32100
57
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6557f7e7707f045eb67eed0b29690171c23fbd92515f5215c978de1af11d2103
3
  size 891650871
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4db044f0e2ae34edf783091398f37420df1649eae1f2f0ffaca225f65b7be0
3
  size 891650871
runs/Nov25_17-40-02_DESKTOP-FPB11SM/1637858565.932426/events.out.tfevents.1637858565.DESKTOP-FPB11SM.22208.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85f1deba7e0f7476bd25e4b4ac66f543e2ad61febb47dc088bb6f411249f2b16
3
+ size 4740
runs/Nov25_17-40-02_DESKTOP-FPB11SM/events.out.tfevents.1637858565.DESKTOP-FPB11SM.22208.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e9c8127b7bfe608f84667fe18518eea026745ab8c2dc025506fe71a7628370d
3
+ size 5321
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 8.709332525719078e-05,
4
- "train_runtime": 189.6957,
5
- "train_samples": 21538,
6
- "train_samples_per_second": 113.54,
7
- "train_steps_per_second": 1.777
8
  }
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.009268463998593412,
4
+ "train_runtime": 186.2906,
5
+ "train_samples": 21673,
6
+ "train_samples_per_second": 116.34,
7
+ "train_steps_per_second": 1.82
8
  }
trainer_state.json CHANGED
@@ -2,60 +2,60 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "global_step": 337,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.15,
12
- "learning_rate": 4.258160237388724e-05,
13
- "loss": 0.0002,
14
  "step": 50
15
  },
16
  {
17
- "epoch": 0.3,
18
- "learning_rate": 3.516320474777448e-05,
19
- "loss": 0.0002,
20
  "step": 100
21
  },
22
  {
23
- "epoch": 0.45,
24
- "learning_rate": 2.774480712166172e-05,
25
- "loss": 0.0,
26
  "step": 150
27
  },
28
  {
29
  "epoch": 0.59,
30
- "learning_rate": 2.0326409495548962e-05,
31
- "loss": 0.0001,
32
  "step": 200
33
  },
34
  {
35
  "epoch": 0.74,
36
- "learning_rate": 1.29080118694362e-05,
37
- "loss": 0.0,
38
  "step": 250
39
  },
40
  {
41
- "epoch": 0.89,
42
- "learning_rate": 5.489614243323442e-06,
43
- "loss": 0.0,
44
  "step": 300
45
  },
46
  {
47
  "epoch": 1.0,
48
- "step": 337,
49
- "total_flos": 2801087359488000.0,
50
- "train_loss": 8.709332525719078e-05,
51
- "train_runtime": 189.6957,
52
- "train_samples_per_second": 113.54,
53
- "train_steps_per_second": 1.777
54
  }
55
  ],
56
- "max_steps": 337,
57
  "num_train_epochs": 1,
58
- "total_flos": 2801087359488000.0,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "global_step": 339,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.15,
12
+ "learning_rate": 4.262536873156342e-05,
13
+ "loss": 0.0544,
14
  "step": 50
15
  },
16
  {
17
+ "epoch": 0.29,
18
+ "learning_rate": 3.5250737463126844e-05,
19
+ "loss": 0.0046,
20
  "step": 100
21
  },
22
  {
23
+ "epoch": 0.44,
24
+ "learning_rate": 2.7876106194690264e-05,
25
+ "loss": 0.0017,
26
  "step": 150
27
  },
28
  {
29
  "epoch": 0.59,
30
+ "learning_rate": 2.0501474926253688e-05,
31
+ "loss": 0.0008,
32
  "step": 200
33
  },
34
  {
35
  "epoch": 0.74,
36
+ "learning_rate": 1.3126843657817109e-05,
37
+ "loss": 0.0003,
38
  "step": 250
39
  },
40
  {
41
+ "epoch": 0.88,
42
+ "learning_rate": 5.752212389380531e-06,
43
+ "loss": 0.0005,
44
  "step": 300
45
  },
46
  {
47
  "epoch": 1.0,
48
+ "step": 339,
49
+ "total_flos": 2741653307243520.0,
50
+ "train_loss": 0.009268463998593412,
51
+ "train_runtime": 186.2906,
52
+ "train_samples_per_second": 116.34,
53
+ "train_steps_per_second": 1.82
54
  }
55
  ],
56
+ "max_steps": 339,
57
  "num_train_epochs": 1,
58
+ "total_flos": 2741653307243520.0,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb7add8ff853cd2132d39756e4551133929cae92cc5bcd1c7db0103a7b3d00e1
3
  size 2991
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b331deea65e8e800146e5b236d9284b85aec2aeed33a1d4d1f8096aa29f4c51a
3
  size 2991