regisss HF staff commited on
Commit
cd2b2ec
1 Parent(s): 10198e2

End of training

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ base_model: BridgeTower/bridgetower-large-itm-mlm-itc
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
- - newyorker_caption_contest
8
  model-index:
9
  - name: test-bridgetower
10
  results: []
@@ -15,7 +15,12 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # test-bridgetower
17
 
18
- This model is a fine-tuned version of [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc) on the newyorker_caption_contest dataset.
 
 
 
 
 
19
 
20
  ## Model description
21
 
 
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
+ - jmhessel/newyorker_caption_contest
8
  model-index:
9
  - name: test-bridgetower
10
  results: []
 
15
 
16
  # test-bridgetower
17
 
18
+ This model is a fine-tuned version of [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc) on the jmhessel/newyorker_caption_contest matching dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.1163
21
+ - Memory Allocated (gb): 17.72
22
+ - Max Memory Allocated (gb): 94.43
23
+ - Total Memory Available (gb): 94.61
24
 
25
  ## Model description
26
 
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_loss": 0.11634299904108047,
4
+ "eval_runtime": 0.4543,
5
+ "eval_samples_per_second": 1162.27,
6
+ "eval_steps_per_second": 11.006,
7
+ "max_memory_allocated (GB)": 94.43,
8
+ "memory_allocated (GB)": 17.72,
9
+ "total_memory_available (GB)": 94.61,
10
+ "train_loss": 0.08891021746855515,
11
+ "train_runtime": 341.352,
12
+ "train_samples_per_second": 429.544,
13
+ "train_steps_per_second": 1.141
14
+ }
test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_loss": 0.11634299904108047,
4
+ "eval_runtime": 0.4543,
5
+ "eval_samples_per_second": 1162.27,
6
+ "eval_steps_per_second": 11.006,
7
+ "max_memory_allocated (GB)": 94.43,
8
+ "memory_allocated (GB)": 17.72,
9
+ "total_memory_available (GB)": 94.61
10
+ }
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 1,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
train_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "max_memory_allocated (GB)": 94.43,
4
+ "memory_allocated (GB)": 17.74,
5
+ "total_memory_available (GB)": 94.61,
6
+ "train_loss": 0.08891021746855515,
7
+ "train_runtime": 341.352,
8
+ "train_samples_per_second": 429.544,
9
+ "train_steps_per_second": 1.141
10
+ }
trainer_state.json ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 130,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.38,
13
+ "learning_rate": 9.230769230769232e-06,
14
+ "loss": 0.2471,
15
+ "max_memory_allocated (GB)": 94.4,
16
+ "memory_allocated (GB)": 17.88,
17
+ "step": 10,
18
+ "total_memory_available (GB)": 94.61
19
+ },
20
+ {
21
+ "epoch": 0.77,
22
+ "learning_rate": 8.461538461538462e-06,
23
+ "loss": 0.1034,
24
+ "max_memory_allocated (GB)": 94.42,
25
+ "memory_allocated (GB)": 17.88,
26
+ "step": 20,
27
+ "total_memory_available (GB)": 94.61
28
+ },
29
+ {
30
+ "epoch": 1.15,
31
+ "learning_rate": 7.692307692307694e-06,
32
+ "loss": 0.0834,
33
+ "max_memory_allocated (GB)": 94.42,
34
+ "memory_allocated (GB)": 17.88,
35
+ "step": 30,
36
+ "total_memory_available (GB)": 94.61
37
+ },
38
+ {
39
+ "epoch": 1.54,
40
+ "learning_rate": 6.923076923076923e-06,
41
+ "loss": 0.0813,
42
+ "max_memory_allocated (GB)": 94.42,
43
+ "memory_allocated (GB)": 17.88,
44
+ "step": 40,
45
+ "total_memory_available (GB)": 94.61
46
+ },
47
+ {
48
+ "epoch": 1.92,
49
+ "learning_rate": 6.153846153846155e-06,
50
+ "loss": 0.0771,
51
+ "max_memory_allocated (GB)": 94.42,
52
+ "memory_allocated (GB)": 17.83,
53
+ "step": 50,
54
+ "total_memory_available (GB)": 94.61
55
+ },
56
+ {
57
+ "epoch": 2.31,
58
+ "learning_rate": 5.384615384615385e-06,
59
+ "loss": 0.0732,
60
+ "max_memory_allocated (GB)": 94.42,
61
+ "memory_allocated (GB)": 17.88,
62
+ "step": 60,
63
+ "total_memory_available (GB)": 94.61
64
+ },
65
+ {
66
+ "epoch": 2.69,
67
+ "learning_rate": 4.615384615384616e-06,
68
+ "loss": 0.0714,
69
+ "max_memory_allocated (GB)": 94.43,
70
+ "memory_allocated (GB)": 17.89,
71
+ "step": 70,
72
+ "total_memory_available (GB)": 94.61
73
+ },
74
+ {
75
+ "epoch": 3.08,
76
+ "learning_rate": 3.846153846153847e-06,
77
+ "loss": 0.0715,
78
+ "max_memory_allocated (GB)": 94.43,
79
+ "memory_allocated (GB)": 17.89,
80
+ "step": 80,
81
+ "total_memory_available (GB)": 94.61
82
+ },
83
+ {
84
+ "epoch": 3.46,
85
+ "learning_rate": 3.0769230769230774e-06,
86
+ "loss": 0.0693,
87
+ "max_memory_allocated (GB)": 94.43,
88
+ "memory_allocated (GB)": 17.88,
89
+ "step": 90,
90
+ "total_memory_available (GB)": 94.61
91
+ },
92
+ {
93
+ "epoch": 3.85,
94
+ "learning_rate": 2.307692307692308e-06,
95
+ "loss": 0.0691,
96
+ "max_memory_allocated (GB)": 94.43,
97
+ "memory_allocated (GB)": 17.88,
98
+ "step": 100,
99
+ "total_memory_available (GB)": 94.61
100
+ },
101
+ {
102
+ "epoch": 4.23,
103
+ "learning_rate": 1.5384615384615387e-06,
104
+ "loss": 0.0691,
105
+ "max_memory_allocated (GB)": 94.43,
106
+ "memory_allocated (GB)": 17.88,
107
+ "step": 110,
108
+ "total_memory_available (GB)": 94.61
109
+ },
110
+ {
111
+ "epoch": 4.62,
112
+ "learning_rate": 7.692307692307694e-07,
113
+ "loss": 0.0702,
114
+ "max_memory_allocated (GB)": 94.43,
115
+ "memory_allocated (GB)": 17.88,
116
+ "step": 120,
117
+ "total_memory_available (GB)": 94.61
118
+ },
119
+ {
120
+ "epoch": 5.0,
121
+ "learning_rate": 0.0,
122
+ "loss": 0.0698,
123
+ "max_memory_allocated (GB)": 94.43,
124
+ "memory_allocated (GB)": 17.74,
125
+ "step": 130,
126
+ "total_memory_available (GB)": 94.61
127
+ },
128
+ {
129
+ "epoch": 5.0,
130
+ "max_memory_allocated (GB)": 94.43,
131
+ "memory_allocated (GB)": 17.74,
132
+ "step": 130,
133
+ "total_flos": 3.117345829342413e+16,
134
+ "total_memory_available (GB)": 94.61,
135
+ "train_loss": 0.08891021746855515,
136
+ "train_runtime": 341.352,
137
+ "train_samples_per_second": 429.544,
138
+ "train_steps_per_second": 1.141
139
+ }
140
+ ],
141
+ "logging_steps": 10,
142
+ "max_steps": 130,
143
+ "num_train_epochs": 5,
144
+ "save_steps": 500,
145
+ "total_flos": 3.117345829342413e+16,
146
+ "trial_name": null,
147
+ "trial_params": null
148
+ }
validation_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_loss": 0.11108700931072235,
4
+ "eval_runtime": 7.8437,
5
+ "eval_samples_per_second": 67.697,
6
+ "eval_steps_per_second": 0.637,
7
+ "max_memory_allocated (GB)": 94.43,
8
+ "memory_allocated (GB)": 17.71,
9
+ "total_memory_available (GB)": 94.61
10
+ }