SoyGema commited on
Commit
68a09c0
1 Parent(s): 2214f8e

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,31 @@
1
  ---
 
 
 
2
  license: apache-2.0
3
  base_model: t5-small
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
  - opus100
 
 
8
  model-index:
9
  - name: english-spanish
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +33,11 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # english-spanish
17
 
18
- This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus100 dataset.
 
 
 
 
19
 
20
  ## Model description
21
 
 
1
  ---
2
+ language:
3
+ - en
4
+ - es
5
  license: apache-2.0
6
  base_model: t5-small
7
  tags:
8
  - generated_from_trainer
9
  datasets:
10
  - opus100
11
+ metrics:
12
+ - bleu
13
  model-index:
14
  - name: english-spanish
15
+ results:
16
+ - task:
17
+ name: Translation
18
+ type: translation
19
+ dataset:
20
+ name: opus100 en-es
21
+ type: opus100
22
+ config: en-es
23
+ split: validation
24
+ args: en-es
25
+ metrics:
26
+ - name: Bleu
27
+ type: bleu
28
+ value: 15.8604
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  # english-spanish
35
 
36
+ This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus100 en-es dataset.
37
+ It achieves the following results on the evaluation set:
38
+ - Loss: 1.1056
39
+ - Bleu: 15.8604
40
+ - Gen Len: 40.6875
41
 
42
  ## Model description
43
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_bleu": 4.7921,
4
- "eval_gen_len": 46.875,
5
- "eval_loss": 0.959338903427124,
6
- "eval_runtime": 168.4301,
7
- "eval_samples": 8,
8
- "eval_samples_per_second": 0.047,
9
- "eval_steps_per_second": 0.012,
10
- "train_loss": 1.504677425346778,
11
- "train_runtime": 1613.5134,
12
- "train_samples": 3907,
13
- "train_samples_per_second": 7.264,
14
- "train_steps_per_second": 1.817
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_bleu": 15.8604,
4
+ "eval_gen_len": 40.6875,
5
+ "eval_loss": 1.1056389808654785,
6
+ "eval_runtime": 217.338,
7
+ "eval_samples": 16,
8
+ "eval_samples_per_second": 0.074,
9
+ "eval_steps_per_second": 0.018,
10
+ "train_loss": 1.430499308184933,
11
+ "train_runtime": 3735.0404,
12
+ "train_samples": 7813,
13
+ "train_samples_per_second": 6.275,
14
+ "train_steps_per_second": 1.569
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_bleu": 4.7921,
4
- "eval_gen_len": 46.875,
5
- "eval_loss": 0.959338903427124,
6
- "eval_runtime": 168.4301,
7
- "eval_samples": 8,
8
- "eval_samples_per_second": 0.047,
9
- "eval_steps_per_second": 0.012
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_bleu": 15.8604,
4
+ "eval_gen_len": 40.6875,
5
+ "eval_loss": 1.1056389808654785,
6
+ "eval_runtime": 217.338,
7
+ "eval_samples": 16,
8
+ "eval_samples_per_second": 0.074,
9
+ "eval_steps_per_second": 0.018
10
  }
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
 
1
  {
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 1.504677425346778,
4
- "train_runtime": 1613.5134,
5
- "train_samples": 3907,
6
- "train_samples_per_second": 7.264,
7
- "train_steps_per_second": 1.817
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 1.430499308184933,
4
+ "train_runtime": 3735.0404,
5
+ "train_samples": 7813,
6
+ "train_samples_per_second": 6.275,
7
+ "train_steps_per_second": 1.569
8
  }
trainer_state.json CHANGED
@@ -3,56 +3,92 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 2931,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
11
  {
12
  "epoch": 0.51,
13
  "learning_rate": 4.14704878880928e-05,
14
- "loss": 1.6585,
15
- "step": 500
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 1.02,
19
  "learning_rate": 3.294097577618561e-05,
20
- "loss": 1.534,
21
- "step": 1000
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 1.54,
25
  "learning_rate": 2.4411463664278406e-05,
26
- "loss": 1.504,
27
- "step": 1500
 
 
 
 
 
 
28
  },
29
  {
30
  "epoch": 2.05,
31
  "learning_rate": 1.5881951552371206e-05,
32
- "loss": 1.4679,
33
- "step": 2000
 
 
 
 
 
 
34
  },
35
  {
36
  "epoch": 2.56,
37
  "learning_rate": 7.3524394404640055e-06,
38
- "loss": 1.442,
39
- "step": 2500
 
 
 
 
 
 
40
  },
41
  {
42
  "epoch": 3.0,
43
- "step": 2931,
44
- "total_flos": 118513687363584.0,
45
- "train_loss": 1.504677425346778,
46
- "train_runtime": 1613.5134,
47
- "train_samples_per_second": 7.264,
48
- "train_steps_per_second": 1.817
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 2931,
53
  "num_train_epochs": 3,
54
  "save_steps": 500,
55
- "total_flos": 118513687363584.0,
56
  "trial_name": null,
57
  "trial_params": null
58
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 5862,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.26,
13
+ "learning_rate": 4.57352439440464e-05,
14
+ "loss": 1.6481,
15
+ "step": 500
16
+ },
17
  {
18
  "epoch": 0.51,
19
  "learning_rate": 4.14704878880928e-05,
20
+ "loss": 1.5182,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.77,
25
+ "learning_rate": 3.72057318321392e-05,
26
+ "loss": 1.478,
27
+ "step": 1500
28
  },
29
  {
30
  "epoch": 1.02,
31
  "learning_rate": 3.294097577618561e-05,
32
+ "loss": 1.4709,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 1.28,
37
+ "learning_rate": 2.8676219720232007e-05,
38
+ "loss": 1.4008,
39
+ "step": 2500
40
  },
41
  {
42
  "epoch": 1.54,
43
  "learning_rate": 2.4411463664278406e-05,
44
+ "loss": 1.4038,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 1.79,
49
+ "learning_rate": 2.0146707608324806e-05,
50
+ "loss": 1.3903,
51
+ "step": 3500
52
  },
53
  {
54
  "epoch": 2.05,
55
  "learning_rate": 1.5881951552371206e-05,
56
+ "loss": 1.4114,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 2.3,
61
+ "learning_rate": 1.1617195496417605e-05,
62
+ "loss": 1.3706,
63
+ "step": 4500
64
  },
65
  {
66
  "epoch": 2.56,
67
  "learning_rate": 7.3524394404640055e-06,
68
+ "loss": 1.3448,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 2.81,
73
+ "learning_rate": 3.087683384510406e-06,
74
+ "loss": 1.3314,
75
+ "step": 5500
76
  },
77
  {
78
  "epoch": 3.0,
79
+ "step": 5862,
80
+ "total_flos": 237909211152384.0,
81
+ "train_loss": 1.430499308184933,
82
+ "train_runtime": 3735.0404,
83
+ "train_samples_per_second": 6.275,
84
+ "train_steps_per_second": 1.569
85
  }
86
  ],
87
  "logging_steps": 500,
88
+ "max_steps": 5862,
89
  "num_train_epochs": 3,
90
  "save_steps": 500,
91
+ "total_flos": 237909211152384.0,
92
  "trial_name": null,
93
  "trial_params": null
94
  }