SoyGema commited on
Commit
d2e8c7a
1 Parent(s): fa2493a

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,7 @@
1
  ---
 
 
 
2
  license: apache-2.0
3
  base_model: t5-small
4
  tags:
@@ -15,7 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # english-hindi
17
 
18
- This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus100 dataset.
 
 
19
 
20
  ## Model description
21
 
 
1
  ---
2
+ language:
3
+ - en
4
+ - hi
5
  license: apache-2.0
6
  base_model: t5-small
7
  tags:
 
18
 
19
  # english-hindi
20
 
21
+ This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus100 en-hi dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.0816
24
 
25
  ## Model description
26
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.0,
4
- "eval_loss": 0.07966260612010956,
5
- "eval_runtime": 114.5298,
6
  "eval_samples": 8,
7
- "eval_samples_per_second": 0.07,
8
- "eval_steps_per_second": 0.017,
9
- "train_loss": 0.27012521614669077,
10
- "train_runtime": 925.0114,
11
  "train_samples": 2088,
12
- "train_samples_per_second": 6.772,
13
- "train_steps_per_second": 1.693
14
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.0,
4
+ "eval_loss": 0.08162859082221985,
5
+ "eval_runtime": 0.5668,
6
  "eval_samples": 8,
7
+ "eval_samples_per_second": 14.114,
8
+ "eval_steps_per_second": 3.529,
9
+ "train_loss": 0.2691089805515333,
10
+ "train_runtime": 914.55,
11
  "train_samples": 2088,
12
+ "train_samples_per_second": 6.849,
13
+ "train_steps_per_second": 1.712
14
  }
eval_results.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_bleu": 0.0,
4
- "eval_loss": 0.07966260612010956,
5
- "eval_runtime": 114.5298,
6
  "eval_samples": 8,
7
- "eval_samples_per_second": 0.07,
8
- "eval_steps_per_second": 0.017
9
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 0.08162859082221985,
4
+ "eval_runtime": 0.5668,
 
5
  "eval_samples": 8,
6
+ "eval_samples_per_second": 14.114,
7
+ "eval_steps_per_second": 3.529
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.27012521614669077,
4
- "train_runtime": 925.0114,
5
  "train_samples": 2088,
6
- "train_samples_per_second": 6.772,
7
- "train_steps_per_second": 1.693
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.2691089805515333,
4
+ "train_runtime": 914.55,
5
  "train_samples": 2088,
6
+ "train_samples_per_second": 6.849,
7
+ "train_steps_per_second": 1.712
8
  }
trainer_state.json CHANGED
@@ -11,29 +11,29 @@
11
  {
12
  "epoch": 0.96,
13
  "learning_rate": 3.403575989782887e-05,
14
- "loss": 0.3117,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.92,
19
  "learning_rate": 1.8071519795657726e-05,
20
- "loss": 0.2609,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 2.87,
25
  "learning_rate": 2.1072796934865904e-06,
26
- "loss": 0.2413,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 3.0,
31
  "step": 1566,
32
  "total_flos": 73168104062976.0,
33
- "train_loss": 0.27012521614669077,
34
- "train_runtime": 925.0114,
35
- "train_samples_per_second": 6.772,
36
- "train_steps_per_second": 1.693
37
  }
38
  ],
39
  "logging_steps": 500,
 
11
  {
12
  "epoch": 0.96,
13
  "learning_rate": 3.403575989782887e-05,
14
+ "loss": 0.3102,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.92,
19
  "learning_rate": 1.8071519795657726e-05,
20
+ "loss": 0.2601,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 2.87,
25
  "learning_rate": 2.1072796934865904e-06,
26
+ "loss": 0.2405,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 3.0,
31
  "step": 1566,
32
  "total_flos": 73168104062976.0,
33
+ "train_loss": 0.2691089805515333,
34
+ "train_runtime": 914.55,
35
+ "train_samples_per_second": 6.849,
36
+ "train_steps_per_second": 1.712
37
  }
38
  ],
39
  "logging_steps": 500,