searde commited on
Commit
2817661
1 Parent(s): a9fba0d

Reset model

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  metrics:
22
  - name: Rouge1
23
  type: rouge
24
- value: 89.6649
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,12 +31,12 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the searde/dataset-financial-documents-3 3.0.0 dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.0967
35
- - Rouge1: 89.6649
36
- - Rouge2: 68.3919
37
- - Rougel: 89.4415
38
- - Rougelsum: 89.5589
39
- - Gen Len: 39.3769
40
 
41
  ## Model description
42
 
 
21
  metrics:
22
  - name: Rouge1
23
  type: rouge
24
+ value: 14.9574
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the searde/dataset-financial-documents-3 3.0.0 dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 3.0505
35
+ - Rouge1: 14.9574
36
+ - Rouge2: 0.0
37
+ - Rougel: 8.4517
38
+ - Rougelsum: 12.4858
39
+ - Gen Len: 63.0
40
 
41
  ## Model description
42
 
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_gen_len": 39.37688442211055,
4
- "eval_loss": 0.09672751277685165,
5
- "eval_rouge1": 89.6649,
6
- "eval_rouge2": 68.3919,
7
- "eval_rougeL": 89.4415,
8
- "eval_rougeLsum": 89.5589,
9
- "eval_runtime": 169.8618,
10
- "eval_samples": 199,
11
- "eval_samples_per_second": 1.172,
12
- "eval_steps_per_second": 0.294,
13
- "train_loss": 0.19669942220052083,
14
- "train_runtime": 181.9658,
15
- "train_samples": 199,
16
- "train_samples_per_second": 3.281,
17
- "train_steps_per_second": 0.824
18
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_gen_len": 63.0,
4
+ "eval_loss": 3.0504558086395264,
5
+ "eval_rouge1": 14.9574,
6
+ "eval_rouge2": 0.0,
7
+ "eval_rougeL": 8.4517,
8
+ "eval_rougeLsum": 12.4858,
9
+ "eval_runtime": 13.4834,
10
+ "eval_samples": 2,
11
+ "eval_samples_per_second": 0.148,
12
+ "eval_steps_per_second": 0.074,
13
+ "train_loss": 3.8336060841878257,
14
+ "train_runtime": 23.7681,
15
+ "train_samples": 8,
16
+ "train_samples_per_second": 1.01,
17
+ "train_steps_per_second": 0.252
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_gen_len": 39.37688442211055,
4
- "eval_loss": 0.09672751277685165,
5
- "eval_rouge1": 89.6649,
6
- "eval_rouge2": 68.3919,
7
- "eval_rougeL": 89.4415,
8
- "eval_rougeLsum": 89.5589,
9
- "eval_runtime": 169.8618,
10
- "eval_samples": 199,
11
- "eval_samples_per_second": 1.172,
12
- "eval_steps_per_second": 0.294
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_gen_len": 63.0,
4
+ "eval_loss": 3.0504558086395264,
5
+ "eval_rouge1": 14.9574,
6
+ "eval_rouge2": 0.0,
7
+ "eval_rougeL": 8.4517,
8
+ "eval_rougeLsum": 12.4858,
9
+ "eval_runtime": 13.4834,
10
+ "eval_samples": 2,
11
+ "eval_samples_per_second": 0.148,
12
+ "eval_steps_per_second": 0.074
13
  }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.19669942220052083,
4
- "train_runtime": 181.9658,
5
- "train_samples": 199,
6
- "train_samples_per_second": 3.281,
7
- "train_steps_per_second": 0.824
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 3.8336060841878257,
4
+ "train_runtime": 23.7681,
5
+ "train_samples": 8,
6
+ "train_samples_per_second": 1.01,
7
+ "train_steps_per_second": 0.252
8
  }
trainer_state.json CHANGED
@@ -2,24 +2,24 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 3.0,
12
- "step": 150,
13
- "total_flos": 44938500538368.0,
14
- "train_loss": 0.19669942220052083,
15
- "train_runtime": 181.9658,
16
- "train_samples_per_second": 3.281,
17
- "train_steps_per_second": 0.824
18
  }
19
  ],
20
- "max_steps": 150,
21
  "num_train_epochs": 3,
22
- "total_flos": 44938500538368.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 6,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 3.0,
12
+ "step": 6,
13
+ "total_flos": 6496406470656.0,
14
+ "train_loss": 3.8336060841878257,
15
+ "train_runtime": 23.7681,
16
+ "train_samples_per_second": 1.01,
17
+ "train_steps_per_second": 0.252
18
  }
19
  ],
20
+ "max_steps": 6,
21
  "num_train_epochs": 3,
22
+ "total_flos": 6496406470656.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
training_args.bin CHANGED
Binary files a/training_args.bin and b/training_args.bin differ