gc394 commited on
Commit
2a4d062
·
verified ·
1 Parent(s): f8ed993

End of training

Browse files
Files changed (7) hide show
  1. README.md +68 -0
  2. config.json +1 -2
  3. model.safetensors +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +2 -2
  6. training_args.bin +2 -2
  7. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: gc394/da_distilbert
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: ft_da_distilbert_effective_rate
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # ft_da_distilbert_effective_rate
15
+
16
+ This model is a fine-tuned version of [gc394/da_distilbert](https://huggingface.co/gc394/da_distilbert) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0625
19
+ - Mape: 21050161102848.0
20
+ - Rmse: 0.2500
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 2e-05
40
+ - train_batch_size: 16
41
+ - eval_batch_size: 16
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 10
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Mape | Rmse |
50
+ |:-------------:|:-----:|:----:|:---------------:|:----------------:|:------:|
51
+ | No log | 1.0 | 105 | 0.0634 | 7549680615424.0 | 0.2519 |
52
+ | No log | 2.0 | 210 | 0.0625 | 21050161102848.0 | 0.2500 |
53
+ | No log | 3.0 | 315 | 0.0651 | 15955784630272.0 | 0.2552 |
54
+ | No log | 4.0 | 420 | 0.0676 | 16507671150592.0 | 0.2599 |
55
+ | 0.0129 | 5.0 | 525 | 0.0729 | 35525666799616.0 | 0.2700 |
56
+ | 0.0129 | 6.0 | 630 | 0.0669 | 30705371316224.0 | 0.2586 |
57
+ | 0.0129 | 7.0 | 735 | 0.0686 | 32481740849152.0 | 0.2619 |
58
+ | 0.0129 | 8.0 | 840 | 0.0703 | 40486999949312.0 | 0.2652 |
59
+ | 0.0129 | 9.0 | 945 | 0.0708 | 35813152784384.0 | 0.2661 |
60
+ | 0.005 | 10.0 | 1050 | 0.0704 | 38553111232512.0 | 0.2653 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.40.1
66
+ - Pytorch 2.4.0.dev20240502
67
+ - Datasets 2.19.0
68
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -19,7 +19,6 @@
19
  "model_type": "distilbert",
20
  "n_heads": 12,
21
  "n_layers": 6,
22
- "output_past": true,
23
  "pad_token_id": 0,
24
  "problem_type": "regression",
25
  "qa_dropout": 0.1,
@@ -28,5 +27,5 @@
28
  "tie_weights_": true,
29
  "torch_dtype": "float32",
30
  "transformers_version": "4.40.1",
31
- "vocab_size": 28996
32
  }
 
19
  "model_type": "distilbert",
20
  "n_heads": 12,
21
  "n_layers": 6,
 
22
  "pad_token_id": 0,
23
  "problem_type": "regression",
24
  "qa_dropout": 0.1,
 
27
  "tie_weights_": true,
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.40.1",
30
+ "vocab_size": 30522
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3147c7f98b8a793903d30e2a0d61001219ca9457021a09b61e9944679e143202
3
- size 263141604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb810cb38427f8ceecb5f3c5a5341c7bab2ff2b4b67c9a71ce23d76f6377566
3
+ size 267829484
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -43,9 +43,9 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_lower_case": false,
47
  "mask_token": "[MASK]",
48
- "model_max_length": 1000000000000000019884624838656,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1792e4ea57dcf08b3341c14edb699c0644361483dfe48c04f66d036ba3fa3432
3
- size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df69f12f2527db3804d7bb60dd132ac1194afc54f61d6f735eae248d23427ad
3
+ size 4984
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff