1-13-am commited on
Commit
020928c
1 Parent(s): 8569d77

Training completed!

Browse files
Files changed (4) hide show
  1. README.md +13 -12
  2. config.json +27 -27
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.0015
22
- - F Beta: 0.8072
23
- - Precision: 0.9817
24
- - Recall: 0.8015
25
 
26
  ## Model description
27
 
@@ -44,23 +44,24 @@ The following hyperparameters were used during training:
44
  - train_batch_size: 8
45
  - eval_batch_size: 16
46
  - seed: 42
47
- - gradient_accumulation_steps: 3
48
- - total_train_batch_size: 24
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - lr_scheduler_warmup_ratio: 0.05
52
- - num_epochs: 1
53
  - mixed_precision_training: Native AMP
54
 
55
  ### Training results
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | F Beta | Precision | Recall |
58
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
59
- | 0.0169 | 0.19 | 150 | 0.0026 | 0.8985 | 0.9836 | 0.8954 |
60
- | 0.0036 | 0.37 | 300 | 0.0019 | 0.8693 | 0.9778 | 0.8655 |
61
- | 0.0036 | 0.56 | 450 | 0.0017 | 0.8120 | 0.9741 | 0.8066 |
62
- | 0.0053 | 0.74 | 600 | 0.0016 | 0.7912 | 0.9796 | 0.7851 |
63
- | 0.0059 | 0.93 | 750 | 0.0015 | 0.8072 | 0.9817 | 0.8015 |
 
64
 
65
 
66
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0065
22
+ - F Beta: 0.9611
23
+ - Precision: 0.9932
24
+ - Recall: 0.9598
25
 
26
  ## Model description
27
 
 
44
  - train_batch_size: 8
45
  - eval_batch_size: 16
46
  - seed: 42
47
+ - gradient_accumulation_steps: 4
48
+ - total_train_batch_size: 32
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - lr_scheduler_warmup_ratio: 0.05
52
+ - num_epochs: 3
53
  - mixed_precision_training: Native AMP
54
 
55
  ### Training results
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | F Beta | Precision | Recall |
58
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
59
+ | 0.0291 | 0.46 | 300 | 0.0104 | 0.9756 | 0.9854 | 0.9752 |
60
+ | 0.0062 | 0.93 | 600 | 0.0041 | 0.9830 | 0.9901 | 0.9827 |
61
+ | 0.0044 | 1.39 | 900 | 0.0057 | 0.9713 | 0.9895 | 0.9706 |
62
+ | 0.0258 | 1.85 | 1200 | 0.0040 | 0.9799 | 0.9920 | 0.9794 |
63
+ | 0.0135 | 2.32 | 1500 | 0.0050 | 0.9845 | 0.9943 | 0.9841 |
64
+ | 0.0023 | 2.78 | 1800 | 0.0065 | 0.9611 | 0.9932 | 0.9598 |
65
 
66
 
67
  ### Framework versions
config.json CHANGED
@@ -1,43 +1,43 @@
1
  {
2
  "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
- "DebertaForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "O",
12
- "1": "B-NAME_STUDENT",
13
- "2": "B-URL_PERSONAL",
14
- "3": "B-ID_NUM",
15
- "4": "B-STREET_ADDRESS",
16
- "5": "B-PHONE_NUM",
17
- "6": "B-EMAIL",
18
- "7": "B-USERNAME",
19
- "8": "I-NAME_STUDENT",
20
- "9": "I-URL_PERSONAL",
21
- "10": "I-ID_NUM",
22
- "11": "I-STREET_ADDRESS",
23
- "12": "I-PHONE_NUM"
24
  },
25
  "initializer_range": 0.02,
26
  "intermediate_size": 3072,
27
  "label2id": {
28
- "B-EMAIL": 6,
29
- "B-ID_NUM": 3,
30
- "B-NAME_STUDENT": 1,
31
- "B-PHONE_NUM": 5,
32
- "B-STREET_ADDRESS": 4,
33
- "B-URL_PERSONAL": 2,
34
- "B-USERNAME": 7,
35
- "I-ID_NUM": 10,
36
- "I-NAME_STUDENT": 8,
37
- "I-PHONE_NUM": 12,
38
- "I-STREET_ADDRESS": 11,
39
- "I-URL_PERSONAL": 9,
40
- "O": 0
41
  },
42
  "layer_norm_eps": 1e-07,
43
  "max_position_embeddings": 512,
 
1
  {
2
  "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
+ "DebertaV2ForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "B-NAME_STUDENT",
12
+ "1": "B-URL_PERSONAL",
13
+ "2": "B-ID_NUM",
14
+ "3": "B-STREET_ADDRESS",
15
+ "4": "B-PHONE_NUM",
16
+ "5": "B-EMAIL",
17
+ "6": "B-USERNAME",
18
+ "7": "I-NAME_STUDENT",
19
+ "8": "I-URL_PERSONAL",
20
+ "9": "I-ID_NUM",
21
+ "10": "I-STREET_ADDRESS",
22
+ "11": "I-PHONE_NUM",
23
+ "12": "O"
24
  },
25
  "initializer_range": 0.02,
26
  "intermediate_size": 3072,
27
  "label2id": {
28
+ "B-EMAIL": 5,
29
+ "B-ID_NUM": 2,
30
+ "B-NAME_STUDENT": 0,
31
+ "B-PHONE_NUM": 4,
32
+ "B-STREET_ADDRESS": 3,
33
+ "B-URL_PERSONAL": 1,
34
+ "B-USERNAME": 6,
35
+ "I-ID_NUM": 9,
36
+ "I-NAME_STUDENT": 7,
37
+ "I-PHONE_NUM": 11,
38
+ "I-STREET_ADDRESS": 10,
39
+ "I-URL_PERSONAL": 8,
40
+ "O": 12
41
  },
42
  "layer_norm_eps": 1e-07,
43
  "max_position_embeddings": 512,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34447759db9417cbc162db7781412e40bf51daf43bd819a83aab067bbc1e5b18
3
- size 737753016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9fef5246efc1d9faa5b336b2beba637f1f9305c8838a7d776c68fd34de4dbe
3
+ size 735390572
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa66f07f19802685670b87f42045c629cb3b2ec946120717686b93d620b135a4
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123496ed5e2b5a7009aed827cfd8eab76419269d2e734e57f08a9e641702879b
3
  size 4219