juandalibaba commited on
Commit
63acbf9
1 Parent(s): de93609

Training in progress epoch 0

Browse files
Files changed (4) hide show
  1. README.md +7 -13
  2. config.json +1 -31
  3. tf_model.h5 +2 -2
  4. tokenizer.json +12 -3
README.md CHANGED
@@ -15,13 +15,9 @@ probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Train Loss: 0.1022
19
- - Validation Loss: 0.2518
20
- - Train Precision: 0.6732
21
- - Train Recall: 0.4533
22
- - Train F1: 0.5418
23
- - Train Accuracy: 0.9482
24
- - Epoch: 2
25
 
26
  ## Model description
27
 
@@ -40,16 +36,14 @@ More information needed
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
- - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 636, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.01}
44
  - training_precision: float32
45
 
46
  ### Training results
47
 
48
- | Train Loss | Validation Loss | Train Precision | Train Recall | Train F1 | Train Accuracy | Epoch |
49
- |:----------:|:---------------:|:---------------:|:------------:|:--------:|:--------------:|:-----:|
50
- | 0.1026 | 0.2518 | 0.6732 | 0.4533 | 0.5418 | 0.9482 | 0 |
51
- | 0.1016 | 0.2518 | 0.6732 | 0.4533 | 0.5418 | 0.9482 | 1 |
52
- | 0.1022 | 0.2518 | 0.6732 | 0.4533 | 0.5418 | 0.9482 | 2 |
53
 
54
 
55
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 2.7876
19
+ - Validation Loss: 1.9931
20
+ - Epoch: 0
 
 
 
 
21
 
22
  ## Model description
23
 
 
36
  ### Training hyperparameters
37
 
38
  The following hyperparameters were used during training:
39
+ - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 500, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}}, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False}
40
  - training_precision: float32
41
 
42
  ### Training results
43
 
44
+ | Train Loss | Validation Loss | Epoch |
45
+ |:----------:|:---------------:|:-----:|
46
+ | 2.7876 | 1.9931 | 0 |
 
 
47
 
48
 
49
  ### Framework versions
config.json CHANGED
@@ -2,43 +2,13 @@
2
  "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForTokenClassification"
6
  ],
7
  "attention_dropout": 0.1,
8
  "dim": 768,
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
- "id2label": {
12
- "0": "O",
13
- "1": "B-corporation",
14
- "2": "I-corporation",
15
- "3": "B-creative-work",
16
- "4": "I-creative-work",
17
- "5": "B-group",
18
- "6": "I-group",
19
- "7": "B-location",
20
- "8": "I-location",
21
- "9": "B-person",
22
- "10": "I-person",
23
- "11": "B-product",
24
- "12": "I-product"
25
- },
26
  "initializer_range": 0.02,
27
- "label2id": {
28
- "B-corporation": 1,
29
- "B-creative-work": 3,
30
- "B-group": 5,
31
- "B-location": 7,
32
- "B-person": 9,
33
- "B-product": 11,
34
- "I-corporation": 2,
35
- "I-creative-work": 4,
36
- "I-group": 6,
37
- "I-location": 8,
38
- "I-person": 10,
39
- "I-product": 12,
40
- "O": 0
41
- },
42
  "max_position_embeddings": 512,
43
  "model_type": "distilbert",
44
  "n_heads": 12,
 
2
  "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForQuestionAnswering"
6
  ],
7
  "attention_dropout": 0.1,
8
  "dim": 768,
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "initializer_range": 0.02,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "max_position_embeddings": 512,
13
  "model_type": "distilbert",
14
  "n_heads": 12,
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a71559104acdc481e131da99aff0a91cb22bb6985b8c9f08bc285de89c4bc29
3
- size 265618704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a27ec4db071c288fbd3aeebac8c58fab03e71dc3f133e9cb5f94cb0dfd09efa
3
+ size 265583592
tokenizer.json CHANGED
@@ -2,11 +2,20 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": null,
 
 
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 384,
6
+ "strategy": "OnlySecond",
7
  "stride": 0
8
  },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 384
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,