joseph10 commited on
Commit
4218450
1 Parent(s): eec69da

Training in progress, epoch 1

Browse files
Files changed (4) hide show
  1. README.md +62 -0
  2. config.json +14 -2
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/bert_uncased_L-2_H-128_A-2
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: berttiny-hateXplain-parentpretrained
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # berttiny-hateXplain-parentpretrained
17
+
18
+ This model is a fine-tuned version of [google/bert_uncased_L-2_H-128_A-2](https://huggingface.co/google/bert_uncased_L-2_H-128_A-2) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.2286
21
+ - Accuracy: 0.7601
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0001286744242350192
41
+ - train_batch_size: 128
42
+ - eval_batch_size: 128
43
+ - seed: 33
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 7
47
+
48
+ ### Training results
49
+
50
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
52
+ | 0.236 | 1.0 | 121 | 0.2296 | 0.7581 |
53
+ | 0.2239 | 2.0 | 242 | 0.2274 | 0.7591 |
54
+ | 0.2174 | 3.0 | 363 | 0.2286 | 0.7601 |
55
+
56
+
57
+ ### Framework versions
58
+
59
+ - Transformers 4.36.0.dev0
60
+ - Pytorch 2.1.1
61
+ - Datasets 2.15.0
62
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -2,12 +2,17 @@
2
  "_name_or_path": "agvidit1/DistilledBert_HateSpeech_pretrain",
3
  "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForSequenceClassification"
6
  ],
7
  "attention_dropout": 0.1,
 
 
8
  "dim": 768,
9
  "dropout": 0.1,
 
10
  "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": 0,
13
  "1": 1,
@@ -15,17 +20,22 @@
15
  "3": 3
16
  },
17
  "initializer_range": 0.02,
 
18
  "label2id": {
19
  "0": 0,
20
  "1": 1,
21
  "2": 2,
22
  "3": 3
23
  },
 
24
  "max_position_embeddings": 512,
25
- "model_type": "distilbert",
26
  "n_heads": 12,
27
  "n_layers": 6,
 
 
28
  "pad_token_id": 0,
 
29
  "problem_type": "single_label_classification",
30
  "qa_dropout": 0.1,
31
  "seq_classif_dropout": 0.2,
@@ -33,5 +43,7 @@
33
  "tie_weights_": true,
34
  "torch_dtype": "float32",
35
  "transformers_version": "4.36.0.dev0",
 
 
36
  "vocab_size": 30522
37
  }
 
2
  "_name_or_path": "agvidit1/DistilledBert_HateSpeech_pretrain",
3
  "activation": "gelu",
4
  "architectures": [
5
+ "BertForSequenceClassification"
6
  ],
7
  "attention_dropout": 0.1,
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "classifier_dropout": null,
10
  "dim": 768,
11
  "dropout": 0.1,
12
+ "hidden_act": "gelu",
13
  "hidden_dim": 3072,
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
  "id2label": {
17
  "0": 0,
18
  "1": 1,
 
20
  "3": 3
21
  },
22
  "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
  "label2id": {
25
  "0": 0,
26
  "1": 1,
27
  "2": 2,
28
  "3": 3
29
  },
30
+ "layer_norm_eps": 1e-12,
31
  "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
  "n_heads": 12,
34
  "n_layers": 6,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 12,
37
  "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
  "problem_type": "single_label_classification",
40
  "qa_dropout": 0.1,
41
  "seq_classif_dropout": 0.2,
 
43
  "tie_weights_": true,
44
  "torch_dtype": "float32",
45
  "transformers_version": "4.36.0.dev0",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
  "vocab_size": 30522
49
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa2ee73f8ef5e2aba8dc8552e2c60d53e640beb1de82bde1dd2ef5494cffb85b
3
- size 267838720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f2b07901305af79e865683b356eb4ab715f6de4a310edcafd9a9f4f108274c
3
+ size 437964800
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d867fd6fc4a219ef921faa090af65c09ea1d32483558946d8c240940c28b9f0
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd291d568e2c39552c50319c78392acde4dc20f897814064a3491ade1c9c0893
3
  size 4792