CVR123 commited on
Commit
0a4dff2
1 Parent(s): f866fc7

Telugubert for TeluguQC

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  metrics:
@@ -15,13 +17,13 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # output
17
 
18
- This model was trained from scratch on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.7144
21
- - Precision: 0.9059
22
- - Recall: 0.9049
23
- - Accuracy: 0.9049
24
- - F1-score: 0.9053
25
 
26
  ## Model description
27
 
@@ -46,20 +48,19 @@ The following hyperparameters were used during training:
46
  - seed: 42
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: linear
49
- - num_epochs: 8
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | Accuracy | F1-score |
54
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:--------:|:--------:|
55
- | 0.6607 | 1.0 | 309 | 0.3826 | 0.8915 | 0.8907 | 0.8907 | 0.8905 |
56
- | 0.2673 | 2.0 | 618 | 0.4694 | 0.8886 | 0.8866 | 0.8866 | 0.8860 |
57
- | 0.1819 | 3.0 | 927 | 0.4766 | 0.9001 | 0.8988 | 0.8988 | 0.8989 |
58
- | 0.102 | 4.0 | 1236 | 0.6096 | 0.8945 | 0.8927 | 0.8927 | 0.8930 |
59
- | 0.0607 | 5.0 | 1545 | 0.6537 | 0.8971 | 0.8947 | 0.8947 | 0.8955 |
60
- | 0.0326 | 6.0 | 1854 | 0.6568 | 0.9127 | 0.9109 | 0.9109 | 0.9116 |
61
- | 0.0221 | 7.0 | 2163 | 0.7081 | 0.9045 | 0.9028 | 0.9028 | 0.9035 |
62
- | 0.0133 | 8.0 | 2472 | 0.7144 | 0.9059 | 0.9049 | 0.9049 | 0.9053 |
63
 
64
 
65
  ### Framework versions
 
1
  ---
2
+ license: cc-by-4.0
3
+ base_model: l3cube-pune/telugu-bert
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
17
 
18
  # output
19
 
20
+ This model is a fine-tuned version of [l3cube-pune/telugu-bert](https://huggingface.co/l3cube-pune/telugu-bert) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.6017
23
+ - Precision: 0.7825
24
+ - Recall: 0.7870
25
+ - Accuracy: 0.7870
26
+ - F1-score: 0.7809
27
 
28
  ## Model description
29
 
 
48
  - seed: 42
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
+ - num_epochs: 7
52
 
53
  ### Training results
54
 
55
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | Accuracy | F1-score |
56
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:--------:|:--------:|
57
+ | 1.7849 | 1.0 | 32 | 1.7769 | 0.0454 | 0.2130 | 0.2130 | 0.0748 |
58
+ | 1.7491 | 2.0 | 64 | 1.7436 | 0.1414 | 0.3306 | 0.3306 | 0.1933 |
59
+ | 1.7032 | 3.0 | 96 | 1.7059 | 0.2918 | 0.3043 | 0.3043 | 0.2076 |
60
+ | 1.6498 | 4.0 | 128 | 1.6571 | 0.7274 | 0.6572 | 0.6572 | 0.6571 |
61
+ | 1.6029 | 5.0 | 160 | 1.6236 | 0.7687 | 0.7627 | 0.7627 | 0.7576 |
62
+ | 1.5724 | 6.0 | 192 | 1.6049 | 0.7894 | 0.7951 | 0.7951 | 0.7893 |
63
+ | 1.556 | 7.0 | 224 | 1.6017 | 0.7825 | 0.7870 | 0.7870 | 0.7809 |
 
64
 
65
 
66
  ### Framework versions
config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "_name_or_path": "google/muril-large-cased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
- "embedding_size": 1024,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
- "hidden_size": 1024,
12
  "id2label": {
13
  "0": "Abbreviation",
14
  "1": "Description",
@@ -18,7 +18,7 @@
18
  "5": "Numeric"
19
  },
20
  "initializer_range": 0.02,
21
- "intermediate_size": 4096,
22
  "label2id": {
23
  "Abbreviation": 0,
24
  "Description": 1,
@@ -30,8 +30,8 @@
30
  "layer_norm_eps": 1e-12,
31
  "max_position_embeddings": 512,
32
  "model_type": "bert",
33
- "num_attention_heads": 16,
34
- "num_hidden_layers": 24,
35
  "pad_token_id": 0,
36
  "position_embedding_type": "absolute",
37
  "problem_type": "single_label_classification",
 
1
  {
2
+ "_name_or_path": "l3cube-pune/telugu-bert",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
+ "embedding_size": 768,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "id2label": {
13
  "0": "Abbreviation",
14
  "1": "Description",
 
18
  "5": "Numeric"
19
  },
20
  "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
  "label2id": {
23
  "Abbreviation": 0,
24
  "Description": 1,
 
30
  "layer_norm_eps": 1e-12,
31
  "max_position_embeddings": 512,
32
  "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
  "pad_token_id": 0,
36
  "position_embedding_type": "absolute",
37
  "problem_type": "single_label_classification",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bc64af6f109935cc9432bb9b419416bdff7fe1f4140f2e1c74c90915f17ffe7
3
- size 2023700848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab7a751f02eea3422dd153d9de73285a746a4c7df81a6f7f561c7dd835eeb38
3
+ size 950266896
runs/Apr08_23-42-26_8dcdf4d4de1d/events.out.tfevents.1712619768.8dcdf4d4de1d.555.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530973ef616e4b8a0934258eed9ed878425c31c1a1da9b061bcf565fa0cf2c82
3
+ size 10661
runs/Apr08_23-49-01_8dcdf4d4de1d/events.out.tfevents.1712620152.8dcdf4d4de1d.555.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28651a948d707300107b8f9efa6f8ebcd5f74e8b7d361a42e49f8be061f43638
3
+ size 9972
special_tokens_map.json CHANGED
@@ -1,7 +1,37 @@
1
  {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
tokenizer_config.json CHANGED
@@ -47,7 +47,7 @@
47
  "do_lower_case": false,
48
  "lowercase": false,
49
  "mask_token": "[MASK]",
50
- "model_max_length": 1000000000000000019884624838656,
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
 
47
  "do_lower_case": false,
48
  "lowercase": false,
49
  "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80da42a4639d0fa5d3d50362fd788f0a9d4f3d24530f49c46737ec07dcddb243
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca09a3f3f95451faf3e17d022867f8113ccd6ab0ca2e5d2d6e73464c3890360
3
  size 4856