fgaim commited on
Commit
56201f9
1 Parent(s): e94878a
.gitignore CHANGED
@@ -1,2 +1,3 @@
 
1
  checkpoint-*/
2
- run/
1
+ .ipynb_checkpoints/
2
  checkpoint-*/
3
+ runs/
README.txt → README.md RENAMED
@@ -3,16 +3,38 @@ language: ti
3
  widget:
4
  - text: "ድምጻዊ ኣብርሃም ኣፈወርቂ ንዘልኣለም ህያው ኮይኑ ኣብ ልብና ይነብር"
5
  datasets:
6
- - tiposd_sera.py
 
 
 
 
 
 
7
  model-index:
8
- - name: tipos-tiroberta
9
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
 
13
  # Tigrinya POS tagging with TiRoBERTa
14
 
15
- This model is a fine-tuned version of [TiRoBERTa](https://huggingface.co//content/tiroberta) on the NTC tiposd dataset.
16
 
17
  ## Training
18
 
@@ -29,7 +51,7 @@ The following hyperparameters were used during training:
29
 
30
  ### Results
31
 
32
- It achieves the following results on the evaluation set:
33
  - Loss: 0.3194
34
  - Adj Precision: 0.9219
35
  - Adj Recall: 0.9335
@@ -122,3 +144,26 @@ It achieves the following results on the evaluation set:
122
  - Pytorch 1.9.0+cu111
123
  - Datasets 1.13.3
124
  - Tokenizers 0.10.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  widget:
4
  - text: "ድምጻዊ ኣብርሃም ኣፈወርቂ ንዘልኣለም ህያው ኮይኑ ኣብ ልብና ይነብር"
5
  datasets:
6
+ - TLMD
7
+ - NTC
8
+ metrics:
9
+ - f1
10
+ - precision
11
+ - recall
12
+ - accuracy
13
  model-index:
14
+ - name: tiroberta-base-pos
15
+ results:
16
+ - task:
17
+ name: Token Classification
18
+ type: token-classification
19
+ metrics:
20
+ - name: F1
21
+ type: f1
22
+ value: 0.9562
23
+ - name: Precision
24
+ type: precision
25
+ value: 0.9562
26
+ - name: Recall
27
+ type: recall
28
+ value: 0.9562
29
+ - name: Accuracy
30
+ type: accuracy
31
+ value: 0.9562
32
  ---
33
 
34
 
35
  # Tigrinya POS tagging with TiRoBERTa
36
 
37
+ This model is a fine-tuned version of [TiRoBERTa](https://huggingface.co/fgaim/tiroberta) on the NTC-v1 dataset (Tedla et al. 2016).
38
 
39
  ## Training
40
 
51
 
52
  ### Results
53
 
54
+ The model achieves the following results on the test set:
55
  - Loss: 0.3194
56
  - Adj Precision: 0.9219
57
  - Adj Recall: 0.9335
144
  - Pytorch 1.9.0+cu111
145
  - Datasets 1.13.3
146
  - Tokenizers 0.10.3
147
+
148
+
149
+ ## Citation
150
+
151
+ If you use this model in your product or research, please cite as follows:
152
+
153
+ ```
154
+ @article{Fitsum2021TiPLMs,
155
+ author={Fitsum Gaim and Wonsuk Yang and Jong C. Park},
156
+ title={Monolingual Pre-trained Language Models for Tigrinya},
157
+ year=2021,
158
+ publisher={WiNLP 2021/EMNLP 2021}
159
+ }
160
+ ```
161
+
162
+
163
+ ## References
164
+
165
+ ```
166
+ Tedla, Y., Yamamoto, K. & Marasinghe, A. 2016.
167
+ Tigrinya Part-of-Speech Tagging with Morphological Patterns and the New Nagaoka Tigrinya Corpus.
168
+ International Journal Of Computer Applications 146 pp. 33-41 (2016).
169
+ ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/content/tiroberta",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
@@ -12,50 +12,50 @@
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
  "id2label": {
15
- "0": 0,
16
- "1": 1,
17
- "2": 2,
18
- "3": 3,
19
- "4": 4,
20
- "5": 5,
21
- "6": 6,
22
- "7": 7,
23
- "8": 8,
24
- "9": 9,
25
- "10": 10,
26
- "11": 11,
27
- "12": 12,
28
- "13": 13,
29
- "14": 14,
30
- "15": 15,
31
- "16": 16,
32
- "17": 17,
33
- "18": 18,
34
- "19": 19
35
  },
36
  "initializer_range": 0.02,
37
  "intermediate_size": 3072,
38
  "label2id": {
39
- "0": 0,
40
- "1": 1,
41
- "2": 2,
42
- "3": 3,
43
- "4": 4,
44
- "5": 5,
45
- "6": 6,
46
- "7": 7,
47
- "8": 8,
48
- "9": 9,
49
- "10": 10,
50
- "11": 11,
51
- "12": 12,
52
- "13": 13,
53
- "14": 14,
54
- "15": 15,
55
- "16": 16,
56
- "17": 17,
57
- "18": 18,
58
- "19": 19
59
  },
60
  "layer_norm_eps": 1e-05,
61
  "max_position_embeddings": 514,
1
  {
2
+ "_name_or_path": "tiroberta-pos",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
  "id2label": {
15
+ "0": "ADJ",
16
+ "1": "ADV",
17
+ "2": "CON",
18
+ "3": "FW",
19
+ "4": "INT",
20
+ "5": "N",
21
+ "6": "NUM",
22
+ "7": "N_PRP",
23
+ "8": "N_V",
24
+ "9": "PRE",
25
+ "10": "PRO",
26
+ "11": "PUN",
27
+ "12": "UNC",
28
+ "13": "V",
29
+ "14": "V_AUX",
30
+ "15": "V_GER",
31
+ "16": "V_IMF",
32
+ "17": "V_IMV",
33
+ "18": "V_PRF",
34
+ "19": "V_REL"
35
  },
36
  "initializer_range": 0.02,
37
  "intermediate_size": 3072,
38
  "label2id": {
39
+ "ADJ": 0,
40
+ "ADV": 1,
41
+ "CON": 2,
42
+ "FW": 3,
43
+ "INT": 4,
44
+ "N": 5,
45
+ "NUM": 6,
46
+ "N_PRP": 7,
47
+ "N_V": 8,
48
+ "PRE": 9,
49
+ "PRO": 10,
50
+ "PUN": 11,
51
+ "UNC": 12,
52
+ "V": 13,
53
+ "V_AUX": 14,
54
+ "V_GER": 15,
55
+ "V_IMF": 16,
56
+ "V_IMV": 17,
57
+ "V_PRF": 18,
58
+ "V_REL": 19
59
  },
60
  "layer_norm_eps": 1e-05,
61
  "max_position_embeddings": 514,
runs/Oct17_10-28-48_6a22a682a44e/1634466547.3715649/events.out.tfevents.1634466547.6a22a682a44e.3256.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea18e60b3a0bd59e3b7369aba4da5ae92532aa2e241f973748f6b58ca8dc5667
3
- size 4528
 
 
 
runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634466547.6a22a682a44e.3256.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c065648c5f56f8300b841f262506d12d772ab58cf0b9893fa612f9b4a7afb41f
3
- size 5559
 
 
 
runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634468593.6a22a682a44e.3256.2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7723f601a06a5253ffa02a77503160dae1b3615fcbf2e2b1da5c8ed951986582
3
- size 4880
 
 
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "special_tokens_map_file": null, "name_or_path": "/content/tiroberta", "tokenizer_class": "RobertaTokenizer"}
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "RobertaTokenizer"}