Shaltiel commited on
Commit
75209df
โ€ข
1 Parent(s): cb3a4de

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +187 -0
README.md CHANGED
@@ -1,3 +1,190 @@
1
  ---
2
  license: cc-by-4.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-4.0
3
+ language:
4
+ - he
5
  ---
6
+ # DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew
7
+
8
+ State-of-the-art language model for Hebrew, as released [here](link to arxiv).
9
+
10
+ This is the fine-tuned model for the morphological tagging task.
11
+
12
+ Sample usage:
13
+
14
+ ```
15
+ from transformers import AutoModel, AutoTokenizer
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictabert-morph')
18
+ model = AutoModel.from_pretrained('dicta-il/dictabert-morph', trust_remote_code=True)
19
+
20
+ model.eval()
21
+
22
+ sentence = 'ื‘ืฉื ืช 1948 ื”ืฉืœื™ื ืืคืจื™ื ืงื™ืฉื•ืŸ ืืช ืœื™ืžื•ื“ื™ื• ื‘ืคื™ืกื•ืœ ืžืชื›ืช ื•ื‘ืชื•ืœื“ื•ืช ื”ืืžื ื•ืช ื•ื”ื—ืœ ืœืคืจืกื ืžืืžืจื™ื ื”ื•ืžื•ืจื™ืกื˜ื™ื™ื'
23
+ print(model.predict([sentence], tokenizer))
24
+
25
+ ```
26
+
27
+ Output:
28
+ ```json
29
+ [{
30
+ "text": "ื‘ืฉื ืช 1948 ื”ืฉืœื™ื ืืคืจื™ื ืงื™ืฉื•ืŸ ืืช ืœื™ืžื•ื“ื™ื• ื‘ืคื™ืกื•ืœ ืžืชื›ืช ื•ื‘ืชื•ืœื“ื•ืช ื”ืืžื ื•ืช ื•ื”ื—ืœ ืœืคืจืกื ืžืืžืจื™ื ื”ื•ืžื•ืจื™ืกื˜ื™ื™ื",
31
+ "tokens": [{
32
+ "token": "ื‘ืฉื ืช",
33
+ "pos": "NOUN",
34
+ "feats": {
35
+ "Gender": "Fem",
36
+ "Number": "Sing"
37
+ },
38
+ "prefixes": ["ADP"],
39
+ "suffix": false
40
+ }, {
41
+ "token": "1948",
42
+ "pos": "NUM",
43
+ "feats": {},
44
+ "prefixes": [],
45
+ "suffix": false
46
+ }, {
47
+ "token": "ื”ืฉืœื™ื",
48
+ "pos": "VERB",
49
+ "feats": {
50
+ "Gender": "Masc",
51
+ "Number": "Sing",
52
+ "Person": "3",
53
+ "Tense": "Past"
54
+ },
55
+ "prefixes": [],
56
+ "suffix": false
57
+ }, {
58
+ "token": "ืืคืจื™ื",
59
+ "pos": "PROPN",
60
+ "feats": {},
61
+ "prefixes": [],
62
+ "suffix": false
63
+ }, {
64
+ "token": "ืงื™ืฉื•ืŸ",
65
+ "pos": "PROPN",
66
+ "feats": {},
67
+ "prefixes": [],
68
+ "suffix": false
69
+ }, {
70
+ "token": "ืืช",
71
+ "pos": "ADP",
72
+ "feats": {},
73
+ "prefixes": [],
74
+ "suffix": false
75
+ }, {
76
+ "token": "ืœื™ืžื•ื“ื™ื•",
77
+ "pos": "NOUN",
78
+ "feats": {
79
+ "Gender": "Masc",
80
+ "Number": "Plur"
81
+ },
82
+ "prefixes": [],
83
+ "suffix": "PRON",
84
+ "suffix_feats": {
85
+ "Gender": "Masc",
86
+ "Number": "Sing",
87
+ "Person": "3"
88
+ }
89
+ }, {
90
+ "token": "ื‘ืคื™ืกื•ืœ",
91
+ "pos": "NOUN",
92
+ "feats": {
93
+ "Gender": "Masc",
94
+ "Number": "Sing"
95
+ },
96
+ "prefixes": ["ADP"],
97
+ "suffix": false
98
+ }, {
99
+ "token": "ืžืชื›ืช",
100
+ "pos": "NOUN",
101
+ "feats": {
102
+ "Gender": "Fem",
103
+ "Number": "Sing"
104
+ },
105
+ "prefixes": [],
106
+ "suffix": false
107
+ }, {
108
+ "token": "ื•ื‘ืชื•ืœื“ื•ืช",
109
+ "pos": "NOUN",
110
+ "feats": {
111
+ "Gender": "Fem",
112
+ "Number": "Plur"
113
+ },
114
+ "prefixes": ["CCONJ", "ADP"],
115
+ "suffix": false
116
+ }, {
117
+ "token": "ื”ืืžื ื•ืช",
118
+ "pos": "NOUN",
119
+ "feats": {
120
+ "Gender": "Fem",
121
+ "Number": "Sing"
122
+ },
123
+ "prefixes": ["DET"],
124
+ "suffix": false
125
+ }, {
126
+ "token": "ื•ื”ื—ืœ",
127
+ "pos": "VERB",
128
+ "feats": {
129
+ "Gender": "Masc",
130
+ "Number": "Sing",
131
+ "Person": "3",
132
+ "Tense": "Past"
133
+ },
134
+ "prefixes": ["CCONJ"],
135
+ "suffix": false
136
+ }, {
137
+ "token": "ืœืคืจืกื",
138
+ "pos": "VERB",
139
+ "feats": {},
140
+ "prefixes": [],
141
+ "suffix": false
142
+ }, {
143
+ "token": "ืžืืžืจื™ื",
144
+ "pos": "NOUN",
145
+ "feats": {
146
+ "Gender": "Masc",
147
+ "Number": "Plur"
148
+ },
149
+ "prefixes": [],
150
+ "suffix": false
151
+ }, {
152
+ "token": "ื”ื•ืžื•ืจื™ืกื˜ื™ื™ื",
153
+ "pos": "ADJ",
154
+ "feats": {
155
+ "Gender": "Masc",
156
+ "Number": "Plur"
157
+ },
158
+ "prefixes": [],
159
+ "suffix": false
160
+ }]
161
+ }]
162
+ ```
163
+
164
+
165
+ ## Citation
166
+
167
+ If you use DictaBERT in your research, please cite ```DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew```
168
+
169
+ **BibTeX:**
170
+
171
+ To add
172
+
173
+ ## License
174
+
175
+ Shield: [![CC BY 4.0][cc-by-shield]][cc-by]
176
+
177
+ This work is licensed under a
178
+ [Creative Commons Attribution 4.0 International License][cc-by].
179
+
180
+ [![CC BY 4.0][cc-by-image]][cc-by]
181
+
182
+ [cc-by]: http://creativecommons.org/licenses/by/4.0/
183
+ [cc-by-image]: https://i.creativecommons.org/l/by/4.0/88x31.png
184
+ [cc-by-shield]: https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg
185
+
186
+
187
+
188
+
189
+
190
+ `