truongphan
commited on
Commit
•
33f44dc
1
Parent(s):
d270279
Update README.md
Browse files
README.md
CHANGED
@@ -9,11 +9,21 @@ We fine-tuned BERT to train Vietnam tourism dataset for a question answering sys
|
|
9 |
|
10 |
model = AutoModelForTokenClassification.from_pretrained("truongphan/vntourismNER")
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
line = "King Garden is located in Thanh Thuy, Phu Tho"
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
17 |
|
18 |
|
19 |
# Authors
|
|
|
9 |
|
10 |
model = AutoModelForTokenClassification.from_pretrained("truongphan/vntourismNER")
|
11 |
|
12 |
+
custom_labels = [
|
13 |
+
"O", "B-TA", "I-TA", "B-PRO", "I-PRO", "B-TEM", "I-TEM", "B-COM", "I-COM", "B-PAR", "I-PAR", "B-CIT", "I-CIT",
|
14 |
+
"B-MOU", "I-MOU", "B-HAM", "I-HAM", "B-AWA", "I-AWA", "B-VIS", "I-VIS", "B-FES", "I-FES", "B-ISL", "I-ISL",
|
15 |
+
"B-TOW", "I-TOW", "B-VIL", "I-VIL", "B-CHU", "I-CHU", "B-PAG", "I-PAG", "B-BEA", "I-BEA", "B-WAR", "I-WAR",
|
16 |
+
"B-WAT", "I-WAT", "B-SA", "I-SA", "B-SER", "I-SER", "B-STR", "I-STR", "B-NUN", "I-NUN", "B-PAL", "I-PAL",
|
17 |
+
"B-VOL", "I-VOL", "B-HIL", "I-HIL", "B-MAR", "I-MAR", "B-VAL", "I-VAL", "B-PROD", "I-PROD", "B-DIS", "I-DIS",
|
18 |
+
"B-FOO", "I-FOO", "B-DISH", "I-DISH", "B-DRI", "I-DRI"
|
19 |
+
]
|
20 |
line = "King Garden is located in Thanh Thuy, Phu Tho"
|
21 |
+
|
22 |
+
nlp = pipeline('ner', model=model, tokenizer=tokenizer)
|
23 |
+
|
24 |
+
ner_rs = nlp(line)
|
25 |
+
for k in ner_rs:
|
26 |
+
print(custom_labels[int(str(k['entity']).replace('LABEL_',''))], '-', k['word'])
|
27 |
|
28 |
|
29 |
# Authors
|