Thao Pham
commited on
Commit
•
97bb53e
1
Parent(s):
28fd7c1
Adding pre-trained models
Browse files- .gitattributes +5 -0
- .gitignore +0 -1
- model.py +2 -2
- pretrained-models/ner-vietnamese-electra-base.pt +3 -0
- pretrained-models/phobert.pt +3 -0
- pretrained-models/vncorenlp/VnCoreNLP-1.2.jar +3 -0
- pretrained-models/vncorenlp/models/dep/vi-dep.xz +3 -0
- pretrained-models/vncorenlp/models/ner/vi-500brownclusters.xz +3 -0
- pretrained-models/vncorenlp/models/ner/vi-ner.xz +3 -0
- pretrained-models/vncorenlp/models/ner/vi-pretrainedembeddings.xz +3 -0
- pretrained-models/vncorenlp/models/postagger/vi-tagger +3 -0
- pretrained-models/vncorenlp/models/wordsegmenter/vi-vocab +0 -0
- pretrained-models/vncorenlp/models/wordsegmenter/wordsegmenter.rdr +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
vncorenlp filter=lfs diff=lfs merge=lfs -text
|
37 |
+
pretrained-models/phobert.pt filter=lfs diff=lfs merge=lfs -text
|
38 |
+
pretrained-models/ner-vietnamese-electra-base.pt filter=lfs diff=lfs merge=lfs -text
|
39 |
+
pretrained-models/vncorenlp/models/postagger/vi-tagger filter=lfs diff=lfs merge=lfs -text
|
40 |
+
pretrained-models/vncorenlp/VnCoreNLP-1.2.jar filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
venv
|
2 |
-
pretrained-models
|
3 |
test_file.txt
|
|
|
1 |
venv
|
|
|
2 |
test_file.txt
|
model.py
CHANGED
@@ -43,14 +43,14 @@ class KeyBERTVi:
|
|
43 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
44 |
if __name__ == "__main__":
|
45 |
# args
|
46 |
-
print(dir_path)
|
47 |
|
48 |
stopwords_file_path = f'{dir_path}/vietnamese-stopwords-dash.txt'
|
49 |
|
50 |
text_file_path = sys.argv[1]
|
51 |
with open(f'{dir_path}/{text_file_path}', 'r') as f:
|
52 |
text = ' '.join([ln.strip() for ln in f.readlines()])
|
53 |
-
print(text)
|
54 |
|
55 |
kw_model = KeyBERTVi(stopwords_file_path)
|
56 |
title = None
|
|
|
43 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
44 |
if __name__ == "__main__":
|
45 |
# args
|
46 |
+
# print(dir_path)
|
47 |
|
48 |
stopwords_file_path = f'{dir_path}/vietnamese-stopwords-dash.txt'
|
49 |
|
50 |
text_file_path = sys.argv[1]
|
51 |
with open(f'{dir_path}/{text_file_path}', 'r') as f:
|
52 |
text = ' '.join([ln.strip() for ln in f.readlines()])
|
53 |
+
# print(text)
|
54 |
|
55 |
kw_model = KeyBERTVi(stopwords_file_path)
|
56 |
title = None
|
pretrained-models/ner-vietnamese-electra-base.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af136f7fcb73fba5edee9021032227ede597c2882f39d910b3b830cf49bf5d52
|
3 |
+
size 532423140
|
pretrained-models/phobert.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:984b4f9b49a06331113974f50c8dc96b845bf808034b79993c3ddbf4a946d872
|
3 |
+
size 540111904
|
pretrained-models/vncorenlp/VnCoreNLP-1.2.jar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e2811cdbc2ddfc71d04be5dc36e185c88dcd1ad4d5d69e4ff2e1369dccf7793
|
3 |
+
size 27412703
|
pretrained-models/vncorenlp/models/dep/vi-dep.xz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:266e4a3a55d5edd1607d5f036c2f95b70c0a6c80f58b57fd9962677a6ef331b7
|
3 |
+
size 16048864
|
pretrained-models/vncorenlp/models/ner/vi-500brownclusters.xz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d30f9cfdf0af193a69e185d1acda0306a9fbe1321f8a700f7c66557a90f92b8c
|
3 |
+
size 5599844
|
pretrained-models/vncorenlp/models/ner/vi-ner.xz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f04c5e752d7f99a6313b758fc2607a2c3906e58b1d60a37eb0192aead73d61f7
|
3 |
+
size 9956876
|
pretrained-models/vncorenlp/models/ner/vi-pretrainedembeddings.xz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00d3d034f1b23a8bfe5168195741fde845808c212e6dfcd4c94bead1665eb0fc
|
3 |
+
size 57313672
|
pretrained-models/vncorenlp/models/postagger/vi-tagger
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a95608a5982db89c11353b451154ec396eccc0ff1f5b22874935ecdf4e0ace01
|
3 |
+
size 29709468
|
pretrained-models/vncorenlp/models/wordsegmenter/vi-vocab
ADDED
Binary file (527 kB). View file
|
|
pretrained-models/vncorenlp/models/wordsegmenter/wordsegmenter.rdr
ADDED
The diff for this file is too large to render.
See raw diff
|
|