kardosdrur
commited on
Commit
•
9dd3049
1
Parent(s):
da72807
Upload folder using huggingface_hub
Browse files- README.md +24 -0
- config.cfg +20 -0
- model.bin +3 -0
- tokenizer.bin +3 -0
README.md
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
tags:
|
6 |
+
- embeddings
|
7 |
+
- tokenizers
|
8 |
+
library_name: yasep
|
9 |
+
---
|
10 |
+
|
11 |
+
# kardosdrur/en_20-newsgroups_wordlevel_glove-50
|
12 |
+
|
13 |
+
This repository contains an embedding pipeline that has been trained using [yasep](https://github.com/x-tabdeveloping/yasep)
|
14 |
+
|
15 |
+
## Usage
|
16 |
+
```python
|
17 |
+
# pip install yasep
|
18 |
+
|
19 |
+
from yasep.pipeline import Pipeline
|
20 |
+
|
21 |
+
nlp = Pipeline.from_hub('kardosdrur/en_20-newsgroups_wordlevel_glove-50')
|
22 |
+
nlp("A text you want to process")
|
23 |
+
|
24 |
+
```
|
config.cfg
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[model]
|
2 |
+
@models = "glove.v1"
|
3 |
+
vector_size = 50
|
4 |
+
alpha = 0.75
|
5 |
+
window = 15
|
6 |
+
symmetric = true
|
7 |
+
distance_weighting = true
|
8 |
+
iter = 25
|
9 |
+
initial_learning_rate = 0.05
|
10 |
+
n_jobs = 8
|
11 |
+
memory = 4.0
|
12 |
+
|
13 |
+
[tokenizer]
|
14 |
+
@tokenizers = "wordlevel_tokenizer.v1"
|
15 |
+
vocab_size = 30000
|
16 |
+
min_frequency = 0
|
17 |
+
clean_text = true
|
18 |
+
handle_chinese_chars = true
|
19 |
+
strip_accents = null
|
20 |
+
lowercase = true
|
model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c45f6b00d369d9e9c96dc72852e6ae3dd8e8478ff28305ec8adca0fc61dcabb3
|
3 |
+
size 6469141
|
tokenizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3438ae8da0e745a64d9f5cc474b1933ceb66fde953b6cb0c6955c090ab0d462
|
3 |
+
size 454258
|