96abhishekarora
commited on
Commit
•
e228287
1
Parent(s):
eb3a6ff
Add new LinkTransformer model.
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +3 -1
- LT_training_config.json +8 -8
- README.md +8 -8
- config.json +1 -1
- model.safetensors +1 -1
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
37 |
+
.git/lfs/objects/50/c5/50c55d19867e26fc83db98c43d59b1d3af837a78e24a9d131a4953a572074379 filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
CHANGED
@@ -3,5 +3,7 @@
|
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
-
"pooling_mode_mean_sqrt_len_tokens": false
|
|
|
|
|
7 |
}
|
|
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false
|
9 |
}
|
LT_training_config.json
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
{
|
2 |
"model_save_dir": "models",
|
3 |
-
"model_save_name": "lt-
|
4 |
-
"opt_model_description": "This
|
5 |
"opt_model_lang": "ja",
|
6 |
"train_batch_size": 64,
|
7 |
-
"num_epochs":
|
8 |
"warm_up_perc": 1,
|
9 |
"learning_rate": 2e-05,
|
10 |
-
"loss_type": "
|
11 |
"val_perc": 0.2,
|
12 |
"wandb_names": {
|
13 |
-
"id": "econabhishek",
|
14 |
-
"run": "lt-historicjapanesecompanies-comp-prod-ind_onlinecontrastive_full",
|
15 |
"project": "linkage",
|
|
|
|
|
16 |
"entity": "econabhishek"
|
17 |
},
|
18 |
"add_pooling_layer": false,
|
@@ -22,8 +22,8 @@
|
|
22 |
"save_val_test_pickles": true,
|
23 |
"val_query_prop": 0.5,
|
24 |
"loss_params": {},
|
25 |
-
"eval_type": "
|
26 |
"training_dataset": "dataframe",
|
27 |
"base_model_path": "oshizo/sbert-jsnli-luke-japanese-base-lite",
|
28 |
-
"best_model_path": "models/lt-
|
29 |
}
|
|
|
1 |
{
|
2 |
"model_save_dir": "models",
|
3 |
+
"model_save_name": "lt-wikidata-comp-prod-ind-ja",
|
4 |
+
"opt_model_description": "This is a (Modern) Japanese Link Transformer model - trained on Company <SEP> Product <SEP> Industry from wiki data.",
|
5 |
"opt_model_lang": "ja",
|
6 |
"train_batch_size": 64,
|
7 |
+
"num_epochs": 70,
|
8 |
"warm_up_perc": 1,
|
9 |
"learning_rate": 2e-05,
|
10 |
+
"loss_type": "supcon",
|
11 |
"val_perc": 0.2,
|
12 |
"wandb_names": {
|
|
|
|
|
13 |
"project": "linkage",
|
14 |
+
"id": "econabhishek",
|
15 |
+
"run": "lt-wikidata-comp-prod-ind-ja",
|
16 |
"entity": "econabhishek"
|
17 |
},
|
18 |
"add_pooling_layer": false,
|
|
|
22 |
"save_val_test_pickles": true,
|
23 |
"val_query_prop": 0.5,
|
24 |
"loss_params": {},
|
25 |
+
"eval_type": "retrieval",
|
26 |
"training_dataset": "dataframe",
|
27 |
"base_model_path": "oshizo/sbert-jsnli-luke-japanese-base-lite",
|
28 |
+
"best_model_path": "models/lt-wikidata-comp-prod-ind-ja"
|
29 |
}
|
README.md
CHANGED
@@ -22,7 +22,7 @@ Take a look at the documentation of [sentence-transformers](https://www.sbert.ne
|
|
22 |
This model has been fine-tuned on the model : oshizo/sbert-jsnli-luke-japanese-base-lite. It is pretrained for the language : - ja.
|
23 |
|
24 |
|
25 |
-
This
|
26 |
|
27 |
## Usage (LinkTransformer)
|
28 |
|
@@ -95,20 +95,20 @@ The model was trained with the parameters:
|
|
95 |
|
96 |
**DataLoader**:
|
97 |
|
98 |
-
`torch.utils.data.dataloader.DataLoader` of length
|
99 |
```
|
100 |
-
{'batch_size': 64, 'sampler': 'torch.utils.data.
|
101 |
```
|
102 |
|
103 |
**Loss**:
|
104 |
|
105 |
-
`linktransformer.modified_sbert.losses.
|
106 |
|
107 |
Parameters of the fit()-Method:
|
108 |
```
|
109 |
{
|
110 |
-
"epochs":
|
111 |
-
"evaluation_steps":
|
112 |
"evaluator": "sentence_transformers.evaluation.SequentialEvaluator.SequentialEvaluator",
|
113 |
"max_grad_norm": 1,
|
114 |
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
@@ -117,7 +117,7 @@ Parameters of the fit()-Method:
|
|
117 |
},
|
118 |
"scheduler": "WarmupLinear",
|
119 |
"steps_per_epoch": null,
|
120 |
-
"warmup_steps":
|
121 |
"weight_decay": 0.01
|
122 |
}
|
123 |
```
|
@@ -127,7 +127,7 @@ Parameters of the fit()-Method:
|
|
127 |
|
128 |
LinkTransformer(
|
129 |
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: LukeModel
|
130 |
-
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
|
131 |
)
|
132 |
```
|
133 |
|
|
|
22 |
This model has been fine-tuned on the model : oshizo/sbert-jsnli-luke-japanese-base-lite. It is pretrained for the language : - ja.
|
23 |
|
24 |
|
25 |
+
This is a (Modern) Japanese Link Transformer model - trained on Company <SEP> Product <SEP> Industry from wiki data.
|
26 |
|
27 |
## Usage (LinkTransformer)
|
28 |
|
|
|
95 |
|
96 |
**DataLoader**:
|
97 |
|
98 |
+
`torch.utils.data.dataloader.DataLoader` of length 57 with parameters:
|
99 |
```
|
100 |
+
{'batch_size': 64, 'sampler': 'torch.utils.data.dataloader._InfiniteConstantSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
101 |
```
|
102 |
|
103 |
**Loss**:
|
104 |
|
105 |
+
`linktransformer.modified_sbert.losses.SupConLoss_wandb`
|
106 |
|
107 |
Parameters of the fit()-Method:
|
108 |
```
|
109 |
{
|
110 |
+
"epochs": 70,
|
111 |
+
"evaluation_steps": 29,
|
112 |
"evaluator": "sentence_transformers.evaluation.SequentialEvaluator.SequentialEvaluator",
|
113 |
"max_grad_norm": 1,
|
114 |
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
|
|
117 |
},
|
118 |
"scheduler": "WarmupLinear",
|
119 |
"steps_per_epoch": null,
|
120 |
+
"warmup_steps": 3990,
|
121 |
"weight_decay": 0.01
|
122 |
}
|
123 |
```
|
|
|
127 |
|
128 |
LinkTransformer(
|
129 |
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: LukeModel
|
130 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
|
131 |
)
|
132 |
```
|
133 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LukeModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "models/lt-wikidata-comp-prod-ind-ja",
|
3 |
"architectures": [
|
4 |
"LukeModel"
|
5 |
],
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 532299592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d4a325fec886bed0d30b8007d07638a460b83f1ce43ab481dd762e350b0f13b
|
3 |
size 532299592
|