96abhishekarora
commited on
Commit
·
c15228a
1
Parent(s):
fe59494
Updated model with better training and evaluation. Test and val data included as pickle files. Older Legacy files were removed to avoid confusion.
Browse files- .gitattributes +4 -39
- Information-Retrieval_evaluation_eval_results.csv +6 -0
- Information-Retrieval_evaluation_test_results.csv +5 -0
- README.md +1 -1
- config.json +1 -1
- eval/Information-Retrieval_evaluation_eval_results.csv +0 -0
- special_tokens_map.json +6 -42
- pytorch_model.bin → test_data.pickle +2 -2
- tokenizer.json +0 -0
- tokenizer_config.json +0 -7
- val_data.pickle +3 -0
.gitattributes
CHANGED
@@ -1,41 +1,6 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
-
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
37 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
|
38 |
.git/lfs/objects/10/5b/105b8df0ad58b8f0280d55ee92bd528df20c93351ed578775f427b8022581ec0 filter=lfs diff=lfs merge=lfs -text
|
39 |
-
.
|
40 |
-
.
|
41 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
2 |
+
.git/lfs/objects/97/f4/97f4992b52a0621c15f4b731a5d0aa12f93a8136dfefe9da6d15efe200de2025 filter=lfs diff=lfs merge=lfs -text
|
3 |
.git/lfs/objects/10/5b/105b8df0ad58b8f0280d55ee92bd528df20c93351ed578775f427b8022581ec0 filter=lfs diff=lfs merge=lfs -text
|
4 |
+
test_data.pickle filter=lfs diff=lfs merge=lfs -text
|
5 |
+
val_data.pickle filter=lfs diff=lfs merge=lfs -text
|
6 |
+
sentencepiece.bpe.model filter=lfs diff=lfs merge=lfs -text
|
Information-Retrieval_evaluation_eval_results.csv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
|
2 |
+
0,0,0.7432432432432432,0.9054054054054054,0.9459459459459459,0.9594594594594594,0.7432432432432432,0.7274774774774774,0.3018018018018018,0.8896396396396395,0.19189189189189185,0.9346846846846846,0.09729729729729729,0.9481981981981981,0.8277027027027029,0.8510838939890354,0.8163342935605461
|
3 |
+
0,0,0.7432432432432432,0.9054054054054054,0.9459459459459459,0.9594594594594594,0.7432432432432432,0.7274774774774774,0.3018018018018018,0.8896396396396395,0.19189189189189185,0.9346846846846846,0.09729729729729729,0.9481981981981981,0.8277027027027029,0.8510838939890354,0.8163342935605461
|
4 |
+
0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
|
5 |
+
0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
|
6 |
+
0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
|
Information-Retrieval_evaluation_test_results.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
|
2 |
+
0,0,0.4947565543071161,0.6910112359550562,0.804119850187266,0.9108614232209737,0.4947565543071161,0.4534398965578741,0.24269662921348314,0.6424451578384162,0.17617977528089887,0.7637154449794898,0.10408239700374532,0.8804012841091494,0.6226166696391399,0.6721939572015778,0.6041564193432677
|
3 |
+
0,0,0.49887640449438203,0.7138576779026217,0.8307116104868913,0.9258426966292135,0.49887640449438203,0.4541889602282861,0.2545568039950063,0.6692460317460317,0.18337078651685393,0.7918958444801142,0.10606741573033708,0.8973154093097913,0.6346632185958015,0.68553627425209,0.6149989655561907
|
4 |
+
0,0,0.6821192052980133,0.890728476821192,0.9370860927152318,0.9668874172185431,0.6821192052980133,0.6821192052980133,0.29690949227373065,0.890728476821192,0.18741721854304633,0.9370860927152318,0.0966887417218543,0.9668874172185431,0.7943918847892356,0.8374009368138124,0.796834833672582
|
5 |
+
0,0,0.7185430463576159,0.9238410596026491,0.956953642384106,0.9801324503311258,0.7185430463576159,0.7185430463576159,0.30794701986754963,0.9238410596026491,0.19139072847682115,0.956953642384106,0.09801324503311257,0.9801324503311258,0.8196100073583515,0.8596148051808452,0.8212238276807813
|
README.md
CHANGED
@@ -10,7 +10,7 @@ tags:
|
|
10 |
|
11 |
---
|
12 |
|
13 |
-
#
|
14 |
|
15 |
This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
|
16 |
It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
|
|
|
10 |
|
11 |
---
|
12 |
|
13 |
+
# {MODEL_NAME}
|
14 |
|
15 |
This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
|
16 |
It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"CamembertModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "dangvantuan/sentence-camembert-large",
|
3 |
"architectures": [
|
4 |
"CamembertModel"
|
5 |
],
|
eval/Information-Retrieval_evaluation_eval_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
CHANGED
@@ -3,27 +3,9 @@
|
|
3 |
"<s>NOTUSED",
|
4 |
"</s>NOTUSED"
|
5 |
],
|
6 |
-
"bos_token":
|
7 |
-
|
8 |
-
|
9 |
-
"normalized": false,
|
10 |
-
"rstrip": false,
|
11 |
-
"single_word": false
|
12 |
-
},
|
13 |
-
"cls_token": {
|
14 |
-
"content": "<s>",
|
15 |
-
"lstrip": false,
|
16 |
-
"normalized": false,
|
17 |
-
"rstrip": false,
|
18 |
-
"single_word": false
|
19 |
-
},
|
20 |
-
"eos_token": {
|
21 |
-
"content": "</s>",
|
22 |
-
"lstrip": false,
|
23 |
-
"normalized": false,
|
24 |
-
"rstrip": false,
|
25 |
-
"single_word": false
|
26 |
-
},
|
27 |
"mask_token": {
|
28 |
"content": "<mask>",
|
29 |
"lstrip": true,
|
@@ -31,25 +13,7 @@
|
|
31 |
"rstrip": false,
|
32 |
"single_word": false
|
33 |
},
|
34 |
-
"pad_token":
|
35 |
-
|
36 |
-
|
37 |
-
"normalized": false,
|
38 |
-
"rstrip": false,
|
39 |
-
"single_word": false
|
40 |
-
},
|
41 |
-
"sep_token": {
|
42 |
-
"content": "</s>",
|
43 |
-
"lstrip": false,
|
44 |
-
"normalized": false,
|
45 |
-
"rstrip": false,
|
46 |
-
"single_word": false
|
47 |
-
},
|
48 |
-
"unk_token": {
|
49 |
-
"content": "<unk>",
|
50 |
-
"lstrip": false,
|
51 |
-
"normalized": false,
|
52 |
-
"rstrip": false,
|
53 |
-
"single_word": false
|
54 |
-
}
|
55 |
}
|
|
|
3 |
"<s>NOTUSED",
|
4 |
"</s>NOTUSED"
|
5 |
],
|
6 |
+
"bos_token": "<s>",
|
7 |
+
"cls_token": "<s>",
|
8 |
+
"eos_token": "</s>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"mask_token": {
|
10 |
"content": "<mask>",
|
11 |
"lstrip": true,
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"pad_token": "<pad>",
|
17 |
+
"sep_token": "</s>",
|
18 |
+
"unk_token": "<unk>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
pytorch_model.bin → test_data.pickle
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cadd8cf5f67df026618234c3f3a709d22fc6d454829cdc08d307c49ad9b1c859
|
3 |
+
size 44811
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -66,17 +66,10 @@
|
|
66 |
"cls_token": "<s>",
|
67 |
"eos_token": "</s>",
|
68 |
"mask_token": "<mask>",
|
69 |
-
"max_length": 514,
|
70 |
"model_max_length": 1000000000000000019884624838656,
|
71 |
-
"pad_to_multiple_of": null,
|
72 |
"pad_token": "<pad>",
|
73 |
-
"pad_token_type_id": 0,
|
74 |
-
"padding_side": "right",
|
75 |
"sep_token": "</s>",
|
76 |
"sp_model_kwargs": {},
|
77 |
-
"stride": 0,
|
78 |
"tokenizer_class": "CamembertTokenizer",
|
79 |
-
"truncation_side": "right",
|
80 |
-
"truncation_strategy": "longest_first",
|
81 |
"unk_token": "<unk>"
|
82 |
}
|
|
|
66 |
"cls_token": "<s>",
|
67 |
"eos_token": "</s>",
|
68 |
"mask_token": "<mask>",
|
|
|
69 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
70 |
"pad_token": "<pad>",
|
|
|
|
|
71 |
"sep_token": "</s>",
|
72 |
"sp_model_kwargs": {},
|
|
|
73 |
"tokenizer_class": "CamembertTokenizer",
|
|
|
|
|
74 |
"unk_token": "<unk>"
|
75 |
}
|
val_data.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:626904ea85eb65a3f91a22272d4a0b234d2aa2ca4f8502a1c7a70355b17b5aaa
|
3 |
+
size 169613
|