96abhishekarora commited on
Commit
c15228a
·
1 Parent(s): fe59494

Updated model with better training and evaluation. Test and val data included as pickle files. Older Legacy files were removed to avoid confusion.

Browse files
.gitattributes CHANGED
@@ -1,41 +1,6 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
37
  model.safetensors filter=lfs diff=lfs merge=lfs -text
 
38
  .git/lfs/objects/10/5b/105b8df0ad58b8f0280d55ee92bd528df20c93351ed578775f427b8022581ec0 filter=lfs diff=lfs merge=lfs -text
39
- .git/lfs/objects/d8/9e/d89eb57d2145088f1078af291109f8d3033eccffb4526b2562deb93d4239263a filter=lfs diff=lfs merge=lfs -text
40
- .git/lfs/objects/8f/13/8f1325becf59c50fad5dca8ed3851bb8f7710dea27f965f2162c8236eb0d4184 filter=lfs diff=lfs merge=lfs -text
41
- .git/lfs/objects/99/44/994418f47770d3b34ecf225929a10feb591a4d18025804cf2051af4827e5e4ec filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  model.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ .git/lfs/objects/97/f4/97f4992b52a0621c15f4b731a5d0aa12f93a8136dfefe9da6d15efe200de2025 filter=lfs diff=lfs merge=lfs -text
3
  .git/lfs/objects/10/5b/105b8df0ad58b8f0280d55ee92bd528df20c93351ed578775f427b8022581ec0 filter=lfs diff=lfs merge=lfs -text
4
+ test_data.pickle filter=lfs diff=lfs merge=lfs -text
5
+ val_data.pickle filter=lfs diff=lfs merge=lfs -text
6
+ sentencepiece.bpe.model filter=lfs diff=lfs merge=lfs -text
Information-Retrieval_evaluation_eval_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.7432432432432432,0.9054054054054054,0.9459459459459459,0.9594594594594594,0.7432432432432432,0.7274774774774774,0.3018018018018018,0.8896396396396395,0.19189189189189185,0.9346846846846846,0.09729729729729729,0.9481981981981981,0.8277027027027029,0.8510838939890354,0.8163342935605461
3
+ 0,0,0.7432432432432432,0.9054054054054054,0.9459459459459459,0.9594594594594594,0.7432432432432432,0.7274774774774774,0.3018018018018018,0.8896396396396395,0.19189189189189185,0.9346846846846846,0.09729729729729729,0.9481981981981981,0.8277027027027029,0.8510838939890354,0.8163342935605461
4
+ 0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
5
+ 0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
6
+ 0,0,0.3772455089820359,0.564870259481038,0.6946107784431138,0.8483033932135728,0.3772455089820359,0.3772455089820359,0.1882900864936793,0.564870259481038,0.13892215568862273,0.6946107784431138,0.08483033932135728,0.8483033932135728,0.5086089725311275,0.5891511561097007,0.5148648590536671
Information-Retrieval_evaluation_test_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.4947565543071161,0.6910112359550562,0.804119850187266,0.9108614232209737,0.4947565543071161,0.4534398965578741,0.24269662921348314,0.6424451578384162,0.17617977528089887,0.7637154449794898,0.10408239700374532,0.8804012841091494,0.6226166696391399,0.6721939572015778,0.6041564193432677
3
+ 0,0,0.49887640449438203,0.7138576779026217,0.8307116104868913,0.9258426966292135,0.49887640449438203,0.4541889602282861,0.2545568039950063,0.6692460317460317,0.18337078651685393,0.7918958444801142,0.10606741573033708,0.8973154093097913,0.6346632185958015,0.68553627425209,0.6149989655561907
4
+ 0,0,0.6821192052980133,0.890728476821192,0.9370860927152318,0.9668874172185431,0.6821192052980133,0.6821192052980133,0.29690949227373065,0.890728476821192,0.18741721854304633,0.9370860927152318,0.0966887417218543,0.9668874172185431,0.7943918847892356,0.8374009368138124,0.796834833672582
5
+ 0,0,0.7185430463576159,0.9238410596026491,0.956953642384106,0.9801324503311258,0.7185430463576159,0.7185430463576159,0.30794701986754963,0.9238410596026491,0.19139072847682115,0.956953642384106,0.09801324503311257,0.9801324503311258,0.8196100073583515,0.8596148051808452,0.8212238276807813
README.md CHANGED
@@ -10,7 +10,7 @@ tags:
10
 
11
  ---
12
 
13
- # dell-research-harvard/lt-un-data-fine-industry-fr
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
 
10
 
11
  ---
12
 
13
+ # {MODEL_NAME}
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models/linkage_un_data_fr_fine_industry",
3
  "architectures": [
4
  "CamembertModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "dangvantuan/sentence-camembert-large",
3
  "architectures": [
4
  "CamembertModel"
5
  ],
eval/Information-Retrieval_evaluation_eval_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -3,27 +3,9 @@
3
  "<s>NOTUSED",
4
  "</s>NOTUSED"
5
  ],
6
- "bos_token": {
7
- "content": "<s>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "cls_token": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "eos_token": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
  "mask_token": {
28
  "content": "<mask>",
29
  "lstrip": true,
@@ -31,25 +13,7 @@
31
  "rstrip": false,
32
  "single_word": false
33
  },
34
- "pad_token": {
35
- "content": "<pad>",
36
- "lstrip": false,
37
- "normalized": false,
38
- "rstrip": false,
39
- "single_word": false
40
- },
41
- "sep_token": {
42
- "content": "</s>",
43
- "lstrip": false,
44
- "normalized": false,
45
- "rstrip": false,
46
- "single_word": false
47
- },
48
- "unk_token": {
49
- "content": "<unk>",
50
- "lstrip": false,
51
- "normalized": false,
52
- "rstrip": false,
53
- "single_word": false
54
- }
55
  }
 
3
  "<s>NOTUSED",
4
  "</s>NOTUSED"
5
  ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
pytorch_model.bin → test_data.pickle RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:105b8df0ad58b8f0280d55ee92bd528df20c93351ed578775f427b8022581ec0
3
- size 1346772393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadd8cf5f67df026618234c3f3a709d22fc6d454829cdc08d307c49ad9b1c859
3
+ size 44811
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -66,17 +66,10 @@
66
  "cls_token": "<s>",
67
  "eos_token": "</s>",
68
  "mask_token": "<mask>",
69
- "max_length": 514,
70
  "model_max_length": 1000000000000000019884624838656,
71
- "pad_to_multiple_of": null,
72
  "pad_token": "<pad>",
73
- "pad_token_type_id": 0,
74
- "padding_side": "right",
75
  "sep_token": "</s>",
76
  "sp_model_kwargs": {},
77
- "stride": 0,
78
  "tokenizer_class": "CamembertTokenizer",
79
- "truncation_side": "right",
80
- "truncation_strategy": "longest_first",
81
  "unk_token": "<unk>"
82
  }
 
66
  "cls_token": "<s>",
67
  "eos_token": "</s>",
68
  "mask_token": "<mask>",
 
69
  "model_max_length": 1000000000000000019884624838656,
 
70
  "pad_token": "<pad>",
 
 
71
  "sep_token": "</s>",
72
  "sp_model_kwargs": {},
 
73
  "tokenizer_class": "CamembertTokenizer",
 
 
74
  "unk_token": "<unk>"
75
  }
val_data.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:626904ea85eb65a3f91a22272d4a0b234d2aa2ca4f8502a1c7a70355b17b5aaa
3
+ size 169613