96abhishekarora commited on
Commit
702d2db
1 Parent(s): 9bf83e2

Updated model with better training and evaluation. Test and val data included as pickle files. Older Legacy files were removed to avoid confusion.

Browse files
.gitattributes CHANGED
@@ -1,41 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
37
  model.safetensors filter=lfs diff=lfs merge=lfs -text
 
38
  .git/lfs/objects/af/c3/afc308c30b585f63df930d326b7d368bd14b8099d8440c3165dc0568c17e891a filter=lfs diff=lfs merge=lfs -text
39
- .git/lfs/objects/17/88/17884b92bb03fce9b0e3de84052aa11df38e25aec5a1e7be3fd563d047fcaa1c filter=lfs diff=lfs merge=lfs -text
40
- .git/lfs/objects/a7/fa/a7fa346be10b7a4615f8feb12a01c6ce7b28687d60f2e03b5b1d9dd8f6dcfbe1 filter=lfs diff=lfs merge=lfs -text
41
- .git/lfs/objects/8d/21/8d21b3dceaaae41322410838def31118f35c04b6af37a706486922998826717c filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  model.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ .git/lfs/objects/ea/87/ea87e6be6f2431df3121c921b9042a59bb8f61f4d4a4e264c95833658c9a0954 filter=lfs diff=lfs merge=lfs -text
3
  .git/lfs/objects/af/c3/afc308c30b585f63df930d326b7d368bd14b8099d8440c3165dc0568c17e891a filter=lfs diff=lfs merge=lfs -text
4
+ test_data.pickle filter=lfs diff=lfs merge=lfs -text
5
+ val_data.pickle filter=lfs diff=lfs merge=lfs -text
 
Information-Retrieval_evaluation_eval_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.4670846394984326,0.7210031347962382,0.8087774294670846,0.8871473354231975,0.4670846394984326,0.4592476489028213,0.24555903866248693,0.719435736677116,0.16489028213166143,0.8072100313479624,0.09059561128526646,0.8871473354231975,0.6158630641389262,0.6819885172678376,0.6206111557050049
3
+ 0,0,0.4670846394984326,0.7210031347962382,0.8087774294670846,0.8871473354231975,0.4670846394984326,0.4592476489028213,0.24555903866248693,0.719435736677116,0.16489028213166143,0.8072100313479624,0.09059561128526646,0.8871473354231975,0.6158630641389262,0.6819885172678376,0.6206111557050049
4
+ 0,0,0.2717391304347826,0.4692028985507246,0.5452898550724637,0.6304347826086957,0.2717391304347826,0.2717391304347826,0.15640096618357488,0.4692028985507246,0.10905797101449274,0.5452898550724637,0.06304347826086956,0.6304347826086957,0.38255046583850943,0.4420897495949504,0.3955524278192065
5
+ 0,0,0.2717391304347826,0.4692028985507246,0.5452898550724637,0.6304347826086957,0.2717391304347826,0.2717391304347826,0.15640096618357488,0.4692028985507246,0.10905797101449274,0.5452898550724637,0.06304347826086956,0.6304347826086957,0.38255046583850943,0.4420897495949504,0.3955524278192065
Information-Retrieval_evaluation_test_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.780952380952381,0.9666666666666667,0.9904761904761905,1.0,0.780952380952381,0.7738095238095238,0.32698412698412693,0.9666666666666667,0.20095238095238097,0.9904761904761905,0.10142857142857144,1.0,0.8719160997732425,0.9042782621683655,0.8719160997732426
3
+ 0,0,0.7142857142857143,0.9476190476190476,0.9952380952380953,1.0,0.7142857142857143,0.7095238095238096,0.32063492063492066,0.9476190476190476,0.20190476190476192,0.9952380952380953,0.10142857142857144,1.0,0.8349206349206347,0.8771466669219199,0.8353174603174605
4
+ 0,0,0.6298200514138818,0.794344473007712,0.87146529562982,0.922879177377892,0.6298200514138818,0.6298200514138818,0.2647814910025707,0.794344473007712,0.17429305912596396,0.87146529562982,0.09228791773778919,0.922879177377892,0.7290345615538417,0.7762103137584817,0.7344871530925516
5
+ 0,0,0.7994858611825193,0.8920308483290489,0.9383033419023136,0.9717223650385605,0.7994858611825193,0.7994858611825193,0.2973436161096829,0.8920308483290489,0.1876606683804627,0.9383033419023136,0.09717223650385604,0.9717223650385605,0.8560543926225159,0.8840081774709585,0.8582293823504317
README.md CHANGED
@@ -10,7 +10,7 @@ tags:
10
 
11
  ---
12
 
13
- # dell-research-harvard/lt-un-data-fine-coarse-es
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
 
10
 
11
  ---
12
 
13
+ # {MODEL_NAME}
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models/linkage_un_data_es_fine_coarse",
3
  "architectures": [
4
  "BertModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "hiiamsid/sentence_similarity_spanish_es",
3
  "architectures": [
4
  "BertModel"
5
  ],
eval/Information-Retrieval_evaluation_eval_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,37 +1,7 @@
1
  {
2
- "cls_token": {
3
- "content": "[CLS]",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "mask_token": {
10
- "content": "[MASK]",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "[PAD]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "sep_token": {
24
- "content": "[SEP]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "unk_token": {
31
- "content": "[UNK]",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
  }
 
1
  {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
pytorch_model.bin → test_data.pickle RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc308c30b585f63df930d326b7d368bd14b8099d8440c3165dc0568c17e891a
3
- size 439467497
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20dc4b792591440c2a9965e364b081dda4a272662c869ab9d8062ce76871926b
3
+ size 42438
val_data.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d513cbf6df1d1f178551323bc879605c0e5447c1d3baedf20a55c0979b5cae5
3
+ size 231709