upload v1

Browse files

Files changed (11) hide show

1_Pooling/config.json +7 -0
README.md +1080 -1
config.json +31 -0
config_sentence_transformers.json +7 -0
modules.json +20 -0
pytorch_model.bin +3 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +15 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}

README.md CHANGED Viewed

@@ -1,3 +1,1082 @@
 ---
-license: mit
 ---

 ---
+tags:
+- mteb
+model-index:
+- name: alime-embedding-large-zh
+  results:
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/AFQMC
+      name: MTEB AFQMC
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 49.6479989785073
+    - type: cos_sim_spearman
+      value: 54.733173049795425
+    - type: euclidean_pearson
+      value: 53.06330391299694
+    - type: euclidean_spearman
+      value: 54.73321325021156
+    - type: manhattan_pearson
+      value: 53.0477915350307
+    - type: manhattan_spearman
+      value: 54.728508847750845
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/ATEC
+      name: MTEB ATEC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 48.658812679136325
+    - type: cos_sim_spearman
+      value: 55.125070901329146
+    - type: euclidean_pearson
+      value: 55.73373519622172
+    - type: euclidean_spearman
+      value: 55.12506864911728
+    - type: manhattan_pearson
+      value: 55.71155132206361
+    - type: manhattan_spearman
+      value: 55.121598723227905
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_reviews_multi
+      name: MTEB AmazonReviewsClassification (zh)
+      config: zh
+      split: test
+      revision: 1399c76144fd37290681b995c656ef9b2e06e26d
+    metrics:
+    - type: accuracy
+      value: 46.95
+    - type: f1
+      value: 45.34383964066362
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/BQ
+      name: MTEB BQ
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 62.92731050834033
+    - type: cos_sim_spearman
+      value: 64.8881453551134
+    - type: euclidean_pearson
+      value: 63.31447523186855
+    - type: euclidean_spearman
+      value: 64.88814189042776
+    - type: manhattan_pearson
+      value: 63.222442228527996
+    - type: manhattan_spearman
+      value: 64.79818263591122
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/CLSClusteringP2P
+      name: MTEB CLSClusteringP2P
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 42.518811360488925
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/CLSClusteringS2S
+      name: MTEB CLSClusteringS2S
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 39.72890397315954
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/CMedQAv1-reranking
+      name: MTEB CMedQAv1
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map
+      value: 86.51852576014969
+    - type: mrr
+      value: 89.02047619047619
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/CMedQAv2-reranking
+      name: MTEB CMedQAv2
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map
+      value: 87.11415162833914
+    - type: mrr
+      value: 89.6338492063492
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/CmedqaRetrieval
+      name: MTEB CmedqaRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 24.883
+    - type: map_at_10
+      value: 37.246
+    - type: map_at_100
+      value: 39.11
+    - type: map_at_1000
+      value: 39.222
+    - type: map_at_3
+      value: 32.956
+    - type: map_at_5
+      value: 35.411
+    - type: mrr_at_1
+      value: 37.834
+    - type: mrr_at_10
+      value: 46.031
+    - type: mrr_at_100
+      value: 47.033
+    - type: mrr_at_1000
+      value: 47.077000000000005
+    - type: mrr_at_3
+      value: 43.415
+    - type: mrr_at_5
+      value: 44.938
+    - type: ndcg_at_1
+      value: 37.834
+    - type: ndcg_at_10
+      value: 43.928
+    - type: ndcg_at_100
+      value: 51.312999999999995
+    - type: ndcg_at_1000
+      value: 53.23
+    - type: ndcg_at_3
+      value: 38.397
+    - type: ndcg_at_5
+      value: 40.848
+    - type: precision_at_1
+      value: 37.834
+    - type: precision_at_10
+      value: 9.782
+    - type: precision_at_100
+      value: 1.583
+    - type: precision_at_1000
+      value: 0.183
+    - type: precision_at_3
+      value: 21.664
+    - type: precision_at_5
+      value: 15.934000000000001
+    - type: recall_at_1
+      value: 24.883
+    - type: recall_at_10
+      value: 54.911
+    - type: recall_at_100
+      value: 85.419
+    - type: recall_at_1000
+      value: 98.16
+    - type: recall_at_3
+      value: 38.416
+    - type: recall_at_5
+      value: 45.778
+  - task:
+      type: PairClassification
+    dataset:
+      type: C-MTEB/CMNLI
+      name: MTEB Cmnli
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_accuracy
+      value: 82.5616355983163
+    - type: cos_sim_ap
+      value: 89.3612977679186
+    - type: cos_sim_f1
+      value: 83.93428161870108
+    - type: cos_sim_precision
+      value: 79.42404006677796
+    - type: cos_sim_recall
+      value: 88.98760813654431
+    - type: dot_accuracy
+      value: 82.5616355983163
+    - type: dot_ap
+      value: 89.38168095374776
+    - type: dot_f1
+      value: 83.93428161870108
+    - type: dot_precision
+      value: 79.42404006677796
+    - type: dot_recall
+      value: 88.98760813654431
+    - type: euclidean_accuracy
+      value: 82.5616355983163
+    - type: euclidean_ap
+      value: 89.36129603693611
+    - type: euclidean_f1
+      value: 83.93428161870108
+    - type: euclidean_precision
+      value: 79.42404006677796
+    - type: euclidean_recall
+      value: 88.98760813654431
+    - type: manhattan_accuracy
+      value: 82.42934455802767
+    - type: manhattan_ap
+      value: 89.36577661305246
+    - type: manhattan_f1
+      value: 83.94765539803707
+    - type: manhattan_precision
+      value: 78.66339668914776
+    - type: manhattan_recall
+      value: 89.99298573766659
+    - type: max_accuracy
+      value: 82.5616355983163
+    - type: max_ap
+      value: 89.38168095374776
+    - type: max_f1
+      value: 83.94765539803707
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/CovidRetrieval
+      name: MTEB CovidRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 77.608
+    - type: map_at_10
+      value: 85.1
+    - type: map_at_100
+      value: 85.215
+    - type: map_at_1000
+      value: 85.217
+    - type: map_at_3
+      value: 83.97
+    - type: map_at_5
+      value: 84.638
+    - type: mrr_at_1
+      value: 77.97699999999999
+    - type: mrr_at_10
+      value: 85.173
+    - type: mrr_at_100
+      value: 85.28
+    - type: mrr_at_1000
+      value: 85.282
+    - type: mrr_at_3
+      value: 84.089
+    - type: mrr_at_5
+      value: 84.726
+    - type: ndcg_at_1
+      value: 77.871
+    - type: ndcg_at_10
+      value: 88.141
+    - type: ndcg_at_100
+      value: 88.612
+    - type: ndcg_at_1000
+      value: 88.68
+    - type: ndcg_at_3
+      value: 85.9
+    - type: ndcg_at_5
+      value: 87.06
+    - type: precision_at_1
+      value: 77.871
+    - type: precision_at_10
+      value: 9.841999999999999
+    - type: precision_at_100
+      value: 1.005
+    - type: precision_at_1000
+      value: 0.101
+    - type: precision_at_3
+      value: 30.698999999999998
+    - type: precision_at_5
+      value: 19.009
+    - type: recall_at_1
+      value: 77.608
+    - type: recall_at_10
+      value: 97.418
+    - type: recall_at_100
+      value: 99.473
+    - type: recall_at_1000
+      value: 100.0
+    - type: recall_at_3
+      value: 91.307
+    - type: recall_at_5
+      value: 94.125
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/DuRetrieval
+      name: MTEB DuRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 26.104
+    - type: map_at_10
+      value: 78.62
+    - type: map_at_100
+      value: 81.417
+    - type: map_at_1000
+      value: 81.46600000000001
+    - type: map_at_3
+      value: 55.077
+    - type: map_at_5
+      value: 69.18900000000001
+    - type: mrr_at_1
+      value: 90.55
+    - type: mrr_at_10
+      value: 93.42200000000001
+    - type: mrr_at_100
+      value: 93.46900000000001
+    - type: mrr_at_1000
+      value: 93.472
+    - type: mrr_at_3
+      value: 93.108
+    - type: mrr_at_5
+      value: 93.318
+    - type: ndcg_at_1
+      value: 90.55
+    - type: ndcg_at_10
+      value: 86.227
+    - type: ndcg_at_100
+      value: 89.201
+    - type: ndcg_at_1000
+      value: 89.655
+    - type: ndcg_at_3
+      value: 85.89099999999999
+    - type: ndcg_at_5
+      value: 84.443
+    - type: precision_at_1
+      value: 90.55
+    - type: precision_at_10
+      value: 40.915
+    - type: precision_at_100
+      value: 4.749
+    - type: precision_at_1000
+      value: 0.486
+    - type: precision_at_3
+      value: 76.9
+    - type: precision_at_5
+      value: 64.56
+    - type: recall_at_1
+      value: 26.104
+    - type: recall_at_10
+      value: 86.924
+    - type: recall_at_100
+      value: 96.52
+    - type: recall_at_1000
+      value: 98.83800000000001
+    - type: recall_at_3
+      value: 57.196999999999996
+    - type: recall_at_5
+      value: 73.595
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/EcomRetrieval
+      name: MTEB EcomRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 51.9
+    - type: map_at_10
+      value: 62.446
+    - type: map_at_100
+      value: 62.922
+    - type: map_at_1000
+      value: 62.934999999999995
+    - type: map_at_3
+      value: 59.933
+    - type: map_at_5
+      value: 61.548
+    - type: mrr_at_1
+      value: 51.9
+    - type: mrr_at_10
+      value: 62.446
+    - type: mrr_at_100
+      value: 62.922
+    - type: mrr_at_1000
+      value: 62.934999999999995
+    - type: mrr_at_3
+      value: 59.933
+    - type: mrr_at_5
+      value: 61.548
+    - type: ndcg_at_1
+      value: 51.9
+    - type: ndcg_at_10
+      value: 67.561
+    - type: ndcg_at_100
+      value: 69.87400000000001
+    - type: ndcg_at_1000
+      value: 70.19800000000001
+    - type: ndcg_at_3
+      value: 62.474
+    - type: ndcg_at_5
+      value: 65.391
+    - type: precision_at_1
+      value: 51.9
+    - type: precision_at_10
+      value: 8.36
+    - type: precision_at_100
+      value: 0.9440000000000001
+    - type: precision_at_1000
+      value: 0.097
+    - type: precision_at_3
+      value: 23.267
+    - type: precision_at_5
+      value: 15.379999999999999
+    - type: recall_at_1
+      value: 51.9
+    - type: recall_at_10
+      value: 83.6
+    - type: recall_at_100
+      value: 94.39999999999999
+    - type: recall_at_1000
+      value: 96.89999999999999
+    - type: recall_at_3
+      value: 69.8
+    - type: recall_at_5
+      value: 76.9
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/IFlyTek-classification
+      name: MTEB IFlyTek
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 49.672951135051946
+    - type: f1
+      value: 38.246634605142084
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/JDReview-classification
+      name: MTEB JDReview
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 86.52908067542214
+    - type: ap
+      value: 55.415146961759135
+    - type: f1
+      value: 81.38343036361825
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/LCQMC
+      name: MTEB LCQMC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 70.15572724302896
+    - type: cos_sim_spearman
+      value: 75.11630463239744
+    - type: euclidean_pearson
+      value: 74.2927184018677
+    - type: euclidean_spearman
+      value: 75.11630463089752
+    - type: manhattan_pearson
+      value: 74.27724224882166
+    - type: manhattan_spearman
+      value: 75.10012699894408
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/Mmarco-reranking
+      name: MTEB MMarcoReranking
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map
+      value: 30.62934327678744
+    - type: mrr
+      value: 29.48730158730159
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/MMarcoRetrieval
+      name: MTEB MMarcoRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 65.33
+    - type: map_at_10
+      value: 74.524
+    - type: map_at_100
+      value: 74.851
+    - type: map_at_1000
+      value: 74.86500000000001
+    - type: map_at_3
+      value: 72.748
+    - type: map_at_5
+      value: 73.896
+    - type: mrr_at_1
+      value: 67.593
+    - type: mrr_at_10
+      value: 75.19
+    - type: mrr_at_100
+      value: 75.472
+    - type: mrr_at_1000
+      value: 75.484
+    - type: mrr_at_3
+      value: 73.634
+    - type: mrr_at_5
+      value: 74.638
+    - type: ndcg_at_1
+      value: 67.593
+    - type: ndcg_at_10
+      value: 78.254
+    - type: ndcg_at_100
+      value: 79.727
+    - type: ndcg_at_1000
+      value: 80.09100000000001
+    - type: ndcg_at_3
+      value: 74.892
+    - type: ndcg_at_5
+      value: 76.835
+    - type: precision_at_1
+      value: 67.593
+    - type: precision_at_10
+      value: 9.46
+    - type: precision_at_100
+      value: 1.02
+    - type: precision_at_1000
+      value: 0.105
+    - type: precision_at_3
+      value: 28.227999999999998
+    - type: precision_at_5
+      value: 17.965999999999998
+    - type: recall_at_1
+      value: 65.33
+    - type: recall_at_10
+      value: 89.048
+    - type: recall_at_100
+      value: 95.732
+    - type: recall_at_1000
+      value: 98.598
+    - type: recall_at_3
+      value: 80.209
+    - type: recall_at_5
+      value: 84.824
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_intent
+      name: MTEB MassiveIntentClassification (zh-CN)
+      config: zh-CN
+      split: test
+      revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
+    metrics:
+    - type: accuracy
+      value: 73.38937457969065
+    - type: f1
+      value: 70.87692475465195
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_scenario
+      name: MTEB MassiveScenarioClassification (zh-CN)
+      config: zh-CN
+      split: test
+      revision: 7d571f92784cd94a019292a1f45445077d0ef634
+    metrics:
+    - type: accuracy
+      value: 76.04236718224612
+    - type: f1
+      value: 75.52425703483891
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/MedicalRetrieval
+      name: MTEB MedicalRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 53.1
+    - type: map_at_10
+      value: 60.24
+    - type: map_at_100
+      value: 60.781
+    - type: map_at_1000
+      value: 60.81999999999999
+    - type: map_at_3
+      value: 58.733000000000004
+    - type: map_at_5
+      value: 59.618
+    - type: mrr_at_1
+      value: 53.0
+    - type: mrr_at_10
+      value: 60.195
+    - type: mrr_at_100
+      value: 60.736000000000004
+    - type: mrr_at_1000
+      value: 60.775
+    - type: mrr_at_3
+      value: 58.68299999999999
+    - type: mrr_at_5
+      value: 59.573
+    - type: ndcg_at_1
+      value: 53.1
+    - type: ndcg_at_10
+      value: 63.568999999999996
+    - type: ndcg_at_100
+      value: 66.401
+    - type: ndcg_at_1000
+      value: 67.597
+    - type: ndcg_at_3
+      value: 60.455000000000005
+    - type: ndcg_at_5
+      value: 62.05500000000001
+    - type: precision_at_1
+      value: 53.1
+    - type: precision_at_10
+      value: 7.3999999999999995
+    - type: precision_at_100
+      value: 0.877
+    - type: precision_at_1000
+      value: 0.097
+    - type: precision_at_3
+      value: 21.8
+    - type: precision_at_5
+      value: 13.86
+    - type: recall_at_1
+      value: 53.1
+    - type: recall_at_10
+      value: 74.0
+    - type: recall_at_100
+      value: 87.7
+    - type: recall_at_1000
+      value: 97.39999999999999
+    - type: recall_at_3
+      value: 65.4
+    - type: recall_at_5
+      value: 69.3
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/MultilingualSentiment-classification
+      name: MTEB MultilingualSentiment
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 76.40333333333332
+    - type: f1
+      value: 76.40924131087777
+  - task:
+      type: PairClassification
+    dataset:
+      type: C-MTEB/OCNLI
+      name: MTEB Ocnli
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_accuracy
+      value: 77.15213860314023
+    - type: cos_sim_ap
+      value: 79.30594584166899
+    - type: cos_sim_f1
+      value: 80.25889967637539
+    - type: cos_sim_precision
+      value: 71.38157894736842
+    - type: cos_sim_recall
+      value: 91.65786694825766
+    - type: dot_accuracy
+      value: 77.15213860314023
+    - type: dot_ap
+      value: 79.30594584166899
+    - type: dot_f1
+      value: 80.25889967637539
+    - type: dot_precision
+      value: 71.38157894736842
+    - type: dot_recall
+      value: 91.65786694825766
+    - type: euclidean_accuracy
+      value: 77.15213860314023
+    - type: euclidean_ap
+      value: 79.30594584166899
+    - type: euclidean_f1
+      value: 80.25889967637539
+    - type: euclidean_precision
+      value: 71.38157894736842
+    - type: euclidean_recall
+      value: 91.65786694825766
+    - type: manhattan_accuracy
+      value: 77.36870600974554
+    - type: manhattan_ap
+      value: 79.23401219102254
+    - type: manhattan_f1
+      value: 80.44901777362021
+    - type: manhattan_precision
+      value: 72.20822837951302
+    - type: manhattan_recall
+      value: 90.8130939809926
+    - type: max_accuracy
+      value: 77.36870600974554
+    - type: max_ap
+      value: 79.30594584166899
+    - type: max_f1
+      value: 80.44901777362021
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/OnlineShopping-classification
+      name: MTEB OnlineShopping
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 92.6
+    - type: ap
+      value: 90.78779333103819
+    - type: f1
+      value: 92.59253441654515
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/PAWSX
+      name: MTEB PAWSX
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 34.4442917065113
+    - type: cos_sim_spearman
+      value: 37.93070836936766
+    - type: euclidean_pearson
+      value: 38.35141108502335
+    - type: euclidean_spearman
+      value: 37.936378767247106
+    - type: manhattan_pearson
+      value: 38.357078125497566
+    - type: manhattan_spearman
+      value: 37.94413026678537
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/QBQTC
+      name: MTEB QBQTC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 32.84777948741198
+    - type: cos_sim_spearman
+      value: 34.212129449696285
+    - type: euclidean_pearson
+      value: 32.69161407750465
+    - type: euclidean_spearman
+      value: 34.21178008084197
+    - type: manhattan_pearson
+      value: 32.675418316752506
+    - type: manhattan_spearman
+      value: 34.178590557249
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts22-crosslingual-sts
+      name: MTEB STS22 (zh)
+      config: zh
+      split: test
+      revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
+    metrics:
+    - type: cos_sim_pearson
+      value: 64.65903821549742
+    - type: cos_sim_spearman
+      value: 64.54376284777354
+    - type: euclidean_pearson
+      value: 63.70022677799055
+    - type: euclidean_spearman
+      value: 64.54376284777354
+    - type: manhattan_pearson
+      value: 64.46392290759724
+    - type: manhattan_spearman
+      value: 65.2496975447815
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/STSB
+      name: MTEB STSB
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 80.05773088991484
+    - type: cos_sim_spearman
+      value: 80.71550237522008
+    - type: euclidean_pearson
+      value: 80.31115977415573
+    - type: euclidean_spearman
+      value: 80.71510951779365
+    - type: manhattan_pearson
+      value: 80.25235514937249
+    - type: manhattan_spearman
+      value: 80.65958309383224
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/T2Reranking
+      name: MTEB T2Reranking
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map
+      value: 66.18255262304848
+    - type: mrr
+      value: 75.95393252087565
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/T2Retrieval
+      name: MTEB T2Retrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 28.651
+    - type: map_at_10
+      value: 76.281
+    - type: map_at_100
+      value: 80.018
+    - type: map_at_1000
+      value: 80.098
+    - type: map_at_3
+      value: 54.783
+    - type: map_at_5
+      value: 66.508
+    - type: mrr_at_1
+      value: 90.99199999999999
+    - type: mrr_at_10
+      value: 93.812
+    - type: mrr_at_100
+      value: 93.87100000000001
+    - type: mrr_at_1000
+      value: 93.87299999999999
+    - type: mrr_at_3
+      value: 93.415
+    - type: mrr_at_5
+      value: 93.685
+    - type: ndcg_at_1
+      value: 90.99199999999999
+    - type: ndcg_at_10
+      value: 84.57900000000001
+    - type: ndcg_at_100
+      value: 88.474
+    - type: ndcg_at_1000
+      value: 89.172
+    - type: ndcg_at_3
+      value: 86.56099999999999
+    - type: ndcg_at_5
+      value: 84.811
+    - type: precision_at_1
+      value: 90.99199999999999
+    - type: precision_at_10
+      value: 40.969
+    - type: precision_at_100
+      value: 4.97
+    - type: precision_at_1000
+      value: 0.515
+    - type: precision_at_3
+      value: 74.734
+    - type: precision_at_5
+      value: 61.980999999999995
+    - type: recall_at_1
+      value: 28.651
+    - type: recall_at_10
+      value: 83.321
+    - type: recall_at_100
+      value: 95.498
+    - type: recall_at_1000
+      value: 98.759
+    - type: recall_at_3
+      value: 56.708000000000006
+    - type: recall_at_5
+      value: 70.25200000000001
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/TNews-classification
+      name: MTEB TNews
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 52.037
+    - type: f1
+      value: 50.3832093595745
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/ThuNewsClusteringP2P
+      name: MTEB ThuNewsClusteringP2P
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 70.09793315196697
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/ThuNewsClusteringS2S
+      name: MTEB ThuNewsClusteringS2S
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 63.66930246094367
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/VideoRetrieval
+      name: MTEB VideoRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 60.4
+    - type: map_at_10
+      value: 69.878
+    - type: map_at_100
+      value: 70.285
+    - type: map_at_1000
+      value: 70.295
+    - type: map_at_3
+      value: 68.033
+    - type: map_at_5
+      value: 69.233
+    - type: mrr_at_1
+      value: 60.3
+    - type: mrr_at_10
+      value: 69.828
+    - type: mrr_at_100
+      value: 70.235
+    - type: mrr_at_1000
+      value: 70.245
+    - type: mrr_at_3
+      value: 67.983
+    - type: mrr_at_5
+      value: 69.18299999999999
+    - type: ndcg_at_1
+      value: 60.4
+    - type: ndcg_at_10
+      value: 74.155
+    - type: ndcg_at_100
+      value: 76.173
+    - type: ndcg_at_1000
+      value: 76.44800000000001
+    - type: ndcg_at_3
+      value: 70.44500000000001
+    - type: ndcg_at_5
+      value: 72.61800000000001
+    - type: precision_at_1
+      value: 60.4
+    - type: precision_at_10
+      value: 8.74
+    - type: precision_at_100
+      value: 0.9690000000000001
+    - type: precision_at_1000
+      value: 0.099
+    - type: precision_at_3
+      value: 25.8
+    - type: precision_at_5
+      value: 16.54
+    - type: recall_at_1
+      value: 60.4
+    - type: recall_at_10
+      value: 87.4
+    - type: recall_at_100
+      value: 96.89999999999999
+    - type: recall_at_1000
+      value: 99.1
+    - type: recall_at_3
+      value: 77.4
+    - type: recall_at_5
+      value: 82.69999999999999
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/waimai-classification
+      name: MTEB Waimai
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 88.49000000000001
+    - type: ap
+      value: 73.5441395538586
+    - type: f1
+      value: 86.88114969870975
 ---
+# {alime-embedding-large-zh}
+The alime embedding model.
+<!--- Describe your model here -->
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["西湖在哪？", "西湖风景名胜区位于浙江省杭州市"]
+model = SentenceTransformer('Pristinenlp/alime-embedding-large-zh')
+embeddings = model.encode(sentences)
+print(embeddings)
+```

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.2.2",
+    "transformers": "4.31.0",
+    "pytorch": "2.0.1"
+  }
+}

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc38380a162eb4296219f38cba0df2f2bba407d9bd366a1f445c669f5162b78
+size 1302216105

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff