|
--- |
|
license: apache-2.0 |
|
tags: |
|
- mteb |
|
model-index: |
|
- name: lim_base_zh_v0 |
|
results: |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (zh) |
|
config: zh |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 46.66600000000001 |
|
- type: f1 |
|
value: 43.88121213919628 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/CLSClusteringP2P |
|
name: MTEB CLSClusteringP2P |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 33.55469933811146 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/CLSClusteringS2S |
|
name: MTEB CLSClusteringS2S |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 36.17977796122646 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/CMedQAv1-reranking |
|
name: MTEB CMedQAv1 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 83.84687250720238 |
|
- type: mrr |
|
value: 86.34579365079364 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/CMedQAv2-reranking |
|
name: MTEB CMedQAv2 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 84.7457752094449 |
|
- type: mrr |
|
value: 87.41591269841268 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: C-MTEB/CMNLI |
|
name: MTEB Cmnli |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 70.99218280216476 |
|
- type: cos_sim_ap |
|
value: 79.5838273070596 |
|
- type: cos_sim_f1 |
|
value: 73.01215092730762 |
|
- type: cos_sim_precision |
|
value: 67.09108716944172 |
|
- type: cos_sim_recall |
|
value: 80.07949497311199 |
|
- type: dot_accuracy |
|
value: 70.99218280216476 |
|
- type: dot_ap |
|
value: 79.58744690895374 |
|
- type: dot_f1 |
|
value: 73.01215092730762 |
|
- type: dot_precision |
|
value: 67.09108716944172 |
|
- type: dot_recall |
|
value: 80.07949497311199 |
|
- type: euclidean_accuracy |
|
value: 70.99218280216476 |
|
- type: euclidean_ap |
|
value: 79.5838273070596 |
|
- type: euclidean_f1 |
|
value: 73.01215092730762 |
|
- type: euclidean_precision |
|
value: 67.09108716944172 |
|
- type: euclidean_recall |
|
value: 80.07949497311199 |
|
- type: manhattan_accuracy |
|
value: 70.88394467829224 |
|
- type: manhattan_ap |
|
value: 79.42301231718942 |
|
- type: manhattan_f1 |
|
value: 72.72536687631029 |
|
- type: manhattan_precision |
|
value: 65.91297738932168 |
|
- type: manhattan_recall |
|
value: 81.10825344867898 |
|
- type: max_accuracy |
|
value: 70.99218280216476 |
|
- type: max_ap |
|
value: 79.58744690895374 |
|
- type: max_f1 |
|
value: 73.01215092730762 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/IFlyTek-classification |
|
name: MTEB IFlyTek |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 47.34128510965756 |
|
- type: f1 |
|
value: 35.49963469301016 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/JDReview-classification |
|
name: MTEB JDReview |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 85.66604127579738 |
|
- type: ap |
|
value: 53.038152290755555 |
|
- type: f1 |
|
value: 80.14685686902159 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/Mmarco-reranking |
|
name: MTEB MMarcoReranking |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 20.56449688140155 |
|
- type: mrr |
|
value: 19.60753968253968 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (zh-CN) |
|
config: zh-CN |
|
split: test |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
metrics: |
|
- type: accuracy |
|
value: 72.38399462004035 |
|
- type: f1 |
|
value: 70.33023134666634 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (zh-CN) |
|
config: zh-CN |
|
split: test |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
metrics: |
|
- type: accuracy |
|
value: 74.87222595830531 |
|
- type: f1 |
|
value: 74.25722751562503 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/MultilingualSentiment-classification |
|
name: MTEB MultilingualSentiment |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 76.27000000000001 |
|
- type: f1 |
|
value: 75.9660773461064 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: C-MTEB/OCNLI |
|
name: MTEB Ocnli |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 67.35246345425013 |
|
- type: cos_sim_ap |
|
value: 69.69618171375657 |
|
- type: cos_sim_f1 |
|
value: 71.70665459483928 |
|
- type: cos_sim_precision |
|
value: 62.75752773375595 |
|
- type: cos_sim_recall |
|
value: 83.6325237592397 |
|
- type: dot_accuracy |
|
value: 67.35246345425013 |
|
- type: dot_ap |
|
value: 69.69618171375657 |
|
- type: dot_f1 |
|
value: 71.70665459483928 |
|
- type: dot_precision |
|
value: 62.75752773375595 |
|
- type: dot_recall |
|
value: 83.6325237592397 |
|
- type: euclidean_accuracy |
|
value: 67.35246345425013 |
|
- type: euclidean_ap |
|
value: 69.69618171375657 |
|
- type: euclidean_f1 |
|
value: 71.70665459483928 |
|
- type: euclidean_precision |
|
value: 62.75752773375595 |
|
- type: euclidean_recall |
|
value: 83.6325237592397 |
|
- type: manhattan_accuracy |
|
value: 66.81104493773688 |
|
- type: manhattan_ap |
|
value: 69.33781930832232 |
|
- type: manhattan_f1 |
|
value: 71.6342082980525 |
|
- type: manhattan_precision |
|
value: 59.78798586572438 |
|
- type: manhattan_recall |
|
value: 89.33474128827878 |
|
- type: max_accuracy |
|
value: 67.35246345425013 |
|
- type: max_ap |
|
value: 69.69618171375657 |
|
- type: max_f1 |
|
value: 71.70665459483928 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/OnlineShopping-classification |
|
name: MTEB OnlineShopping |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 93.05 |
|
- type: ap |
|
value: 91.26069801777923 |
|
- type: f1 |
|
value: 93.04149818231389 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/T2Reranking |
|
name: MTEB T2Reranking |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 65.74883739850293 |
|
- type: mrr |
|
value: 75.47326869136282 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/TNews-classification |
|
name: MTEB TNews |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 53.269999999999996 |
|
- type: f1 |
|
value: 51.410630382886445 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/ThuNewsClusteringP2P |
|
name: MTEB ThuNewsClusteringP2P |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 63.344532225921434 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/ThuNewsClusteringS2S |
|
name: MTEB ThuNewsClusteringS2S |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 60.33437882010517 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/waimai-classification |
|
name: MTEB Waimai |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 87.96000000000002 |
|
- type: ap |
|
value: 72.43737061465443 |
|
- type: f1 |
|
value: 86.48668399738767 |
|
--- |
|
## Model Details |
|
Lim is a general text embedding model(chinese),We are continuously optimizing it. |
|
|
|
## History |
|
『2023-12-22』Published lim_base_zh_v0 model |
|
|
|
|
|
## Usage (Sentence-Transformers) |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
model_name="liujiarik/lim_base_zh" |
|
from sentence_transformers import SentenceTransformer |
|
sentences = ['我换手机号了', '如果我换手机怎么办?'] |
|
|
|
model = SentenceTransformer(model_name) |
|
embeddings = model.encode(sentences) |
|
print(embeddings) |
|
``` |