Matryoshka Representation Learning
Paper • 2205.13147 • Published • 27
How to use ArnavKewalram/bge-small-code-v1 with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("ArnavKewalram/bge-small-code-v1")
sentences = [
"Sets the global variables $rects and $origRectSpecs",
"def modify_ranking(tournament):\n database = TinyDB('db.json')\n # recuperation de tous les joueurs du tournoi\n players_table = database.table('players')\n list_players = get_player_list(tournament)\n # Modification du rang joueur par joueur\n for player in list_players:\n new_ranking = view.modify_ranking_view(player)\n players_table.update({'Classement': new_ranking}, doc_ids=[player.doc_id])",
"function setConstants() {\n const wrapItems = \".image-analysis-wrapper .face-wrap, .image-analysis-wrapper .score-wrap, .image-analysis-wrapper .attribute-wrap, .image-analysis-wrapper .region-block, .image-analysis-wrapper .region-block .word-block .word-wrap\";\n\n $rects = jQuery(\".image-analysis-wrapper .rectangle\");\n\n // Iterate over each rectangle and save the width, height, top position,\n // left position, closest stats block element, and position of the closest\n // stats block element to an object. Each object is then added to the \n // $origRectSpecs array for global use.\n $origRectSpecs = $rects.map(function () {\n closestWrapItems = jQuery(this).siblings(wrapItems);\n\n const stats = closestWrapItems.map(function () {\n return {\n origStatTop: jQuery(this).position().top || parseInt(jQuery(this).css(\"top\")),\n origStatLeft: jQuery(this).position().left || parseInt(jQuery(this).css(\"left\"))\n }\n })\n\n return {\n origRectWidth: jQuery(this).width(),\n origRectHeight: jQuery(this).height(),\n origRectTop: jQuery(this).position().top || parseInt(jQuery(this).css(\"top\")), // if the rect is on a tab that is currently not displayed it has a position of 0, so this check gets the css instead so we don't lose the value\n origRectLeft: jQuery(this).position().left || parseInt(jQuery(this).css(\"left\")),\n statBlock: closestWrapItems[0],\n statPosition: stats[0]\n }\n })\n }",
"reset() {\n\n // Set the initial crop to match any given fixed aspect ratio (or\n // default to a square crop 1:1).\n let aspectRatio = this._initialAspectRatio\n\n // Calculate the initial crop size such that it fits within the bounds\n let width = getWidth(this.bounds)\n let height = getWidth(this.bounds) / aspectRatio\n\n if (aspectRatio < width / getHeight(this.bounds)) {\n width = getHeight(this.bounds) * aspectRatio\n height = getHeight(this.bounds)\n }\n\n // Calculate the initial crop position to be central to the bounds\n const x = (getWidth(this.bounds) - width) / 2\n const y = (getHeight(this.bounds) - height) / 2\n\n // Set the region\n this.region = [\n [x, y],\n [x + width, y + height]\n ]\n }"
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]This is a sentence-transformers model finetuned from BAAI/bge-small-en-v1.5. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for retrieval.
SentenceTransformer(
(0): Transformer({'transformer_task': 'feature-extraction', 'modality_config': {'text': {'method': 'forward', 'method_output_name': 'last_hidden_state'}}, 'module_output_name': 'token_embeddings', 'architecture': 'BertModel'})
(1): Pooling({'embedding_dimension': 384, 'pooling_mode': 'cls', 'include_prompt': True})
(2): Normalize({})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("sentence_transformers_model_id")
# Run inference
queries = [
'For component xyz, returns "xyz[]"',
]
documents = [
'private static final String getArrayTypeName(String typeName) {\n final String arrayTypeName = builtInArrayComponentName2ArrayTypeName.get(typeName);\n return (null == arrayTypeName) ? typeName + ARRAY_TYPE_SUFFIX : arrayTypeName;\n }',
'public String toString() {\n\t \tif(size == 0) {\n\t \t\treturn "[]";\n\t \t}else {\n\t \t\t\n\t \t\tString result = "[" + elementData[0];\n\t \t\tfor(int i = 1; i < size; i++) {\n\t \t\t\tresult += ", " + elementData[i];\n\t \t\t}\n\t \t\t\n\t \t\tresult += "]";\n\t \t\t\n\t \t\treturn result;\n\t \t}\n\t }',
]
query_embeddings = model.encode_query(queries)
document_embeddings = model.encode_document(documents)
print(query_embeddings.shape, document_embeddings.shape)
# [1, 384] [2, 384]
# Get the similarity scores for the embeddings
similarities = model.similarity(query_embeddings, document_embeddings)
print(similarities)
# tensor([[0.4896, 0.4966]])
cornstack_evalInformationRetrievalEvaluator| Metric | Value |
|---|---|
| cosine_accuracy@1 | 0.726 |
| cosine_accuracy@5 | 0.886 |
| cosine_accuracy@10 | 0.918 |
| cosine_precision@1 | 0.726 |
| cosine_precision@3 | 0.282 |
| cosine_precision@5 | 0.1772 |
| cosine_precision@10 | 0.0918 |
| cosine_recall@1 | 0.726 |
| cosine_recall@3 | 0.846 |
| cosine_recall@5 | 0.886 |
| cosine_recall@10 | 0.918 |
| cosine_ndcg@10 | 0.8243 |
| cosine_mrr@1 | 0.726 |
| cosine_mrr@5 | 0.7893 |
| cosine_mrr@10 | 0.7939 |
| cosine_map@100 | 0.7966 |
anchor, positive, and negative| anchor | positive | negative | |
|---|---|---|---|
| type | string | string | string |
| modality | text | text | text |
| details |
|
|
|
| anchor | positive | negative |
|---|---|---|
Fades all outputs to the given color and waits for it to complete. |
def FadeOutputs(box, color, steps=50): |
def _colour_loop(self, colours, seconds=None, milliseconds=None, fade=True): |
Sets the additional element count if buffer resize is required, defaults to initialElementCount of factory method. |
public void setResizeElementCount(int v) { vboSet.setResizeElementCount(v); } |
public int getResizeElementCount() { return vboSet.getResizeElementCount(); } |
delete a specific incident |
def delete_specific_incident(self, incident_id): |
def delete(openstack_resource): |
MatryoshkaLoss with these parameters:{
"loss": "MultipleNegativesRankingLoss",
"matryoshka_dims": [
384,
256,
128,
64
],
"matryoshka_weights": [
1.0,
1.0,
1.0,
1.0
],
"n_dims_per_step": -1
}
per_device_train_batch_size: 32gradient_accumulation_steps: 2learning_rate: 2e-05num_train_epochs: 1warmup_ratio: 0.05fp16: Truebatch_sampler: no_duplicatesoverwrite_output_dir: Falsedo_predict: Falseprediction_loss_only: Trueper_device_train_batch_size: 32per_device_eval_batch_size: 8per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 2eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 2e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 1max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: Nonewarmup_ratio: 0.05warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falsebf16: Falsefp16: Truefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}parallelism_config: Nonedeepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torch_fusedoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthproject: huggingfacetrackio_space_id: trackioddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsehub_revision: Nonegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: noneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseliger_kernel_config: Noneeval_use_gather_object: Falseaverage_tokens_across_devices: Trueprompts: Nonebatch_sampler: no_duplicatesmulti_dataset_batch_sampler: proportionalrouter_mapping: {}learning_rate_mapping: {}| Epoch | Step | Training Loss | cornstack_eval_cosine_ndcg@10 |
|---|---|---|---|
| 0.016 | 50 | 6.8957 | - |
| 0.032 | 100 | 5.8104 | - |
| 0.048 | 150 | 5.3881 | - |
| 0.064 | 200 | 5.1643 | - |
| 0.08 | 250 | 5.1469 | - |
| 0.096 | 300 | 4.8455 | - |
| 0.112 | 350 | 4.9429 | - |
| 0.128 | 400 | 5.0664 | - |
| 0.144 | 450 | 4.6627 | - |
| 0.16 | 500 | 4.686 | - |
| 0.176 | 550 | 4.643 | - |
| 0.192 | 600 | 4.4053 | - |
| 0.208 | 650 | 4.5371 | - |
| 0.224 | 700 | 4.5435 | - |
| 0.24 | 750 | 4.432 | - |
| 0.256 | 800 | 4.4243 | - |
| 0.272 | 850 | 4.2231 | - |
| 0.288 | 900 | 4.2622 | - |
| 0.304 | 950 | 4.3597 | - |
| 0.32 | 1000 | 4.2547 | 0.8176 |
| 0.336 | 1050 | 4.2443 | - |
| 0.352 | 1100 | 4.4695 | - |
| 0.368 | 1150 | 4.3728 | - |
| 0.384 | 1200 | 4.3351 | - |
| 0.4 | 1250 | 3.9853 | - |
| 0.416 | 1300 | 4.2823 | - |
| 0.432 | 1350 | 4.1293 | - |
| 0.448 | 1400 | 4.1029 | - |
| 0.464 | 1450 | 4.1758 | - |
| 0.48 | 1500 | 4.1655 | - |
| 0.496 | 1550 | 4.0803 | - |
| 0.512 | 1600 | 4.1985 | - |
| 0.528 | 1650 | 4.0523 | - |
| 0.544 | 1700 | 4.1011 | - |
| 0.56 | 1750 | 4.2448 | - |
| 0.576 | 1800 | 4.0936 | - |
| 0.592 | 1850 | 3.9888 | - |
| 0.608 | 1900 | 4.1434 | - |
| 0.624 | 1950 | 3.9789 | - |
| 0.64 | 2000 | 3.9967 | 0.8271 |
| 0.656 | 2050 | 4.0894 | - |
| 0.672 | 2100 | 3.8938 | - |
| 0.688 | 2150 | 4.0384 | - |
| 0.704 | 2200 | 4.1308 | - |
| 0.72 | 2250 | 3.864 | - |
| 0.736 | 2300 | 4.0325 | - |
| 0.752 | 2350 | 3.8263 | - |
| 0.768 | 2400 | 3.9559 | - |
| 0.784 | 2450 | 3.7323 | - |
| 0.8 | 2500 | 3.7366 | - |
| 0.816 | 2550 | 3.9768 | - |
| 0.832 | 2600 | 3.9144 | - |
| 0.848 | 2650 | 3.9013 | - |
| 0.864 | 2700 | 3.9211 | - |
| 0.88 | 2750 | 3.9616 | - |
| 0.896 | 2800 | 3.9926 | - |
| 0.912 | 2850 | 3.9388 | - |
| 0.928 | 2900 | 3.8664 | - |
| 0.944 | 2950 | 3.8747 | - |
| 0.96 | 3000 | 4.0419 | 0.8243 |
| 0.976 | 3050 | 3.9493 | - |
| 0.992 | 3100 | 3.8626 | - |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{kusupati2024matryoshka,
title={Matryoshka Representation Learning},
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
year={2024},
eprint={2205.13147},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{oord2019representationlearningcontrastivepredictive,
title={Representation Learning with Contrastive Predictive Coding},
author={Aaron van den Oord and Yazhe Li and Oriol Vinyals},
year={2019},
eprint={1807.03748},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/1807.03748},
}
Base model
BAAI/bge-small-en-v1.5