benjamintli
/

modernbert-code

+{
+    "word_embedding_dimension": 768,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,593 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:193623
+- loss:CachedMultipleNegativesRankingLoss
+base_model: answerdotai/ModernBERT-base
+widget:
+- source_sentence: "@Override\n    public void encode(final OtpOutputStream buf) {\n\
+    \        final int arity = elems.length;\n\n        buf.write_tuple_head(arity);\n\
+    \n        for (int i = 0; i < arity; i++) {\n            buf.write_any(elems[i]);\n\
+    \        }\n    }"
+  sentences:
+  - fetch function with the same interface than in cozy-client-js
+  - 'Convert this tuple to the equivalent Erlang external representation.
+    @param buf
+    an output stream to which the encoded tuple should be written.'
+  - 'Delete a customer by it''s id.
+    @param int $id The id
+    @return bool
+    @throws \Throwable in case something went wrong when deleting.'
+- source_sentence: "func (md *RootMetadata) KeyGenerationsToUpdate() (kbfsmd.KeyGen,\
+    \ kbfsmd.KeyGen) {\n\treturn md.bareMd.KeyGenerationsToUpdate()\n}"
+  sentences:
+  - 'Return a mapping of table to alias for the primary table and joins.
+    @return array'
+  - // KeyGenerationsToUpdate wraps the respective method of the underlying BareRootMetadata
+    for convenience.
+  - " Platform.valueOf(platformName);\n        DesiredCapabilities desiredCapabilities\
+    \ = new DesiredCapabilities(browser, version, platform);\n        desiredCapabilities.setVersion(version);\n\
+    \        return createAndSetRemoteDriver(url, desiredCapabilities);\n    }"
+- source_sentence: "func (f *fsClient) GetAccess() (access string, policyJSON string,\
+    \ err *probe.Error) {\n\t// For windows this feature is not implemented.\n\tif\
+    \ runtime.GOOS == \"windows\" {\n\t\treturn \"\", \"\", probe.NewError(APINotImplemented{API:\
+    \ \"GetAccess\", APIType: \"filesystem\"})\n\t}\n\tst, err := f.fsStat(false)\n\
+    \tif err != nil {\n"
+  sentences:
+  - "\t\treturn \"\", \"\", err.Trace(f.PathURL.String())\n\t}\n\tif !st.Mode().IsDir()\
+    \ {\n\t\treturn \"\", \"\", probe.NewError(APINotImplemented{API: \"GetAccess\"\
+    , APIType: \"filesystem\"})\n\t}\n\t// Mask with os.ModePerm to get only inode\
+    \ permissions\n\tswitch st.Mode() & os.ModePerm {\n\tcase os.FileMode(0777):\n\
+    \t\treturn \"readwrite\", \"\", nil\n\tcase os.FileMode(0555):\n\t\treturn \"\
+    readonly\", \"\", nil\n\tcase os.FileMode(0333):\n\t\treturn \"writeonly\", \"\
+    \", nil\n\t}\n\treturn \"none\", \"\", nil\n}"
+  - // DeleteOperator deletes the specified operator.
+  - " foreach ($files as $storedfile) {\n            $fs->import_external_file($storedfile);\n\
+    \        }\n    }"
+- source_sentence: "def close_database_session(session):\n    \"\"\"Close connection\
+    \ with the database\"\"\"\n\n    try:\n        session.close()\n    except OperationalError\
+    \ as e:\n        raise DatabaseError(error=e.orig.args[1], code=e.orig.args[0])"
+  sentences:
+  - "        if (is_array($this->data)) {\n                    $this->data[$attributeKey]\
+    \ = is_callable($attributeValue) ? $attributeValue($this->rawData) : $attributeValue;\n\
+    \                } else {\n                    $this->data->$attributeKey = is_callable($attributeValue)\
+    \ ? $attributeValue($this->rawData) : $attributeValue;\n                }\n  \
+    \          }\n            return $this;\n        }\n\n        if (is_array($this->data))\
+    \ {\n            $this->data[$name] = is_callable($value) ? $value($this->rawData)\
+    \ : $value;\n        } else {\n            $this->data->$name = is_callable($value)\
+    \ ? $value($this->rawData) : $value;\n        }\n\n        return $this;\n   \
+    \ }"
+  - 'Waits for the timeout duration until the url responds with correct status code
+    @param routeUrl    URL to check (usually a route one)
+    @param timeout     Max timeout value to await for route readiness.
+    If not set, default timeout value is set to 5.
+    @param timeoutUnit TimeUnit used for timeout duration.
+    If not set, Minutes is used as default TimeUnit.
+    @param repetitions How many times in a row the route must respond successfully
+    to be considered available.
+    @param statusCodes list of status code that might return that service is up and
+    running.
+    It is used as OR, so if one returns true, then the route is considered valid.
+    If not set, then only 200 status code is used.'
+  - Close connection with the database
+- source_sentence: "function onActiveEditorChanged(event, current, previous) {\n \
+    \       if (current && !current._codeMirror._lineFolds) {\n            enableFoldingInEditor(current);\n\
+    \   "
+  sentences:
+  - Get playback settings such as shuffle and repeat.
+  - 'Save config data.
+    @param string $path
+    @param string $value
+    @param string $scope
+    @param int $scopeId
+    @return null'
+  - "     }\n        if (previous) {\n            saveLineFolds(previous);\n     \
+    \   }\n    }"
+datasets:
+- benjamintli/code-retrieval-combined
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+model-index:
+- name: SentenceTransformer based on answerdotai/ModernBERT-base
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: eval
+      type: eval
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.9167054011341452
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.9643023147717765
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.9737845124105233
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.9822441201078368
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.9167054011341452
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.32143410492392543
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.19475690248210473
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.09822441201078369
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.9167054011341452
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.9643023147717765
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.9737845124105233
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.9822441201078368
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.9519116805931805
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.9419304852801657
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.9425514042279245
+      name: Cosine Map@100
+---
+# SentenceTransformer based on answerdotai/ModernBERT-base
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the [code-retrieval-combined](https://huggingface.co/datasets/benjamintli/code-retrieval-combined) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) <!-- at revision 8949b909ec900327062f0ebf497f51aef5e6f0c8 -->
+- **Maximum Sequence Length:** 1024 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+- **Training Dataset:**
+    - [code-retrieval-combined](https://huggingface.co/datasets/benjamintli/code-retrieval-combined)
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False, 'architecture': 'OptimizedModule'})
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("modernbert-code")
+# Run inference
+queries = [
+    "function onActiveEditorChanged(event, current, previous) {\n        if (current \u0026\u0026 !current._codeMirror._lineFolds) {\n            enableFoldingInEditor(current);\n   ",
+]
+documents = [
+    '     }\n        if (previous) {\n            saveLineFolds(previous);\n        }\n    }',
+    'Save config data.\n\n@param string $path\n@param string $value\n@param string $scope\n@param int $scopeId\n\n@return null',
+    'Get playback settings such as shuffle and repeat.',
+]
+query_embeddings = model.encode_query(queries)
+document_embeddings = model.encode_document(documents)
+print(query_embeddings.shape, document_embeddings.shape)
+# [1, 768] [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(query_embeddings, document_embeddings)
+print(similarities)
+# tensor([[0.6443, 0.0381, 0.0291]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Dataset: `eval`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.9167     |
+| cosine_accuracy@3   | 0.9643     |
+| cosine_accuracy@5   | 0.9738     |
+| cosine_accuracy@10  | 0.9822     |
+| cosine_precision@1  | 0.9167     |
+| cosine_precision@3  | 0.3214     |
+| cosine_precision@5  | 0.1948     |
+| cosine_precision@10 | 0.0982     |
+| cosine_recall@1     | 0.9167     |
+| cosine_recall@3     | 0.9643     |
+| cosine_recall@5     | 0.9738     |
+| cosine_recall@10    | 0.9822     |
+| **cosine_ndcg@10**  | **0.9519** |
+| cosine_mrr@10       | 0.9419     |
+| cosine_map@100      | 0.9426     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### code-retrieval-combined
+* Dataset: [code-retrieval-combined](https://huggingface.co/datasets/benjamintli/code-retrieval-combined) at [4403b52](https://huggingface.co/datasets/benjamintli/code-retrieval-combined/tree/4403b525f5962df8374b128e0863482e07cb1dc9)
+* Size: 193,623 training samples
+* Columns: <code>query</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | query                                                                                | positive                                                                           |
+  |:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                             |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 143.24 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 64.75 tokens</li><li>max: 937 tokens</li></ul> |
+* Samples:
+  | query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | positive                                                                                                                                              |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>protected function sendMusicMsgToJsonString(WxSendMusicMsg $msg)<br>    {<br>        $formatStr = '{<br>                        "touser":"%s",<br>                        "msgtype":"%s",<br>                        "music":<br>                        {<br>                          "title":"%s",<br>                          "description":"%s",<br>                          "musicurl":"%s",<br>                          "hqmusicurl":"%s",<br>                          "thumb_media_id":"%s"<br>                        }<br>                    }';<br>        $result = sprintf($formatStr,   $msg->getToUserName(),<br>                                        $msg->getMsgType(),<br>                                        $msg->getTitle(),<br>                                        $msg->getDescription(),<br>                                        $msg->getMusicUrl(),<br>                                        $msg->getHQMusicUrl(),<br>                                        $msg->getThumbMediaId()<br>                            );<br><br>        return $result;<br>    }</code> | <code>formatter WxSendMusicMsg to Json string<br>@param WxSendMusicMsg $msg<br>@return string</code>                                                  |
+  | <code>def getBlocks(self):<br>        """<br>        Get the blocks that need to be migrated<br>        """<br>        try:<br>            conn = self.dbi.connection()<br>            result =</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | <code> self.buflistblks.execute(conn)<br>            return result<br>        finally:<br>            if conn:<br>                conn.close()</code> |
+  | <code>function obj(/*key,value, key,value ...*/) {<br>    var result = {}<br>    for(var n=0; n<arguments.length; n+=2) {<br>        result[arguments[n]] = arguments[n+1]<br>    }<br>    return result<br>}</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | <code>builds an object immediate where keys can be expressions</code>                                                                                 |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "mini_batch_size": 128,
+      "gather_across_devices": false,
+      "directions": [
+          "query_to_doc"
+      ],
+      "partition_mode": "joint",
+      "hardness_mode": null,
+      "hardness_strength": 0.0
+  }
+  ```
+### Evaluation Dataset
+#### code-retrieval-combined
+* Dataset: [code-retrieval-combined](https://huggingface.co/datasets/benjamintli/code-retrieval-combined) at [4403b52](https://huggingface.co/datasets/benjamintli/code-retrieval-combined/tree/4403b525f5962df8374b128e0863482e07cb1dc9)
+* Size: 21,514 evaluation samples
+* Columns: <code>query</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | query                                                                                | positive                                                                            |
+  |:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                              |
+  | details | <ul><li>min: 7 tokens</li><li>mean: 140.91 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 71.36 tokens</li><li>max: 1024 tokens</li></ul> |
+* Samples:
+  | query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>def save<br>      self.attributes.stringify_keys!<br>      self.attributes.delete('customer')<br>      self.attributes.delete('product')<br>      self.attributes.delete('credit_card')<br>      self.attributes.delete('bank_account')<br>      self.attributes.delete('paypal_account')<br><br>  </code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | <code>    self.attributes, options = extract_uniqueness_token(attributes)<br>      self.prefix_options.merge!(options)<br>      super<br>    end</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+  | <code>def _update_summary(self, summary=None):<br>        """Update all parts of the summary or clear when no summary."""<br>        board_image_label = self._parts['board image label']<br>        # get content for update or use blanks when no summary<br>        if summary:<br>            # make a board image with the swap drawn on it<br>            # board, action, text = summary.board, summary.action, summary.text<br>            board_image_cv = self._create_board_image_cv(summary.board)<br>            self._draw_swap_cv(board_image_cv, summary.action)<br>            board_image_tk = self._convert_cv_to_tk(board_image_cv)<br>            text = ''<br>            if not summary.score is None:<br>                text += 'Score: {:3.1f}'.format(summary.score)<br>            if (not summary.mana_drain_leaves is None) and\<br>                    (not summary.total_leaves is None):<br>                text += '       Mana Drains: {}/{}' \<br>                        ''.format(summary.mana_drain_leaves,<br> </code> | <code>                                 summary.total_leaves)<br>        else:<br>            #clear any stored state image and use the blank<br>            board_image_tk = board_image_label._blank_image<br>            text = ''<br>        # update the UI parts with the content<br>        board_image_label._board_image = board_image_tk<br>        board_image_label.config(image=board_image_tk)<br>        # update the summary text<br>        summary_label = self._parts['summary label']<br>        summary_label.config(text=text)<br>        # refresh the UI<br>        self._base.update()</code> |
+  | <code>def chi_p(mass1, mass2, spin1x, spin1y, spin2x, spin2y):<br>    """Returns the effective precession spin from mass1, mass2, spin1x,<br>    spin1y, spin2x, and spin2y.<br>    """<br>    xi1 = secondary_xi(mass1, mass2, spin1x, spin1y, spin2x, spin2y)<br>    xi2 = primary_xi(mass1, mass2, spin1x, spin1y, spin2x, spin2y)<br>    return chi_p_from_xi1_xi2(xi1, xi2)</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | <code>Returns the effective precession spin from mass1, mass2, spin1x,<br>    spin1y, spin2x, and spin2y.</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "mini_batch_size": 128,
+      "gather_across_devices": false,
+      "directions": [
+          "query_to_doc"
+      ],
+      "partition_mode": "joint",
+      "hardness_mode": null,
+      "hardness_strength": 0.0
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 1024
+- `num_train_epochs`: 1
+- `learning_rate`: 8e-05
+- `warmup_steps`: 0.05
+- `bf16`: True
+- `eval_strategy`: steps
+- `per_device_eval_batch_size`: 1024
+- `push_to_hub`: True
+- `hub_model_id`: modernbert-code
+- `load_best_model_at_end`: True
+- `dataloader_num_workers`: 4
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `per_device_train_batch_size`: 1024
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `learning_rate`: 8e-05
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: None
+- `warmup_steps`: 0.05
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `optim_target_modules`: None
+- `gradient_accumulation_steps`: 1
+- `average_tokens_across_devices`: True
+- `max_grad_norm`: 1.0
+- `label_smoothing_factor`: 0.0
+- `bf16`: True
+- `fp16`: False
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `use_cache`: False
+- `neftune_noise_alpha`: None
+- `torch_empty_cache_steps`: None
+- `auto_find_batch_size`: False
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `include_num_input_tokens_seen`: no
+- `log_level`: passive
+- `log_level_replica`: warning
+- `disable_tqdm`: False
+- `project`: huggingface
+- `trackio_space_id`: trackio
+- `eval_strategy`: steps
+- `per_device_eval_batch_size`: 1024
+- `prediction_loss_only`: True
+- `eval_on_start`: False
+- `eval_do_concat_batches`: True
+- `eval_use_gather_object`: False
+- `eval_accumulation_steps`: None
+- `include_for_metrics`: []
+- `batch_eval_metrics`: False
+- `save_only_model`: False
+- `save_on_each_node`: False
+- `enable_jit_checkpoint`: False
+- `push_to_hub`: True
+- `hub_private_repo`: None
+- `hub_model_id`: modernbert-code
+- `hub_strategy`: every_save
+- `hub_always_push`: False
+- `hub_revision`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `restore_callback_states_from_checkpoint`: False
+- `full_determinism`: False
+- `seed`: 42
+- `data_seed`: None
+- `use_cpu`: False
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 4
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `dataloader_prefetch_factor`: None
+- `remove_unused_columns`: True
+- `label_names`: None
+- `train_sampling_strategy`: random
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `ddp_backend`: None
+- `ddp_timeout`: 1800
+- `fsdp`: []
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `deepspeed`: None
+- `debug`: []
+- `skip_memory_metrics`: True
+- `do_predict`: False
+- `resume_from_checkpoint`: None
+- `warmup_ratio`: None
+- `local_rank`: -1
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch   | Step    | Training Loss | Validation Loss | eval_cosine_ndcg@10 |
+|:-------:|:-------:|:-------------:|:---------------:|:-------------------:|
+| 0.0526  | 10      | 5.2457        | 2.4469          | 0.4195              |
+| 0.1053  | 20      | 1.3973        | 0.6956          | 0.7742              |
+| 0.1579  | 30      | 0.5500        | 0.4000          | 0.8560              |
+| 0.2105  | 40      | 0.3429        | 0.2878          | 0.8891              |
+| 0.2632  | 50      | 0.2487        | 0.2250          | 0.9104              |
+| 0.3158  | 60      | 0.2080        | 0.1872          | 0.9256              |
+| 0.3684  | 70      | 0.1768        | 0.1656          | 0.9312              |
+| 0.4211  | 80      | 0.1525        | 0.1501          | 0.9352              |
+| 0.4737  | 90      | 0.1402        | 0.1374          | 0.9397              |
+| 0.5263  | 100     | 0.1343        | 0.1317          | 0.9413              |
+| 0.5789  | 110     | 0.1217        | 0.1242          | 0.9444              |
+| 0.6316  | 120     | 0.1180        | 0.1199          | 0.9454              |
+| 0.6842  | 130     | 0.1164        | 0.1149          | 0.9476              |
+| 0.7368  | 140     | 0.1146        | 0.1106          | 0.9494              |
+| 0.7895  | 150     | 0.1091        | 0.1080          | 0.9494              |
+| 0.8421  | 160     | 0.1085        | 0.1055          | 0.9506              |
+| 0.8947  | 170     | 0.1062        | 0.1041          | 0.9511              |
+| 0.9474  | 180     | 0.1130        | 0.1030          | 0.9517              |
+| **1.0** | **190** | **0.0924**    | **0.1024**      | **0.9519**          |
+* The bold row denotes the saved checkpoint.
+### Framework Versions
+- Python: 3.12.12
+- Sentence Transformers: 5.3.0
+- Transformers: 5.3.0
+- PyTorch: 2.10.0+cu128
+- Accelerate: 1.13.0
+- Datasets: 4.8.3
+- Tokenizers: 0.22.2
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### CachedMultipleNegativesRankingLoss
+```bibtex
+@misc{gao2021scaling,
+    title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
+    author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
+    year={2021},
+    eprint={2101.06983},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.3.0",
+    "transformers": "5.3.0",
+    "pytorch": "2.10.0+cu128"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 1024,
+    "do_lower_case": false
+}